tesseract 4.1.1
Loading...
Searching...
No Matches
colpartitionset.h
Go to the documentation of this file.
1
2// File: colpartitionset.h
3// Description: Class to hold a list of ColPartitions of the page that
4// correspond roughly to columns.
5// Author: Ray Smith
6// Created: Thu Aug 14 10:50:01 PDT 2008
7//
8// (C) Copyright 2008, Google Inc.
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
20
21#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_
22#define TESSERACT_TEXTORD_COLPARTITIONSET_H_
23
24#include "colpartition.h" // For ColPartition_LIST.
25#include "genericvector.h" // For GenericVector.
26#include "rect.h" // For TBOX.
27#include "tabvector.h" // For BLOBNBOX_CLIST.
28
29namespace tesseract {
30
31class WorkingPartSet_LIST;
32class ColSegment_LIST;
33class ColPartitionSet;
35
36// ColPartitionSet is a class that holds a list of ColPartitions.
37// Its main use is in holding a candidate partitioning of the width of the
38// image into columns, where each member ColPartition is a single column.
39// ColPartitionSets are used in building the column layout of a page.
41 public:
42 ColPartitionSet() = default;
43 explicit ColPartitionSet(ColPartition_LIST* partitions);
44 explicit ColPartitionSet(ColPartition* partition);
45
46 ~ColPartitionSet() = default;
47
48 // Simple accessors.
49 const TBOX& bounding_box() const {
50 return bounding_box_;
51 }
52 bool Empty() const {
53 return parts_.empty();
54 }
55 int ColumnCount() const {
56 return parts_.length();
57 }
58
59 // Returns the number of columns of good width.
60 int GoodColumnCount() const;
61
62 // Return an element of the parts_ list from its index.
64
65 // Return the ColPartition that contains the given coords, if any, else nullptr.
66 ColPartition* ColumnContaining(int x, int y);
67
68 // Return the bounding boxes of columns at the given y-range
69 void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
70
71 // Extract all the parts from the list, relinquishing ownership.
72 void RelinquishParts();
73
74 // Attempt to improve this by adding partitions or expanding partitions.
76
77 // If this set is good enough to represent a new partitioning into columns,
78 // add it to the vector of sets, otherwise delete it.
80
81 // Return true if the partitions in other are all compatible with the columns
82 // in this.
83 bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback* cb);
84
85 // Returns the total width of all blobs in the part_set that do not lie
86 // within an approved column. Used as a cost measure for using this
87 // column set over another that might be compatible.
88 int UnmatchedWidth(ColPartitionSet* part_set);
89
90 // Return true if this ColPartitionSet makes a legal column candidate by
91 // having legal individual partitions and non-overlapping adjacent pairs.
93
94 // Return a copy of this. If good_only will only copy the Good ColPartitions.
95 ColPartitionSet* Copy(bool good_only);
96
97 // Display the edges of the columns at the given y coords.
98 void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win);
99
100 // Return the ColumnSpanningType that best explains the columns overlapped
101 // by the given coords(left,right,y), with the given margins.
102 // Also return the first and last column index touched by the coords and
103 // the leftmost spanned column.
104 // Column indices are 2n + 1 for real columns (0 based) and even values
105 // represent the gaps in between columns, with 0 being left of the leftmost.
106 // resolution refers to the ppi resolution of the image. It may be 0 if only
107 // the first_col and last_col are required.
108 ColumnSpanningType SpanningType(int resolution,
109 int left, int right, int height, int y,
110 int left_margin, int right_margin,
111 int* first_col, int* last_col,
112 int* first_spanned_col);
113
114 // The column_set has changed. Close down all in-progress WorkingPartSets in
115 // columns that do not match and start new ones for the new columns in this.
116 // As ColPartitions are turned into BLOCKs, the used ones are put in
117 // used_parts, as they still need to be referenced in the grid.
118 void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright,
119 int resolution, ColPartition_LIST* used_parts,
120 WorkingPartSet_LIST* working_set);
121
122 // Accumulate the widths and gaps into the given variables.
123 void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples,
124 int* total_gap, int* gap_samples);
125
126 // Provide debug output for this ColPartitionSet and all the ColPartitions.
127 void Print();
128
129 private:
130 // Add the given partition to the list in the appropriate place.
131 void AddPartition(ColPartition* new_part, ColPartition_IT* it);
132
133 // Compute the coverage and good column count. Coverage is the amount of the
134 // width of the page (in pixels) that is covered by ColPartitions, which are
135 // used to provide candidate column layouts.
136 // Coverage is split into good and bad. Good coverage is provided by
137 // ColPartitions of a frequent width (according to the callback function
138 // provided by TabFinder::WidthCB, which accesses stored statistics on the
139 // widths of ColPartitions) and bad coverage is provided by all other
140 // ColPartitions, even if they have tab vectors at both sides. Thus:
141 // |-----------------------------------------------------------------|
142 // | Double width heading |
143 // |-----------------------------------------------------------------|
144 // |-------------------------------| |-------------------------------|
145 // | Common width ColParition | | Common width ColPartition |
146 // |-------------------------------| |-------------------------------|
147 // the layout with two common-width columns has better coverage than the
148 // double width heading, because the coverage is "good," even though less in
149 // total coverage than the heading, because the heading coverage is "bad."
150 void ComputeCoverage();
151
152 // Adds the coverage, column count and box for a single partition,
153 // without adding it to the list. (Helper factored from ComputeCoverage.)
154 void AddPartitionCoverageAndBox(const ColPartition& part);
155
156 // The partitions in this column candidate.
157 ColPartition_LIST parts_;
158 // The number of partitions that have a frequent column width.
159 int good_column_count_;
160 // Total width of all the good ColPartitions.
161 int good_coverage_;
162 // Total width of all the bad ColPartitions.
163 int bad_coverage_;
164 // Bounding box of all partitions in the set.
165 TBOX bounding_box_;
166};
167
168ELISTIZEH(ColPartitionSet)
169
170} // namespace tesseract.
171
172#endif // TESSERACT_TEXTORD_COLPARTITION_H_
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:918
integer coordinate
Definition: points.h:32
Definition: rect.h:34
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback *cb)
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback *cb)
ColPartition * GetColumnByIndex(int index)
void ImproveColumnCandidate(WidthCallback *cb, PartSetVector *src_sets)
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
ColPartitionSet * Copy(bool good_only)
ColPartition * ColumnContaining(int x, int y)
const TBOX & bounding_box() const
int UnmatchedWidth(ColPartitionSet *part_set)
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)