tesseract 4.1.1
Loading...
Searching...
No Matches
colfind.h
Go to the documentation of this file.
1
2// File: colfind.h
3// Description: Class to find columns in the grid of BLOBNBOXes.
4// Author: Ray Smith
5//
6// (C) Copyright 2008, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_TEXTORD_COLFIND_H_
20#define TESSERACT_TEXTORD_COLFIND_H_
21
22#include "colpartitiongrid.h"
23#include "colpartitionset.h"
24#include "debugpixa.h"
25#include "imagefind.h"
26#include "ocrblock.h"
27#include "tabfind.h"
28#include "textlineprojection.h"
29
30class BLOCK_LIST;
31struct Boxa;
32struct Pixa;
33class DENORM;
34class ScrollView;
35class STATS;
36class TO_BLOCK;
37
38namespace tesseract {
39
40class ColPartitionSet;
41class ColPartitionSet_LIST;
42class ColSegment_LIST;
43class ColumnGroup_LIST;
44class LineSpacing;
45class StrokeWidth;
46class TempColumn_LIST;
47class EquationDetectBase;
48
49// The ColumnFinder class finds columns in the grid.
50class ColumnFinder : public TabFind {
51 public:
52 // Gridsize is an estimate of the text size in the image. A suitable value
53 // is in TO_BLOCK::line_size after find_components has been used to make
54 // the blobs.
55 // bleft and tright are the bounds of the image (rectangle) being processed.
56 // vlines is a (possibly empty) list of TabVector and vertical_x and y are
57 // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
58 // If cjk_script is true, then broken CJK characters are fixed during
59 // layout analysis to assist in detecting horizontal vs vertically written
60 // textlines.
61 ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
62 int resolution, bool cjk_script, double aligned_gap_fraction,
63 TabVector_LIST* vlines, TabVector_LIST* hlines,
64 int vertical_x, int vertical_y);
65 ~ColumnFinder() override;
66
67 // Accessors for testing
68 const DENORM* denorm() const {
69 return denorm_;
70 }
72 return &projection_;
73 }
74 void set_cjk_script(bool is_cjk) {
75 cjk_script_ = is_cjk;
76 }
77
78 // ======================================================================
79 // The main function of ColumnFinder is broken into pieces to facilitate
80 // optional insertion of orientation and script detection in an efficient
81 // way. The calling sequence IS MANDATORY however, whether or not
82 // OSD is being used:
83 // 1. Construction.
84 // 2. SetupAndFilterNoise.
85 // 3. IsVerticallyAlignedText.
86 // 4. CorrectOrientation.
87 // 5. FindBlocks.
88 // 6. Destruction. Use of a single column finder for multiple images does not
89 // make sense.
90 // Throughout these steps, the ColPartitions are owned by part_grid_, which
91 // means that that it must be kept correct. Exception: big_parts_ owns its
92 // own ColPartitions.
93 // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
94 // for a phase in FindBlocks before TransformToBlocks, when they become
95 // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
96 // indicates more of a betrothal for the majority of layout analysis, ie
97 // which ColPartition will take ownership when the blobs are release from
98 // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
99 // are part of the image regions, as they are not on any TO_BLOCK list.
100 // TODO(rays) break up column finder further into smaller classes, as
101 // there is a lot more to it than column finding now.
102 // ======================================================================
103
104 // Performs initial processing on the blobs in the input_block:
105 // Setup the part_grid, stroke_width_, nontext_map_.
106 // Obvious noise blobs are filtered out and used to mark the nontext_map_.
107 // Initial stroke-width analysis is used to get local text alignment
108 // direction, so the textline projection_ map can be setup.
109 // On return, IsVerticallyAlignedText may be called (now optionally) to
110 // determine the gross textline alignment of the page.
111 void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix,
112 TO_BLOCK* input_block);
113
114 // Tests for vertical alignment of text (returning true if so), and generates
115 // a list of blobs (in osd_blobs) for orientation and script detection.
116 // block is the single block for the whole page or rectangle to be OCRed.
117 // Note that the vertical alignment may be due to text whose writing direction
118 // is vertical, like say Japanese, or due to text whose writing direction is
119 // horizontal but whose text appears vertically aligned because the image is
120 // not the right way up.
121 // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
122 bool IsVerticallyAlignedText(double find_vertical_text_ratio,
123 TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
124
125 // Rotates the blobs and the TabVectors so that the gross writing direction
126 // (text lines) are horizontal and lines are read down the page.
127 // Applied rotation stored in rotation_.
128 // A second rotation is calculated for application during recognition to
129 // make the rotated blobs upright for recognition.
130 // Subsequent rotation stored in text_rotation_.
131 //
132 // Arguments:
133 // vertical_text_lines is true if the text lines are vertical.
134 // recognition_rotation [0..3] is the number of anti-clockwise 90 degree
135 // rotations from osd required for the text to be upright and readable.
136 void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
137 int recognition_rotation);
138
139 // Finds blocks of text, image, rule line, table etc, returning them in the
140 // blocks and to_blocks
141 // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
142 // Image blocks are generated by a combination of photo_mask_pix (which may
143 // NOT be nullptr) and the rejected text found during preliminary textline
144 // finding.
145 // The input_block is the result of a call to find_components, and contains
146 // the blobs found in the image or rectangle to be OCRed. These blobs will be
147 // removed and placed in the output blocks, while unused ones will be deleted.
148 // If single_column is true, the input is treated as single column, but
149 // it is still divided into blocks of equal line spacing/text size.
150 // scaled_color is scaled down by scaled_factor from the input color image,
151 // and may be nullptr if the input was not color.
152 // grey_pix is optional, but if present must match the photo_mask_pix in size,
153 // and must be a *real* grey image instead of binary_pix * 255.
154 // thresholds_pix is expected to be present iff grey_pix is present and
155 // can be an integer factor reduction of the grey_pix. It represents the
156 // thresholds that were used to create the binary_pix from the grey_pix.
157 // Small blobs that confuse the segmentation into lines are placed into
158 // diacritic_blobs, with the intention that they be put into the most
159 // appropriate word after the rest of layout analysis.
160 // Returns -1 if the user hits the 'd' key in the blocks window while running
161 // in debug mode, which requests a retry with more debug info.
162 int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor,
163 TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix,
164 Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks,
165 BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks);
166
167 // Get the rotation required to deskew, and its inverse rotation.
168 void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
169
170 // Set the equation detection pointer.
172
173 private:
174 // Displays the blob and block bounding boxes in a window called Blocks.
175 void DisplayBlocks(BLOCK_LIST* blocks);
176 // Displays the column edges at each grid y coordinate defined by
177 // best_columns_.
178 void DisplayColumnBounds(PartSetVector* sets);
179
181
182 // Sets up column_sets_ (the determined column layout at each horizontal
183 // slice). Returns false if the page is empty.
184 bool MakeColumns(bool single_column);
185 // Attempt to improve the column_candidates by expanding the columns
186 // and adding new partitions from the partition sets in src_sets.
187 // Src_sets may be equal to column_candidates, in which case it will
188 // use them as a source to improve themselves.
189 void ImproveColumnCandidates(PartSetVector* src_sets,
190 PartSetVector* column_sets);
191 // Prints debug information on the column candidates.
192 void PrintColumnCandidates(const char* title);
193 // Finds the optimal set of columns that cover the entire image with as
194 // few changes in column partition as possible.
195 // Returns true if any part of the page is multi-column.
196 bool AssignColumns(const PartSetVector& part_sets);
197 // Finds the biggest range in part_sets_ that has no assigned column, but
198 // column assignment is possible.
199 bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
200 int* start, int* end);
201 // Finds the modal compatible column_set_ index within the given range.
202 int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
203 int start, int end);
204 // Given that there are many column_set_id compatible columns in the range,
205 // shrinks the range to the longest contiguous run of compatibility, allowing
206 // gaps where no columns are possible, but not where competing columns are
207 // possible.
208 void ShrinkRangeToLongestRun(int** column_set_costs,
209 const int* assigned_costs,
210 const bool* any_columns_possible,
211 int column_set_id,
212 int* best_start, int* best_end);
213 // Moves start in the direction of step, up to, but not including end while
214 // the only incompatible regions are no more than kMaxIncompatibleColumnCount
215 // in size, and the compatible regions beyond are bigger.
216 void ExtendRangePastSmallGaps(int** column_set_costs,
217 const int* assigned_costs,
218 const bool* any_columns_possible,
219 int column_set_id,
220 int step, int end, int* start);
221 // Assigns the given column_set_id to the part_sets_ in the given range.
222 void AssignColumnToRange(int column_set_id, int start, int end,
223 int** column_set_costs, int* assigned_costs);
224
225 // Computes the mean_column_gap_.
226 void ComputeMeanColumnGap(bool any_multi_column);
227
230
231 // Hoovers up all un-owned blobs and deletes them.
232 // The rest get released from the block so the ColPartitions can pass
233 // ownership to the output blocks.
234 void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
235 // Splits partitions that cross columns where they have nothing in the gap.
236 void GridSplitPartitions();
237 // Merges partitions where there is vertical overlap, within a single column,
238 // and the horizontal gap is small enough.
239 void GridMergePartitions();
240 // Inserts remaining noise blobs into the most applicable partition if any.
241 // If there is no applicable partition, then the blobs are deleted.
242 void InsertRemainingNoise(TO_BLOCK* block);
243 // Remove partitions that come from horizontal lines that look like
244 // underlines, but are not part of a table.
245 void GridRemoveUnderlinePartitions();
246 // Add horizontal line separators as partitions.
247 void GridInsertHLinePartitions();
248 // Add vertical line separators as partitions.
249 void GridInsertVLinePartitions();
250 // For every ColPartition in the grid, sets its type based on position
251 // in the columns.
252 void SetPartitionTypes();
253 // Only images remain with multiple types in a run of partners.
254 // Sets the type of all in the group to the maximum of the group.
255 void SmoothPartnerRuns();
256
258
259 // Helper functions for TransformToBlocks.
260 // Add the part to the temp list in the correct order.
261 void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
262 // Add everything from the temp list to the work_set assuming correct order.
263 void EmptyTempPartList(ColPartition_CLIST* temp_list,
264 WorkingPartSet_LIST* work_set);
265
266 // Transform the grid of partitions to the output blocks.
267 void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
268
269 // Reflect the blob boxes (but not the outlines) in the y-axis so that
270 // the blocks get created in the correct RTL order. Rotates the blobs
271 // in the input_block and the bblobs list.
272 // The reflection is undone in RotateAndReskewBlocks by
273 // reflecting the blocks themselves, and then recomputing the blob bounding
274 // boxes.
275 void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
276
277 // Undo the deskew that was done in FindTabVectors, as recognition is done
278 // without correcting blobs or blob outlines for skew.
279 // Reskew the completed blocks to put them back to the original rotated coords
280 // that were created by CorrectOrientation.
281 // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
282 // reflection that was done before FindTabVectors.
283 // Blocks that were identified as vertical text (relative to the rotated
284 // coordinates) are further rotated so the text lines are horizontal.
285 // blob polygonal outlines are rotated to match the position of the blocks
286 // that they are in, and their bounding boxes are recalculated to be accurate.
287 // Record appropriate inverse transformations and required
288 // classifier transformation in the blocks.
289 void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
290
291 // Computes the rotations for the block (to make textlines horizontal) and
292 // for the blobs (for classification) and sets the appropriate members
293 // of the given block.
294 // Returns the rotation that needs to be applied to the blobs to make
295 // them sit in the rotated block.
296 FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
297
298 // If true then the page language is cjk, so it is safe to perform
299 // FixBrokenCJK.
300 bool cjk_script_;
301 // The minimum gutter width to apply for finding columns.
302 // Modified when vertical text is detected to prevent detection of
303 // vertical text lines as columns.
304 int min_gutter_width_;
305 // The mean gap between columns over the page.
306 int mean_column_gap_;
307 // Config param saved at construction time. Modifies min_gutter_width_ with
308 // vertical text to prevent detection of vertical text as columns.
309 double tabfind_aligned_gap_fraction_;
310 // The rotation vector needed to convert original coords to deskewed.
311 FCOORD deskew_;
312 // The rotation vector needed to convert deskewed back to original coords.
313 FCOORD reskew_;
314 // The rotation vector used to rotate vertically oriented pages.
315 FCOORD rotation_;
316 // The rotation vector needed to convert the rotated back to original coords.
317 FCOORD rerotate_;
318 // The additional rotation vector needed to rotate text for recognition.
319 FCOORD text_rotation_;
320 // The column_sets_ contain the ordered candidate ColPartitionSets that
321 // define the possible divisions of the page into columns.
322 PartSetVector column_sets_;
323 // A simple array of pointers to the best assigned column division at
324 // each grid y coordinate.
325 ColPartitionSet** best_columns_;
326 // The grid used for creating initial partitions with strokewidth.
327 StrokeWidth* stroke_width_;
328 // The grid used to hold ColPartitions after the columns have been determined.
329 ColPartitionGrid part_grid_;
330 // List of ColPartitions that are no longer needed after they have been
331 // turned into regions, but are kept around because they are referenced
332 // by the part_grid_.
333 ColPartition_LIST good_parts_;
334 // List of ColPartitions that are big and might be dropcap or vertically
335 // joined.
336 ColPartition_LIST big_parts_;
337 // List of ColPartitions that have been declared noise.
338 ColPartition_LIST noise_parts_;
339 // The fake blobs that are made from the images.
340 BLOBNBOX_LIST image_bblobs_;
341 // Horizontal line separators.
342 TabVector_LIST horizontal_lines_;
343 // Image map of photo/noise areas on the page.
344 Pix* nontext_map_;
345 // Textline projection map.
346 TextlineProjection projection_;
347 // Sequence of DENORMS that indicate how to get back to the original image
348 // coordinate space. The destructor must delete all the DENORMs in the chain.
349 DENORM* denorm_;
350
351 // Various debug windows that automatically go away on completion.
352 ScrollView* input_blobs_win_;
353
354 // The equation region detector pointer. Note: This pointer is passed in by
355 // member function SetEquationDetect, and releasing it is NOT owned by this
356 // class.
357 EquationDetectBase* equation_detect_;
358
359 // Allow a subsequent instance to reuse the blocks window.
360 // Not thread-safe, but multiple threads shouldn't be using windows anyway.
361 static ScrollView* blocks_win_;
362};
363
364} // namespace tesseract.
365
366#endif // TESSERACT_TEXTORD_COLFIND_H_
Definition: ocrblock.h:31
integer coordinate
Definition: points.h:32
Definition: points.h:189
Definition: statistc.h:31
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
int FindBlocks(PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
Definition: colfind.cpp:285
const TextlineProjection * projection() const
Definition: colfind.h:71
void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew)
Definition: colfind.cpp:496
void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
Definition: colfind.cpp:142
void set_cjk_script(bool is_cjk)
Definition: colfind.h:74
bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
Definition: colfind.cpp:179
void SetEquationDetect(EquationDetectBase *detect)
Definition: colfind.cpp:502
const DENORM * denorm() const
Definition: colfind.h:68
void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
Definition: colfind.cpp:197
~ColumnFinder() override
Definition: colfind.cpp:97