tesseract 4.1.1
Loading...
Searching...
No Matches
tabfind.cpp
Go to the documentation of this file.
1
2// File: tabfind.cpp
3// Description: Subclass of BBGrid to find vertically aligned blobs.
4// Author: Ray Smith
5//
6// (C) Copyright 2008, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifdef HAVE_CONFIG_H
20#include "config_auto.h"
21#endif
22
23#include "tabfind.h"
24#include "alignedblob.h"
25#include "colpartitiongrid.h"
26#include "detlinefit.h"
27#include "host.h" // for NearlyEqual
28#include "linefind.h"
29
30#include <algorithm>
31
32namespace tesseract {
33
34// Multiple of box size to search for initial gaps.
35const int kTabRadiusFactor = 5;
36// Min and Max multiple of height to search vertically when extrapolating.
37const int kMinVerticalSearch = 3;
38const int kMaxVerticalSearch = 12;
39const int kMaxRaggedSearch = 25;
40// Minimum number of lines in a column width to make it interesting.
41const int kMinLinesInColumn = 10;
42// Minimum width of a column to be interesting.
43const int kMinColumnWidth = 200;
44// Minimum fraction of total column lines for a column to be interesting.
45const double kMinFractionalLinesInColumn = 0.125;
46// Fraction of height used as alignment tolerance for aligned tabs.
47const double kAlignedFraction = 0.03125;
48// Maximum gutter width (in absolute inch) that we care about
49const double kMaxGutterWidthAbsolute = 2.00;
50// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
52// Min aspect ratio of tall objects to be considered a separator line.
53// (These will be ignored in searching the gutter for obstructions.)
54const double kLineFragmentAspectRatio = 10.0;
55// Min number of points to accept after evaluation.
56const int kMinEvaluatedTabs = 3;
57// Up to 30 degrees is allowed for rotations of diacritic blobs.
58// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
59// so that the assert there never fails.
60const double kCosMaxSkewAngle = 0.866025;
61
62static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
63static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
64
65TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
66 TabVector_LIST* vlines, int vertical_x, int vertical_y,
67 int resolution)
68 : AlignedBlob(gridsize, bleft, tright),
69 resolution_(resolution),
70 image_origin_(0, tright.y() - 1),
71 v_it_(&vectors_) {
72 width_cb_ = nullptr;
73 v_it_.add_list_after(vlines);
74 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
76}
77
79 delete width_cb_;
80}
81
83
84// Insert a list of blobs into the given grid (not necessarily this).
85// If take_ownership is true, then the blobs are removed from the source list.
86// See InsertBlob for the other arguments.
87// It would seem to make more sense to swap this and grid, but this way
88// around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
89// while the grid that provides the tab stops(this) has to be derived from
90// TabFind.
91void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
92 BLOBNBOX_LIST* blobs,
93 BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
94 BLOBNBOX_C_IT>* grid) {
95 BLOBNBOX_IT blob_it(blobs);
96 int b_count = 0;
97 int reject_count = 0;
98 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
99 BLOBNBOX* blob = blob_it.data();
100// if (InsertBlob(true, true, blob, grid)) {
101 if (InsertBlob(h_spread, v_spread, blob, grid)) {
102 ++b_count;
103 } else {
104 ++reject_count;
105 }
106 }
108 tprintf("Inserted %d blobs into grid, %d rejected.\n",
109 b_count, reject_count);
110 }
111}
112
113// Insert a single blob into the given grid (not necessarily this).
114// If h_spread, then all cells covered horizontally by the box are
115// used, otherwise, just the bottom-left. Similarly for v_spread.
116// A side effect is that the left and right rule edges of the blob are
117// set according to the tab vectors in this (not grid).
118bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
119 BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
120 BLOBNBOX_C_IT>* grid) {
121 TBOX box = blob->bounding_box();
122 blob->set_left_rule(LeftEdgeForBox(box, false, false));
123 blob->set_right_rule(RightEdgeForBox(box, false, false));
124 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
125 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
126 if (blob->joined_to_prev())
127 return false;
128 grid->InsertBBox(h_spread, v_spread, blob);
129 return true;
130}
131
132// Calls SetBlobRuleEdges for all the blobs in the given block.
134 SetBlobRuleEdges(&block->blobs);
138}
139
140// Sets the left and right rule and crossing_rules for the blobs in the given
141// list by fiding the next outermost tabvectors for each blob.
142void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
143 BLOBNBOX_IT blob_it(blobs);
144 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
145 BLOBNBOX* blob = blob_it.data();
146 TBOX box = blob->bounding_box();
147 blob->set_left_rule(LeftEdgeForBox(box, false, false));
148 blob->set_right_rule(RightEdgeForBox(box, false, false));
149 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
150 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
151 }
152}
153
154// Returns the gutter width of the given TabVector between the given y limits.
155// Also returns x-shift to be added to the vector to clear any intersecting
156// blobs. The shift is deducted from the returned gutter.
157// If ignore_unmergeables is true, then blobs of UnMergeableType are
158// ignored as if they don't exist. (Used for text on image.)
159// max_gutter_width is used as the maximum width worth searching for in case
160// there is nothing near the TabVector.
161int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
162 bool ignore_unmergeables, int max_gutter_width,
163 int* required_shift) {
164 bool right_to_left = v.IsLeftTab();
165 int bottom_x = v.XAtY(bottom_y);
166 int top_x = v.XAtY(top_y);
167 int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
168 BlobGridSearch sidesearch(this);
169 sidesearch.StartSideSearch(start_x, bottom_y, top_y);
170 int min_gap = max_gutter_width;
171 *required_shift = 0;
172 BLOBNBOX* blob = nullptr;
173 while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
174 const TBOX& box = blob->bounding_box();
175 if (box.bottom() >= top_y || box.top() <= bottom_y)
176 continue; // Doesn't overlap enough.
177 if (box.height() >= gridsize() * 2 &&
178 box.height() > box.width() * kLineFragmentAspectRatio) {
179 // Skip likely separator line residue.
180 continue;
181 }
182 if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
183 continue; // Skip non-text if required.
184 int mid_y = (box.bottom() + box.top()) / 2;
185 // We use the x at the mid-y so that the required_shift guarantees
186 // to clear all the blobs on the tab-stop. If we use the min/max
187 // of x at top/bottom of the blob, then exactness would be required,
188 // which is not a good thing.
189 int tab_x = v.XAtY(mid_y);
190 int gap;
191 if (right_to_left) {
192 gap = tab_x - box.right();
193 if (gap < 0 && box.left() - tab_x < *required_shift)
194 *required_shift = box.left() - tab_x;
195 } else {
196 gap = box.left() - tab_x;
197 if (gap < 0 && box.right() - tab_x > *required_shift)
198 *required_shift = box.right() - tab_x;
199 }
200 if (gap > 0 && gap < min_gap)
201 min_gap = gap;
202 }
203 // Result may be negative, in which case, this is a really bad tabstop.
204 return min_gap - abs(*required_shift);
205}
206
207// Find the gutter width and distance to inner neighbour for the given blob.
208void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
209 int max_gutter, bool left,
210 BLOBNBOX* bbox, int* gutter_width,
211 int* neighbour_gap) {
212 const TBOX& box = bbox->bounding_box();
213 // The gutter and internal sides of the box.
214 int gutter_x = left ? box.left() : box.right();
215 int internal_x = left ? box.right() : box.left();
216 // On ragged edges, the gutter side of the box is away from the tabstop.
217 int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
218 *gutter_width = max_gutter;
219 // If the box is away from the tabstop, we need to increase
220 // the allowed gutter width.
221 if (tab_gap > 0)
222 *gutter_width += tab_gap;
223 bool debug = WithinTestRegion(2, box.left(), box.bottom());
224 if (debug)
225 tprintf("Looking in gutter\n");
226 // Find the nearest blob on the outside of the column.
227 BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
228 bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
229 *gutter_width, box.top(), box.bottom());
230 if (gutter_bbox != nullptr) {
231 const TBOX& gutter_box = gutter_bbox->bounding_box();
232 *gutter_width = left ? tab_x - gutter_box.right()
233 : gutter_box.left() - tab_x;
234 }
235 if (*gutter_width >= max_gutter) {
236 // If there is no box because a tab was in the way, get the tab coord.
237 TBOX gutter_box(box);
238 if (left) {
239 gutter_box.set_left(tab_x - max_gutter - 1);
240 gutter_box.set_right(tab_x - max_gutter);
241 int tab_gutter = RightEdgeForBox(gutter_box, true, false);
242 if (tab_gutter < tab_x - 1)
243 *gutter_width = tab_x - tab_gutter;
244 } else {
245 gutter_box.set_left(tab_x + max_gutter);
246 gutter_box.set_right(tab_x + max_gutter + 1);
247 int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
248 if (tab_gutter > tab_x + 1)
249 *gutter_width = tab_gutter - tab_x;
250 }
251 }
252 if (*gutter_width > max_gutter)
253 *gutter_width = max_gutter;
254 // Now look for a neighbour on the inside.
255 if (debug)
256 tprintf("Looking for neighbour\n");
257 BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
258 bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
259 *gutter_width, box.top(), box.bottom());
260 int neighbour_edge = left ? RightEdgeForBox(box, true, false)
261 : LeftEdgeForBox(box, true, false);
262 if (neighbour != nullptr) {
263 const TBOX& n_box = neighbour->bounding_box();
264 if (debug) {
265 tprintf("Found neighbour:");
266 n_box.print();
267 }
268 if (left && n_box.left() < neighbour_edge)
269 neighbour_edge = n_box.left();
270 else if (!left && n_box.right() > neighbour_edge)
271 neighbour_edge = n_box.right();
272 }
273 *neighbour_gap = left ? neighbour_edge - internal_x
274 : internal_x - neighbour_edge;
275}
276
277// Return the x-coord that corresponds to the right edge for the given
278// box. If there is a rule line to the right that vertically overlaps it,
279// then return the x-coord of the rule line, otherwise return the right
280// edge of the page. For details see RightTabForBox below.
281int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
282 TabVector* v = RightTabForBox(box, crossing, extended);
283 return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
284}
285// As RightEdgeForBox, but finds the left Edge instead.
286int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
287 TabVector* v = LeftTabForBox(box, crossing, extended);
288 return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
289}
290
291// This comment documents how this function works.
292// For its purpose and arguments, see the comment in tabfind.h.
293// TabVectors are stored sorted by perpendicular distance of middle from
294// the global mean vertical vector. Since the individual vectors can have
295// differing directions, their XAtY for a given y is not necessarily in the
296// right order. Therefore the search has to be run with a margin.
297// The middle of a vector that passes through (x,y) cannot be higher than
298// halfway from y to the top, or lower than halfway from y to the bottom
299// of the coordinate range; therefore, the search margin is the range of
300// sort keys between these halfway points. Any vector with a sort key greater
301// than the upper margin must be to the right of x at y, and likewise any
302// vector with a sort key less than the lower margin must pass to the left
303// of x at y.
304TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
305 bool extended) {
306 if (v_it_.empty())
307 return nullptr;
308 int top_y = box.top();
309 int bottom_y = box.bottom();
310 int mid_y = (top_y + bottom_y) / 2;
311 int right = crossing ? (box.left() + box.right()) / 2 : box.right();
312 int min_key, max_key;
313 SetupTabSearch(right, mid_y, &min_key, &max_key);
314 // Position the iterator at the first TabVector with sort_key >= min_key.
315 while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
316 v_it_.backward();
317 while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
318 v_it_.forward();
319 // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320 TabVector* best_v = nullptr;
321 int best_x = -1;
322 int key_limit = -1;
323 do {
324 TabVector* v = v_it_.data();
325 int x = v->XAtY(mid_y);
326 if (x >= right &&
327 (v->VOverlap(top_y, bottom_y) > 0 ||
328 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
329 if (best_v == nullptr || x < best_x) {
330 best_v = v;
331 best_x = x;
332 // We can guarantee that no better vector can be found if the
333 // sort key exceeds that of the best by max_key - min_key.
334 key_limit = v->sort_key() + max_key - min_key;
335 }
336 }
337 // Break when the search is done to avoid wrapping the iterator and
338 // thereby potentially slowing the next search.
339 if (v_it_.at_last() ||
340 (best_v != nullptr && v->sort_key() > key_limit))
341 break; // Prevent restarting list for next call.
342 v_it_.forward();
343 } while (!v_it_.at_first());
344 return best_v;
345}
346
347// As RightTabForBox, but finds the left TabVector instead.
348TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
349 bool extended) {
350 if (v_it_.empty())
351 return nullptr;
352 int top_y = box.top();
353 int bottom_y = box.bottom();
354 int mid_y = (top_y + bottom_y) / 2;
355 int left = crossing ? (box.left() + box.right()) / 2 : box.left();
356 int min_key, max_key;
357 SetupTabSearch(left, mid_y, &min_key, &max_key);
358 // Position the iterator at the last TabVector with sort_key <= max_key.
359 while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
360 v_it_.forward();
361 while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362 v_it_.backward();
363 }
364 // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365 TabVector* best_v = nullptr;
366 int best_x = -1;
367 int key_limit = -1;
368 do {
369 TabVector* v = v_it_.data();
370 int x = v->XAtY(mid_y);
371 if (x <= left &&
372 (v->VOverlap(top_y, bottom_y) > 0 ||
373 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
374 if (best_v == nullptr || x > best_x) {
375 best_v = v;
376 best_x = x;
377 // We can guarantee that no better vector can be found if the
378 // sort key is less than that of the best by max_key - min_key.
379 key_limit = v->sort_key() - (max_key - min_key);
380 }
381 }
382 // Break when the search is done to avoid wrapping the iterator and
383 // thereby potentially slowing the next search.
384 if (v_it_.at_first() ||
385 (best_v != nullptr && v->sort_key() < key_limit))
386 break; // Prevent restarting list for next call.
387 v_it_.backward();
388 } while (!v_it_.at_last());
389 return best_v;
390}
391
392// Return true if the given width is close to one of the common
393// widths in column_widths_.
394bool TabFind::CommonWidth(int width) {
395 width /= kColumnWidthFactor;
396 ICOORDELT_IT it(&column_widths_);
397 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
398 ICOORDELT* w = it.data();
399 if (w->x() - 1 <= width && width <= w->y() + 1)
400 return true;
401 }
402 return false;
403}
404
405// Return true if the sizes are more than a
406// factor of 2 different.
407bool TabFind::DifferentSizes(int size1, int size2) {
408 return size1 > size2 * 2 || size2 > size1 * 2;
409}
410
411// Return true if the sizes are more than a
412// factor of 5 different.
413bool TabFind::VeryDifferentSizes(int size1, int size2) {
414 return size1 > size2 * 5 || size2 > size1 * 5;
415}
416
418
419// Top-level function to find TabVectors in an input page block.
420// Returns false if the detected skew angle is impossible.
421// Applies the detected skew angle to deskew the tabs, blobs and part_grid.
422bool TabFind::FindTabVectors(TabVector_LIST* hlines,
423 BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
424 int min_gutter_width,
425 double tabfind_aligned_gap_fraction,
426 ColPartitionGrid* part_grid,
427 FCOORD* deskew, FCOORD* reskew) {
428 ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
429 tabfind_aligned_gap_fraction,
430 block);
431 ComputeColumnWidths(tab_win, part_grid);
433 SortVectors();
434 CleanupTabs();
435 if (!Deskew(hlines, image_blobs, block, deskew, reskew))
436 return false; // Skew angle is too large.
437 part_grid->Deskew(*deskew);
438 ApplyTabConstraints();
439 #ifndef GRAPHICS_DISABLED
440 if (textord_tabfind_show_finaltabs) {
441 tab_win = MakeWindow(640, 50, "FinalTabs");
442 DisplayBoxes(tab_win);
443 DisplayTabs("FinalTabs", tab_win);
444 tab_win = DisplayTabVectors(tab_win);
445 }
446 #endif // GRAPHICS_DISABLED
447 return true;
448}
449
450// Top-level function to not find TabVectors in an input page block,
451// but setup for single column mode.
452void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
453 FCOORD* deskew, FCOORD* reskew) {
454 InsertBlobsToGrid(false, false, image_blobs, this);
455 InsertBlobsToGrid(true, false, &block->blobs, this);
456 deskew->set_x(1.0f);
457 deskew->set_y(0.0f);
458 reskew->set_x(1.0f);
459 reskew->set_y(0.0f);
460}
461
462// Cleans up the lists of blobs in the block ready for use by TabFind.
463// Large blobs that look like text are moved to the main blobs list.
464// Main blobs that are superseded by the image blobs are deleted.
466 BLOBNBOX_IT large_it = &block->large_blobs;
467 BLOBNBOX_IT blob_it = &block->blobs;
468 int b_count = 0;
469 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
470 BLOBNBOX* large_blob = large_it.data();
471 if (large_blob->owner() != nullptr) {
472 blob_it.add_to_end(large_it.extract());
473 ++b_count;
474 }
475 }
477 tprintf("Moved %d large blobs to normal list\n",
478 b_count);
479 #ifndef GRAPHICS_DISABLED
480 ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
481 block->plot_graded_blobs(rej_win);
482 block->plot_noise_blobs(rej_win);
483 rej_win->Update();
484 #endif // GRAPHICS_DISABLED
485 }
486 block->DeleteUnownedNoise();
487}
488
489// Helper function to setup search limits for *TabForBox.
490void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
491 int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
492 int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
493 *min_key = std::min(key1, key2);
494 *max_key = std::max(key1, key2);
495}
496
498#ifndef GRAPHICS_DISABLED
499 // For every vector, display it.
500 TabVector_IT it(&vectors_);
501 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
502 TabVector* vector = it.data();
503 vector->Display(tab_win);
504 }
505 tab_win->Update();
506#endif
507 return tab_win;
508}
509
510// PRIVATE CODE.
511//
512// First part of FindTabVectors, which may be used twice if the text
513// is mostly of vertical alignment.
514ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
515 int min_gutter_width,
516 double tabfind_aligned_gap_fraction,
517 TO_BLOCK* block) {
518 if (textord_tabfind_show_initialtabs) {
519 ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
520 line_win = DisplayTabVectors(line_win);
521 }
522 // Prepare the grid.
523 if (image_blobs != nullptr)
524 InsertBlobsToGrid(true, false, image_blobs, this);
525 InsertBlobsToGrid(true, false, &block->blobs, this);
526 ScrollView* initial_win = FindTabBoxes(min_gutter_width,
527 tabfind_aligned_gap_fraction);
528 FindAllTabVectors(min_gutter_width);
529
531 SortVectors();
532 EvaluateTabs();
533 if (textord_tabfind_show_initialtabs && initial_win != nullptr)
534 initial_win = DisplayTabVectors(initial_win);
535 MarkVerticalText();
536 return initial_win;
537}
538
539// Helper displays all the boxes in the given vector on the given window.
540static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
541 ScrollView* win) {
542 #ifndef GRAPHICS_DISABLED
543 for (int i = 0; i < boxes.size(); ++i) {
544 TBOX box = boxes[i]->bounding_box();
545 int left_x = box.left();
546 int right_x = box.right();
547 int top_y = box.top();
548 int bottom_y = box.bottom();
549 ScrollView::Color box_color = boxes[i]->BoxColor();
550 win->Pen(box_color);
551 win->Rectangle(left_x, bottom_y, right_x, top_y);
552 }
553 win->Update();
554 #endif // GRAPHICS_DISABLED
555}
556
557// For each box in the grid, decide whether it is a candidate tab-stop,
558// and if so add it to the left/right tab boxes.
559ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
560 double tabfind_aligned_gap_fraction) {
561 left_tab_boxes_.clear();
562 right_tab_boxes_.clear();
563 // For every bbox in the grid, determine whether it uses a tab on an edge.
564 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
565 gsearch.StartFullSearch();
566 BLOBNBOX* bbox;
567 while ((bbox = gsearch.NextFullSearch()) != nullptr) {
568 if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
569 // If it is any kind of tab, insert it into the vectors.
570 if (bbox->left_tab_type() != TT_NONE)
571 left_tab_boxes_.push_back(bbox);
572 if (bbox->right_tab_type() != TT_NONE)
573 right_tab_boxes_.push_back(bbox);
574 }
575 }
576 // Sort left tabs by left and right by right to see the outermost one first
577 // on a ragged tab.
578 left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
579 right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
580 ScrollView* tab_win = nullptr;
581 #ifndef GRAPHICS_DISABLED
582 if (textord_tabfind_show_initialtabs) {
583 tab_win = MakeWindow(0, 100, "InitialTabs");
584 tab_win->Pen(ScrollView::BLUE);
585 tab_win->Brush(ScrollView::NONE);
586 // Display the left and right tab boxes.
587 DisplayBoxVector(left_tab_boxes_, tab_win);
588 DisplayBoxVector(right_tab_boxes_, tab_win);
589 tab_win = DisplayTabs("Tabs", tab_win);
590 }
591 #endif // GRAPHICS_DISABLED
592 return tab_win;
593}
594
595bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
596 double tabfind_aligned_gap_fraction) {
597 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
598 TBOX box = bbox->bounding_box();
599 // If there are separator lines, get the column edges.
600 int left_column_edge = bbox->left_rule();
601 int right_column_edge = bbox->right_rule();
602 // The edges of the bounding box of the blob being processed.
603 int left_x = box.left();
604 int right_x = box.right();
605 int top_y = box.top();
606 int bottom_y = box.bottom();
607 int height = box.height();
608 bool debug = WithinTestRegion(3, left_x, top_y);
609 if (debug) {
610 tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
611 left_x, top_y, right_x, bottom_y,
612 left_column_edge, right_column_edge);
613 }
614 // Compute a search radius based on a multiple of the height.
615 int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
616 radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
617 // In Vertical Page mode, once we have an estimate of the vertical line
618 // spacing, the minimum amount of gutter space before a possible tab is
619 // increased under the assumption that column partition is always larger
620 // than line spacing.
621 int min_spacing =
622 static_cast<int>(height * tabfind_aligned_gap_fraction);
623 if (min_gutter_width > min_spacing)
624 min_spacing = min_gutter_width;
625 int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
626 if (min_gutter_width > min_ragged_gutter)
627 min_ragged_gutter = min_gutter_width;
628 int target_right = left_x - min_spacing;
629 int target_left = right_x + min_spacing;
630 // We will be evaluating whether the left edge could be a left tab, and
631 // whether the right edge could be a right tab.
632 // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
633 // that no blobs have been found in the gutter during the radial search.
634 // A box can also be a tab if there are objects in the gutter only above
635 // or only below, and there are aligned objects on the opposite side, but
636 // not too many unaligned objects. The maybe_(left/right)_tab_up counts
637 // aligned objects above and negatively counts unaligned objects above,
638 // and is set to -INT32_MAX if a gutter object is found above.
639 // The other 3 maybe ints work similarly for the other sides.
640 // These conditions are very strict, to minimize false positives, and really
641 // only aligned tabs and outermost ragged tab blobs will qualify, so we
642 // also have maybe_ragged_left/right with less stringent rules.
643 // A blob that is maybe_ragged_left/right will be further qualified later,
644 // using the min_ragged_gutter.
645 bool is_left_tab = true;
646 bool is_right_tab = true;
647 bool maybe_ragged_left = true;
648 bool maybe_ragged_right = true;
649 int maybe_left_tab_up = 0;
650 int maybe_right_tab_up = 0;
651 int maybe_left_tab_down = 0;
652 int maybe_right_tab_down = 0;
653 if (bbox->leader_on_left()) {
654 is_left_tab = false;
655 maybe_ragged_left = false;
656 maybe_left_tab_up = -INT32_MAX;
657 maybe_left_tab_down = -INT32_MAX;
658 }
659 if (bbox->leader_on_right()) {
660 is_right_tab = false;
661 maybe_ragged_right = false;
662 maybe_right_tab_up = -INT32_MAX;
663 maybe_right_tab_down = -INT32_MAX;
664 }
665 int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
666 BLOBNBOX* neighbour = nullptr;
667 while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
668 if (neighbour == bbox)
669 continue;
670 TBOX nbox = neighbour->bounding_box();
671 int n_left = nbox.left();
672 int n_right = nbox.right();
673 if (debug)
674 tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
675 n_left, nbox.bottom(), n_right, nbox.top());
676 // If the neighbouring blob is the wrong side of a separator line, then it
677 // "doesn't exist" as far as we are concerned.
678 if (n_right > right_column_edge || n_left < left_column_edge ||
679 left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
680 continue; // Separator line in the way.
681 int n_mid_x = (n_left + n_right) / 2;
682 int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
683 if (n_mid_x <= left_x && n_right >= target_right) {
684 if (debug)
685 tprintf("Not a left tab\n");
686 is_left_tab = false;
687 if (n_mid_y < top_y)
688 maybe_left_tab_down = -INT32_MAX;
689 if (n_mid_y > bottom_y)
690 maybe_left_tab_up = -INT32_MAX;
691 } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
692 if (debug)
693 tprintf("Maybe a left tab\n");
694 if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
695 ++maybe_left_tab_up;
696 if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
697 ++maybe_left_tab_down;
698 } else if (n_left < left_x && n_right >= left_x) {
699 // Overlaps but not aligned so negative points on a maybe.
700 if (debug)
701 tprintf("Maybe Not a left tab\n");
702 if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
703 --maybe_left_tab_up;
704 if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
705 --maybe_left_tab_down;
706 }
707 if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
708 maybe_ragged_left = false;
709 if (debug)
710 tprintf("Not a ragged left\n");
711 }
712 if (n_mid_x >= right_x && n_left <= target_left) {
713 if (debug)
714 tprintf("Not a right tab\n");
715 is_right_tab = false;
716 if (n_mid_y < top_y)
717 maybe_right_tab_down = -INT32_MAX;
718 if (n_mid_y > bottom_y)
719 maybe_right_tab_up = -INT32_MAX;
720 } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
721 if (debug)
722 tprintf("Maybe a right tab\n");
723 if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
724 ++maybe_right_tab_up;
725 if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
726 ++maybe_right_tab_down;
727 } else if (n_right > right_x && n_left <= right_x) {
728 // Overlaps but not aligned so negative points on a maybe.
729 if (debug)
730 tprintf("Maybe Not a right tab\n");
731 if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
732 --maybe_right_tab_up;
733 if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
734 --maybe_right_tab_down;
735 }
736 if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
737 maybe_ragged_right = false;
738 if (debug)
739 tprintf("Not a ragged right\n");
740 }
741 if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
742 maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX)
743 break;
744 }
745 if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
747 } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
749 } else {
751 }
752 if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
754 } else if (maybe_ragged_right &&
755 ConfirmRaggedRight(bbox, min_ragged_gutter)) {
757 } else {
759 }
760 if (debug) {
761 tprintf("Left result = %s, Right result=%s\n",
762 bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
763 (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
764 bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
765 (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
766 }
767 return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
768}
769
770// Returns true if there is nothing in the rectangle of width min_gutter to
771// the left of bbox.
772bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
773 TBOX search_box(bbox->bounding_box());
774 search_box.set_right(search_box.left());
775 search_box.set_left(search_box.left() - min_gutter);
776 return NothingYOverlapsInBox(search_box, bbox->bounding_box());
777}
778
779// Returns true if there is nothing in the rectangle of width min_gutter to
780// the right of bbox.
781bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
782 TBOX search_box(bbox->bounding_box());
783 search_box.set_left(search_box.right());
784 search_box.set_right(search_box.right() + min_gutter);
785 return NothingYOverlapsInBox(search_box, bbox->bounding_box());
786}
787
788// Returns true if there is nothing in the given search_box that vertically
789// overlaps target_box other than target_box itself.
790bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
791 const TBOX& target_box) {
792 BlobGridSearch rsearch(this);
793 rsearch.StartRectSearch(search_box);
794 BLOBNBOX* blob;
795 while ((blob = rsearch.NextRectSearch()) != nullptr) {
796 const TBOX& box = blob->bounding_box();
797 if (box.y_overlap(target_box) && !(box == target_box))
798 return false;
799 }
800 return true;
801}
802
803void TabFind::FindAllTabVectors(int min_gutter_width) {
804 // A list of vectors that will be created in estimating the skew.
805 TabVector_LIST dummy_vectors;
806 // An estimate of the vertical direction, revised as more lines are added.
807 int vertical_x = 0;
808 int vertical_y = 1;
809 // Find an estimate of the vertical direction by finding some tab vectors.
810 // Slowly up the search size until we get some vectors.
811 for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
812 search_size += kMinVerticalSearch) {
813 int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
814 min_gutter_width,
815 &dummy_vectors,
816 &vertical_x, &vertical_y);
817 vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
818 min_gutter_width,
819 &dummy_vectors,
820 &vertical_x, &vertical_y);
821 if (vector_count > 0)
822 break;
823 }
824 // Get rid of the test vectors and reset the types of the tabs.
825 dummy_vectors.clear();
826 for (int i = 0; i < left_tab_boxes_.size(); ++i) {
827 BLOBNBOX* bbox = left_tab_boxes_[i];
828 if (bbox->left_tab_type() == TT_CONFIRMED)
830 }
831 for (int i = 0; i < right_tab_boxes_.size(); ++i) {
832 BLOBNBOX* bbox = right_tab_boxes_[i];
833 if (bbox->right_tab_type() == TT_CONFIRMED)
835 }
837 tprintf("Beginning real tab search with vertical = %d,%d...\n",
838 vertical_x, vertical_y);
839 }
840 // Now do the real thing ,but keep the vectors in the dummy_vectors list
841 // until they are all done, so we don't get the tab vectors confused with
842 // the rule line vectors.
844 &dummy_vectors, &vertical_x, &vertical_y);
846 &dummy_vectors, &vertical_x, &vertical_y);
848 &dummy_vectors, &vertical_x, &vertical_y);
850 &dummy_vectors, &vertical_x, &vertical_y);
851 // Now add the vectors to the vectors_ list.
852 TabVector_IT v_it(&vectors_);
853 v_it.add_list_after(&dummy_vectors);
854 // Now use the summed (mean) vertical vector as the direction for everything.
855 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
856}
857
858// Helper for FindAllTabVectors finds the vectors of a particular type.
859int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
860 int min_gutter_width, TabVector_LIST* vectors,
861 int* vertical_x, int* vertical_y) {
862 TabVector_IT vector_it(vectors);
863 int vector_count = 0;
864 // Search the right or left tab boxes, looking for tab vectors.
865 bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
866 const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
867 : left_tab_boxes_;
868 for (int i = 0; i < boxes.size(); ++i) {
869 BLOBNBOX* bbox = boxes[i];
870 if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
871 (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
872 TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
873 alignment,
874 bbox, vertical_x, vertical_y);
875 if (vector != nullptr) {
876 ++vector_count;
877 vector_it.add_to_end(vector);
878 }
879 }
880 }
881 return vector_count;
882}
883
884// Finds a vector corresponding to a tabstop running through the
885// given box of the given alignment type.
886// search_size_multiple is a multiple of height used to control
887// the size of the search.
888// vertical_x and y are updated with an estimate of the real
889// vertical direction. (skew finding.)
890// Returns nullptr if no decent tabstop can be found.
891TabVector* TabFind::FindTabVector(int search_size_multiple,
892 int min_gutter_width,
893 TabAlignment alignment,
894 BLOBNBOX* bbox,
895 int* vertical_x, int* vertical_y) {
896 int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
897 AlignedBlobParams align_params(*vertical_x, *vertical_y,
898 height,
899 search_size_multiple, min_gutter_width,
900 resolution_, alignment);
901 // FindVerticalAlignment is in the parent (AlignedBlob) class.
902 return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
903}
904
905// Set the vertical_skew_ member from the given vector and refit
906// all vectors parallel to the skew vector.
907void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
908 // Fit the vertical vector into an ICOORD, which is 16 bit.
909 vertical_skew_.set_with_shrink(vertical_x, vertical_y);
911 tprintf("Vertical skew vector=(%d,%d)\n",
913 v_it_.set_to_list(&vectors_);
914 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
915 TabVector* v = v_it_.data();
916 v->Fit(vertical_skew_, true);
917 }
918 // Now sort the vectors as their direction has potentially changed.
919 SortVectors();
920}
921
922// Sort all the current vectors using the given vertical direction vector.
923void TabFind::SortVectors() {
924 vectors_.sort(TabVector::SortVectorsByKey);
925 v_it_.set_to_list(&vectors_);
926}
927
928// Evaluate all the current tab vectors.
929void TabFind::EvaluateTabs() {
930 TabVector_IT rule_it(&vectors_);
931 for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
932 TabVector* tab = rule_it.data();
933 if (!tab->IsSeparator()) {
934 tab->Evaluate(vertical_skew_, this);
935 if (tab->BoxCount() < kMinEvaluatedTabs) {
936 if (textord_debug_tabfind > 2)
937 tab->Print("Too few boxes");
938 delete rule_it.extract();
939 v_it_.set_to_list(&vectors_);
940 } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
941 tab->Print("Evaluated tab");
942 }
943 }
944 }
945}
946
947// Trace textlines from one side to the other of each tab vector, saving
948// the most frequent column widths found in a list so that a given width
949// can be tested for being a common width with a simple callback function.
950void TabFind::ComputeColumnWidths(ScrollView* tab_win,
951 ColPartitionGrid* part_grid) {
952 #ifndef GRAPHICS_DISABLED
953 if (tab_win != nullptr)
954 tab_win->Pen(ScrollView::WHITE);
955 #endif // GRAPHICS_DISABLED
956 // Accumulate column sections into a STATS
957 int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
958 STATS col_widths(0, col_widths_size + 1);
959 ApplyPartitionsToColumnWidths(part_grid, &col_widths);
960 #ifndef GRAPHICS_DISABLED
961 if (tab_win != nullptr) {
962 tab_win->Update();
963 }
964 #endif // GRAPHICS_DISABLED
965 if (textord_debug_tabfind > 1)
966 col_widths.print();
967 // Now make a list of column widths.
968 MakeColumnWidths(col_widths_size, &col_widths);
969 // Turn the column width into a range.
970 ApplyPartitionsToColumnWidths(part_grid, nullptr);
971}
972
973// Finds column width and:
974// if col_widths is not null (pass1):
975// pair-up tab vectors with existing ColPartitions and accumulate widths.
976// else (pass2):
977// find the largest real partition width for each recorded column width,
978// to be used as the minimum acceptable width.
979void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
980 STATS* col_widths) {
981 // For every ColPartition in the part_grid, add partners to the tabvectors
982 // and accumulate the column widths.
983 ColPartitionGridSearch gsearch(part_grid);
984 gsearch.StartFullSearch();
985 ColPartition* part;
986 while ((part = gsearch.NextFullSearch()) != nullptr) {
987 BLOBNBOX_C_IT blob_it(part->boxes());
988 if (blob_it.empty())
989 continue;
990 BLOBNBOX* left_blob = blob_it.data();
991 blob_it.move_to_last();
992 BLOBNBOX* right_blob = blob_it.data();
993 TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
994 true, false);
995 if (left_vector == nullptr || left_vector->IsRightTab())
996 continue;
997 TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
998 true, false);
999 if (right_vector == nullptr || right_vector->IsLeftTab())
1000 continue;
1001
1002 int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1003 int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1004 // Add to STATS of measurements if the width is significant.
1005 int width = line_right - line_left;
1006 if (col_widths != nullptr) {
1007 AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1008 if (width >= kMinColumnWidth)
1009 col_widths->add(width / kColumnWidthFactor, 1);
1010 } else {
1011 width /= kColumnWidthFactor;
1012 ICOORDELT_IT it(&column_widths_);
1013 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1014 ICOORDELT* w = it.data();
1015 if (NearlyEqual<int>(width, w->y(), 1)) {
1016 int true_width = part->bounding_box().width() / kColumnWidthFactor;
1017 if (true_width <= w->y() && true_width > w->x())
1018 w->set_x(true_width);
1019 break;
1020 }
1021 }
1022 }
1023 }
1024}
1025
1026// Helper makes the list of common column widths in column_widths_ from the
1027// input col_widths. Destroys the content of col_widths by repeatedly
1028// finding the mode and erasing the peak.
1029void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
1030 ICOORDELT_IT w_it(&column_widths_);
1031 int total_col_count = col_widths->get_total();
1032 while (col_widths->get_total() > 0) {
1033 int width = col_widths->mode();
1034 int col_count = col_widths->pile_count(width);
1035 col_widths->add(width, -col_count);
1036 // Get the entire peak.
1037 for (int left = width - 1; left > 0 &&
1038 col_widths->pile_count(left) > 0;
1039 --left) {
1040 int new_count = col_widths->pile_count(left);
1041 col_count += new_count;
1042 col_widths->add(left, -new_count);
1043 }
1044 for (int right = width + 1; right < col_widths_size &&
1045 col_widths->pile_count(right) > 0;
1046 ++right) {
1047 int new_count = col_widths->pile_count(right);
1048 col_count += new_count;
1049 col_widths->add(right, -new_count);
1050 }
1051 if (col_count > kMinLinesInColumn &&
1052 col_count > kMinFractionalLinesInColumn * total_col_count) {
1053 auto* w = new ICOORDELT(0, width);
1054 w_it.add_after_then_move(w);
1056 tprintf("Column of width %d has %d = %.2f%% lines\n",
1057 width * kColumnWidthFactor, col_count,
1058 100.0 * col_count / total_col_count);
1059 }
1060 }
1061}
1062
1063// Mark blobs as being in a vertical text line where that is the case.
1064// Returns true if the majority of the image is vertical text lines.
1065void TabFind::MarkVerticalText() {
1067 tprintf("Checking for vertical lines\n");
1068 BlobGridSearch gsearch(this);
1069 gsearch.StartFullSearch();
1070 BLOBNBOX* blob = nullptr;
1071 while ((blob = gsearch.NextFullSearch()) != nullptr) {
1072 if (blob->region_type() < BRT_UNKNOWN)
1073 continue;
1074 if (blob->UniquelyVertical()) {
1076 }
1077 }
1078}
1079
1080int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1081 TabVector_IT it(lines);
1082 int prev_right = -1;
1083 int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1084 STATS gaps(0, max_gap);
1085 STATS heights(0, max_gap);
1086 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1087 TabVector* v = it.data();
1088 TabVector* partner = v->GetSinglePartner();
1089 if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
1090 heights.add(partner->startpt().x() - v->startpt().x(), 1);
1091 if (prev_right > 0 && v->startpt().x() > prev_right) {
1092 gaps.add(v->startpt().x() - prev_right, 1);
1093 }
1094 prev_right = partner->startpt().x();
1095 }
1097 tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n",
1098 gaps.get_total(), gaps.median(), heights.median());
1099 if (gaps.get_total() < kMinLinesInColumn) return 0;
1100 return static_cast<int>(gaps.median());
1101}
1102
1103// Find the next adjacent (looking to the left or right) blob on this text
1104// line, with the constraint that it must vertically significantly overlap
1105// the [top_y, bottom_y] range.
1106// If ignore_images is true, then blobs with aligned_text() < 0 are treated
1107// as if they do not exist.
1108BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
1109 bool look_left, bool ignore_images,
1110 double min_overlap_fraction,
1111 int gap_limit, int top_y, int bottom_y) {
1112 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1113 const TBOX& box = bbox->bounding_box();
1114 int left = box.left();
1115 int right = box.right();
1116 int mid_x = (left + right) / 2;
1117 sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1118 int best_gap = 0;
1119 bool debug = WithinTestRegion(3, left, bottom_y);
1120 BLOBNBOX* result = nullptr;
1121 BLOBNBOX* neighbour = nullptr;
1122 while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
1123 if (debug) {
1124 tprintf("Adjacent blob: considering box:");
1125 neighbour->bounding_box().print();
1126 }
1127 if (neighbour == bbox ||
1128 (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
1129 continue;
1130 const TBOX& nbox = neighbour->bounding_box();
1131 int n_top_y = nbox.top();
1132 int n_bottom_y = nbox.bottom();
1133 int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
1134 int height = top_y - bottom_y;
1135 int n_height = n_top_y - n_bottom_y;
1136 if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
1137 (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1138 int n_left = nbox.left();
1139 int n_right = nbox.right();
1140 int h_gap = std::max(n_left, left) - std::min(n_right, right);
1141 int n_mid_x = (n_left + n_right) / 2;
1142 if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1143 if (h_gap > gap_limit) {
1144 // Hit a big gap before next tab so don't return anything.
1145 if (debug)
1146 tprintf("Giving up due to big gap = %d vs %d\n",
1147 h_gap, gap_limit);
1148 return result;
1149 }
1150 if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
1151 : neighbour->left_tab_type()) >= TT_CONFIRMED) {
1152 // Hit a tab facing the wrong way. Stop in case we are crossing
1153 // the column boundary.
1154 if (debug)
1155 tprintf("Collision with like tab of type %d at %d,%d\n",
1156 look_left ? neighbour->right_tab_type()
1157 : neighbour->left_tab_type(),
1158 n_left, nbox.bottom());
1159 return result;
1160 }
1161 // This is a good fit to the line. Continue with this
1162 // neighbour as the bbox if the best gap.
1163 if (result == nullptr || h_gap < best_gap) {
1164 if (debug)
1165 tprintf("Good result\n");
1166 result = neighbour;
1167 best_gap = h_gap;
1168 } else {
1169 // The new one is worse, so we probably already have the best result.
1170 return result;
1171 }
1172 } else if (debug) {
1173 tprintf("Wrong way\n");
1174 }
1175 } else if (debug) {
1176 tprintf("Insufficient overlap\n");
1177 }
1178 }
1179 if (WithinTestRegion(3, left, box.top()))
1180 tprintf("Giving up due to end of search\n");
1181 return result; // Hit the edge and found nothing.
1182}
1183
1184// Add a bi-directional partner relationship between the left
1185// and the right. If one (or both) of the vectors is a separator,
1186// extend a nearby extendable vector or create a new one of the
1187// correct type, using the given left or right blob as a guide.
1188void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
1189 TabVector* left, TabVector* right) {
1190 const TBOX& left_box = left_blob->bounding_box();
1191 const TBOX& right_box = right_blob->bounding_box();
1192 if (left->IsSeparator()) {
1193 // Try to find a nearby left edge to extend.
1194 TabVector* v = LeftTabForBox(left_box, true, true);
1195 if (v != nullptr && v != left && v->IsLeftTab() &&
1196 v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1197 left = v; // Found a good replacement.
1198 left->ExtendToBox(left_blob);
1199 } else {
1200 // Fake a vector.
1201 left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1202 vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1203 v_it_.move_to_first();
1204 }
1205 }
1206 if (right->IsSeparator()) {
1207 // Try to find a nearby left edge to extend.
1208 if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1209 tprintf("Box edge (%d,%d-%d)",
1210 right_box.right(), right_box.bottom(), right_box.top());
1211 right->Print(" looking for improvement for");
1212 }
1213 TabVector* v = RightTabForBox(right_box, true, true);
1214 if (v != nullptr && v != right && v->IsRightTab() &&
1215 v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1216 right = v; // Found a good replacement.
1217 right->ExtendToBox(right_blob);
1218 if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1219 right->Print("Extended vector");
1220 }
1221 } else {
1222 // Fake a vector.
1223 right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
1224 right_blob);
1225 vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1226 v_it_.move_to_first();
1227 if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1228 right->Print("Created new vector");
1229 }
1230 }
1231 }
1232 left->AddPartner(right);
1233 right->AddPartner(left);
1234}
1235
1236// Remove separators and unused tabs from the main vectors_ list
1237// to the dead_vectors_ list.
1238void TabFind::CleanupTabs() {
1239 // TODO(rays) Before getting rid of separators and unused vectors, it
1240 // would be useful to try moving ragged vectors outwards to see if this
1241 // allows useful extension. Could be combined with checking ends of partners.
1242 TabVector_IT it(&vectors_);
1243 TabVector_IT dead_it(&dead_vectors_);
1244 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1245 TabVector* v = it.data();
1246 if (v->IsSeparator() || v->Partnerless()) {
1247 dead_it.add_after_then_move(it.extract());
1248 v_it_.set_to_list(&vectors_);
1249 } else {
1250 v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1251 }
1252 }
1253}
1254
1255// Apply the given rotation to the given list of blobs.
1256void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
1257 BLOBNBOX_IT it(blobs);
1258 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1259 it.data()->rotate_box(rotation);
1260 }
1261}
1262
1263// Recreate the grid with deskewed BLOBNBOXes.
1264// Returns false if the detected skew angle is impossible.
1265bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
1266 TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
1267 ComputeDeskewVectors(deskew, reskew);
1268 if (deskew->x() < kCosMaxSkewAngle)
1269 return false;
1270 RotateBlobList(*deskew, image_blobs);
1271 RotateBlobList(*deskew, &block->blobs);
1272 RotateBlobList(*deskew, &block->small_blobs);
1273 RotateBlobList(*deskew, &block->noise_blobs);
1274
1275 // Rotate the horizontal vectors. The vertical vectors don't need
1276 // rotating as they can just be refitted.
1277 TabVector_IT h_it(hlines);
1278 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1279 TabVector* h = h_it.data();
1280 h->Rotate(*deskew);
1281 }
1282 TabVector_IT d_it(&dead_vectors_);
1283 for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1284 TabVector* d = d_it.data();
1285 d->Rotate(*deskew);
1286 }
1287 SetVerticalSkewAndParallelize(0, 1);
1288 // Rebuild the grid to the new size.
1289 TBOX grid_box(bleft_, tright_);
1290 grid_box.rotate_large(*deskew);
1291 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1292 InsertBlobsToGrid(false, false, image_blobs, this);
1293 InsertBlobsToGrid(true, false, &block->blobs, this);
1294 return true;
1295}
1296
1297// Flip the vertical and horizontal lines and rotate the grid ready
1298// for working on the rotated image.
1299// This also makes parameter adjustments for FindInitialTabVectors().
1300void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
1301 TabVector_LIST* horizontal_lines,
1302 int* min_gutter_width) {
1303 // Rotate the horizontal and vertical vectors and swap them over.
1304 // Only the separators are kept and rotated; other tabs are used
1305 // to estimate the gutter width then thrown away.
1306 TabVector_LIST ex_verticals;
1307 TabVector_IT ex_v_it(&ex_verticals);
1308 TabVector_LIST vlines;
1309 TabVector_IT v_it(&vlines);
1310 while (!v_it_.empty()) {
1311 TabVector* v = v_it_.extract();
1312 if (v->IsSeparator()) {
1313 v->Rotate(rotate);
1314 ex_v_it.add_after_then_move(v);
1315 } else {
1316 v_it.add_after_then_move(v);
1317 }
1318 v_it_.forward();
1319 }
1320
1321 // Adjust the min gutter width for better tabbox selection
1322 // in 2nd call to FindInitialTabVectors().
1323 int median_gutter = FindMedianGutterWidth(&vlines);
1324 if (median_gutter > *min_gutter_width)
1325 *min_gutter_width = median_gutter;
1326
1327 TabVector_IT h_it(horizontal_lines);
1328 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1329 TabVector* h = h_it.data();
1330 h->Rotate(rotate);
1331 }
1332 v_it_.add_list_after(horizontal_lines);
1333 v_it_.move_to_first();
1334 h_it.set_to_list(horizontal_lines);
1335 h_it.add_list_after(&ex_verticals);
1336
1337 // Rebuild the grid to the new size.
1338 TBOX grid_box(bleft(), tright());
1339 grid_box.rotate_large(rotate);
1340 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1341}
1342
1343// Clear the grid and get rid of the tab vectors, but not separators,
1344// ready to start again.
1346 v_it_.move_to_first();
1347 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1348 if (!v_it_.data()->IsSeparator())
1349 delete v_it_.extract();
1350 }
1351 Clear();
1352}
1353
1354// Reflect the separator tab vectors and the grids in the y-axis.
1355// Can only be called after Reset!
1357 TabVector_LIST temp_list;
1358 TabVector_IT temp_it(&temp_list);
1359 v_it_.move_to_first();
1360 // The TabVector list only contains vertical lines, but they need to be
1361 // reflected and the list needs to be reversed, so they are still in
1362 // sort_key order.
1363 while (!v_it_.empty()) {
1364 TabVector* v = v_it_.extract();
1365 v_it_.forward();
1366 v->ReflectInYAxis();
1367 temp_it.add_before_then_move(v);
1368 }
1369 v_it_.add_list_after(&temp_list);
1370 v_it_.move_to_first();
1371 // Reset this grid with reflected bounding boxes.
1372 TBOX grid_box(bleft(), tright());
1373 int tmp = grid_box.left();
1374 grid_box.set_left(-grid_box.right());
1375 grid_box.set_right(-tmp);
1376 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1377}
1378
1379// Compute the rotation required to deskew, and its inverse rotation.
1380void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
1381 double length = vertical_skew_ % vertical_skew_;
1382 length = sqrt(length);
1383 deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1384 deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1385 reskew->set_x(deskew->x());
1386 reskew->set_y(-deskew->y());
1387}
1388
1389// Compute and apply constraints to the end positions of TabVectors so
1390// that where possible partners end at the same y coordinate.
1391void TabFind::ApplyTabConstraints() {
1392 TabVector_IT it(&vectors_);
1393 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1394 TabVector* v = it.data();
1395 v->SetupConstraints();
1396 }
1397 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1398 TabVector* v = it.data();
1399 // With the first and last partner, we want a common bottom and top,
1400 // respectively, and for each change of partner, we want a common
1401 // top of first with bottom of next.
1402 v->SetupPartnerConstraints();
1403 }
1404 // TODO(rays) The back-to-back pairs should really be done like the
1405 // front-to-front pairs, but there is no convenient way of producing the
1406 // list of partners like there is with the front-to-front.
1407 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1408 TabVector* v = it.data();
1409 if (!v->IsRightTab())
1410 continue;
1411 // For each back-to-back pair of vectors, try for common top and bottom.
1412 TabVector_IT partner_it(it);
1413 for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1414 TabVector* partner = partner_it.data();
1415 if (!partner->IsLeftTab() || !v->VOverlap(*partner))
1416 continue;
1417 v->SetupPartnerConstraints(partner);
1418 }
1419 }
1420 // Now actually apply the constraints to get common start/end points.
1421 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1422 TabVector* v = it.data();
1423 if (!v->IsSeparator())
1424 v->ApplyConstraints();
1425 }
1426 // TODO(rays) Where constraint application fails, it would be good to try
1427 // checking the ends to see if they really should be moved.
1428}
1429
1430} // namespace tesseract.
@ TT_MAYBE_RAGGED
Definition: blobbox.h:62
@ TT_CONFIRMED
Definition: blobbox.h:64
@ TT_MAYBE_ALIGNED
Definition: blobbox.h:63
@ TT_NONE
Definition: blobbox.h:60
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:120
@ BRT_UNKNOWN
Definition: blobbox.h:78
@ BRT_VERT_TEXT
Definition: blobbox.h:79
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:37
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int textord_debug_tabfind
Definition: alignedblob.cpp:27
const int kColumnWidthFactor
Definition: tabfind.h:42
const int kTabRadiusFactor
Definition: tabfind.cpp:35
const int kMinVerticalSearch
Definition: tabfind.cpp:37
const int kRaggedGutterMultiple
Definition: tabfind.cpp:51
const double kMaxGutterWidthAbsolute
Definition: tabfind.cpp:49
const double kCosMaxSkewAngle
Definition: tabfind.cpp:60
const int kMaxVerticalSearch
Definition: tabfind.cpp:38
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
const int kMaxRaggedSearch
Definition: tabfind.cpp:39
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
const double kMinFractionalLinesInColumn
Definition: tabfind.cpp:45
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:48
@ TA_RIGHT_RAGGED
Definition: tabvector.h:49
@ TA_LEFT_ALIGNED
Definition: tabvector.h:45
@ TA_LEFT_RAGGED
Definition: tabvector.h:46
const double kAlignedFraction
Definition: alignedblob.cpp:38
const int kMinLinesInColumn
Definition: tabfind.cpp:41
const double kMinColumnWidth
const int kMinEvaluatedTabs
Definition: tabfind.cpp:56
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
int push_back(T object)
int size() const
Definition: genericvector.h:72
void set_left_tab_type(TabType new_type)
Definition: blobbox.h:274
bool leader_on_right() const
Definition: blobbox.h:364
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:334
void set_right_tab_type(TabType new_type)
Definition: blobbox.h:280
bool leader_on_left() const
Definition: blobbox.h:358
void set_left_rule(int new_left)
Definition: blobbox.h:316
BlobRegionType region_type() const
Definition: blobbox.h:283
TabType right_tab_type() const
Definition: blobbox.h:277
TabType left_tab_type() const
Definition: blobbox.h:271
int right_rule() const
Definition: blobbox.h:319
int left_rule() const
Definition: blobbox.h:313
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
const TBOX & bounding_box() const
Definition: blobbox.h:230
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
bool joined_to_prev() const
Definition: blobbox.h:256
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_right_rule(int new_right)
Definition: blobbox.h:322
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:430
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:328
bool UniquelyVertical() const
Definition: blobbox.h:410
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
void DeleteUnownedNoise()
Definition: blobbox.cpp:1037
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:775
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1063
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1071
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
void set_with_shrink(int x, int y)
Set from the given x,y, shrinking the vector to fit if needed.
Definition: points.cpp:41
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52
Definition: points.h:189
float y() const
Definition: points.h:210
void set_y(float yin)
rewrite function
Definition: points.h:218
void set_x(float xin)
rewrite function
Definition: points.h:214
float x() const
Definition: points.h:207
Definition: rect.h:34
const ICOORD & botleft() const
Definition: rect.h:92
void set_right(int x)
Definition: rect.h:82
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:72
int16_t top() const
Definition: rect.h:58
void print() const
Definition: rect.h:278
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108
bool y_overlap(const TBOX &box) const
Definition: rect.h:428
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
const ICOORD & topright() const
Definition: rect.h:104
void set_left(int x)
Definition: rect.h:75
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
int32_t pile_count(int32_t value) const
Definition: statistc.h:76
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
int32_t get_total() const
Definition: statistc.h:84
int32_t mode() const
Definition: statistc.cpp:107
static bool WithinTestRegion(int detail_level, int x, int y)
TabVector * FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:761
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:746
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
ICOORD tright_
Definition: bbgrid.h:91
const ICOORD & tright() const
Definition: bbgrid.h:75
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
void Deskew(const FCOORD &deskew)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1256
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91
~TabFind() override
Definition: tabfind.cpp:78
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1300
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:286
bool CommonWidth(int width)
Definition: tabfind.cpp:394
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:452
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:281
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:368
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
Definition: tabfind.cpp:208
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:465
static bool VeryDifferentSizes(int size1, int size2)
Definition: tabfind.cpp:413
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:133
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:490
TabVector_LIST * vectors()
Definition: tabfind.h:173
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:118
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:367
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:142
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:304
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
Definition: tabfind.cpp:161
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:348
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
int XAtY(int y) const
Definition: tabvector.h:188
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:273
bool IsSeparator() const
Definition: tabvector.h:220
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:353
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:207
int sort_key() const
Definition: tabvector.h:157
bool IsLeftTab() const
Definition: tabvector.h:212
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:539
int VOverlap(const TabVector &other) const
Definition: tabvector.h:198
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:293
static void Update()
Definition: scrollview.cpp:709
void Pen(Color color)
Definition: scrollview.cpp:719
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:600
void Brush(Color color)
Definition: scrollview.cpp:725