tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 53 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 65 of file tabfind.cpp.

69 resolution_(resolution),
70 image_origin_(0, tright.y() - 1),
71 v_it_(&vectors_) {
72 width_cb_ = nullptr;
73 v_it_.add_list_after(vlines);
74 SetVerticalSkewAndParallelize(vertical_x, vertical_y);
76}
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
int16_t y() const
access_function
Definition: points.h:56
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
bool CommonWidth(int width)
Definition: tabfind.cpp:394
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:368

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
override

Definition at line 78 of file tabfind.cpp.

78 {
79 delete width_cb_;
80}

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 394 of file tabfind.cpp.

394 {
395 width /= kColumnWidthFactor;
396 ICOORDELT_IT it(&column_widths_);
397 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
398 ICOORDELT* w = it.data();
399 if (w->x() - 1 <= width && width <= w->y() + 1)
400 return true;
401 }
402 return false;
403}
const int kColumnWidthFactor
Definition: tabfind.h:42
int16_t x() const
access function
Definition: points.h:52

◆ dead_vectors()

TabVector_LIST * tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 176 of file tabfind.h.

176 {
177 return &dead_vectors_;
178 }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 407 of file tabfind.cpp.

407 {
408 return size1 > size2 * 2 || size2 > size1 * 2;
409}

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 497 of file tabfind.cpp.

497 {
498#ifndef GRAPHICS_DISABLED
499 // For every vector, display it.
500 TabVector_IT it(&vectors_);
501 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
502 TabVector* vector = it.data();
503 vector->Display(tab_win);
504 }
505 tab_win->Update();
506#endif
507 return tab_win;
508}
static void Update()
Definition: scrollview.cpp:709

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 452 of file tabfind.cpp.

453 {
454 InsertBlobsToGrid(false, false, image_blobs, this);
455 InsertBlobsToGrid(true, false, &block->blobs, this);
456 deskew->set_x(1.0f);
457 deskew->set_y(0.0f);
458 reskew->set_x(1.0f);
459 reskew->set_y(0.0f);
460}
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
void set_y(float yin)
rewrite function
Definition: points.h:218
void set_x(float xin)
rewrite function
Definition: points.h:214
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 514 of file tabfind.cpp.

517 {
518 if (textord_tabfind_show_initialtabs) {
519 ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
520 line_win = DisplayTabVectors(line_win);
521 }
522 // Prepare the grid.
523 if (image_blobs != nullptr)
524 InsertBlobsToGrid(true, false, image_blobs, this);
525 InsertBlobsToGrid(true, false, &block->blobs, this);
526 ScrollView* initial_win = FindTabBoxes(min_gutter_width,
527 tabfind_aligned_gap_fraction);
528 FindAllTabVectors(min_gutter_width);
529
531 SortVectors();
532 EvaluateTabs();
533 if (textord_tabfind_show_initialtabs && initial_win != nullptr)
534 initial_win = DisplayTabVectors(initial_win);
535 MarkVerticalText();
536 return initial_win;
537}
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
ICOORD vertical_skew_
Estimate of true vertical in this image.
Definition: tabfind.h:367
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:353

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 422 of file tabfind.cpp.

427 {
428 ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
429 tabfind_aligned_gap_fraction,
430 block);
431 ComputeColumnWidths(tab_win, part_grid);
433 SortVectors();
434 CleanupTabs();
435 if (!Deskew(hlines, image_blobs, block, deskew, reskew))
436 return false; // Skew angle is too large.
437 part_grid->Deskew(*deskew);
438 ApplyTabConstraints();
439 #ifndef GRAPHICS_DISABLED
440 if (textord_tabfind_show_finaltabs) {
441 tab_win = MakeWindow(640, 50, "FinalTabs");
442 DisplayBoxes(tab_win);
443 DisplayTabs("FinalTabs", tab_win);
444 tab_win = DisplayTabVectors(tab_win);
445 }
446 #endif // GRAPHICS_DISABLED
447 return true;
448}
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 161 of file tabfind.cpp.

163 {
164 bool right_to_left = v.IsLeftTab();
165 int bottom_x = v.XAtY(bottom_y);
166 int top_x = v.XAtY(top_y);
167 int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
168 BlobGridSearch sidesearch(this);
169 sidesearch.StartSideSearch(start_x, bottom_y, top_y);
170 int min_gap = max_gutter_width;
171 *required_shift = 0;
172 BLOBNBOX* blob = nullptr;
173 while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
174 const TBOX& box = blob->bounding_box();
175 if (box.bottom() >= top_y || box.top() <= bottom_y)
176 continue; // Doesn't overlap enough.
177 if (box.height() >= gridsize() * 2 &&
178 box.height() > box.width() * kLineFragmentAspectRatio) {
179 // Skip likely separator line residue.
180 continue;
181 }
182 if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
183 continue; // Skip non-text if required.
184 int mid_y = (box.bottom() + box.top()) / 2;
185 // We use the x at the mid-y so that the required_shift guarantees
186 // to clear all the blobs on the tab-stop. If we use the min/max
187 // of x at top/bottom of the blob, then exactness would be required,
188 // which is not a good thing.
189 int tab_x = v.XAtY(mid_y);
190 int gap;
191 if (right_to_left) {
192 gap = tab_x - box.right();
193 if (gap < 0 && box.left() - tab_x < *required_shift)
194 *required_shift = box.left() - tab_x;
195 } else {
196 gap = box.left() - tab_x;
197 if (gap < 0 && box.right() - tab_x > *required_shift)
198 *required_shift = box.right() - tab_x;
199 }
200 if (gap > 0 && gap < min_gap)
201 min_gap = gap;
202 }
203 // Result may be negative, in which case, this is a really bad tabstop.
204 return min_gap - abs(*required_shift);
205}
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
BlobRegionType region_type() const
Definition: blobbox.h:283
const TBOX & bounding_box() const
Definition: blobbox.h:230
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:430
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 208 of file tabfind.cpp.

211 {
212 const TBOX& box = bbox->bounding_box();
213 // The gutter and internal sides of the box.
214 int gutter_x = left ? box.left() : box.right();
215 int internal_x = left ? box.right() : box.left();
216 // On ragged edges, the gutter side of the box is away from the tabstop.
217 int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
218 *gutter_width = max_gutter;
219 // If the box is away from the tabstop, we need to increase
220 // the allowed gutter width.
221 if (tab_gap > 0)
222 *gutter_width += tab_gap;
223 bool debug = WithinTestRegion(2, box.left(), box.bottom());
224 if (debug)
225 tprintf("Looking in gutter\n");
226 // Find the nearest blob on the outside of the column.
227 BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
228 bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
229 *gutter_width, box.top(), box.bottom());
230 if (gutter_bbox != nullptr) {
231 const TBOX& gutter_box = gutter_bbox->bounding_box();
232 *gutter_width = left ? tab_x - gutter_box.right()
233 : gutter_box.left() - tab_x;
234 }
235 if (*gutter_width >= max_gutter) {
236 // If there is no box because a tab was in the way, get the tab coord.
237 TBOX gutter_box(box);
238 if (left) {
239 gutter_box.set_left(tab_x - max_gutter - 1);
240 gutter_box.set_right(tab_x - max_gutter);
241 int tab_gutter = RightEdgeForBox(gutter_box, true, false);
242 if (tab_gutter < tab_x - 1)
243 *gutter_width = tab_x - tab_gutter;
244 } else {
245 gutter_box.set_left(tab_x + max_gutter);
246 gutter_box.set_right(tab_x + max_gutter + 1);
247 int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
248 if (tab_gutter > tab_x + 1)
249 *gutter_width = tab_gutter - tab_x;
250 }
251 }
252 if (*gutter_width > max_gutter)
253 *gutter_width = max_gutter;
254 // Now look for a neighbour on the inside.
255 if (debug)
256 tprintf("Looking for neighbour\n");
257 BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
258 bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
259 *gutter_width, box.top(), box.bottom());
260 int neighbour_edge = left ? RightEdgeForBox(box, true, false)
261 : LeftEdgeForBox(box, true, false);
262 if (neighbour != nullptr) {
263 const TBOX& n_box = neighbour->bounding_box();
264 if (debug) {
265 tprintf("Found neighbour:");
266 n_box.print();
267 }
268 if (left && n_box.left() < neighbour_edge)
269 neighbour_edge = n_box.left();
270 else if (!left && n_box.right() > neighbour_edge)
271 neighbour_edge = n_box.right();
272 }
273 *neighbour_gap = left ? neighbour_edge - internal_x
274 : internal_x - neighbour_edge;
275}
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:120
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_right(int x)
Definition: rect.h:82
void print() const
Definition: rect.h:278
void set_left(int x)
Definition: rect.h:75
static bool WithinTestRegion(int detail_level, int x, int y)
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:286
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:281

◆ image_origin()

const ICOORD & tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 165 of file tabfind.h.

165 {
166 return image_origin_;
167 }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 118 of file tabfind.cpp.

120 {
121 TBOX box = blob->bounding_box();
122 blob->set_left_rule(LeftEdgeForBox(box, false, false));
123 blob->set_right_rule(RightEdgeForBox(box, false, false));
124 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
125 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
126 if (blob->joined_to_prev())
127 return false;
128 grid->InsertBBox(h_spread, v_spread, blob);
129 return true;
130}
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:334
void set_left_rule(int new_left)
Definition: blobbox.h:316
bool joined_to_prev() const
Definition: blobbox.h:256
void set_right_rule(int new_right)
Definition: blobbox.h:322
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:328
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 91 of file tabfind.cpp.

94 {
95 BLOBNBOX_IT blob_it(blobs);
96 int b_count = 0;
97 int reject_count = 0;
98 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
99 BLOBNBOX* blob = blob_it.data();
100// if (InsertBlob(true, true, blob, grid)) {
101 if (InsertBlob(h_spread, v_spread, blob, grid)) {
102 ++b_count;
103 } else {
104 ++reject_count;
105 }
106 }
108 tprintf("Inserted %d blobs into grid, %d rejected.\n",
109 b_count, reject_count);
110 }
111}
int textord_debug_tabfind
Definition: alignedblob.cpp:27
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:118

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 286 of file tabfind.cpp.

286 {
287 TabVector* v = LeftTabForBox(box, crossing, extended);
288 return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
289}
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:348

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 348 of file tabfind.cpp.

349 {
350 if (v_it_.empty())
351 return nullptr;
352 int top_y = box.top();
353 int bottom_y = box.bottom();
354 int mid_y = (top_y + bottom_y) / 2;
355 int left = crossing ? (box.left() + box.right()) / 2 : box.left();
356 int min_key, max_key;
357 SetupTabSearch(left, mid_y, &min_key, &max_key);
358 // Position the iterator at the last TabVector with sort_key <= max_key.
359 while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
360 v_it_.forward();
361 while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362 v_it_.backward();
363 }
364 // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365 TabVector* best_v = nullptr;
366 int best_x = -1;
367 int key_limit = -1;
368 do {
369 TabVector* v = v_it_.data();
370 int x = v->XAtY(mid_y);
371 if (x <= left &&
372 (v->VOverlap(top_y, bottom_y) > 0 ||
373 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
374 if (best_v == nullptr || x > best_x) {
375 best_v = v;
376 best_x = x;
377 // We can guarantee that no better vector can be found if the
378 // sort key is less than that of the best by max_key - min_key.
379 key_limit = v->sort_key() - (max_key - min_key);
380 }
381 }
382 // Break when the search is done to avoid wrapping the iterator and
383 // thereby potentially slowing the next search.
384 if (v_it_.at_first() ||
385 (best_v != nullptr && v->sort_key() < key_limit))
386 break; // Prevent restarting list for next call.
387 v_it_.backward();
388 } while (!v_it_.at_last());
389 return best_v;
390}
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:490

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1356 of file tabfind.cpp.

1356 {
1357 TabVector_LIST temp_list;
1358 TabVector_IT temp_it(&temp_list);
1359 v_it_.move_to_first();
1360 // The TabVector list only contains vertical lines, but they need to be
1361 // reflected and the list needs to be reversed, so they are still in
1362 // sort_key order.
1363 while (!v_it_.empty()) {
1364 TabVector* v = v_it_.extract();
1365 v_it_.forward();
1366 v->ReflectInYAxis();
1367 temp_it.add_before_then_move(v);
1368 }
1369 v_it_.add_list_after(&temp_list);
1370 v_it_.move_to_first();
1371 // Reset this grid with reflected bounding boxes.
1372 TBOX grid_box(bleft(), tright());
1373 int tmp = grid_box.left();
1374 grid_box.set_left(-grid_box.right());
1375 grid_box.set_right(-tmp);
1376 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1377}
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1345 of file tabfind.cpp.

1345 {
1346 v_it_.move_to_first();
1347 for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1348 if (!v_it_.data()->IsSeparator())
1349 delete v_it_.extract();
1350 }
1351 Clear();
1352}

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1300 of file tabfind.cpp.

1302 {
1303 // Rotate the horizontal and vertical vectors and swap them over.
1304 // Only the separators are kept and rotated; other tabs are used
1305 // to estimate the gutter width then thrown away.
1306 TabVector_LIST ex_verticals;
1307 TabVector_IT ex_v_it(&ex_verticals);
1308 TabVector_LIST vlines;
1309 TabVector_IT v_it(&vlines);
1310 while (!v_it_.empty()) {
1311 TabVector* v = v_it_.extract();
1312 if (v->IsSeparator()) {
1313 v->Rotate(rotate);
1314 ex_v_it.add_after_then_move(v);
1315 } else {
1316 v_it.add_after_then_move(v);
1317 }
1318 v_it_.forward();
1319 }
1320
1321 // Adjust the min gutter width for better tabbox selection
1322 // in 2nd call to FindInitialTabVectors().
1323 int median_gutter = FindMedianGutterWidth(&vlines);
1324 if (median_gutter > *min_gutter_width)
1325 *min_gutter_width = median_gutter;
1326
1327 TabVector_IT h_it(horizontal_lines);
1328 for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1329 TabVector* h = h_it.data();
1330 h->Rotate(rotate);
1331 }
1332 v_it_.add_list_after(horizontal_lines);
1333 v_it_.move_to_first();
1334 h_it.set_to_list(horizontal_lines);
1335 h_it.add_list_after(&ex_verticals);
1336
1337 // Rebuild the grid to the new size.
1338 TBOX grid_box(bleft(), tright());
1339 grid_box.rotate_large(rotate);
1340 Init(gridsize(), grid_box.botleft(), grid_box.topright());
1341}

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 281 of file tabfind.cpp.

281 {
282 TabVector* v = RightTabForBox(box, crossing, extended);
283 return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
284}
ICOORD tright_
Definition: bbgrid.h:91
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:304

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 304 of file tabfind.cpp.

305 {
306 if (v_it_.empty())
307 return nullptr;
308 int top_y = box.top();
309 int bottom_y = box.bottom();
310 int mid_y = (top_y + bottom_y) / 2;
311 int right = crossing ? (box.left() + box.right()) / 2 : box.right();
312 int min_key, max_key;
313 SetupTabSearch(right, mid_y, &min_key, &max_key);
314 // Position the iterator at the first TabVector with sort_key >= min_key.
315 while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
316 v_it_.backward();
317 while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
318 v_it_.forward();
319 // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320 TabVector* best_v = nullptr;
321 int best_x = -1;
322 int key_limit = -1;
323 do {
324 TabVector* v = v_it_.data();
325 int x = v->XAtY(mid_y);
326 if (x >= right &&
327 (v->VOverlap(top_y, bottom_y) > 0 ||
328 (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
329 if (best_v == nullptr || x < best_x) {
330 best_v = v;
331 best_x = x;
332 // We can guarantee that no better vector can be found if the
333 // sort key exceeds that of the best by max_key - min_key.
334 key_limit = v->sort_key() + max_key - min_key;
335 }
336 }
337 // Break when the search is done to avoid wrapping the iterator and
338 // thereby potentially slowing the next search.
339 if (v_it_.at_last() ||
340 (best_v != nullptr && v->sort_key() > key_limit))
341 break; // Prevent restarting list for next call.
342 v_it_.forward();
343 } while (!v_it_.at_first());
344 return best_v;
345}

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1256 of file tabfind.cpp.

1256 {
1257 BLOBNBOX_IT it(blobs);
1258 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1259 it.data()->rotate_box(rotation);
1260 }
1261}

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 142 of file tabfind.cpp.

142 {
143 BLOBNBOX_IT blob_it(blobs);
144 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
145 BLOBNBOX* blob = blob_it.data();
146 TBOX box = blob->bounding_box();
147 blob->set_left_rule(LeftEdgeForBox(box, false, false));
148 blob->set_right_rule(RightEdgeForBox(box, false, false));
149 blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
150 blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
151 }
152}

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 133 of file tabfind.cpp.

133 {
134 SetBlobRuleEdges(&block->blobs);
138}
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:775
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:142

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 490 of file tabfind.cpp.

490 {
491 int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
492 int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
493 *min_key = std::min(key1, key2);
494 *max_key = std::max(key1, key2);
495}
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 465 of file tabfind.cpp.

465 {
466 BLOBNBOX_IT large_it = &block->large_blobs;
467 BLOBNBOX_IT blob_it = &block->blobs;
468 int b_count = 0;
469 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
470 BLOBNBOX* large_blob = large_it.data();
471 if (large_blob->owner() != nullptr) {
472 blob_it.add_to_end(large_it.extract());
473 ++b_count;
474 }
475 }
477 tprintf("Moved %d large blobs to normal list\n",
478 b_count);
479 #ifndef GRAPHICS_DISABLED
480 ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
481 block->plot_graded_blobs(rej_win);
482 block->plot_noise_blobs(rej_win);
483 rej_win->Update();
484 #endif // GRAPHICS_DISABLED
485 }
486 block->DeleteUnownedNoise();
487}
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void DeleteUnownedNoise()
Definition: blobbox.cpp:1037
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1063
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1071

◆ vectors()

TabVector_LIST * tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 173 of file tabfind.h.

173 {
174 return &vectors_;
175 }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 413 of file tabfind.cpp.

413 {
414 return size1 > size2 * 5 || size2 > size1 * 5;
415}

◆ WidthCB()

WidthCallback * tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 158 of file tabfind.h.

158 {
159 return width_cb_;
160 }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Of source image in pixels per inch.

Definition at line 368 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Estimate of true vertical in this image.

Definition at line 367 of file tabfind.h.


The documentation for this class was generated from the following files: