tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::ColumnFinder Class Reference

#include <colfind.h>

Inheritance diagram for tesseract::ColumnFinder:
tesseract::TabFind tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase

Public Member Functions

 ColumnFinder (int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, TabVector_LIST *hlines, int vertical_x, int vertical_y)
 
 ~ColumnFinder () override
 
const DENORMdenorm () const
 
const TextlineProjectionprojection () const
 
void set_cjk_script (bool is_cjk)
 
void SetupAndFilterNoise (PageSegMode pageseg_mode, Pix *photo_mask_pix, TO_BLOCK *input_block)
 
bool IsVerticallyAlignedText (double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
 
void CorrectOrientation (TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation)
 
int FindBlocks (PageSegMode pageseg_mode, Pix *scaled_color, int scaled_factor, TO_BLOCK *block, Pix *photo_mask_pix, Pix *thresholds_pix, Pix *grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks)
 
void GetDeskewVectors (FCOORD *deskew, FCOORD *reskew)
 
void SetEquationDetect (EquationDetectBase *detect)
 
- Public Member Functions inherited from tesseract::TabFind
 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
 ~TabFind () override
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~AlignedBlob () override
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BlobGrid () override
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
 ~BBGrid () override
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::TabFind
static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
- Protected Member Functions inherited from tesseract::TabFind
TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 
- Static Protected Member Functions inherited from tesseract::TabFind
static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 
- Protected Attributes inherited from tesseract::TabFind
ICOORD vertical_skew_
 Estimate of true vertical in this image. More...
 
int resolution_
 Of source image in pixels per inch. More...
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

Definition at line 50 of file colfind.h.

Constructor & Destructor Documentation

◆ ColumnFinder()

tesseract::ColumnFinder::ColumnFinder ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
int  resolution,
bool  cjk_script,
double  aligned_gap_fraction,
TabVector_LIST *  vlines,
TabVector_LIST *  hlines,
int  vertical_x,
int  vertical_y 
)

Definition at line 74 of file colfind.cpp.

80 : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
81 resolution),
82 cjk_script_(cjk_script),
83 min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
84 mean_column_gap_(tright.x() - bleft.x()),
85 tabfind_aligned_gap_fraction_(aligned_gap_fraction),
86 deskew_(0.0f, 0.0f),
87 reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
88 text_rotation_(0.0f, 0.0f),
89 best_columns_(nullptr), stroke_width_(nullptr),
90 part_grid_(gridsize, bleft, tright), nontext_map_(nullptr),
91 projection_(resolution),
92 denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) {
93 TabVector_IT h_it(&horizontal_lines_);
94 h_it.add_list_after(hlines);
95}
const double kMinGutterWidthGrid
Definition: colfind.cpp:51
int16_t x() const
access function
Definition: points.h:52
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
const ICOORD & tright() const
Definition: bbgrid.h:75
TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
Definition: tabfind.cpp:65

◆ ~ColumnFinder()

tesseract::ColumnFinder::~ColumnFinder ( )
override

Definition at line 97 of file colfind.cpp.

97 {
98 column_sets_.delete_data_pointers();
99 delete [] best_columns_;
100 delete stroke_width_;
101 delete input_blobs_win_;
102 pixDestroy(&nontext_map_);
103 while (denorm_ != nullptr) {
104 DENORM* dead_denorm = denorm_;
105 denorm_ = const_cast<DENORM*>(denorm_->predecessor());
106 delete dead_denorm;
107 }
108
109 // The ColPartitions are destroyed automatically, but any boxes in
110 // the noise_parts_ list are owned and need to be deleted explicitly.
111 ColPartition_IT part_it(&noise_parts_);
112 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
113 ColPartition* part = part_it.data();
114 part->DeleteBoxes();
115 }
116 // Likewise any boxes in the good_parts_ list need to be deleted.
117 // These are just the image parts. Text parts have already given their
118 // boxes on to the TO_BLOCK, and have empty lists.
119 part_it.set_to_list(&good_parts_);
120 for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
121 ColPartition* part = part_it.data();
122 part->DeleteBoxes();
123 }
124 // Also, any blobs on the image_bblobs_ list need to have their cblobs
125 // deleted. This only happens if there has been an early return from
126 // FindColumns, as in a normal return, the blobs go into the grid and
127 // end up in noise_parts_, good_parts_ or the output blocks.
128 BLOBNBOX_IT bb_it(&image_bblobs_);
129 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
130 BLOBNBOX* bblob = bb_it.data();
131 delete bblob->cblob();
132 }
133}
void delete_data_pointers()
C_BLOB * cblob() const
Definition: blobbox.h:268
const DENORM * predecessor() const
Definition: normalis.h:263

Member Function Documentation

◆ CorrectOrientation()

void tesseract::ColumnFinder::CorrectOrientation ( TO_BLOCK block,
bool  vertical_text_lines,
int  recognition_rotation 
)

Definition at line 197 of file colfind.cpp.

199 {
200 const FCOORD anticlockwise90(0.0f, 1.0f);
201 const FCOORD clockwise90(0.0f, -1.0f);
202 const FCOORD rotation180(-1.0f, 0.0f);
203 const FCOORD norotation(1.0f, 0.0f);
204
205 text_rotation_ = norotation;
206 // Rotate the page to make the text upright, as implied by
207 // recognition_rotation.
208 rotation_ = norotation;
209 if (recognition_rotation == 1) {
210 rotation_ = anticlockwise90;
211 } else if (recognition_rotation == 2) {
212 rotation_ = rotation180;
213 } else if (recognition_rotation == 3) {
214 rotation_ = clockwise90;
215 }
216 // We infer text writing direction to be vertical if there are several
217 // vertical text lines detected, and horizontal if not. But if the page
218 // orientation was determined to be 90 or 270 degrees, the true writing
219 // direction is the opposite of what we inferred.
220 if (recognition_rotation & 1) {
221 vertical_text_lines = !vertical_text_lines;
222 }
223 // If we still believe the writing direction is vertical, we use the
224 // convention of rotating the page ccw 90 degrees to make the text lines
225 // horizontal, and mark the blobs for rotation cw 90 degrees for
226 // classification so that the text order is correct after recognition.
227 if (vertical_text_lines) {
228 rotation_.rotate(anticlockwise90);
229 text_rotation_.rotate(clockwise90);
230 }
231 // Set rerotate_ to the inverse of rotation_.
232 rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
233 if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
234 // Rotate all the blobs and tab vectors.
235 RotateBlobList(rotation_, &block->large_blobs);
236 RotateBlobList(rotation_, &block->blobs);
237 RotateBlobList(rotation_, &block->small_blobs);
238 RotateBlobList(rotation_, &block->noise_blobs);
239 TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
240 &min_gutter_width_);
241 part_grid_.Init(gridsize(), bleft(), tright());
242 // Reset all blobs to initial state and filter by size.
243 // Since they have rotated, the list they belong on could have changed.
244 block->ReSetAndReFilterBlobs();
245 SetBlockRuleEdges(block);
246 stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
247 }
249 tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
250 vertical_text_lines, recognition_rotation,
251 rotation_.x(), rotation_.y(),
252 text_rotation_.x(), text_rotation_.y());
253 }
254 // Setup the denormalization.
255 ASSERT_HOST(denorm_ == nullptr);
256 denorm_ = new DENORM;
257 denorm_->SetupNormalization(nullptr, &rotation_, nullptr,
258 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
259}
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int textord_debug_tabfind
Definition: alignedblob.cpp:27
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
void ReSetAndReFilterBlobs()
Definition: blobbox.cpp:1011
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:775
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:96
Definition: points.h:189
void rotate(const FCOORD vec)
Definition: points.h:763
float y() const
Definition: points.h:210
float x() const
Definition: points.h:207
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:445
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
static void RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:1256
void ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
Definition: tabfind.cpp:1300
void SetBlockRuleEdges(TO_BLOCK *block)
Definition: tabfind.cpp:133

◆ denorm()

const DENORM * tesseract::ColumnFinder::denorm ( ) const
inline

Definition at line 68 of file colfind.h.

68 {
69 return denorm_;
70 }

◆ FindBlocks()

int tesseract::ColumnFinder::FindBlocks ( PageSegMode  pageseg_mode,
Pix *  scaled_color,
int  scaled_factor,
TO_BLOCK block,
Pix *  photo_mask_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
DebugPixa pixa_debug,
BLOCK_LIST *  blocks,
BLOBNBOX_LIST *  diacritic_blobs,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 285 of file colfind.cpp.

290 {
291 pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
292 stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
293 stroke_width_->RemoveLineResidue(&big_parts_);
294 FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_,
295 input_block);
296 SetBlockRuleEdges(input_block);
297 stroke_width_->GradeBlobsIntoPartitions(
298 pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
299 &projection_, diacritic_blobs, &part_grid_, &big_parts_);
300 if (!PSM_SPARSE(pageseg_mode)) {
301 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
302 input_block, this, pixa_debug, &part_grid_,
303 &big_parts_);
304 ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
305 photo_mask_pix);
306 ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
307 input_block, this, pixa_debug, &part_grid_,
308 &big_parts_);
309 }
310 part_grid_.ReTypeBlobs(&image_bblobs_);
311 TidyBlobs(input_block);
312 Reset();
313 // TODO(rays) need to properly handle big_parts_.
314 ColPartition_IT p_it(&big_parts_);
315 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
316 p_it.data()->DisownBoxesNoAssert();
317 big_parts_.clear();
318 delete stroke_width_;
319 stroke_width_ = nullptr;
320 // Compute the edge offsets whether or not there is a grey_pix. It is done
321 // here as the c_blobs haven't been touched by rotation or anything yet,
322 // so no denorm is required, yet the text has been separated from image, so
323 // no time is wasted running it on image blobs.
324 input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
325
326 // A note about handling right-to-left scripts (Hebrew/Arabic):
327 // The columns must be reversed and come out in right-to-left instead of
328 // the normal left-to-right order. Because the left-to-right ordering
329 // is implicit in many data structures, it is simpler to fool the algorithms
330 // into thinking they are dealing with left-to-right text.
331 // To do this, we reflect the needed data in the y-axis and then reflect
332 // the blocks back after they have been created. This is a temporary
333 // arrangement that is confined to this function only, so the reflection
334 // is completely invisible in the output blocks.
335 // The only objects reflected are:
336 // The vertical separator lines that have already been found;
337 // The bounding boxes of all BLOBNBOXES on all lists on the input_block
338 // plus the image_bblobs. The outlines are not touched, since they are
339 // not looked at.
340 bool input_is_rtl = input_block->block->right_to_left();
341 if (input_is_rtl) {
342 // Reflect the vertical separator lines (member of TabFind).
344 // Reflect the blob boxes.
345 ReflectForRtl(input_block, &image_bblobs_);
346 part_grid_.ReflectInYAxis();
347 }
348
349 if (!PSM_SPARSE(pageseg_mode)) {
350 if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
351 // No tab stops needed. Just the grid that FindTabVectors makes.
352 DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
353 } else {
354 SetBlockRuleEdges(input_block);
355 // Find the tab stops, estimate skew, and deskew the tabs, blobs and
356 // part_grid_.
357 FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
358 min_gutter_width_, tabfind_aligned_gap_fraction_,
359 &part_grid_, &deskew_, &reskew_);
360 // Add the deskew to the denorm_.
361 auto* new_denorm = new DENORM;
362 new_denorm->SetupNormalization(nullptr, &deskew_, denorm_,
363 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
364 denorm_ = new_denorm;
365 }
366 SetBlockRuleEdges(input_block);
367 part_grid_.SetTabStops(this);
368
369 // Make the column_sets_.
370 if (!MakeColumns(false)) {
371 tprintf("Empty page!!\n");
372 part_grid_.DeleteParts();
373 return 0; // This is an empty page.
374 }
375
376 // Refill the grid using rectangular spreading, and get the benefit
377 // of the completed tab vectors marking the rule edges of each blob.
378 Clear();
379 #ifndef GRAPHICS_DISABLED
380 if (textord_tabfind_show_reject_blobs) {
381 ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
382 input_block->plot_graded_blobs(rej_win);
383 }
384 #endif // GRAPHICS_DISABLED
385 InsertBlobsToGrid(false, false, &image_bblobs_, this);
386 InsertBlobsToGrid(true, true, &input_block->blobs, this);
387
388 part_grid_.GridFindMargins(best_columns_);
389 // Split and merge the partitions by looking at local neighbours.
390 GridSplitPartitions();
391 // Resolve unknown partitions by adding to an existing partition, fixing
392 // the type, or declaring them noise.
393 part_grid_.GridFindMargins(best_columns_);
394 GridMergePartitions();
395 // Insert any unused noise blobs that are close enough to an appropriate
396 // partition.
397 InsertRemainingNoise(input_block);
398 // Add horizontal line separators as partitions.
399 GridInsertHLinePartitions();
400 GridInsertVLinePartitions();
401 // Recompute margins based on a local neighbourhood search.
402 part_grid_.GridFindMargins(best_columns_);
403 SetPartitionTypes();
404 }
405 if (textord_tabfind_show_initial_partitions) {
406 ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
407 part_grid_.DisplayBoxes(part_win);
408 DisplayTabVectors(part_win);
409 }
410
411 if (!PSM_SPARSE(pageseg_mode)) {
412 if (equation_detect_) {
413 equation_detect_->FindEquationParts(&part_grid_, best_columns_);
414 }
415 if (textord_tabfind_find_tables) {
416 TableFinder table_finder;
417 table_finder.Init(gridsize(), bleft(), tright());
418 table_finder.set_resolution(resolution_);
419 table_finder.set_left_to_right_language(
420 !input_block->block->right_to_left());
421 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
422 // insert dot-like noise into period_grid_
423 table_finder.InsertCleanPartitions(&part_grid_, input_block);
424 // Get Table Regions
425 table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
426 }
427 GridRemoveUnderlinePartitions();
428 part_grid_.DeleteUnknownParts(input_block);
429
430 // Build the partitions into chains that belong in the same block and
431 // refine into one-to-one links, then smooth the types within each chain.
432 part_grid_.FindPartitionPartners();
433 part_grid_.FindFigureCaptions();
434 part_grid_.RefinePartitionPartners(true);
435 SmoothPartnerRuns();
436
437 #ifndef GRAPHICS_DISABLED
438 if (textord_tabfind_show_partitions) {
439 ScrollView* window = MakeWindow(400, 300, "Partitions");
440 if (window != nullptr) {
441 part_grid_.DisplayBoxes(window);
443 DisplayTabVectors(window);
444 if (window != nullptr && textord_tabfind_show_partitions > 1) {
445 delete window->AwaitEvent(SVET_DESTROY);
446 }
447 }
448 }
449 #endif // GRAPHICS_DISABLED
450 part_grid_.AssertNoDuplicates();
451 }
452 // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
453 // and ownership of the BLOBNBOXes moves to the ColPartitions.
454 // (They were previously owned by the block or the image_bblobs list.)
455 ReleaseBlobsAndCleanupUnused(input_block);
456 // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
457 // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
458 // from the ColPartitions to the output TO_BLOCK. In non-text, the
459 // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
460 if (PSM_SPARSE(pageseg_mode))
461 part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
462 else
463 TransformToBlocks(blocks, to_blocks);
465 tprintf("Found %d blocks, %d to_blocks\n",
466 blocks->length(), to_blocks->length());
467 }
468
469 DisplayBlocks(blocks);
470 RotateAndReskewBlocks(input_is_rtl, to_blocks);
471 int result = 0;
472 #ifndef GRAPHICS_DISABLED
473 if (blocks_win_ != nullptr) {
474 bool waiting = false;
475 do {
476 waiting = false;
477 SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
478 if (event->type == SVET_INPUT && event->parameter != nullptr) {
479 if (*event->parameter == 'd')
480 result = -1;
481 else
482 blocks->clear();
483 } else if (event->type == SVET_DESTROY) {
484 blocks_win_ = nullptr;
485 } else {
486 waiting = true;
487 }
488 delete event;
489 } while (waiting);
490 }
491 #endif // GRAPHICS_DISABLED
492 return result;
493}
bool textord_debug_printable
Definition: alignedblob.cpp:33
@ SVET_ANY
Definition: scrollview.h:56
@ SVET_DESTROY
Definition: scrollview.h:46
@ SVET_INPUT
Definition: scrollview.h:50
bool PSM_COL_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:197
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:200
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
void AssertNoDuplicates()
Definition: bbgrid.h:638
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
void ExtractPartitionsAsBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void SetTabStops(TabFind *tabgrid)
void RefinePartitionPartners(bool get_desperate)
void GridFindMargins(ColPartitionSet **best_columns)
void DeleteUnknownParts(TO_BLOCK *block)
void ReTypeBlobs(BLOBNBOX_LIST *im_blobs)
virtual int FindEquationParts(ColPartitionGrid *part_grid, ColPartitionSet **best_columns)=0
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1298
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
Definition: imagefind.cpp:1245
void RemoveLineResidue(ColPartition_LIST *big_part_list)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:91
void DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:452
int resolution_
Of source image in pixels per inch.
Definition: tabfind.h:368
bool FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
Definition: tabfind.cpp:422
void TidyBlobs(TO_BLOCK *block)
Definition: tabfind.cpp:465
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:514
WidthCallback * WidthCB()
Definition: tabfind.h:158
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:497
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443

◆ GetDeskewVectors()

void tesseract::ColumnFinder::GetDeskewVectors ( FCOORD deskew,
FCOORD reskew 
)

Definition at line 496 of file colfind.cpp.

496 {
497 *reskew = reskew_;
498 *deskew = reskew_;
499 deskew->set_y(-deskew->y());
500}
void set_y(float yin)
rewrite function
Definition: points.h:218

◆ IsVerticallyAlignedText()

bool tesseract::ColumnFinder::IsVerticallyAlignedText ( double  find_vertical_text_ratio,
TO_BLOCK block,
BLOBNBOX_CLIST *  osd_blobs 
)

Definition at line 179 of file colfind.cpp.

181 {
182 return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio,
183 block, osd_blobs);
184}
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)

◆ projection()

const TextlineProjection * tesseract::ColumnFinder::projection ( ) const
inline

Definition at line 71 of file colfind.h.

71 {
72 return &projection_;
73 }

◆ set_cjk_script()

void tesseract::ColumnFinder::set_cjk_script ( bool  is_cjk)
inline

Definition at line 74 of file colfind.h.

74 {
75 cjk_script_ = is_cjk;
76 }

◆ SetEquationDetect()

void tesseract::ColumnFinder::SetEquationDetect ( EquationDetectBase detect)

Definition at line 502 of file colfind.cpp.

502 {
503 equation_detect_ = detect;
504}

◆ SetupAndFilterNoise()

void tesseract::ColumnFinder::SetupAndFilterNoise ( PageSegMode  pageseg_mode,
Pix *  photo_mask_pix,
TO_BLOCK input_block 
)

Definition at line 142 of file colfind.cpp.

144 {
145 part_grid_.Init(gridsize(), bleft(), tright());
146 delete stroke_width_;
147 stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
148 min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
149 input_block->ReSetAndReFilterBlobs();
150 #ifndef GRAPHICS_DISABLED
151 if (textord_tabfind_show_blocks) {
152 input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
153 input_block->plot_graded_blobs(input_blobs_win_);
154 }
155 #endif // GRAPHICS_DISABLED
156 SetBlockRuleEdges(input_block);
157 pixDestroy(&nontext_map_);
158 // Run a preliminary strokewidth neighbour detection on the medium blobs.
159 stroke_width_->SetNeighboursOnMediumBlobs(input_block);
160 CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
161 // Remove obvious noise and make the initial non-text map.
162 nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
163 photo_mask_pix, input_block);
164 stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
165 input_block);
166 // Clear the strokewidth grid ready for rotation or leader finding.
167 stroke_width_->Clear();
168}
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1071
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)

The documentation for this class was generated from the following files: