tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::ColPartitionSet Class Reference

#include <colpartitionset.h>

Inheritance diagram for tesseract::ColPartitionSet:
ELIST_LINK

Public Member Functions

 ColPartitionSet ()=default
 
 ColPartitionSet (ColPartition_LIST *partitions)
 
 ColPartitionSet (ColPartition *partition)
 
 ~ColPartitionSet ()=default
 
const TBOXbounding_box () const
 
bool Empty () const
 
int ColumnCount () const
 
int GoodColumnCount () const
 
ColPartitionGetColumnByIndex (int index)
 
ColPartitionColumnContaining (int x, int y)
 
void GetColumnBoxes (int y_bottom, int y_top, ColSegment_LIST *segments)
 
void RelinquishParts ()
 
void ImproveColumnCandidate (WidthCallback *cb, PartSetVector *src_sets)
 
void AddToColumnSetsIfUnique (PartSetVector *column_sets, WidthCallback *cb)
 
bool CompatibleColumns (bool debug, ColPartitionSet *other, WidthCallback *cb)
 
int UnmatchedWidth (ColPartitionSet *part_set)
 
bool LegalColumnCandidate ()
 
ColPartitionSetCopy (bool good_only)
 
void DisplayColumnEdges (int y_bottom, int y_top, ScrollView *win)
 
ColumnSpanningType SpanningType (int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
 
void ChangeWorkColumns (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
void AccumulateColumnWidthsAndGaps (int *total_width, int *width_samples, int *total_gap, int *gap_samples)
 
void Print ()
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Detailed Description

Definition at line 40 of file colpartitionset.h.

Constructor & Destructor Documentation

◆ ColPartitionSet() [1/3]

tesseract::ColPartitionSet::ColPartitionSet ( )
default

◆ ColPartitionSet() [2/3]

tesseract::ColPartitionSet::ColPartitionSet ( ColPartition_LIST *  partitions)
explicit

Definition at line 36 of file colpartitionset.cpp.

36 {
37 ColPartition_IT it(&parts_);
38 it.add_list_after(partitions);
39 ComputeCoverage();
40}

◆ ColPartitionSet() [3/3]

tesseract::ColPartitionSet::ColPartitionSet ( ColPartition partition)
explicit

Definition at line 42 of file colpartitionset.cpp.

42 {
43 ColPartition_IT it(&parts_);
44 it.add_after_then_move(part);
45 ComputeCoverage();
46}

◆ ~ColPartitionSet()

tesseract::ColPartitionSet::~ColPartitionSet ( )
default

Member Function Documentation

◆ AccumulateColumnWidthsAndGaps()

void tesseract::ColPartitionSet::AccumulateColumnWidthsAndGaps ( int *  total_width,
int *  width_samples,
int *  total_gap,
int *  gap_samples 
)

Definition at line 572 of file colpartitionset.cpp.

575 {
576 ColPartition_IT it(&parts_);
577 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
578 ColPartition* part = it.data();
579 *total_width += part->ColumnWidth();
580 ++*width_samples;
581 if (!it.at_last()) {
582 ColPartition* next_part = it.data_relative(1);
583 int part_left = part->right_key();
584 int part_right = next_part->left_key();
585 int gap = part->KeyWidth(part_left, part_right);
586 *total_gap += gap;
587 ++*gap_samples;
588 }
589 }
590}

◆ AddToColumnSetsIfUnique()

void tesseract::ColPartitionSet::AddToColumnSetsIfUnique ( PartSetVector column_sets,
WidthCallback cb 
)

Definition at line 175 of file colpartitionset.cpp.

176 {
177 bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
178 bounding_box_.bottom());
179 if (debug) {
180 tprintf("Considering new column candidate:\n");
181 Print();
182 }
183 if (!LegalColumnCandidate()) {
184 if (debug) {
185 tprintf("Not a legal column candidate:\n");
186 Print();
187 }
188 delete this;
189 return;
190 }
191 for (int i = 0; i < column_sets->size(); ++i) {
192 ColPartitionSet* columns = column_sets->get(i);
193 // In ordering the column set candidates, good_coverage_ is king,
194 // followed by good_column_count_ and then bad_coverage_.
195 bool better = good_coverage_ > columns->good_coverage_;
196 if (good_coverage_ == columns->good_coverage_) {
197 better = good_column_count_ > columns->good_column_count_;
198 if (good_column_count_ == columns->good_column_count_) {
199 better = bad_coverage_ > columns->bad_coverage_;
200 }
201 }
202 if (better) {
203 // The new one is better so add it.
204 if (debug)
205 tprintf("Good one\n");
206 column_sets->insert(this, i);
207 return;
208 }
209 if (columns->CompatibleColumns(false, this, cb)) {
210 if (debug)
211 tprintf("Duplicate\n");
212 delete this;
213 return; // It is not unique.
214 }
215 }
216 if (debug)
217 tprintf("Added to end\n");
218 column_sets->push_back(this);
219}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
static bool WithinTestRegion(int detail_level, int x, int y)

◆ bounding_box()

const TBOX & tesseract::ColPartitionSet::bounding_box ( ) const
inline

Definition at line 49 of file colpartitionset.h.

49 {
50 return bounding_box_;
51 }

◆ ChangeWorkColumns()

void tesseract::ColPartitionSet::ChangeWorkColumns ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 500 of file colpartitionset.cpp.

504 {
505 // Move the input list to a temporary location so we can delete its elements
506 // as we add them to the output working_set.
507 WorkingPartSet_LIST work_src;
508 WorkingPartSet_IT src_it(&work_src);
509 src_it.add_list_after(working_set_list);
510 src_it.move_to_first();
511 WorkingPartSet_IT dest_it(working_set_list);
512 // Completed blocks and to_blocks are accumulated and given to the first new
513 // one whenever we keep a column, or at the end.
514 BLOCK_LIST completed_blocks;
515 TO_BLOCK_LIST to_blocks;
516 WorkingPartSet* first_new_set = nullptr;
517 WorkingPartSet* working_set = nullptr;
518 ColPartition_IT col_it(&parts_);
519 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
520 ColPartition* column = col_it.data();
521 // Any existing column to the left of column is completed.
522 while (!src_it.empty() &&
523 ((working_set = src_it.data())->column() == nullptr ||
524 working_set->column()->right_key() <= column->left_key())) {
525 src_it.extract();
526 working_set->ExtractCompletedBlocks(bleft, tright, resolution,
527 used_parts, &completed_blocks,
528 &to_blocks);
529 delete working_set;
530 src_it.forward();
531 }
532 // Make a new between-column WorkingSet for before the current column.
533 working_set = new WorkingPartSet(nullptr);
534 dest_it.add_after_then_move(working_set);
535 if (first_new_set == nullptr)
536 first_new_set = working_set;
537 // A matching column gets to stay, and first_new_set gets all the
538 // completed_sets.
539 working_set = src_it.empty() ? nullptr : src_it.data();
540 if (working_set != nullptr &&
541 working_set->column()->MatchingColumns(*column)) {
542 working_set->set_column(column);
543 dest_it.add_after_then_move(src_it.extract());
544 src_it.forward();
545 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
546 first_new_set = nullptr;
547 } else {
548 // Just make a new working set for the current column.
549 working_set = new WorkingPartSet(column);
550 dest_it.add_after_then_move(working_set);
551 }
552 }
553 // Complete any remaining src working sets.
554 while (!src_it.empty()) {
555 working_set = src_it.extract();
556 working_set->ExtractCompletedBlocks(bleft, tright, resolution,
557 used_parts, &completed_blocks,
558 &to_blocks);
559 delete working_set;
560 src_it.forward();
561 }
562 // Make a new between-column WorkingSet for after the last column.
563 working_set = new WorkingPartSet(nullptr);
564 dest_it.add_after_then_move(working_set);
565 if (first_new_set == nullptr)
566 first_new_set = working_set;
567 // The first_new_set now gets any accumulated completed_parts/blocks.
568 first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
569}

◆ ColumnContaining()

ColPartition * tesseract::ColPartitionSet::ColumnContaining ( int  x,
int  y 
)

Definition at line 70 of file colpartitionset.cpp.

70 {
71 ColPartition_IT it(&parts_);
72 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
73 ColPartition* part = it.data();
74 if (part->ColumnContains(x, y))
75 return part;
76 }
77 return nullptr;
78}

◆ ColumnCount()

int tesseract::ColPartitionSet::ColumnCount ( ) const
inline

Definition at line 55 of file colpartitionset.h.

55 {
56 return parts_.length();
57 }

◆ CompatibleColumns()

bool tesseract::ColPartitionSet::CompatibleColumns ( bool  debug,
ColPartitionSet other,
WidthCallback cb 
)

Definition at line 223 of file colpartitionset.cpp.

224 {
225 if (debug) {
226 tprintf("CompatibleColumns testing compatibility\n");
227 Print();
228 other->Print();
229 }
230 if (other->parts_.empty()) {
231 if (debug)
232 tprintf("CompatibleColumns true due to empty other\n");
233 return true;
234 }
235 ColPartition_IT it(&other->parts_);
236 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
237 ColPartition* part = it.data();
238 if (part->blob_type() < BRT_UNKNOWN) {
239 if (debug) {
240 tprintf("CompatibleColumns ignoring image partition\n");
241 part->Print();
242 }
243 continue; // Image partitions are irrelevant to column compatibility.
244 }
245 int y = part->MidY();
246 int left = part->bounding_box().left();
247 int right = part->bounding_box().right();
248 ColPartition* left_col = ColumnContaining(left, y);
249 ColPartition* right_col = ColumnContaining(right, y);
250 if (right_col == nullptr || left_col == nullptr) {
251 if (debug) {
252 tprintf("CompatibleColumns false due to partition edge outside\n");
253 part->Print();
254 }
255 return false; // A partition edge lies outside of all columns
256 }
257 if (right_col != left_col && cb->Run(right - left)) {
258 if (debug) {
259 tprintf("CompatibleColumns false due to good width in multiple cols\n");
260 part->Print();
261 }
262 return false; // Partition with a good width must be in a single column.
263 }
264
265 ColPartition_IT it2= it;
266 while (!it2.at_last()) {
267 it2.forward();
268 ColPartition* next_part = it2.data();
269 if (!BLOBNBOX::IsTextType(next_part->blob_type()))
270 continue; // Non-text partitions are irrelevant.
271 int next_left = next_part->bounding_box().left();
272 if (next_left == right) {
273 break; // They share the same edge, so one must be a pull-out.
274 }
275 // Search to see if right and next_left fall within a single column.
276 ColPartition* next_left_col = ColumnContaining(next_left, y);
277 if (right_col == next_left_col) {
278 // There is a column break in this column.
279 // This can be due to a figure caption within a column, a pull-out
280 // block, or a simple broken textline that remains to be merged:
281 // all allowed, or a change in column layout: not allowed.
282 // If both partitions are of good width, then it is likely
283 // a change in column layout, otherwise probably an allowed situation.
284 if (part->good_width() && next_part->good_width()) {
285 if (debug) {
286 int next_right = next_part->bounding_box().right();
287 tprintf("CompatibleColumns false due to 2 parts of good width\n");
288 tprintf("part1 %d-%d, part2 %d-%d\n",
289 left, right, next_left, next_right);
290 right_col->Print();
291 }
292 return false;
293 }
294 }
295 break;
296 }
297 }
298 if (debug)
299 tprintf("CompatibleColumns true!\n");
300 return true;
301}
@ BRT_UNKNOWN
Definition: blobbox.h:78
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:418
ColPartition * ColumnContaining(int x, int y)

◆ Copy()

ColPartitionSet * tesseract::ColPartitionSet::Copy ( bool  good_only)

Definition at line 354 of file colpartitionset.cpp.

354 {
355 ColPartition_LIST copy_parts;
356 ColPartition_IT src_it(&parts_);
357 ColPartition_IT dest_it(&copy_parts);
358 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
359 ColPartition* part = src_it.data();
360 if (BLOBNBOX::IsTextType(part->blob_type()) &&
361 (!good_only || part->good_width() || part->good_column()))
362 dest_it.add_after_then_move(part->ShallowCopy());
363 }
364 if (dest_it.empty())
365 return nullptr;
366 return new ColPartitionSet(&copy_parts);
367}

◆ DisplayColumnEdges()

void tesseract::ColPartitionSet::DisplayColumnEdges ( int  y_bottom,
int  y_top,
ScrollView win 
)

Definition at line 386 of file colpartitionset.cpp.

387 {
388#ifndef GRAPHICS_DISABLED
389 ColPartition_IT it(&parts_);
390 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
391 ColPartition* part = it.data();
392 win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
393 win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
394 }
395#endif // GRAPHICS_DISABLED
396}
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:532

◆ Empty()

bool tesseract::ColPartitionSet::Empty ( ) const
inline

Definition at line 52 of file colpartitionset.h.

52 {
53 return parts_.empty();
54 }

◆ GetColumnBoxes()

void tesseract::ColPartitionSet::GetColumnBoxes ( int  y_bottom,
int  y_top,
ColSegment_LIST *  segments 
)

Definition at line 370 of file colpartitionset.cpp.

371 {
372 ColPartition_IT it(&parts_);
373 ColSegment_IT col_it(segments);
374 col_it.move_to_last();
375 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
376 ColPartition* part = it.data();
377 ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
378 ICOORD top_right(part->RightAtY(y_bottom), y_top);
379 auto *col_seg = new ColSegment();
380 col_seg->InsertBox(TBOX(bot_left, top_right));
381 col_it.add_after_then_move(col_seg);
382 }
383}
integer coordinate
Definition: points.h:32
Definition: rect.h:34

◆ GetColumnByIndex()

ColPartition * tesseract::ColPartitionSet::GetColumnByIndex ( int  index)

Definition at line 60 of file colpartitionset.cpp.

60 {
61 ColPartition_IT it(&parts_);
62 it.mark_cycle_pt();
63 for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
64 if (it.cycled_list())
65 return nullptr;
66 return it.data();
67}

◆ GoodColumnCount()

int tesseract::ColPartitionSet::GoodColumnCount ( ) const

Definition at line 49 of file colpartitionset.cpp.

49 {
50 int num_good_cols = 0;
51 // This is a read-only iteration of the list.
52 ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_));
53 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
54 if (it.data()->good_width()) ++num_good_cols;
55 }
56 return num_good_cols;
57}

◆ ImproveColumnCandidate()

void tesseract::ColPartitionSet::ImproveColumnCandidate ( WidthCallback cb,
PartSetVector src_sets 
)

Definition at line 90 of file colpartitionset.cpp.

91 {
92 int set_size = src_sets->size();
93 // Iterate over the provided column sets, as each one may have something
94 // to improve this.
95 for (int i = 0; i < set_size; ++i) {
96 ColPartitionSet* column_set = src_sets->get(i);
97 if (column_set == nullptr)
98 continue;
99 // Iterate over the parts in this and column_set, adding bigger or
100 // new parts in column_set to this.
101 ColPartition_IT part_it(&parts_);
102 ASSERT_HOST(!part_it.empty());
103 int prev_right = INT32_MIN;
104 part_it.mark_cycle_pt();
105 ColPartition_IT col_it(&column_set->parts_);
106 for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
107 ColPartition* col_part = col_it.data();
108 if (col_part->blob_type() < BRT_UNKNOWN)
109 continue; // Ignore image partitions.
110 int col_left = col_part->left_key();
111 int col_right = col_part->right_key();
112 // Sync-up part_it (in this) so it matches the col_part in column_set.
113 ColPartition* part = part_it.data();
114 while (!part_it.at_last() && part->right_key() < col_left) {
115 prev_right = part->right_key();
116 part_it.forward();
117 part = part_it.data();
118 }
119 int part_left = part->left_key();
120 int part_right = part->right_key();
121 if (part_right < col_left || col_right < part_left) {
122 // There is no overlap so this is a new partition.
123 AddPartition(col_part->ShallowCopy(), &part_it);
124 continue;
125 }
126 // Check the edges of col_part to see if they can improve part.
127 bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
128 if (col_left < part_left && col_left > prev_right) {
129 // The left edge of the column is better and it doesn't overlap,
130 // so we can potentially expand it.
131 int col_box_left = col_part->BoxLeftKey();
132 bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
133 bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
134 if (tab_width_ok || (!part_width_ok)) {
135 // The tab is leaving the good column metric at least as good as
136 // it was before, so use the tab.
137 part->CopyLeftTab(*col_part, false);
138 part->SetColumnGoodness(cb);
139 } else if (col_box_left < part_left &&
140 (box_width_ok || !part_width_ok)) {
141 // The box is leaving the good column metric at least as good as
142 // it was before, so use the box.
143 part->CopyLeftTab(*col_part, true);
144 part->SetColumnGoodness(cb);
145 }
146 part_left = part->left_key();
147 }
148 if (col_right > part_right &&
149 (part_it.at_last() ||
150 part_it.data_relative(1)->left_key() > col_right)) {
151 // The right edge is better, so we can possibly expand it.
152 int col_box_right = col_part->BoxRightKey();
153 bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
154 bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
155 if (tab_width_ok || (!part_width_ok)) {
156 // The tab is leaving the good column metric at least as good as
157 // it was before, so use the tab.
158 part->CopyRightTab(*col_part, false);
159 part->SetColumnGoodness(cb);
160 } else if (col_box_right > part_right &&
161 (box_width_ok || !part_width_ok)) {
162 // The box is leaving the good column metric at least as good as
163 // it was before, so use the box.
164 part->CopyRightTab(*col_part, true);
165 part->SetColumnGoodness(cb);
166 }
167 }
168 }
169 }
170 ComputeCoverage();
171}
#define ASSERT_HOST(x)
Definition: errcode.h:88

◆ LegalColumnCandidate()

bool tesseract::ColPartitionSet::LegalColumnCandidate ( )

Definition at line 331 of file colpartitionset.cpp.

331 {
332 ColPartition_IT it(&parts_);
333 if (it.empty())
334 return false;
335 bool any_text_parts = false;
336 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
337 ColPartition* part = it.data();
338 if (BLOBNBOX::IsTextType(part->blob_type())) {
339 if (!part->IsLegal())
340 return false; // Individual partition is illegal.
341 any_text_parts = true;
342 }
343 if (!it.at_last()) {
344 ColPartition* next_part = it.data_relative(1);
345 if (next_part->left_key() < part->right_key()) {
346 return false;
347 }
348 }
349 }
350 return any_text_parts;
351}

◆ Print()

void tesseract::ColPartitionSet::Print ( )

Definition at line 593 of file colpartitionset.cpp.

593 {
594 ColPartition_IT it(&parts_);
595 tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
596 " (%d,%d)->(%d,%d)\n",
597 it.length(), good_column_count_, good_coverage_, bad_coverage_,
598 bounding_box_.left(), bounding_box_.bottom(),
599 bounding_box_.right(), bounding_box_.top());
600 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
601 ColPartition* part = it.data();
602 part->Print();
603 }
604}
int16_t top() const
Definition: rect.h:58
int16_t right() const
Definition: rect.h:79

◆ RelinquishParts()

void tesseract::ColPartitionSet::RelinquishParts ( )

Definition at line 81 of file colpartitionset.cpp.

81 {
82 ColPartition_IT it(&parts_);
83 while (!it.empty()) {
84 it.extract();
85 it.forward();
86 }
87}

◆ SpanningType()

ColumnSpanningType tesseract::ColPartitionSet::SpanningType ( int  resolution,
int  left,
int  right,
int  height,
int  y,
int  left_margin,
int  right_margin,
int *  first_col,
int *  last_col,
int *  first_spanned_col 
)

Definition at line 405 of file colpartitionset.cpp.

412 {
413 *first_col = -1;
414 *last_col = -1;
415 *first_spanned_col = -1;
416 int margin_columns = 0;
417 ColPartition_IT it(&parts_);
418 int col_index = 1;
419 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
420 ColPartition* part = it.data();
421 if (part->ColumnContains(left, y) ||
422 (it.at_first() && part->ColumnContains(left + height, y))) {
423 // In the default case, first_col is set, but columns_spanned remains
424 // zero, so first_col will get reset in the first column genuinely
425 // spanned, but we can tell the difference from a noise partition
426 // that touches no column.
427 *first_col = col_index;
428 if (part->ColumnContains(right, y) ||
429 (it.at_last() && part->ColumnContains(right - height, y))) {
430 // Both within a single column.
431 *last_col = col_index;
432 return CST_FLOWING;
433 }
434 if (left_margin <= part->LeftAtY(y)) {
435 // It completely spans this column.
436 *first_spanned_col = col_index;
437 margin_columns = 1;
438 }
439 } else if (part->ColumnContains(right, y) ||
440 (it.at_last() && part->ColumnContains(right - height, y))) {
441 if (*first_col < 0) {
442 // It started in-between.
443 *first_col = col_index - 1;
444 }
445 if (right_margin >= part->RightAtY(y)) {
446 // It completely spans this column.
447 if (margin_columns == 0)
448 *first_spanned_col = col_index;
449 ++margin_columns;
450 }
451 *last_col = col_index;
452 break;
453 } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
454 // Neither left nor right are contained within, so it spans this
455 // column.
456 if (*first_col < 0) {
457 // It started in between the previous column and the current column.
458 *first_col = col_index - 1;
459 }
460 if (margin_columns == 0)
461 *first_spanned_col = col_index;
462 *last_col = col_index;
463 } else if (right < part->LeftAtY(y)) {
464 // We have gone past the end.
465 *last_col = col_index - 1;
466 if (*first_col < 0) {
467 // It must lie completely between columns =>noise.
468 *first_col = col_index - 1;
469 }
470 break;
471 }
472 }
473 if (*first_col < 0)
474 *first_col = col_index - 1; // The last in-between.
475 if (*last_col < 0)
476 *last_col = col_index - 1; // The last in-between.
477 ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
478 ASSERT_HOST(*first_col <= *last_col);
479 if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
480 // Neither end was in a column, and it didn't span any, so it lies
481 // entirely between columns, therefore noise.
482 return CST_NOISE;
483 } else if (margin_columns <= 1) {
484 // An exception for headings that stick outside of single-column text.
485 if (margin_columns == 1 && parts_.singleton()) {
486 return CST_HEADING;
487 }
488 // It is a pullout, as left and right were not in the same column, but
489 // it doesn't go to the edge of its start and end.
490 return CST_PULLOUT;
491 }
492 // Its margins went to the edges of first and last columns => heading.
493 return CST_HEADING;
494}
const double kMinColumnWidth

◆ UnmatchedWidth()

int tesseract::ColPartitionSet::UnmatchedWidth ( ColPartitionSet part_set)

Definition at line 306 of file colpartitionset.cpp.

306 {
307 int total_width = 0;
308 ColPartition_IT it(&part_set->parts_);
309 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
310 ColPartition* part = it.data();
311 if (!BLOBNBOX::IsTextType(part->blob_type())) {
312 continue; // Non-text partitions are irrelevant to column compatibility.
313 }
314 int y = part->MidY();
315 BLOBNBOX_C_IT box_it(part->boxes());
316 for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
317 const TBOX& box = it.data()->bounding_box();
318 // Assume that the whole blob is outside any column iff its x-middle
319 // is outside.
320 int x = (box.left() + box.right()) / 2;
321 ColPartition* col = ColumnContaining(x, y);
322 if (col == nullptr)
323 total_width += box.width();
324 }
325 }
326 return total_width;
327}
int16_t width() const
Definition: rect.h:115

The documentation for this class was generated from the following files: