tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()=default
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_height () const
 
void set_median_height (int height)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uint8_t * color1 ()
 
uint8_t * color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 68 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
default

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 81 of file colpartition.cpp.

82 : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
83 median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
84 median_left_(INT32_MAX), median_right_(-INT32_MAX),
85 blob_type_(blob_type),
86 vertical_(vertical) {
87 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
88}
BlobRegionType blob_type() const
Definition: colpartition.h:149

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 133 of file colpartition.cpp.

133 {
134 // Remove this as a partner of all partners, as we don't want them
135 // referring to a deleted object.
136 ColPartition_C_IT it(&upper_partners_);
137 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
138 it.data()->RemovePartner(false, this);
139 }
140 it.set_to_list(&lower_partners_);
141 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
142 it.data()->RemovePartner(true, this);
143 }
144}

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 638 of file colpartition.cpp.

638 {
639 // The result has to either own all of the blobs or none of them.
640 // Verify the flag is consistent.
641 ASSERT_HOST(owns_blobs() == other->owns_blobs());
642 // TODO(nbeato): check owns_blobs better. Right now owns_blobs
643 // should always be true when this is called. So there is no issues.
644 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
645 bounding_box_.bottom()) ||
646 TabFind::WithinTestRegion(2, other->bounding_box_.left(),
647 other->bounding_box_.bottom())) {
648 tprintf("Merging:");
649 Print();
650 other->Print();
651 }
652
653 // Update the special_blobs_densities_.
654 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
655 for (int type = 0; type < BSTT_COUNT; ++type) {
656 unsigned w1 = boxes_.length();
657 unsigned w2 = other->boxes_.length();
658 float new_val = special_blobs_densities_[type] * w1 +
659 other->special_blobs_densities_[type] * w2;
660 if (!w1 || !w2) {
661 ASSERT_HOST((w1 + w2) > 0);
662 special_blobs_densities_[type] = new_val / (w1 + w2);
663 }
664 }
665
666 // Merge the two sorted lists.
667 BLOBNBOX_C_IT it(&boxes_);
668 BLOBNBOX_C_IT it2(&other->boxes_);
669 for (; !it2.empty(); it2.forward()) {
670 BLOBNBOX* bbox2 = it2.extract();
671 ColPartition* prev_owner = bbox2->owner();
672 if (prev_owner != other && prev_owner != nullptr) {
673 // A blob on other's list is owned by someone else; let them have it.
674 continue;
675 }
676 ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
677 if (prev_owner == other)
678 bbox2->set_owner(this);
679 it.add_to_end(bbox2);
680 }
681 left_margin_ = std::min(left_margin_, other->left_margin_);
682 right_margin_ = std::max(right_margin_, other->right_margin_);
683 if (other->left_key_ < left_key_) {
684 left_key_ = other->left_key_;
685 left_key_tab_ = other->left_key_tab_;
686 }
687 if (other->right_key_ > right_key_) {
688 right_key_ = other->right_key_;
689 right_key_tab_ = other->right_key_tab_;
690 }
691 // Combine the flow and blob_type in a sensible way.
692 // Dominant flows stay.
693 if (!DominatesInMerge(flow_, other->flow_)) {
694 flow_ = other->flow_;
695 blob_type_ = other->blob_type_;
696 }
697 SetBlobTypes();
698 if (IsVerticalType()) {
699 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
700 last_add_was_vertical_ = true;
701 } else {
702 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
703 last_add_was_vertical_ = false;
704 }
706 // Fix partner lists. other is going away, so remove it as a
707 // partner of all its partners and add this in its place.
708 for (int upper = 0; upper < 2; ++upper) {
709 ColPartition_CLIST partners;
710 ColPartition_C_IT part_it(&partners);
711 part_it.add_list_after(upper ? &other->upper_partners_
712 : &other->lower_partners_);
713 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
714 ColPartition* partner = part_it.extract();
715 partner->RemovePartner(!upper, other);
716 partner->RemovePartner(!upper, this);
717 partner->AddPartner(!upper, this);
718 }
719 }
720 delete other;
721 if (cb != nullptr) {
723 }
724}
@ BSTT_COUNT
Definition: blobbox.h:103
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:129
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
tesseract::ColPartition * owner() const
Definition: blobbox.h:352
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:355
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
static bool WithinTestRegion(int detail_level, int x, int y)
PolyBlockType type() const
Definition: colpartition.h:182
bool IsVerticalType() const
Definition: colpartition.h:442
void SetColumnGoodness(WidthCallback *cb)

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 169 of file colpartition.cpp.

169 {
170 TBOX box = bbox->bounding_box();
171 // Update the partition limits.
172 if (boxes_.length() == 0) {
173 bounding_box_ = box;
174 } else {
175 bounding_box_ += box;
176 }
177
178 if (IsVerticalType()) {
179 if (!last_add_was_vertical_) {
180 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
181 last_add_was_vertical_ = true;
182 }
183 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
184 } else {
185 if (last_add_was_vertical_) {
186 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
187 last_add_was_vertical_ = false;
188 }
189 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
190 }
191 if (!left_key_tab_)
192 left_key_ = BoxLeftKey();
193 if (!right_key_tab_)
194 right_key_ = BoxRightKey();
195 if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
196 tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
197 box.left(), box.bottom(), box.right(), box.top(),
198 bounding_box_.left(), bounding_box_.right());
199}
const TBOX & bounding_box() const
Definition: blobbox.h:230
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t right() const
Definition: rect.h:79

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 603 of file colpartition.cpp.

603 {
604 if (upper) {
605 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
606 true, this);
607 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
608 } else {
609 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
610 true, this);
611 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
612 }
613}

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1347 of file colpartition.cpp.

1350 {
1351 if (block_owned_)
1352 return; // Done it already.
1353 block_owned_ = true;
1354 WorkingPartSet_IT it(working_sets);
1355 // If there is an upper partner use its working_set_ directly.
1356 ColPartition* partner = SingletonPartner(true);
1357 if (partner != nullptr && partner->working_set_ != nullptr) {
1358 working_set_ = partner->working_set_;
1359 working_set_->AddPartition(this);
1360 return;
1361 }
1362 if (partner != nullptr && textord_debug_bugs) {
1363 tprintf("Partition with partner has no working set!:");
1364 Print();
1365 partner->Print();
1366 }
1367 // Search for the column that the left edge fits in.
1368 WorkingPartSet* work_set = nullptr;
1369 it.move_to_first();
1370 int col_index = 0;
1371 for (it.mark_cycle_pt(); !it.cycled_list() &&
1372 col_index != first_column_;
1373 it.forward(), ++col_index);
1374 if (textord_debug_tabfind >= 2) {
1375 tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1376 Print();
1377 }
1378 if (it.cycled_list() && textord_debug_bugs) {
1379 tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1380 }
1381 ASSERT_HOST(!it.cycled_list());
1382 work_set = it.data();
1383 // If last_column_ != first_column, then we need to scoop up all blocks
1384 // between here and the last_column_ and put back in work_set.
1385 if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1386 // Find the column that the right edge falls in.
1387 BLOCK_LIST completed_blocks;
1388 TO_BLOCK_LIST to_blocks;
1389 for (; !it.cycled_list() && col_index <= last_column_;
1390 it.forward(), ++col_index) {
1391 WorkingPartSet* end_set = it.data();
1392 end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1393 &completed_blocks, &to_blocks);
1394 }
1395 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1396 }
1397 working_set_ = work_set;
1398 work_set->AddPartition(this);
1399}
int textord_debug_bugs
Definition: alignedblob.cpp:28
int textord_debug_tabfind
Definition: alignedblob.cpp:27
bool IsPulloutType() const
Definition: colpartition.h:438
ColPartition * SingletonPartner(bool upper)
void AddPartition(ColPartition *part)

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 215 of file colpartition.cpp.

215 {
216 BLOBNBOX* biggest = nullptr;
217 BLOBNBOX_C_IT bb_it(&boxes_);
218 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
219 BLOBNBOX* bbox = bb_it.data();
220 if (IsVerticalType()) {
221 if (biggest == nullptr ||
222 bbox->bounding_box().width() > biggest->bounding_box().width())
223 biggest = bbox;
224 } else {
225 if (biggest == nullptr ||
226 bbox->bounding_box().height() > biggest->bounding_box().height())
227 biggest = bbox;
228 }
229 }
230 return biggest;
231}
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 149 of file colpartition.h.

149 {
150 return blob_type_;
151 }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 206 of file colpartition.h.

206 {
207 return block_owned_;
208 }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 221 of file colpartition.h.

221 {
222 return bottom_spacing_;
223 }

◆ bounding_box()

const TBOX & tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 110 of file colpartition.h.

110 {
111 return bounding_box_;
112 }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 234 of file colpartition.cpp.

234 {
235 TBOX result;
236 BLOBNBOX_C_IT bb_it(&boxes_);
237 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
238 if (box != bb_it.data()) {
239 result += bb_it.data()->bounding_box();
240 }
241 }
242 return result;
243}

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1771 of file colpartition.cpp.

1771 {
1772 if (type_ == PT_UNKNOWN)
1773 return BLOBNBOX::TextlineColor(blob_type_, flow_);
1775}
@ PT_UNKNOWN
Definition: capi.h:129
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:444
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:393

◆ boxes()

BLOBNBOX_CLIST * tesseract::ColPartition::boxes ( )
inline

Definition at line 188 of file colpartition.h.

188 {
189 return &boxes_;
190 }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 191 of file colpartition.h.

191 {
192 return boxes_.length();
193 }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 333 of file colpartition.h.

333 {
334 return SortKey(bounding_box_.left(), MidY());
335 }
int SortKey(int x, int y) const
Definition: colpartition.h:317

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 337 of file colpartition.h.

337 {
338 return SortKey(bounding_box_.right(), MidY());
339 }

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 247 of file colpartition.cpp.

247 {
248 BLOBNBOX_C_IT bb_it(&boxes_);
249 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
250 BLOBNBOX* bblob = bb_it.data();
251 ColPartition* other = bblob->owner();
252 if (other == nullptr) {
253 // Normal case: ownership is available.
254 bblob->set_owner(this);
255 } else {
256 ASSERT_HOST(other == this);
257 }
258 }
259}

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 240 of file colpartition.h.

240 {
241 if (type_ == PT_TABLE)
242 type_ = type_before_table_;
243 }
@ PT_TABLE
Definition: capi.h:135

◆ color1()

uint8_t * tesseract::ColPartition::color1 ( )
inline

Definition at line 286 of file colpartition.h.

286 {
287 return color1_;
288 }

◆ color2()

uint8_t * tesseract::ColPartition::color2 ( )
inline

Definition at line 289 of file colpartition.h.

289 {
290 return color2_;
291 }

◆ column_set()

ColPartitionSet * tesseract::ColPartition::column_set ( ) const
inline

Definition at line 215 of file colpartition.h.

215 {
216 return column_set_;
217 }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 354 of file colpartition.h.

354 {
355 return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
356 }
int LeftAtY(int y) const
Definition: colpartition.h:341
int RightAtY(int y) const
Definition: colpartition.h:345

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1056 of file colpartition.cpp.

1057 {
1058 int first_spanned_col = -1;
1059 ColumnSpanningType span_type =
1060 columns->SpanningType(resolution,
1061 bounding_box_.left(), bounding_box_.right(),
1062 std::min(bounding_box_.height(), bounding_box_.width()),
1063 MidY(), left_margin_, right_margin_,
1064 first_col, last_col,
1065 &first_spanned_col);
1066 type_ = PartitionType(span_type);
1067}
PolyBlockType PartitionType(ColumnSpanningType flow) const

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 329 of file colpartition.h.

329 {
330 return KeyWidth(left_key_, right_key_);
331 }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:325

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 861 of file colpartition.cpp.

861 {
862 bounding_box_ = TBOX(); // Clear it
863 BLOBNBOX_C_IT it(&boxes_);
864 BLOBNBOX* bbox = nullptr;
865 int non_leader_count = 0;
866 if (it.empty()) {
867 bounding_box_.set_left(left_margin_);
868 bounding_box_.set_right(right_margin_);
869 bounding_box_.set_bottom(0);
870 bounding_box_.set_top(0);
871 } else {
872 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
873 bbox = it.data();
874 bounding_box_ += bbox->bounding_box();
875 if (bbox->flow() != BTFT_LEADER)
876 ++non_leader_count;
877 }
878 }
879 if (!left_key_tab_)
880 left_key_ = BoxLeftKey();
881 if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
882 // TODO(rays) investigate the causes of these error messages, to find
883 // out if they are genuinely harmful, or just indicative of junk input.
884 tprintf("Computed left-illegal partition\n");
885 Print();
886 }
887 if (!right_key_tab_)
888 right_key_ = BoxRightKey();
889 if (right_key_ < BoxRightKey() && textord_debug_bugs) {
890 tprintf("Computed right-illegal partition\n");
891 Print();
892 }
893 if (it.empty())
894 return;
895 if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
897 median_top_ = bounding_box_.top();
898 median_bottom_ = bounding_box_.bottom();
899 median_height_ = bounding_box_.height();
900 median_left_ = bounding_box_.left();
901 median_right_ = bounding_box_.right();
902 median_width_ = bounding_box_.width();
903 } else {
904 STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
905 STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
906 STATS height_stats(0, bounding_box_.height() + 1);
907 STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
908 STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
909 STATS width_stats(0, bounding_box_.width() + 1);
910 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
911 bbox = it.data();
912 if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
913 const TBOX& box = bbox->bounding_box();
914 int area = box.area();
915 top_stats.add(box.top(), area);
916 bottom_stats.add(box.bottom(), area);
917 height_stats.add(box.height(), area);
918 left_stats.add(box.left(), area);
919 right_stats.add(box.right(), area);
920 width_stats.add(box.width(), area);
921 }
922 }
923 median_top_ = static_cast<int>(top_stats.median() + 0.5);
924 median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
925 median_height_ = static_cast<int>(height_stats.median() + 0.5);
926 median_left_ = static_cast<int>(left_stats.median() + 0.5);
927 median_right_ = static_cast<int>(right_stats.median() + 0.5);
928 median_width_ = static_cast<int>(width_stats.median() + 0.5);
929 }
930
931 if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
932 tprintf("Made partition with bad right coords");
933 Print();
934 }
935 if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
936 tprintf("Made partition with bad left coords");
937 Print();
938 }
939 // Fix partner lists. The bounding box has changed and partners are stored
940 // in bounding box order, so remove and reinsert this as a partner
941 // of all its partners.
942 for (int upper = 0; upper < 2; ++upper) {
943 ColPartition_CLIST partners;
944 ColPartition_C_IT part_it(&partners);
945 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
946 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
947 ColPartition* partner = part_it.extract();
948 partner->RemovePartner(!upper, this);
949 partner->AddPartner(!upper, this);
950 }
951 }
952 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
953 bounding_box_.bottom())) {
954 tprintf("Recomputed box for partition %p\n", this);
955 Print();
956 }
957}
@ BTFT_LEADER
Definition: blobbox.h:121
@ BRT_RECTIMAGE
Definition: blobbox.h:76
@ BRT_POLYIMAGE
Definition: blobbox.h:77
BlobTextFlowType flow() const
Definition: blobbox.h:295
void set_right(int x)
Definition: rect.h:82
void set_bottom(int y)
Definition: rect.h:68
int32_t area() const
Definition: rect.h:122
void set_top(int y)
Definition: rect.h:61
void set_left(int x)
Definition: rect.h:75
Definition: statistc.h:31
bool IsImageType() const
Definition: colpartition.h:430

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 582 of file colpartition.cpp.

582 {
583 memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
584 if (boxes_.empty()) {
585 return;
586 }
587
588 BLOBNBOX_C_IT blob_it(&boxes_);
589 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
590 BLOBNBOX* blob = blob_it.data();
592 special_blobs_densities_[type]++;
593 }
594
595 for (float& special_blobs_density : special_blobs_densities_) {
596 special_blobs_density /= boxes_.length();
597 }
598}
BlobSpecialTextType
Definition: blobbox.h:96
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:289

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 413 of file colpartition.cpp.

413 {
414 if (bounding_box_.right() < other.bounding_box_.left() &&
415 bounding_box_.right() < other.LeftBlobRule())
416 return false;
417 if (other.bounding_box_.right() < bounding_box_.left() &&
418 other.bounding_box_.right() < LeftBlobRule())
419 return false;
420 if (bounding_box_.left() > other.bounding_box_.right() &&
421 bounding_box_.left() > other.RightBlobRule())
422 return false;
423 if (other.bounding_box_.left() > bounding_box_.right() &&
424 other.bounding_box_.left() > RightBlobRule())
425 return false;
426 return true;
427}

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1758 of file colpartition.cpp.

1758 {
1759 ColPartition* copy = ShallowCopy();
1760 copy->set_owns_blobs(false);
1761 BLOBNBOX_C_IT inserter(copy->boxes());
1762 BLOBNBOX_C_IT traverser(boxes());
1763 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1764 inserter.add_after_then_move(traverser.data());
1765 return copy;
1766}
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:188
ColPartition * ShallowCopy() const

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 519 of file colpartition.cpp.

519 {
520 left_key_tab_ = take_box ? false : src.left_key_tab_;
521 if (left_key_tab_) {
522 left_key_ = src.left_key_;
523 } else {
524 bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
525 left_key_ = BoxLeftKey();
526 }
527 if (left_margin_ > bounding_box_.left())
528 left_margin_ = src.left_margin_;
529}
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 532 of file colpartition.cpp.

532 {
533 right_key_tab_ = take_box ? false : src.right_key_tab_;
534 if (right_key_tab_) {
535 right_key_ = src.right_key_;
536 } else {
537 bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
538 right_key_ = BoxRightKey();
539 }
540 if (right_margin_ < bounding_box_.right())
541 right_margin_ = src.right_margin_;
542}

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 960 of file colpartition.cpp.

960 {
961 BLOBNBOX_C_IT it(&boxes_);
962 int overlap_count = 0;
963 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
964 BLOBNBOX* bbox = it.data();
965 if (box.overlap(bbox->bounding_box()))
966 ++overlap_count;
967 }
968 return overlap_count;
969}
bool overlap(const TBOX &box) const
Definition: rect.h:355

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 305 of file colpartition.cpp.

305 {
306 // Although the boxes_ list is a C_LIST, in some cases it owns the
307 // BLOBNBOXes, as the ColPartition takes ownership from the grid,
308 // and the BLOBNBOXes own the underlying C_BLOBs.
309 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
310 BLOBNBOX* bblob = bb_it.extract();
311 delete bblob->cblob();
312 delete bblob;
313 }
314}
C_BLOB * cblob() const
Definition: blobbox.h:268

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 212 of file colpartition.h.

212 {
213 return desperately_merged_;
214 }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 263 of file colpartition.cpp.

263 {
264 BLOBNBOX_C_IT bb_it(&boxes_);
265 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
266 BLOBNBOX* bblob = bb_it.data();
267 ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
268 bblob->set_owner(nullptr);
269 }
270}

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 276 of file colpartition.cpp.

276 {
277 BLOBNBOX_C_IT bb_it(&boxes_);
278 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
279 BLOBNBOX* bblob = bb_it.data();
280 if (bblob->owner() == this)
281 bblob->set_owner(nullptr);
282 }
283}

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 95 of file colpartition.cpp.

98 {
99 ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
100 part->set_type(block_type);
101 part->set_flow(flow);
102 part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
103 part->set_left_margin(box.left());
104 part->set_right_margin(box.right());
105 part->SetBlobTypes();
106 part->ComputeLimits();
107 part->ClaimBoxes();
108 return part;
109}
integer coordinate
Definition: points.h:32
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:241
BlobTextFlowType flow() const
Definition: colpartition.h:155

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 155 of file colpartition.h.

155 {
156 return flow_;
157 }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 161 of file colpartition.h.

161 {
162 return good_blob_score_;
163 }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 167 of file colpartition.h.

167 {
168 return good_column_;
169 }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 164 of file colpartition.h.

164 {
165 return good_width_;
166 }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1280 of file colpartition.cpp.

1280 {
1281 // Approximation of the baseline.
1282 DetLineFit linepoints;
1283 // Calculation of the mean height on this line segment. Note that these
1284 // variable names apply to the context of a horizontal line, and work
1285 // analogously, rather than literally in the case of a vertical line.
1286 int total_height = 0;
1287 int coverage = 0;
1288 int height_count = 0;
1289 int width = 0;
1290 BLOBNBOX_C_IT it(&boxes_);
1291 TBOX box(it.data()->bounding_box());
1292 // Accumulate points representing the baseline at the middle of each blob,
1293 // but add an additional point for each end of the line. This makes it
1294 // harder to fit a severe skew angle, as it is most likely not right.
1295 if (IsVerticalType()) {
1296 // For a vertical line, use the right side as the baseline.
1297 ICOORD first_pt(box.right(), box.bottom());
1298 // Use the bottom-right of the first (bottom) box, the top-right of the
1299 // last, and the middle-right of all others.
1300 linepoints.Add(first_pt);
1301 for (it.forward(); !it.at_last(); it.forward()) {
1302 BLOBNBOX* blob = it.data();
1303 box = blob->bounding_box();
1304 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1305 linepoints.Add(box_pt);
1306 total_height += box.width();
1307 coverage += box.height();
1308 ++height_count;
1309 }
1310 box = it.data()->bounding_box();
1311 ICOORD last_pt(box.right(), box.top());
1312 linepoints.Add(last_pt);
1313 width = last_pt.y() - first_pt.y();
1314
1315 } else {
1316 // Horizontal lines use the bottom as the baseline.
1317 TBOX box(it.data()->bounding_box());
1318 // Use the bottom-left of the first box, the the bottom-right of the last,
1319 // and the middle of all others.
1320 ICOORD first_pt(box.left(), box.bottom());
1321 linepoints.Add(first_pt);
1322 for (it.forward(); !it.at_last(); it.forward()) {
1323 BLOBNBOX* blob = it.data();
1324 box = blob->bounding_box();
1325 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1326 linepoints.Add(box_pt);
1327 total_height += box.height();
1328 coverage += box.width();
1329 ++height_count;
1330 }
1331 box = it.data()->bounding_box();
1332 ICOORD last_pt(box.right(), box.bottom());
1333 linepoints.Add(last_pt);
1334 width = last_pt.x() - first_pt.x();
1335 }
1336 // Maximum median error allowed to be a good text line.
1337 if (height_count == 0)
1338 return false;
1339 double max_error = kMaxBaselineError * total_height / height_count;
1340 ICOORD start_pt, end_pt;
1341 double error = linepoints.Fit(&start_pt, &end_pt);
1342 return error < max_error && coverage >= kMinBaselineCoverage * width;
1343}
const double kMinBaselineCoverage
const double kMaxBaselineError

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 385 of file colpartition.h.

385 {
386 return std::min(median_right_, other.median_right_) -
387 std::max(median_left_, other.median_left_);
388 }

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 366 of file colpartition.h.

366 {
367 return bounding_box_.x_overlap(other.bounding_box_);
368 }
bool x_overlap(const TBOX &box) const
Definition: rect.h:401

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 244 of file colpartition.h.

244 {
245 return inside_table_column_;
246 }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 358 of file colpartition.h.

358 {
359 return boxes_.empty();
360 }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 460 of file colpartition.h.

460 {
461 return IsHorizontalType() && IsLineType();
462 }
bool IsHorizontalType() const
Definition: colpartition.h:446

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 446 of file colpartition.h.

446 {
447 return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
448 }
@ BRT_TEXT
Definition: blobbox.h:80
@ BRT_HLINE
Definition: blobbox.h:74

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 430 of file colpartition.h.

430 {
431 return PTIsImageType(type_);
432 }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:77

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2175 of file colpartition.cpp.

2175 {
2176 // Overlap does not occur when last < part.first or first > part.last.
2177 // In other words, one is completely to the side of the other.
2178 // This is just DeMorgan's law applied to that so the function returns true.
2179 return (last_column_ >= part.first_column_) &&
2180 (first_column_ <= part.last_column_);
2181}

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 350 of file colpartition.h.

350 {
351 return bounding_box_.right() < other.bounding_box_.right();
352 }

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 342 of file colpartition.cpp.

342 {
343 if (bounding_box_.left() > bounding_box_.right()) {
344 if (textord_debug_bugs) {
345 tprintf("Bounding box invalid\n");
346 Print();
347 }
348 return false; // Bounding box invalid.
349 }
350 if (left_margin_ > bounding_box_.left() ||
351 right_margin_ < bounding_box_.right()) {
352 if (textord_debug_bugs) {
353 tprintf("Margins invalid\n");
354 Print();
355 }
356 return false; // Margins invalid.
357 }
358 if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
359 if (textord_debug_bugs) {
360 tprintf("Key inside box: %d v %d or %d v %d\n",
361 left_key_, BoxLeftKey(), right_key_, BoxRightKey());
362 Print();
363 }
364 return false; // Keys inside the box.
365 }
366 return true;
367}

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 426 of file colpartition.h.

426 {
427 return PTIsLineType(type_);
428 }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:73

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 438 of file colpartition.h.

438 {
439 return PTIsPulloutType(type_);
440 }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:89

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 362 of file colpartition.h.

362 {
363 return boxes_.singleton();
364 }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 434 of file colpartition.h.

434 {
435 return PTIsTextType(type_);
436 }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:82

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 450 of file colpartition.h.

450 {
451 return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
452 }
@ PT_NOISE
Definition: capi.h:143
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:430

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 455 of file colpartition.h.

455 {
456 return IsVerticalType() && IsLineType();
457 }

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 442 of file colpartition.h.

442 {
443 return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
444 }
@ BRT_VLINE
Definition: blobbox.h:75
@ BRT_VERT_TEXT
Definition: blobbox.h:79

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 325 of file colpartition.h.

325 {
326 return (right_key - left_key) / vertical_.y();
327 }
int16_t y() const
access_function
Definition: points.h:56

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 173 of file colpartition.h.

173 {
174 return left_key_;
175 }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 170 of file colpartition.h.

170 {
171 return left_key_tab_;
172 }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 113 of file colpartition.h.

113 {
114 return left_margin_;
115 }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 341 of file colpartition.h.

341 {
342 return XAtY(left_key_, y);
343 }

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 545 of file colpartition.cpp.

545 {
546 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
547 return it.data()->left_rule();
548}

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1407 of file colpartition.cpp.

1412 {
1413 int page_height = tright.y() - bleft.y();
1414 // Compute the initial spacing stats.
1415 ColPartition_IT it(block_parts);
1416 int part_count = 0;
1417 int max_line_height = 0;
1418
1419 // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1420 // because their line spacing with their neighbors maybe smaller and their
1421 // height may be slightly larger.
1422
1423 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1424 ColPartition* part = it.data();
1425 ASSERT_HOST(!part->boxes()->empty());
1426 STATS side_steps(0, part->bounding_box().height());
1427 if (part->bounding_box().height() > max_line_height)
1428 max_line_height = part->bounding_box().height();
1429 BLOBNBOX_C_IT blob_it(part->boxes());
1430 int prev_bottom = blob_it.data()->bounding_box().bottom();
1431 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1432 BLOBNBOX* blob = blob_it.data();
1433 int bottom = blob->bounding_box().bottom();
1434 int step = bottom - prev_bottom;
1435 if (step < 0)
1436 step = -step;
1437 side_steps.add(step, 1);
1438 prev_bottom = bottom;
1439 }
1440 part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1441 if (!it.at_last()) {
1442 ColPartition* next_part = it.data_relative(1);
1443 part->set_bottom_spacing(part->median_bottom() -
1444 next_part->median_bottom());
1445 part->set_top_spacing(part->median_top() - next_part->median_top());
1446 } else {
1447 part->set_bottom_spacing(page_height);
1448 part->set_top_spacing(page_height);
1449 }
1451 part->Print();
1452 tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1453 side_steps.median(), part->top_spacing(), part->bottom_spacing());
1454 }
1455 ++part_count;
1456 }
1457 if (part_count == 0)
1458 return;
1459
1460 SmoothSpacings(resolution, page_height, block_parts);
1461
1462 // Move the partitions into individual block lists and make the blocks.
1463 BLOCK_IT block_it(completed_blocks);
1464 TO_BLOCK_IT to_block_it(to_blocks);
1465 ColPartition_LIST spacing_parts;
1466 ColPartition_IT sp_block_it(&spacing_parts);
1467 int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1468 for (it.mark_cycle_pt(); !it.empty();) {
1469 ColPartition* part = it.extract();
1470 sp_block_it.add_to_end(part);
1471 it.forward();
1472 if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1473 !part->SpacingsEqual(*it.data(), resolution)) {
1474 // There is a spacing boundary. Check to see if it.data() belongs
1475 // better in the current block or the next one.
1476 if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1477 ColPartition* next_part = it.data();
1478 // If there is a size match one-way, then the middle line goes with
1479 // its matched size, otherwise it goes with the smallest spacing.
1480 ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
1482 tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1483 " sizes %d %d %d\n",
1484 part->top_spacing(), part->bottom_spacing(),
1485 next_part->top_spacing(), next_part->bottom_spacing(),
1486 part->median_height(), next_part->median_height(),
1487 third_part != nullptr ? third_part->median_height() : 0);
1488 }
1489 // We can only consider adding the next line to the block if the sizes
1490 // match and the lines are close enough for their size.
1491 if (part->SizesSimilar(*next_part) &&
1492 next_part->median_height() * kMaxSameBlockLineSpacing >
1493 part->bottom_spacing() &&
1494 part->median_height() * kMaxSameBlockLineSpacing >
1495 part->top_spacing()) {
1496 // Even now, we can only add it as long as the third line doesn't
1497 // match in the same way and have a smaller bottom spacing.
1498 if (third_part == nullptr ||
1499 !next_part->SizesSimilar(*third_part) ||
1500 third_part->median_height() * kMaxSameBlockLineSpacing <=
1501 next_part->bottom_spacing() ||
1502 next_part->median_height() * kMaxSameBlockLineSpacing <=
1503 next_part->top_spacing() ||
1504 next_part->bottom_spacing() > part->bottom_spacing()) {
1505 // Add to the current block.
1506 sp_block_it.add_to_end(it.extract());
1507 it.forward();
1509 tprintf("Added line to current block.\n");
1510 }
1511 }
1512 }
1513 }
1514 TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1515 if (to_block != nullptr) {
1516 to_block_it.add_to_end(to_block);
1517 block_it.add_to_end(to_block->block);
1518 }
1519 sp_block_it.set_to_list(&spacing_parts);
1520 } else {
1521 if (textord_debug_tabfind && !it.empty()) {
1522 ColPartition* next_part = it.data();
1523 tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1524 part->top_spacing(), part->bottom_spacing(),
1525 next_part->top_spacing(), next_part->bottom_spacing(),
1526 part->median_height(), next_part->median_height());
1527 }
1528 }
1529 }
1530}
const double kMaxSameBlockLineSpacing
BLOCK * block
Definition: blobbox.h:777
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)

◆ lower_partners()

ColPartition_CLIST * tesseract::ColPartition::lower_partners ( )
inline

Definition at line 200 of file colpartition.h.

200 {
201 return &lower_partners_;
202 }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 116 of file colpartition.cpp.

117 {
118 box->set_owner(nullptr);
119 ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
120 single->set_flow(BTFT_NONE);
121 single->AddBox(box);
122 single->ComputeLimits();
123 single->ClaimBoxes();
124 single->SetBlobTypes();
125 single->set_block_owned(true);
126 if (big_part_list != nullptr) {
127 ColPartition_IT part_it(big_part_list);
128 part_it.add_to_end(single);
129 }
130 return single;
131}
@ BTFT_NONE
Definition: blobbox.h:115
@ BRT_UNKNOWN
Definition: blobbox.h:78

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1623 of file colpartition.cpp.

1625 {
1626 if (block_parts->empty())
1627 return nullptr; // Nothing to do.
1628 // If the block_parts are not in reading order, then it will make an invalid
1629 // block polygon and bounding_box, so sort by bounding box now just to make
1630 // sure.
1631 block_parts->sort(&ColPartition::SortByBBox);
1632 ColPartition_IT it(block_parts);
1633 ColPartition* part = it.data();
1634 PolyBlockType type = part->type();
1635 if (type == PT_VERTICAL_TEXT)
1636 return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1637 // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1638 // put the average spacing in each partition, so we can just take the
1639 // linespacing from the first partition.
1640 int line_spacing = part->bottom_spacing();
1641 if (line_spacing < part->median_height())
1642 line_spacing = part->bounding_box().height();
1643 ICOORDELT_LIST vertices;
1644 ICOORDELT_IT vert_it(&vertices);
1645 ICOORD start, end;
1646 int min_x = INT32_MAX;
1647 int max_x = -INT32_MAX;
1648 int min_y = INT32_MAX;
1649 int max_y = -INT32_MAX;
1650 int iteration = 0;
1651 do {
1652 if (iteration == 0)
1653 ColPartition::LeftEdgeRun(&it, &start, &end);
1654 else
1655 ColPartition::RightEdgeRun(&it, &start, &end);
1656 ClipCoord(bleft, tright, &start);
1657 ClipCoord(bleft, tright, &end);
1658 vert_it.add_after_then_move(new ICOORDELT(start));
1659 vert_it.add_after_then_move(new ICOORDELT(end));
1660 UpdateRange(start.x(), &min_x, &max_x);
1661 UpdateRange(end.x(), &min_x, &max_x);
1662 UpdateRange(start.y(), &min_y, &max_y);
1663 UpdateRange(end.y(), &min_y, &max_y);
1664 if ((iteration == 0 && it.at_first()) ||
1665 (iteration == 1 && it.at_last())) {
1666 ++iteration;
1667 it.move_to_last();
1668 }
1669 } while (iteration < 2);
1671 tprintf("Making block at (%d,%d)->(%d,%d)\n",
1672 min_x, min_y, max_x, max_y);
1673 auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1674 block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
1675 return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1676}
@ PT_VERTICAL_TEXT
Definition: capi.h:136
PolyBlockType
Definition: publictypes.h:53
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:120
Definition: ocrblock.h:31
int16_t x() const
access function
Definition: points.h:52
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:715

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 148 of file colpartition.cpp.

151 {
152 auto* part = new ColPartition(blob_type, vertical);
153 part->bounding_box_ = TBOX(left, bottom, right, top);
154 part->median_bottom_ = bottom;
155 part->median_top_ = top;
156 part->median_height_ = top - bottom;
157 part->median_left_ = left;
158 part->median_right_ = right;
159 part->median_width_ = right - left;
160 part->left_key_ = part->BoxLeftKey();
161 part->right_key_ = part->BoxRightKey();
162 return part;
163}

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1706 of file colpartition.cpp.

1706 {
1707 BLOBNBOX_C_IT blob_it(&boxes_);
1708 TO_ROW* row = nullptr;
1709 int line_size = IsVerticalType() ? median_width_ : median_height_;
1710 // Add all the blobs to a single TO_ROW.
1711 for (; !blob_it.empty(); blob_it.forward()) {
1712 BLOBNBOX* blob = blob_it.extract();
1713// blob->compute_bounding_box();
1714 int top = blob->bounding_box().top();
1715 int bottom = blob->bounding_box().bottom();
1716 if (row == nullptr) {
1717 row = new TO_ROW(blob, static_cast<float>(top),
1718 static_cast<float>(bottom),
1719 static_cast<float>(line_size));
1720 } else {
1721 row->add_blob(blob, static_cast<float>(top),
1722 static_cast<float>(bottom),
1723 static_cast<float>(line_size));
1724 }
1725 }
1726 return row;
1727}
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:733

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1680 of file colpartition.cpp.

1683 {
1684 if (block_parts->empty())
1685 return nullptr; // Nothing to do.
1686 ColPartition_IT it(block_parts);
1687 ColPartition* part = it.data();
1688 TBOX block_box = part->bounding_box();
1689 int line_spacing = block_box.width();
1690 PolyBlockType type = it.data()->type();
1691 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1692 block_box += it.data()->bounding_box();
1693 }
1695 tprintf("Making block at:");
1696 block_box.print();
1697 }
1698 auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1699 block_box.right(), block_box.top());
1700 block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
1701 return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1702}
void print() const
Definition: rect.h:278

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1083 of file colpartition.cpp.

1083 {
1084 bool result = false;
1085 // Gather statistics on the gaps between blobs and the widths of the blobs.
1086 int part_width = bounding_box_.width();
1087 STATS gap_stats(0, part_width);
1088 STATS width_stats(0, part_width);
1089 BLOBNBOX_C_IT it(&boxes_);
1090 BLOBNBOX* prev_blob = it.data();
1091 prev_blob->set_flow(BTFT_NEIGHBOURS);
1092 width_stats.add(prev_blob->bounding_box().width(), 1);
1093 int blob_count = 1;
1094 for (it.forward(); !it.at_first(); it.forward()) {
1095 BLOBNBOX* blob = it.data();
1096 int left = blob->bounding_box().left();
1097 int right = blob->bounding_box().right();
1098 gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1099 width_stats.add(right - left, 1);
1101 prev_blob = blob;
1102 ++blob_count;
1103 }
1104 double median_gap = gap_stats.median();
1105 double median_width = width_stats.median();
1106 double max_width = std::max(median_gap, median_width);
1107 double min_width = std::min(median_gap, median_width);
1108 double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1109 if (textord_debug_tabfind >= 4) {
1110 tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1111 gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1112 min_width * kMaxLeaderGapFractionOfMin);
1113 }
1114 if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1115 gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1116 blob_count >= kMinLeaderCount) {
1117 // This is stable enough to be called a leader, so check the widths.
1118 // Since leader dashes can join, run a dp cutting algorithm and go
1119 // on the cost.
1120 int offset = static_cast<int>(ceil(gap_iqr * 2));
1121 int min_step = static_cast<int>(median_gap + median_width + 0.5);
1122 int max_step = min_step + offset;
1123 min_step -= offset;
1124 // Pad the buffer with min_step/2 on each end.
1125 int part_left = bounding_box_.left() - min_step / 2;
1126 part_width += min_step;
1127 auto* projection = new DPPoint[part_width];
1128 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1129 BLOBNBOX* blob = it.data();
1130 int left = blob->bounding_box().left();
1131 int right = blob->bounding_box().right();
1132 int height = blob->bounding_box().height();
1133 for (int x = left; x < right; ++x) {
1134 projection[left - part_left].AddLocalCost(height);
1135 }
1136 }
1137 DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1139 part_width, projection);
1140 if (best_end != nullptr && best_end->total_cost() < blob_count) {
1141 // Good enough. Call it a leader.
1142 result = true;
1143 bool modified_blob_list = false;
1144 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1145 BLOBNBOX* blob = it.data();
1146 // If the first or last blob is spaced too much, don't mark it.
1147 if (it.at_first()) {
1148 int gap = it.data_relative(1)->bounding_box().left() -
1149 blob->bounding_box().right();
1150 if (blob->bounding_box().width() + gap > max_step) {
1151 it.extract();
1152 modified_blob_list = true;
1153 continue;
1154 }
1155 }
1156 if (it.at_last()) {
1157 int gap = blob->bounding_box().left() -
1158 it.data_relative(-1)->bounding_box().right();
1159 if (blob->bounding_box().width() + gap > max_step) {
1160 it.extract();
1161 modified_blob_list = true;
1162 break;
1163 }
1164 }
1166 blob->set_flow(BTFT_LEADER);
1167 }
1168 if (modified_blob_list) ComputeLimits();
1169 blob_type_ = BRT_TEXT;
1170 flow_ = BTFT_LEADER;
1171 } else if (textord_debug_tabfind) {
1172 if (best_end == nullptr) {
1173 tprintf("No path\n");
1174 } else {
1175 tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1176 blob_count);
1177 }
1178 }
1179 delete [] projection;
1180 }
1181 return result;
1182}
@ BTFT_NEIGHBOURS
Definition: blobbox.h:117
const double kMaxLeaderGapFractionOfMin
const int kMinLeaderCount
const double kMaxLeaderGapFractionOfMax
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:298
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:286
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:31
int64_t CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:69

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 370 of file colpartition.cpp.

370 {
371 int y = (MidY() + other.MidY()) / 2;
372 if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
374 return false;
375 if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
377 return false;
378 return true;
379}
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:37
const int kColumnWidthFactor
Definition: tabfind.h:42

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 405 of file colpartition.cpp.

405 {
406 if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
407 return !TabFind::DifferentSizes(median_width_, other.median_width_);
408 else
409 return !TabFind::DifferentSizes(median_height_, other.median_height_);
410}
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:407

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 430 of file colpartition.cpp.

432 {
433 int match_count = 0;
434 int nonmatch_count = 0;
435 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
436 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
437 box_it.mark_cycle_pt();
438 other_it.mark_cycle_pt();
439 while (!box_it.cycled_list() && !other_it.cycled_list()) {
440 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
441 fractional_tolerance,
442 constant_tolerance))
443 ++match_count;
444 else
445 ++nonmatch_count;
446 box_it.forward();
447 other_it.forward();
448 }
449 return match_count > nonmatch_count;
450}

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 382 of file colpartition.cpp.

382 {
383 if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
384 other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
385 return false; // Too noisy.
386
387 // Colors must match for other to count.
388 double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
389 other.color2_,
390 color1_);
391 double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
392 other.color2_,
393 color2_);
394 double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
395 other.color1_);
396 double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
397 other.color2_);
398// All 4 distances must be small enough.
399 return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
400 d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
401}
const int kMaxColorDistance
const int kMaxRMSColorNoise
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:355

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 128 of file colpartition.h.

128 {
129 return median_bottom_;
130 }

◆ median_height()

int tesseract::ColPartition::median_height ( ) const
inline

Definition at line 137 of file colpartition.h.

137 {
138 return median_height_;
139 }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 131 of file colpartition.h.

131 {
132 return median_left_;
133 }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 134 of file colpartition.h.

134 {
135 return median_right_;
136 }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 125 of file colpartition.h.

125 {
126 return median_top_;
127 }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 143 of file colpartition.h.

143 {
144 return median_width_;
145 }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 309 of file colpartition.h.

309 {
310 return (median_top_ + median_bottom_) / 2;
311 }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 313 of file colpartition.h.

313 {
314 return (bounding_box_.left() + bounding_box_.right()) / 2;
315 }

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 305 of file colpartition.h.

305 {
306 return (bounding_box_.top() + bounding_box_.bottom()) / 2;
307 }

◆ nearest_neighbor_above()

ColPartition * tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 250 of file colpartition.h.

250 {
251 return nearest_neighbor_above_;
252 }

◆ nearest_neighbor_below()

ColPartition * tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 256 of file colpartition.h.

256 {
257 return nearest_neighbor_below_;
258 }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 458 of file colpartition.cpp.

459 {
460 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
461 int min_top = INT32_MAX;
462 int max_bottom = -INT32_MAX;
463 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
464 BLOBNBOX* blob = it.data();
465 if (!blob->IsDiacritic()) {
466 if (debug) {
467 tprintf("Blob is not a diacritic:");
468 blob->bounding_box().print();
469 }
470 return false; // All blobs must have diacritic bases.
471 }
472 if (blob->base_char_top() < min_top)
473 min_top = blob->base_char_top();
474 if (blob->base_char_bottom() > max_bottom)
475 max_bottom = blob->base_char_bottom();
476 }
477 // If the intersection of all vertical ranges of all base characters
478 // overlaps the median range of this, then it is OK.
479 bool result = min_top > candidate.median_bottom_ &&
480 max_bottom < candidate.median_top_;
481 if (debug) {
482 if (result)
483 tprintf("OKDiacritic!\n");
484 else
485 tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
486 max_bottom, min_top, median_bottom_, median_top_);
487 }
488 return result;
489}
bool IsDiacritic() const
Definition: blobbox.h:380
int base_char_bottom() const
Definition: blobbox.h:386
int base_char_top() const
Definition: blobbox.h:383

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 736 of file colpartition.cpp.

738 {
739 // Vertical partitions are not allowed to be involved.
740 if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
741 if (debug)
742 tprintf("Vertical partition\n");
743 return false;
744 }
745 // The merging partitions must strongly overlap each other.
746 if (!merge1.VSignificantCoreOverlap(merge2)) {
747 if (debug)
748 tprintf("Voverlap %d (%d)\n",
749 merge1.VCoreOverlap(merge2),
750 merge1.VSignificantCoreOverlap(merge2));
751 return false;
752 }
753 // The merged box must not overlap the median bounds of this.
754 TBOX merged_box(merge1.bounding_box());
755 merged_box += merge2.bounding_box();
756 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
757 merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
758 merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
759 if (debug)
760 tprintf("Excessive box overlap\n");
761 return false;
762 }
763 // Looks OK!
764 return true;
765}

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 769 of file colpartition.cpp.

769 {
770 if (boxes_.empty() || boxes_.singleton())
771 return nullptr;
772 BLOBNBOX_C_IT it(&boxes_);
773 TBOX left_box(it.data()->bounding_box());
774 for (it.forward(); !it.at_first(); it.forward()) {
775 BLOBNBOX* bbox = it.data();
776 left_box += bbox->bounding_box();
777 if (left_box.overlap(box))
778 return bbox;
779 }
780 return nullptr;
781}

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 292 of file colpartition.h.

292 {
293 return owns_blobs_;
294 }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1006 of file colpartition.cpp.

1006 {
1007 if (flow == CST_NOISE) {
1008 if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1009 blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1010 return PT_NOISE;
1011 flow = CST_FLOWING;
1012 }
1013
1014 switch (blob_type_) {
1015 case BRT_NOISE:
1016 return PT_NOISE;
1017 case BRT_HLINE:
1018 return PT_HORZ_LINE;
1019 case BRT_VLINE:
1020 return PT_VERT_LINE;
1021 case BRT_RECTIMAGE:
1022 case BRT_POLYIMAGE:
1023 switch (flow) {
1024 case CST_FLOWING:
1025 return PT_FLOWING_IMAGE;
1026 case CST_HEADING:
1027 return PT_HEADING_IMAGE;
1028 case CST_PULLOUT:
1029 return PT_PULLOUT_IMAGE;
1030 default:
1031 ASSERT_HOST(!"Undefined flow type for image!");
1032 }
1033 break;
1034 case BRT_VERT_TEXT:
1035 return PT_VERTICAL_TEXT;
1036 case BRT_TEXT:
1037 case BRT_UNKNOWN:
1038 default:
1039 switch (flow) {
1040 case CST_FLOWING:
1041 return PT_FLOWING_TEXT;
1042 case CST_HEADING:
1043 return PT_HEADING_TEXT;
1044 case CST_PULLOUT:
1045 return PT_PULLOUT_TEXT;
1046 default:
1047 ASSERT_HOST(!"Undefined flow type for text!");
1048 }
1049 }
1050 ASSERT_HOST(!"Should never get here!");
1051 return PT_NOISE;
1052}
@ PT_VERT_LINE
Definition: capi.h:142
@ PT_PULLOUT_TEXT
Definition: capi.h:132
@ PT_HEADING_TEXT
Definition: capi.h:131
@ PT_PULLOUT_IMAGE
Definition: capi.h:140
@ PT_HEADING_IMAGE
Definition: capi.h:139
@ PT_FLOWING_TEXT
Definition: capi.h:130
@ PT_HORZ_LINE
Definition: capi.h:141
@ PT_FLOWING_IMAGE
Definition: capi.h:138
@ BRT_NOISE
Definition: blobbox.h:73

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1782 of file colpartition.cpp.

1782 {
1783 int y = MidY();
1784 tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1785 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1786 " ts=%d bs=%d ls=%d rs=%d\n",
1787 boxes_.empty() ? 'E' : ' ',
1788 left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1789 bounding_box_.left(), median_left_,
1790 bounding_box_.bottom(), median_bottom_,
1791 bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1792 right_margin_, median_right_, bounding_box_.top(), median_top_,
1793 good_width_, good_column_, type_,
1794 kBlobTypes[blob_type_], flow_,
1795 first_column_, last_column_, boxes_.length(),
1796 space_above_, space_below_, space_to_left_, space_to_right_);
1797}

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1800 of file colpartition.cpp.

1800 {
1801 tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1802 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1803 color1_[L_ALPHA_CHANNEL],
1804 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1805}

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1877 of file colpartition.cpp.

1878 {
1879 if (TypesSimilar(type_, type)) {
1880 RefinePartnersInternal(true, get_desperate, grid);
1881 RefinePartnersInternal(false, get_desperate, grid);
1882 } else if (type == PT_COUNT) {
1883 // This is the final pass. Make sure only the correctly typed
1884 // partners surivive, however many there are.
1885 RefinePartnersByType(true, &upper_partners_);
1886 RefinePartnersByType(false, &lower_partners_);
1887 // It is possible for a merge to have given a partition multiple
1888 // partners again, so the last resort is to use overlap which is
1889 // guaranteed to leave at most one partner left.
1890 if (!upper_partners_.empty() && !upper_partners_.singleton())
1891 RefinePartnersByOverlap(true, &upper_partners_);
1892 if (!lower_partners_.empty() && !lower_partners_.singleton())
1893 RefinePartnersByOverlap(false, &lower_partners_);
1894 }
1895}
@ PT_COUNT
Definition: capi.h:144
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:419

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 320 of file colpartition.cpp.

320 {
321 BLOBNBOX_CLIST reversed_boxes;
322 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
323 // Reverse the order of the boxes_.
324 BLOBNBOX_C_IT bb_it(&boxes_);
325 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
326 reversed_it.add_before_then_move(bb_it.extract());
327 }
328 bb_it.add_list_after(&reversed_boxes);
329 ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
330 int tmp = left_margin_;
331 left_margin_ = -right_margin_;
332 right_margin_ = -tmp;
334}

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 289 of file colpartition.cpp.

289 {
290 BLOBNBOX_C_IT bb_it(&boxes_);
291 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
292 BLOBNBOX* bblob = bb_it.data();
293 if (bblob->flow() != BTFT_LEADER) {
294 if (bblob->owner() == this) bblob->set_owner(nullptr);
295 bb_it.extract();
296 }
297 }
298 if (bb_it.empty()) return false;
299 flow_ = BTFT_LEADER;
301 return true;
302}

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 202 of file colpartition.cpp.

202 {
203 BLOBNBOX_C_IT bb_it(&boxes_);
204 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
205 if (box == bb_it.data()) {
206 bb_it.extract();
208 return;
209 }
210 }
211}

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 618 of file colpartition.cpp.

618 {
619 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
620 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
621 if (it.data() == partner) {
622 it.extract();
623 break;
624 }
625 }
626}

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 179 of file colpartition.h.

179 {
180 return right_key_;
181 }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 176 of file colpartition.h.

176 {
177 return right_key_tab_;
178 }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 119 of file colpartition.h.

119 {
120 return right_margin_;
121 }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 345 of file colpartition.h.

345 {
346 return XAtY(right_key_, y);
347 }

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 550 of file colpartition.cpp.

550 {
551 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
552 it.move_to_last();
553 return it.data()->right_rule();
554}

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 152 of file colpartition.h.

152 {
153 blob_type_ = t;
154 }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 209 of file colpartition.h.

209 {
210 block_owned_ = owned;
211 }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 224 of file colpartition.h.

224 {
225 bottom_spacing_ = spacing;
226 }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 732 of file colpartition.h.

732 {
733 first_column_ = column;
734 }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 158 of file colpartition.h.

158 {
159 flow_ = f;
160 }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 247 of file colpartition.h.

247 {
248 inside_table_column_ = val;
249 }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 735 of file colpartition.h.

735 {
736 last_column_ = column;
737 }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 116 of file colpartition.h.

116 {
117 left_margin_ = margin;
118 }

◆ set_median_height()

void tesseract::ColPartition::set_median_height ( int  height)
inline

Definition at line 140 of file colpartition.h.

140 {
141 median_height_ = height;
142 }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 146 of file colpartition.h.

146 {
147 median_width_ = width;
148 }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 253 of file colpartition.h.

253 {
254 nearest_neighbor_above_ = part;
255 }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 259 of file colpartition.h.

259 {
260 nearest_neighbor_below_ = part;
261 }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 295 of file colpartition.h.

295 {
296 // Do NOT change ownership flag when there are blobs in the list.
297 // Immediately set the ownership flag when creating copies.
298 ASSERT_HOST(boxes_.empty());
299 owns_blobs_ = owns_blobs;
300 }

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 122 of file colpartition.h.

122 {
123 right_margin_ = margin;
124 }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 218 of file colpartition.h.

218 {
219 side_step_ = step;
220 }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 265 of file colpartition.h.

265 {
266 space_above_ = space;
267 }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 271 of file colpartition.h.

271 {
272 space_below_ = space;
273 }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 277 of file colpartition.h.

277 {
278 space_to_left_ = space;
279 }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 283 of file colpartition.h.

283 {
284 space_to_right_ = space;
285 }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 234 of file colpartition.h.

234 {
235 if (type_ != PT_TABLE) {
236 type_before_table_ = type_;
237 type_ = PT_TABLE;
238 }
239 }

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 230 of file colpartition.h.

230 {
231 top_spacing_ = spacing;
232 }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 185 of file colpartition.h.

185 {
186 type_ = t;
187 }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 194 of file colpartition.h.

194 {
195 vertical_ = v;
196 }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 203 of file colpartition.h.

203 {
204 working_set_ = working_set;
205 }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1265 of file colpartition.cpp.

1265 {
1266 if (!owns_blobs())
1267 return;
1268 BLOBNBOX_C_IT it(&boxes_);
1269 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1270 BLOBNBOX* blob = it.data();
1271 if (blob->flow() != BTFT_LEADER)
1272 blob->set_flow(flow_);
1273 blob->set_region_type(blob_type_);
1274 ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
1275 }
1276}

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1070 of file colpartition.cpp.

1070 {
1071 int y = MidY();
1072 int width = RightAtY(y) - LeftAtY(y);
1073 good_width_ = cb->Run(width);
1074 good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1075}

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 494 of file colpartition.cpp.

494 {
495 if (tab_vector != nullptr) {
496 left_key_ = tab_vector->sort_key();
497 left_key_tab_ = left_key_ <= BoxLeftKey();
498 } else {
499 left_key_tab_ = false;
500 }
501 if (!left_key_tab_)
502 left_key_ = BoxLeftKey();
503}

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 973 of file colpartition.cpp.

973 {
974 int first_spanned_col = -1;
975 ColumnSpanningType span_type =
976 columns->SpanningType(resolution,
977 bounding_box_.left(), bounding_box_.right(),
978 std::min(bounding_box_.height(), bounding_box_.width()),
979 MidY(), left_margin_, right_margin_,
980 &first_column_, &last_column_,
981 &first_spanned_col);
982 column_set_ = columns;
983 if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
984 !IsLineType()) {
985 // Unequal columns may indicate that the pullout spans one of the columns
986 // it lies in, so force it to be allocated to just that column.
987 if (first_spanned_col >= 0) {
988 first_column_ = first_spanned_col;
989 last_column_ = first_spanned_col;
990 } else {
991 if ((first_column_ & 1) == 0)
992 last_column_ = first_column_;
993 else if ((last_column_ & 1) == 0)
994 first_column_ = last_column_;
995 else
996 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
997 }
998 }
999 type_ = PartitionType(span_type);
1000}

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1191 of file colpartition.cpp.

1191 {
1192 int blob_count = 0; // Total # blobs.
1193 int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1194 int noisy_count = 0; // Total # neighbours marked as noise.
1195 int hline_count = 0;
1196 int vline_count = 0;
1197 BLOBNBOX_C_IT it(&boxes_);
1198 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1199 BLOBNBOX* blob = it.data();
1200 ++blob_count;
1201 noisy_count += blob->NoisyNeighbours();
1202 good_blob_score_ += blob->GoodTextBlob();
1203 if (blob->region_type() == BRT_HLINE) ++hline_count;
1204 if (blob->region_type() == BRT_VLINE) ++vline_count;
1205 }
1206 flow_ = BTFT_NEIGHBOURS;
1207 blob_type_ = BRT_UNKNOWN;
1208 if (hline_count > vline_count) {
1209 flow_ = BTFT_NONE;
1210 blob_type_ = BRT_HLINE;
1211 } else if (vline_count > hline_count) {
1212 flow_ = BTFT_NONE;
1213 blob_type_ = BRT_VLINE;
1214 } else if (value < -1 || 1 < value) {
1215 int long_side;
1216 int short_side;
1217 if (value > 0) {
1218 long_side = bounding_box_.width();
1219 short_side = bounding_box_.height();
1220 blob_type_ = BRT_TEXT;
1221 } else {
1222 long_side = bounding_box_.height();
1223 short_side = bounding_box_.width();
1224 blob_type_ = BRT_VERT_TEXT;
1225 }
1226 // We will combine the old metrics using aspect ratio and blob counts
1227 // with the input value by allowing a strong indication to flip the
1228 // STRONG_CHAIN/CHAIN flow values.
1229 int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1230 if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1231 if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1232 if (abs(value) >= kMinStrongTextValue)
1233 flow_ = BTFT_STRONG_CHAIN;
1234 else if (abs(value) >= kMinChainTextValue)
1235 flow_ = BTFT_CHAIN;
1236 else
1237 flow_ = BTFT_NEIGHBOURS;
1238 // Upgrade chain to strong chain if the other indicators are good
1239 if (flow_ == BTFT_CHAIN && strong_score == 3)
1240 flow_ = BTFT_STRONG_CHAIN;
1241 // Downgrade strong vertical text to chain if the indicators are bad.
1242 if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1243 flow_ = BTFT_CHAIN;
1244 }
1245 if (flow_ == BTFT_NEIGHBOURS) {
1246 // Check for noisy neighbours.
1247 if (noisy_count >= blob_count) {
1248 flow_ = BTFT_NONTEXT;
1249 blob_type_= BRT_NOISE;
1250 }
1251 }
1252 if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1253 bounding_box_.bottom())) {
1254 tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1255 blob_count, noisy_count, good_blob_score_);
1256 tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1257 value, flow_, blob_type_);
1258 Print();
1259 }
1260 SetBlobTypes();
1261}
@ BTFT_CHAIN
Definition: blobbox.h:118
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:119
@ BTFT_NONTEXT
Definition: blobbox.h:116
const int kMinChainTextValue
const int kHorzStrongTextlineCount
const int kHorzStrongTextlineHeight
const int kHorzStrongTextlineAspect
const int kMinStrongTextValue
int NoisyNeighbours() const
Definition: blobbox.cpp:237
BlobRegionType region_type() const
Definition: blobbox.h:283
int GoodTextBlob() const
Definition: blobbox.cpp:226

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 506 of file colpartition.cpp.

506 {
507 if (tab_vector != nullptr) {
508 right_key_ = tab_vector->sort_key();
509 right_key_tab_ = right_key_ >= BoxRightKey();
510 } else {
511 right_key_tab_ = false;
512 }
513 if (!right_key_tab_)
514 right_key_ = BoxRightKey();
515}

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 576 of file colpartition.cpp.

577 {
579 special_blobs_densities_[type] = density;
580}

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1731 of file colpartition.cpp.

1731 {
1732 auto* part = new ColPartition(blob_type_, vertical_);
1733 part->left_margin_ = left_margin_;
1734 part->right_margin_ = right_margin_;
1735 part->bounding_box_ = bounding_box_;
1736 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1737 sizeof(special_blobs_densities_));
1738 part->median_bottom_ = median_bottom_;
1739 part->median_top_ = median_top_;
1740 part->median_height_ = median_height_;
1741 part->median_left_ = median_left_;
1742 part->median_right_ = median_right_;
1743 part->median_width_ = median_width_;
1744 part->good_width_ = good_width_;
1745 part->good_column_ = good_column_;
1746 part->left_key_tab_ = left_key_tab_;
1747 part->right_key_tab_ = right_key_tab_;
1748 part->type_ = type_;
1749 part->flow_ = flow_;
1750 part->left_key_ = left_key_;
1751 part->right_key_ = right_key_;
1752 part->first_column_ = first_column_;
1753 part->last_column_ = last_column_;
1754 part->owns_blobs_ = false;
1755 return part;
1756}

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 629 of file colpartition.cpp.

629 {
630 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
631 if (!partners->singleton())
632 return nullptr;
633 ColPartition_C_IT it(partners);
634 return it.data();
635}

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1808 of file colpartition.cpp.

1808 {
1809 STATS left_stats(0, working_set_count);
1810 STATS right_stats(0, working_set_count);
1811 PolyBlockType max_type = type_;
1812 ColPartition* partner;
1813 for (partner = SingletonPartner(false); partner != nullptr;
1814 partner = partner->SingletonPartner(false)) {
1815 if (partner->type_ > max_type)
1816 max_type = partner->type_;
1817 if (column_set_ == partner->column_set_) {
1818 left_stats.add(partner->first_column_, 1);
1819 right_stats.add(partner->last_column_, 1);
1820 }
1821 }
1822 type_ = max_type;
1823 // TODO(rays) Either establish that it isn't necessary to set the columns,
1824 // or find a way to do it that does not cause an assert failure in
1825 // AddToWorkingSet.
1826#if 0
1827 first_column_ = left_stats.mode();
1828 last_column_ = right_stats.mode();
1829 if (last_column_ < first_column_)
1830 last_column_ = first_column_;
1831#endif
1832
1833 for (partner = SingletonPartner(false); partner != nullptr;
1834 partner = partner->SingletonPartner(false)) {
1835 partner->type_ = max_type;
1836#if 0 // See TODO above
1837 if (column_set_ == partner->column_set_) {
1838 partner->first_column_ = first_column_;
1839 partner->last_column_ = last_column_;
1840 }
1841#endif
1842 }
1843}

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 715 of file colpartition.h.

715 {
716 const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
717 const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
718 int mid_y1 = part1->bounding_box_.y_middle();
719 int mid_y2 = part2->bounding_box_.y_middle();
720 if ((part2->bounding_box_.bottom() <= mid_y1 &&
721 mid_y1 <= part2->bounding_box_.top()) ||
722 (part1->bounding_box_.bottom() <= mid_y2 &&
723 mid_y2 <= part1->bounding_box_.top())) {
724 // Sort by increasing x.
725 return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
726 }
727 // Sort by decreasing y.
728 return mid_y2 - mid_y1;
729 }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 317 of file colpartition.h.

317 {
318 return TabVector::SortKey(vertical_, x, y);
319 }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 262 of file colpartition.h.

262 {
263 return space_above_;
264 }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 268 of file colpartition.h.

268 {
269 return space_below_;
270 }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 274 of file colpartition.h.

274 {
275 return space_to_left_;
276 }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 280 of file colpartition.h.

280 {
281 return space_to_right_;
282 }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 561 of file colpartition.cpp.

561 {
563 BLOBNBOX_C_IT blob_it(&boxes_);
564 int count = 0;
565 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
566 BLOBNBOX* blob = blob_it.data();
568 if (blob_type == type) {
569 count++;
570 }
571 }
572
573 return count;
574}
int count(LIST var_list)
Definition: oldlist.cpp:95

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 556 of file colpartition.cpp.

556 {
558 return special_blobs_densities_[type];
559}

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 823 of file colpartition.cpp.

823 {
824 if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
825 return nullptr; // There will be no change.
826 ColPartition* split_part = ShallowCopy();
827 split_part->set_owns_blobs(owns_blobs());
828 BLOBNBOX_C_IT it(&boxes_);
829 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
830 BLOBNBOX* bbox = it.data();
831 ColPartition* prev_owner = bbox->owner();
832 ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
833 const TBOX& box = bbox->bounding_box();
834 if (box.left() >= split_x) {
835 split_part->AddBox(it.extract());
836 if (owns_blobs() && prev_owner != nullptr)
837 bbox->set_owner(split_part);
838 }
839 }
840 if (it.empty()) {
841 // Possible if split-x passes through the first blob.
842 it.add_list_after(&split_part->boxes_);
843 }
844 ASSERT_HOST(!it.empty());
845 if (split_part->IsEmpty()) {
846 // Split part ended up with nothing. Possible if split_x passes
847 // through the last blob.
848 delete split_part;
849 return nullptr;
850 }
851 right_key_tab_ = false;
852 split_part->left_key_tab_ = false;
853 right_margin_ = split_x;
854 split_part->left_margin_ = split_x;
856 split_part->ComputeLimits();
857 return split_part;
858}

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 787 of file colpartition.cpp.

787 {
788 ColPartition* split_part = ShallowCopy();
789 split_part->set_owns_blobs(owns_blobs());
790 BLOBNBOX_C_IT it(&boxes_);
791 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
792 BLOBNBOX* bbox = it.data();
793 ColPartition* prev_owner = bbox->owner();
794 ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
795 if (bbox == split_blob || !split_part->boxes_.empty()) {
796 split_part->AddBox(it.extract());
797 if (owns_blobs() && prev_owner != nullptr)
798 bbox->set_owner(split_part);
799 }
800 }
801 ASSERT_HOST(!it.empty());
802 if (split_part->IsEmpty()) {
803 // Split part ended up with nothing. Possible if split_blob is not
804 // in the list of blobs.
805 delete split_part;
806 return nullptr;
807 }
808 right_key_tab_ = false;
809 split_part->left_key_tab_ = false;
811 // TODO(nbeato) Merge Ray's CL like this:
812 // if (owns_blobs())
813 // SetBlobTextlineGoodness();
814 split_part->ComputeLimits();
815 // TODO(nbeato) Merge Ray's CL like this:
816 // if (split_part->owns_blobs())
817 // split_part->SetBlobTextlineGoodness();
818 return split_part;
819}

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 227 of file colpartition.h.

227 {
228 return top_spacing_;
229 }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 182 of file colpartition.h.

182 {
183 return type_;
184 }

◆ TypesMatch() [1/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 413 of file colpartition.h.

413 {
414 return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
416 }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:426

◆ TypesMatch() [2/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 410 of file colpartition.h.

410 {
411 return TypesMatch(blob_type_, other.blob_type_);
412 }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:410

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 419 of file colpartition.h.

419 {
420 return (type1 == type2 ||
421 (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
422 (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
423 }
@ PT_INLINE_EQUATION
Definition: capi.h:134

◆ upper_partners()

ColPartition_CLIST * tesseract::ColPartition::upper_partners ( )
inline

Definition at line 197 of file colpartition.h.

197 {
198 return &upper_partners_;
199 }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 376 of file colpartition.h.

376 {
377 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
378 return 0;
379 }
380 return std::min(median_top_, other.median_top_) -
381 std::max(median_bottom_, other.median_bottom_);
382 }

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 371 of file colpartition.h.

371 {
372 return bounding_box_.y_gap(other.bounding_box_) < 0;
373 }
int y_gap(const TBOX &box) const
Definition: rect.h:233

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 391 of file colpartition.h.

391 {
392 if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
393 return false;
394 }
395 int overlap = VCoreOverlap(other);
396 int height = std::min(median_top_ - median_bottom_,
397 other.median_top_ - other.median_bottom_);
398 return overlap * 3 > height;
399 }
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:376

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 402 of file colpartition.h.

402 {
403 return left_margin_ <= other.bounding_box_.left() &&
404 bounding_box_.left() >= other.left_margin_ &&
405 bounding_box_.right() <= other.right_margin_ &&
406 right_margin_ >= other.bounding_box_.right();
407 }

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 321 of file colpartition.h.

321 {
322 return TabVector::XAtY(vertical_, sort_key, y);
323 }
int XAtY(int y) const
Definition: tabvector.h:188

The documentation for this class was generated from the following files: