tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::TabVector Class Reference

#include <tabvector.h>

Inheritance diagram for tesseract::TabVector:
ELIST2_LINK

Public Member Functions

 TabVector ()=default
 
 ~TabVector ()=default
 
 TabVector (const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew, BLOBNBOX *blob)
 
TabVectorShallowCopy () const
 
const ICOORDstartpt () const
 
const ICOORDendpt () const
 
int extended_ymax () const
 
int extended_ymin () const
 
int sort_key () const
 
int mean_width () const
 
void set_top_constraints (TabConstraint_LIST *constraints)
 
void set_bottom_constraints (TabConstraint_LIST *constraints)
 
TabVector_CLIST * partners ()
 
void set_startpt (const ICOORD &start)
 
void set_endpt (const ICOORD &end)
 
bool intersects_other_lines () const
 
void set_intersects_other_lines (bool value)
 
int XAtY (int y) const
 
int VOverlap (const TabVector &other) const
 
int VOverlap (int top_y, int bottom_y) const
 
int ExtendedOverlap (int top_y, int bottom_y) const
 
bool IsLeftTab () const
 
bool IsRightTab () const
 
bool IsSeparator () const
 
bool IsCenterTab () const
 
bool IsRagged () const
 
bool IsLeftOf (const TabVector &other) const
 
bool Partnerless ()
 
int BoxCount ()
 
void Freeze ()
 
void XYFlip ()
 
void ReflectInYAxis ()
 
void ExtendToBox (BLOBNBOX *blob)
 
void SetYStart (int start_y)
 
void SetYEnd (int end_y)
 
void Rotate (const FCOORD &rotation)
 
void SetupConstraints ()
 
void SetupPartnerConstraints ()
 
void SetupPartnerConstraints (TabVector *partner)
 
void ApplyConstraints ()
 
bool SimilarTo (const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
 
void MergeWith (const ICOORD &vertical, TabVector *other)
 
void AddPartner (TabVector *partner)
 
bool IsAPartner (const TabVector *other)
 
void Print (const char *prefix)
 
void Debug (const char *prefix)
 
void Display (ScrollView *tab_win)
 
void FitAndEvaluateIfNeeded (const ICOORD &vertical, TabFind *finder)
 
void Evaluate (const ICOORD &vertical, TabFind *finder)
 
bool Fit (ICOORD vertical, bool force_parallel)
 
TabVectorVerticalTextlinePartner ()
 
TabVectorGetSinglePartner ()
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static TabVectorFitVector (TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
 
static int SortKey (const ICOORD &vertical, int x, int y)
 
static int XAtY (const ICOORD &vertical, int sort_key, int y)
 
static int SortVectorsByKey (const void *v1, const void *v2)
 
static void MergeSimilarTabVectors (const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
 

Detailed Description

Definition at line 111 of file tabvector.h.

Constructor & Destructor Documentation

◆ TabVector() [1/2]

tesseract::TabVector::TabVector ( )
default

◆ ~TabVector()

tesseract::TabVector::~TabVector ( )
default

◆ TabVector() [2/2]

tesseract::TabVector::TabVector ( const TabVector src,
TabAlignment  alignment,
const ICOORD vertical_skew,
BLOBNBOX blob 
)

Definition at line 198 of file tabvector.cpp.

200 : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
201 needs_refit_(true), needs_evaluation_(true),
202 alignment_(alignment) {
203 BLOBNBOX_C_IT it(&boxes_);
204 it.add_to_end(blob);
205 TBOX box = blob->bounding_box();
206 if (IsLeftTab()) {
207 startpt_ = box.botleft();
208 endpt_ = box.topleft();
209 } else {
210 startpt_ = box.botright();
211 endpt_ = box.topright();
212 }
213 sort_key_ = SortKey(vertical_skew,
214 (startpt_.x() + endpt_.x()) / 2,
215 (startpt_.y() + endpt_.y()) / 2);
216 if (textord_debug_tabfind > 3)
217 Print("Constructed a new tab vector:");
218}
int textord_debug_tabfind
Definition: alignedblob.cpp:27
const TBOX & bounding_box() const
Definition: blobbox.h:230
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52
Definition: rect.h:34
const ICOORD & botleft() const
Definition: rect.h:92
ICOORD botright() const
Definition: rect.h:96
ICOORD topleft() const
Definition: rect.h:100
const ICOORD & topright() const
Definition: rect.h:104
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
bool IsLeftTab() const
Definition: tabvector.h:212
void Print(const char *prefix)
Definition: tabvector.cpp:517

Member Function Documentation

◆ AddPartner()

void tesseract::TabVector::AddPartner ( TabVector partner)

Definition at line 484 of file tabvector.cpp.

484 {
485 if (IsSeparator() || partner->IsSeparator())
486 return;
487 TabVector_C_IT it(&partners_);
488 if (!it.empty()) {
489 it.move_to_last();
490 if (it.data() == partner)
491 return;
492 }
493 it.add_after_then_move(partner);
494}
bool IsSeparator() const
Definition: tabvector.h:220

◆ ApplyConstraints()

void tesseract::TabVector::ApplyConstraints ( )

Definition at line 345 of file tabvector.cpp.

345 {
346 if (top_constraints_ != nullptr)
347 TabConstraint::ApplyConstraints(top_constraints_);
348 if (bottom_constraints_ != nullptr)
349 TabConstraint::ApplyConstraints(bottom_constraints_);
350}
static void ApplyConstraints(TabConstraint_LIST *constraints)
Definition: tabvector.cpp:116

◆ BoxCount()

int tesseract::TabVector::BoxCount ( )
inline

Definition at line 244 of file tabvector.h.

244 {
245 return boxes_.length();
246 }

◆ Debug()

void tesseract::TabVector::Debug ( const char *  prefix)

Definition at line 527 of file tabvector.cpp.

527 {
528 Print(prefix);
529 BLOBNBOX_C_IT it(&boxes_);
530 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
531 BLOBNBOX* bbox = it.data();
532 const TBOX& box = bbox->bounding_box();
533 tprintf("Box at (%d,%d)->(%d,%d)\n",
534 box.left(), box.bottom(), box.right(), box.top());
535 }
536}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79

◆ Display()

void tesseract::TabVector::Display ( ScrollView tab_win)

Definition at line 539 of file tabvector.cpp.

539 {
540#ifndef GRAPHICS_DISABLED
542 tab_win->Pen(ScrollView::BLUE);
543 else if (alignment_ == TA_LEFT_ALIGNED)
544 tab_win->Pen(ScrollView::LIME_GREEN);
545 else if (alignment_ == TA_LEFT_RAGGED)
546 tab_win->Pen(ScrollView::DARK_GREEN);
547 else if (alignment_ == TA_RIGHT_ALIGNED)
548 tab_win->Pen(ScrollView::PINK);
549 else if (alignment_ == TA_RIGHT_RAGGED)
550 tab_win->Pen(ScrollView::CORAL);
551 else
552 tab_win->Pen(ScrollView::WHITE);
553 tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
554 tab_win->Pen(ScrollView::GREY);
555 tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
556 tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
557 char score_buf[64];
558 snprintf(score_buf, sizeof(score_buf), "%d", percent_score_);
559 tab_win->TextAttributes("Times", 50, false, false, false);
560 tab_win->Text(startpt_.x(), startpt_.y(), score_buf);
561#endif
562}
bool textord_debug_printable
Definition: alignedblob.cpp:33
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:48
@ TA_RIGHT_RAGGED
Definition: tabvector.h:49
@ TA_LEFT_ALIGNED
Definition: tabvector.h:45
@ TA_LEFT_RAGGED
Definition: tabvector.h:46
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:532
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
Definition: scrollview.cpp:635
void Text(int x, int y, const char *mystring)
Definition: scrollview.cpp:652
void Pen(Color color)
Definition: scrollview.cpp:719

◆ endpt()

const ICOORD & tesseract::TabVector::endpt ( ) const
inline

Definition at line 148 of file tabvector.h.

148 {
149 return endpt_;
150 }

◆ Evaluate()

void tesseract::TabVector::Evaluate ( const ICOORD vertical,
TabFind finder 
)

Definition at line 579 of file tabvector.cpp.

579 {
580 bool debug = false;
581 needs_evaluation_ = false;
582 int length = endpt_.y() - startpt_.y();
583 if (length == 0 || boxes_.empty()) {
584 percent_score_ = 0;
585 Print("Zero length in evaluate");
586 return;
587 }
588 // Compute the mean box height.
589 BLOBNBOX_C_IT it(&boxes_);
590 int mean_height = 0;
591 int height_count = 0;
592 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
593 BLOBNBOX* bbox = it.data();
594 const TBOX& box = bbox->bounding_box();
595 int height = box.height();
596 mean_height += height;
597 ++height_count;
598 }
599 if (height_count > 0) mean_height /= height_count;
600 int max_gutter = kGutterMultiple * mean_height;
601 if (IsRagged()) {
602 // Ragged edges face a tougher test in that the gap must always be within
603 // the height of the blob.
604 max_gutter = kGutterToNeighbourRatio * mean_height;
605 }
606
607 STATS gutters(0, max_gutter + 1);
608 // Evaluate the boxes for their goodness, calculating the coverage as we go.
609 // Remove boxes that are not good and shorten the list to the first and
610 // last good boxes.
611 int num_deleted_boxes = 0;
612 bool text_on_image = false;
613 int good_length = 0;
614 const TBOX* prev_good_box = nullptr;
615 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
616 BLOBNBOX* bbox = it.data();
617 const TBOX& box = bbox->bounding_box();
618 int mid_y = (box.top() + box.bottom()) / 2;
619 if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
620 if (!debug) {
621 tprintf("After already deleting %d boxes, ", num_deleted_boxes);
622 Print("Starting evaluation");
623 }
624 debug = true;
625 }
626 // A good box is one where the nearest neighbour on the inside is closer
627 // than half the distance to the nearest neighbour on the outside
628 // (of the putative column).
629 bool left = IsLeftTab();
630 int tab_x = XAtY(mid_y);
631 int gutter_width;
632 int neighbour_gap;
633 finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
634 bbox, &gutter_width, &neighbour_gap);
635 if (debug) {
636 tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
637 box.left(), box.bottom(), box.right(), box.top(),
638 gutter_width, neighbour_gap);
639 }
640 // Now we can make the test.
641 if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
642 // A good box contributes its height to the good_length.
643 good_length += box.top() - box.bottom();
644 gutters.add(gutter_width, 1);
645 // Two good boxes together contribute the gap between them
646 // to the good_length as well, as long as the gap is not
647 // too big.
648 if (prev_good_box != nullptr) {
649 int vertical_gap = box.bottom() - prev_good_box->top();
650 double size1 = sqrt(static_cast<double>(prev_good_box->area()));
651 double size2 = sqrt(static_cast<double>(box.area()));
652 if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2))
653 good_length += vertical_gap;
654 if (debug) {
655 tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n",
656 vertical_gap, kMaxFillinMultiple * std::min(size1, size2),
657 good_length);
658 }
659 } else {
660 // Adjust the start to the first good box.
661 SetYStart(box.bottom());
662 }
663 prev_good_box = &box;
664 if (bbox->flow() == BTFT_TEXT_ON_IMAGE)
665 text_on_image = true;
666 } else {
667 // Get rid of boxes that are not good.
668 if (debug) {
669 tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
670 box.left(), box.bottom(), box.right(), box.top(),
671 gutter_width, neighbour_gap);
672 }
673 it.extract();
674 ++num_deleted_boxes;
675 }
676 }
677 if (debug) {
678 Print("Evaluating:");
679 }
680 // If there are any good boxes, do it again, except this time get rid of
681 // boxes that have a gutter that is a small fraction of the mean gutter.
682 // This filters out ends that run into a coincidental gap in the text.
683 int search_top = endpt_.y();
684 int search_bottom = startpt_.y();
685 int median_gutter = IntCastRounded(gutters.median());
686 if (gutters.get_total() > 0) {
687 prev_good_box = nullptr;
688 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
689 BLOBNBOX* bbox = it.data();
690 const TBOX& box = bbox->bounding_box();
691 int mid_y = (box.top() + box.bottom()) / 2;
692 // A good box is one where the gutter width is at least some constant
693 // fraction of the mean gutter width.
694 bool left = IsLeftTab();
695 int tab_x = XAtY(mid_y);
696 int max_gutter = kGutterMultiple * mean_height;
697 if (IsRagged()) {
698 // Ragged edges face a tougher test in that the gap must always be
699 // within the height of the blob.
700 max_gutter = kGutterToNeighbourRatio * mean_height;
701 }
702 int gutter_width;
703 int neighbour_gap;
704 finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
705 bbox, &gutter_width, &neighbour_gap);
706 // Now we can make the test.
707 if (gutter_width >= median_gutter * kMinGutterFraction) {
708 if (prev_good_box == nullptr) {
709 // Adjust the start to the first good box.
710 SetYStart(box.bottom());
711 search_bottom = box.top();
712 }
713 prev_good_box = &box;
714 search_top = box.bottom();
715 } else {
716 // Get rid of boxes that are not good.
717 if (debug) {
718 tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
719 box.left(), box.bottom(), box.right(), box.top(),
720 gutter_width, median_gutter);
721 }
722 it.extract();
723 ++num_deleted_boxes;
724 }
725 }
726 }
727 // If there has been a good box, adjust the end.
728 if (prev_good_box != nullptr) {
729 SetYEnd(prev_good_box->top());
730 // Compute the percentage of the vector that is occupied by good boxes.
731 int length = endpt_.y() - startpt_.y();
732 percent_score_ = 100 * good_length / length;
733 if (num_deleted_boxes > 0) {
734 needs_refit_ = true;
735 FitAndEvaluateIfNeeded(vertical, finder);
736 if (boxes_.empty())
737 return;
738 }
739 // Test the gutter over the whole vector, instead of just at the boxes.
740 int required_shift;
741 if (search_bottom > search_top) {
742 search_bottom = startpt_.y();
743 search_top = endpt_.y();
744 }
745 double min_gutter_width = kLineCountReciprocal / boxes_.length();
746 min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
747 min_gutter_width *= mean_height;
748 int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
749 if (median_gutter > max_gutter_width)
750 max_gutter_width = median_gutter;
751 int gutter_width = finder->GutterWidth(search_bottom, search_top, *this,
752 text_on_image, max_gutter_width,
753 &required_shift);
754 if (gutter_width < min_gutter_width) {
755 if (debug) {
756 tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n",
757 gutter_width, min_gutter_width);
758 }
759 boxes_.shallow_clear();
760 percent_score_ = 0;
761 } else if (debug) {
762 tprintf("Final gutter %d, vs limit of %g, required shift = %d\n",
763 gutter_width, min_gutter_width, required_shift);
764 }
765 } else {
766 // There are no good boxes left, so score is 0.
767 percent_score_ = 0;
768 }
769
770 if (debug) {
771 Print("Evaluation complete:");
772 }
773}
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:120
int IntCastRounded(double x)
Definition: helpers.h:175
const double kMinAlignedGutter
Definition: tabvector.cpp:50
const int kGutterMultiple
Definition: tabvector.cpp:35
const double kMinRaggedGutter
Definition: tabvector.cpp:52
const double kLineCountReciprocal
Definition: tabvector.cpp:48
const int kMaxFillinMultiple
Definition: tabvector.cpp:44
const double kMinGutterFraction
Definition: tabvector.cpp:46
const int kGutterToNeighbourRatio
Definition: tabvector.cpp:37
BlobTextFlowType flow() const
Definition: blobbox.h:295
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
Definition: statistc.h:31
static bool WithinTestRegion(int detail_level, int x, int y)
int XAtY(int y) const
Definition: tabvector.h:188
void SetYEnd(int end_y)
Definition: tabvector.cpp:267
bool IsRagged() const
Definition: tabvector.h:228
void SetYStart(int start_y)
Definition: tabvector.cpp:262
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:565

◆ extended_ymax()

int tesseract::TabVector::extended_ymax ( ) const
inline

Definition at line 151 of file tabvector.h.

151 {
152 return extended_ymax_;
153 }

◆ extended_ymin()

int tesseract::TabVector::extended_ymin ( ) const
inline

Definition at line 154 of file tabvector.h.

154 {
155 return extended_ymin_;
156 }

◆ ExtendedOverlap()

int tesseract::TabVector::ExtendedOverlap ( int  top_y,
int  bottom_y 
) const
inline

Definition at line 207 of file tabvector.h.

207 {
208 return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
209 }

◆ ExtendToBox()

void tesseract::TabVector::ExtendToBox ( BLOBNBOX blob)

Definition at line 238 of file tabvector.cpp.

238 {
239 TBOX new_box = new_blob->bounding_box();
240 BLOBNBOX_C_IT it(&boxes_);
241 if (!it.empty()) {
242 BLOBNBOX* blob = it.data();
243 TBOX box = blob->bounding_box();
244 while (!it.at_last() && box.top() <= new_box.top()) {
245 if (blob == new_blob)
246 return; // We have it already.
247 it.forward();
248 blob = it.data();
249 box = blob->bounding_box();
250 }
251 if (box.top() >= new_box.top()) {
252 it.add_before_stay_put(new_blob);
253 needs_refit_ = true;
254 return;
255 }
256 }
257 needs_refit_ = true;
258 it.add_after_stay_put(new_blob);
259}

◆ Fit()

bool tesseract::TabVector::Fit ( ICOORD  vertical,
bool  force_parallel 
)

Definition at line 780 of file tabvector.cpp.

780 {
781 needs_refit_ = false;
782 if (boxes_.empty()) {
783 // Don't refit something with no boxes, as that only happens
784 // in Evaluate, and we don't want to end up with a zero vector.
785 if (!force_parallel)
786 return false;
787 // If we are forcing parallel, then we just need to set the sort_key_.
788 ICOORD midpt = startpt_;
789 midpt += endpt_;
790 midpt /= 2;
791 sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
792 return startpt_.y() != endpt_.y();
793 }
794 if (!force_parallel && !IsRagged()) {
795 // Use a fitted line as the vertical.
796 DetLineFit linepoints;
797 BLOBNBOX_C_IT it(&boxes_);
798 // Fit a line to all the boxes in the list.
799 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
800 BLOBNBOX* bbox = it.data();
801 const TBOX& box = bbox->bounding_box();
802 int x1 = IsRightTab() ? box.right() : box.left();
803 ICOORD boxpt(x1, box.bottom());
804 linepoints.Add(boxpt);
805 if (it.at_last()) {
806 ICOORD top_pt(x1, box.top());
807 linepoints.Add(top_pt);
808 }
809 }
810 linepoints.Fit(&startpt_, &endpt_);
811 if (startpt_.y() != endpt_.y()) {
812 vertical = endpt_;
813 vertical -= startpt_;
814 }
815 }
816 int start_y = startpt_.y();
817 int end_y = endpt_.y();
818 sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
819 BLOBNBOX_C_IT it(&boxes_);
820 // Choose a line parallel to the vertical such that all boxes are on the
821 // correct side of it.
822 mean_width_ = 0;
823 int width_count = 0;
824 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
825 BLOBNBOX* bbox = it.data();
826 const TBOX& box = bbox->bounding_box();
827 mean_width_ += box.width();
828 ++width_count;
829 int x1 = IsRightTab() ? box.right() : box.left();
830 // Test both the bottom and the top, as one will be more extreme, depending
831 // on the direction of skew.
832 int bottom_y = box.bottom();
833 int top_y = box.top();
834 int key = SortKey(vertical, x1, bottom_y);
835 if (IsLeftTab() == (key < sort_key_)) {
836 sort_key_ = key;
837 startpt_ = ICOORD(x1, bottom_y);
838 }
839 key = SortKey(vertical, x1, top_y);
840 if (IsLeftTab() == (key < sort_key_)) {
841 sort_key_ = key;
842 startpt_ = ICOORD(x1, top_y);
843 }
844 if (it.at_first())
845 start_y = bottom_y;
846 if (it.at_last())
847 end_y = top_y;
848 }
849 if (width_count > 0) {
850 mean_width_ = (mean_width_ + width_count - 1) / width_count;
851 }
852 endpt_ = startpt_ + vertical;
853 needs_evaluation_ = true;
854 if (start_y != end_y) {
855 // Set the ends of the vector to fully include the first and last blobs.
856 startpt_.set_x(XAtY(vertical, sort_key_, start_y));
857 startpt_.set_y(start_y);
858 endpt_.set_x(XAtY(vertical, sort_key_, end_y));
859 endpt_.set_y(end_y);
860 return true;
861 }
862 return false;
863}
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
int16_t width() const
Definition: rect.h:115
bool IsRightTab() const
Definition: tabvector.h:216

◆ FitAndEvaluateIfNeeded()

void tesseract::TabVector::FitAndEvaluateIfNeeded ( const ICOORD vertical,
TabFind finder 
)

Definition at line 565 of file tabvector.cpp.

566 {
567 if (needs_refit_)
568 Fit(vertical, true);
569 if (needs_evaluation_)
570 Evaluate(vertical, finder);
571}
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:579
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:780

◆ FitVector()

TabVector * tesseract::TabVector::FitVector ( TabAlignment  alignment,
ICOORD  vertical,
int  extended_start_y,
int  extended_end_y,
BLOBNBOX_CLIST *  good_points,
int *  vertical_x,
int *  vertical_y 
)
static

Definition at line 176 of file tabvector.cpp.

179 {
180 auto* vector = new TabVector(extended_start_y, extended_end_y,
181 alignment, good_points);
182 if (!vector->Fit(vertical, false)) {
183 delete vector;
184 return nullptr;
185 }
186 if (!vector->IsRagged()) {
187 vertical = vector->endpt_ - vector->startpt_;
188 int weight = vector->BoxCount();
189 *vertical_x += vertical.x() * weight;
190 *vertical_y += vertical.y() * weight;
191 }
192 return vector;
193}

◆ Freeze()

void tesseract::TabVector::Freeze ( )
inline

Definition at line 249 of file tabvector.h.

249 {
250 boxes_.shallow_clear();
251 }

◆ GetSinglePartner()

TabVector * tesseract::TabVector::GetSinglePartner ( )

Definition at line 866 of file tabvector.cpp.

866 {
867 if (!partners_.singleton())
868 return nullptr;
869 TabVector_C_IT partner_it(&partners_);
870 TabVector* partner = partner_it.data();
871 return partner;
872}

◆ intersects_other_lines()

bool tesseract::TabVector::intersects_other_lines ( ) const
inline

Definition at line 178 of file tabvector.h.

178 {
179 return intersects_other_lines_;
180 }

◆ IsAPartner()

bool tesseract::TabVector::IsAPartner ( const TabVector other)

Definition at line 497 of file tabvector.cpp.

497 {
498 TabVector_C_IT it(&partners_);
499 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
500 if (it.data() == other)
501 return true;
502 }
503 return false;
504}

◆ IsCenterTab()

bool tesseract::TabVector::IsCenterTab ( ) const
inline

Definition at line 224 of file tabvector.h.

224 {
225 return alignment_ == TA_CENTER_JUSTIFIED;
226 }
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:47

◆ IsLeftOf()

bool tesseract::TabVector::IsLeftOf ( const TabVector other) const
inline

Definition at line 234 of file tabvector.h.

234 {
235 return sort_key_ < other.sort_key_;
236 }

◆ IsLeftTab()

bool tesseract::TabVector::IsLeftTab ( ) const
inline

Definition at line 212 of file tabvector.h.

212 {
213 return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
214 }

◆ IsRagged()

bool tesseract::TabVector::IsRagged ( ) const
inline

Definition at line 228 of file tabvector.h.

228 {
229 return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
230 }

◆ IsRightTab()

bool tesseract::TabVector::IsRightTab ( ) const
inline

Definition at line 216 of file tabvector.h.

216 {
217 return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
218 }

◆ IsSeparator()

bool tesseract::TabVector::IsSeparator ( ) const
inline

Definition at line 220 of file tabvector.h.

220 {
221 return alignment_ == TA_SEPARATOR;
222 }
@ TA_SEPARATOR
Definition: tabvector.h:50

◆ mean_width()

int tesseract::TabVector::mean_width ( ) const
inline

Definition at line 160 of file tabvector.h.

160 {
161 return mean_width_;
162 }

◆ MergeSimilarTabVectors()

void tesseract::TabVector::MergeSimilarTabVectors ( const ICOORD vertical,
TabVector_LIST *  vectors,
BlobGrid grid 
)
static

Definition at line 353 of file tabvector.cpp.

355 {
356 TabVector_IT it1(vectors);
357 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
358 TabVector* v1 = it1.data();
359 TabVector_IT it2(it1);
360 for (it2.forward(); !it2.at_first(); it2.forward()) {
361 TabVector* v2 = it2.data();
362 if (v2->SimilarTo(vertical, *v1, grid)) {
363 // Merge into the forward one, in case the combined vector now
364 // overlaps one in between.
366 v2->Print("Merging");
367 v1->Print("by deleting");
368 }
369 v2->MergeWith(vertical, it1.extract());
371 v2->Print("Producing");
372 }
373 ICOORD merged_vector = v2->endpt();
374 merged_vector -= v2->startpt();
375 if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
376 v2->Print("Garbage result of merge?");
377 }
378 break;
379 }
380 }
381 }
382}

◆ MergeWith()

void tesseract::TabVector::MergeWith ( const ICOORD vertical,
TabVector other 
)

Definition at line 450 of file tabvector.cpp.

450 {
451 extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
452 extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
453 if (other->IsRagged()) {
454 alignment_ = other->alignment_;
455 }
456 // Merge sort the two lists of boxes.
457 BLOBNBOX_C_IT it1(&boxes_);
458 BLOBNBOX_C_IT it2(&other->boxes_);
459 while (!it2.empty()) {
460 BLOBNBOX* bbox2 = it2.extract();
461 it2.forward();
462 TBOX box2 = bbox2->bounding_box();
463 BLOBNBOX* bbox1 = it1.data();
464 TBOX box1 = bbox1->bounding_box();
465 while (box1.bottom() < box2.bottom() && !it1.at_last()) {
466 it1.forward();
467 bbox1 = it1.data();
468 box1 = bbox1->bounding_box();
469 }
470 if (box1.bottom() < box2.bottom()) {
471 it1.add_to_end(bbox2);
472 } else if (bbox1 != bbox2) {
473 it1.add_before_stay_put(bbox2);
474 }
475 }
476 Fit(vertical, true);
477 other->Delete(this);
478}

◆ Partnerless()

bool tesseract::TabVector::Partnerless ( )
inline

Definition at line 239 of file tabvector.h.

239 {
240 return partners_.empty();
241 }

◆ partners()

TabVector_CLIST * tesseract::TabVector::partners ( )
inline

Definition at line 169 of file tabvector.h.

169 {
170 return &partners_;
171 }

◆ Print()

void tesseract::TabVector::Print ( const char *  prefix)

Definition at line 517 of file tabvector.cpp.

517 {
518 tprintf(
519 "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
520 " partners=%d\n",
521 prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(),
522 endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_,
523 boxes_.length(), partners_.length());
524}

◆ ReflectInYAxis()

void tesseract::TabVector::ReflectInYAxis ( )
inline

Definition at line 264 of file tabvector.h.

264 {
265 startpt_.set_x(-startpt_.x());
266 endpt_.set_x(-endpt_.x());
267 sort_key_ = -sort_key_;
268 if (alignment_ == TA_LEFT_ALIGNED)
269 alignment_ = TA_RIGHT_ALIGNED;
270 else if (alignment_ == TA_RIGHT_ALIGNED)
271 alignment_ = TA_LEFT_ALIGNED;
272 if (alignment_ == TA_LEFT_RAGGED)
273 alignment_ = TA_RIGHT_RAGGED;
274 else if (alignment_ == TA_RIGHT_RAGGED)
275 alignment_ = TA_LEFT_RAGGED;
276 }

◆ Rotate()

void tesseract::TabVector::Rotate ( const FCOORD rotation)

Definition at line 273 of file tabvector.cpp.

273 {
274 startpt_.rotate(rotation);
275 endpt_.rotate(rotation);
276 int dx = endpt_.x() - startpt_.x();
277 int dy = endpt_.y() - startpt_.y();
278 if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
279 // Need to flip start/end.
280 ICOORD tmp = startpt_;
281 startpt_ = endpt_;
282 endpt_ = tmp;
283 }
284}
void rotate(const FCOORD &vec)
Definition: points.h:536

◆ set_bottom_constraints()

void tesseract::TabVector::set_bottom_constraints ( TabConstraint_LIST *  constraints)
inline

Definition at line 166 of file tabvector.h.

166 {
167 bottom_constraints_ = constraints;
168 }

◆ set_endpt()

void tesseract::TabVector::set_endpt ( const ICOORD end)
inline

Definition at line 175 of file tabvector.h.

175 {
176 endpt_ = end;
177 }

◆ set_intersects_other_lines()

void tesseract::TabVector::set_intersects_other_lines ( bool  value)
inline

Definition at line 181 of file tabvector.h.

181 {
182 intersects_other_lines_ = value;
183 }

◆ set_startpt()

void tesseract::TabVector::set_startpt ( const ICOORD start)
inline

Definition at line 172 of file tabvector.h.

172 {
173 startpt_ = start;
174 }

◆ set_top_constraints()

void tesseract::TabVector::set_top_constraints ( TabConstraint_LIST *  constraints)
inline

Definition at line 163 of file tabvector.h.

163 {
164 top_constraints_ = constraints;
165 }

◆ SetupConstraints()

void tesseract::TabVector::SetupConstraints ( )

Definition at line 288 of file tabvector.cpp.

288 {
291}
static void CreateConstraint(TabVector *vector, bool is_top)
Definition: tabvector.cpp:63

◆ SetupPartnerConstraints() [1/2]

void tesseract::TabVector::SetupPartnerConstraints ( )

Definition at line 294 of file tabvector.cpp.

294 {
295 // With the first and last partner, we want a common bottom and top,
296 // respectively, and for each change of partner, we want a common
297 // top of first with bottom of next.
298 TabVector_C_IT it(&partners_);
299 TabVector* prev_partner = nullptr;
300 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
301 TabVector* partner = it.data();
302 if (partner->top_constraints_ == nullptr ||
303 partner->bottom_constraints_ == nullptr) {
304 partner->Print("Impossible: has no constraints");
305 Print("This vector has it as a partner");
306 continue;
307 }
308 if (prev_partner == nullptr) {
309 // This is the first partner, so common bottom.
310 if (TabConstraint::CompatibleConstraints(bottom_constraints_,
311 partner->bottom_constraints_))
312 TabConstraint::MergeConstraints(bottom_constraints_,
313 partner->bottom_constraints_);
314 } else {
315 // We need prev top to be common with partner bottom.
316 if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
317 partner->bottom_constraints_))
318 TabConstraint::MergeConstraints(prev_partner->top_constraints_,
319 partner->bottom_constraints_);
320 }
321 prev_partner = partner;
322 if (it.at_last()) {
323 // This is the last partner, so common top.
324 if (TabConstraint::CompatibleConstraints(top_constraints_,
325 partner->top_constraints_))
326 TabConstraint::MergeConstraints(top_constraints_,
327 partner->top_constraints_);
328 }
329 }
330}
static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
Definition: tabvector.cpp:92
static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
Definition: tabvector.cpp:75

◆ SetupPartnerConstraints() [2/2]

void tesseract::TabVector::SetupPartnerConstraints ( TabVector partner)

Definition at line 333 of file tabvector.cpp.

333 {
334 if (TabConstraint::CompatibleConstraints(bottom_constraints_,
335 partner->bottom_constraints_))
336 TabConstraint::MergeConstraints(bottom_constraints_,
337 partner->bottom_constraints_);
338 if (TabConstraint::CompatibleConstraints(top_constraints_,
339 partner->top_constraints_))
340 TabConstraint::MergeConstraints(top_constraints_,
341 partner->top_constraints_);
342}

◆ SetYEnd()

void tesseract::TabVector::SetYEnd ( int  end_y)

Definition at line 267 of file tabvector.cpp.

267 {
268 endpt_.set_x(XAtY(end_y));
269 endpt_.set_y(end_y);
270}

◆ SetYStart()

void tesseract::TabVector::SetYStart ( int  start_y)

Definition at line 262 of file tabvector.cpp.

262 {
263 startpt_.set_x(XAtY(start_y));
264 startpt_.set_y(start_y);
265}

◆ ShallowCopy()

TabVector * tesseract::TabVector::ShallowCopy ( ) const

Definition at line 225 of file tabvector.cpp.

225 {
226 auto* copy = new TabVector();
227 copy->startpt_ = startpt_;
228 copy->endpt_ = endpt_;
229 copy->alignment_ = alignment_;
230 copy->extended_ymax_ = extended_ymax_;
231 copy->extended_ymin_ = extended_ymin_;
232 copy->intersects_other_lines_ = intersects_other_lines_;
233 return copy;
234}

◆ SimilarTo()

bool tesseract::TabVector::SimilarTo ( const ICOORD vertical,
const TabVector other,
BlobGrid grid 
) const

Definition at line 386 of file tabvector.cpp.

387 {
388 if ((IsRightTab() && other.IsRightTab()) ||
389 (IsLeftTab() && other.IsLeftTab())) {
390 // If they don't overlap, at least in extensions, then there is no chance.
391 if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
392 return false;
393 // A fast approximation to the scale factor of the sort_key_.
394 int v_scale = abs(vertical.y());
395 if (v_scale == 0)
396 v_scale = 1;
397 // If they are close enough, then OK.
398 if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
399 sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
400 return true;
401 // Ragged tabs get a bigger threshold.
402 if (!IsRagged() || !other.IsRagged() ||
403 sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
404 sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
405 return false;
406 if (grid == nullptr) {
407 // There is nothing else to test!
408 return true;
409 }
410 // If there is nothing in the rectangle between the vector that is going to
411 // move, and the place it is moving to, then they can be merged.
412 // Setup a vertical search for any blob.
413 const TabVector* mover = (IsRightTab() &&
414 sort_key_ < other.sort_key_) ? this : &other;
415 int top_y = mover->endpt_.y();
416 int bottom_y = mover->startpt_.y();
417 int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
418 int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
419 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
420 if (IsRightTab()) {
421 right += shift;
422 } else {
423 left -= shift;
424 }
425
426 GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
427 vsearch.StartVerticalSearch(left, right, top_y);
428 BLOBNBOX* blob;
429 while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
430 const TBOX& box = blob->bounding_box();
431 if (box.top() > bottom_y)
432 return true; // Nothing found.
433 if (box.bottom() < top_y)
434 continue; // Doesn't overlap.
435 int left_at_box = XAtY(box.bottom());
436 int right_at_box = left_at_box;
437 if (IsRightTab())
438 right_at_box += shift;
439 else
440 left_at_box -= shift;
441 if (std::min(right_at_box, static_cast<int>(box.right())) > std::max(left_at_box, static_cast<int>(box.left())))
442 return false;
443 }
444 return true; // Nothing found.
445 }
446 return false;
447}
const int kSimilarVectorDist
Definition: tabvector.cpp:39
const int kSimilarRaggedDist
Definition: tabvector.cpp:42
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:207

◆ sort_key()

int tesseract::TabVector::sort_key ( ) const
inline

Definition at line 157 of file tabvector.h.

157 {
158 return sort_key_;
159 }

◆ SortKey()

static int tesseract::TabVector::SortKey ( const ICOORD vertical,
int  x,
int  y 
)
inlinestatic

Definition at line 279 of file tabvector.h.

279 {
280 ICOORD pt(x, y);
281 return pt * vertical;
282 }

◆ SortVectorsByKey()

static int tesseract::TabVector::SortVectorsByKey ( const void *  v1,
const void *  v2 
)
inlinestatic

Definition at line 293 of file tabvector.h.

293 {
294 const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
295 const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
296 return tv1->sort_key_ - tv2->sort_key_;
297 }

◆ startpt()

const ICOORD & tesseract::TabVector::startpt ( ) const
inline

Definition at line 145 of file tabvector.h.

145 {
146 return startpt_;
147 }

◆ VerticalTextlinePartner()

TabVector * tesseract::TabVector::VerticalTextlinePartner ( )

Definition at line 876 of file tabvector.cpp.

876 {
877 if (!partners_.singleton())
878 return nullptr;
879 TabVector_C_IT partner_it(&partners_);
880 TabVector* partner = partner_it.data();
881 BLOBNBOX_C_IT box_it1(&boxes_);
882 BLOBNBOX_C_IT box_it2(&partner->boxes_);
883 // Count how many boxes are also in the other list.
884 // At the same time, gather the mean width and median vertical gap.
885 if (textord_debug_tabfind > 1) {
886 Print("Testing for vertical text");
887 partner->Print(" partner");
888 }
889 int num_matched = 0;
890 int num_unmatched = 0;
891 int total_widths = 0;
892 int width = startpt().x() - partner->startpt().x();
893 if (width < 0)
894 width = -width;
895 STATS gaps(0, width * 2);
896 BLOBNBOX* prev_bbox = nullptr;
897 box_it2.mark_cycle_pt();
898 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
899 BLOBNBOX* bbox = box_it1.data();
900 TBOX box = bbox->bounding_box();
901 if (prev_bbox != nullptr) {
902 gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
903 }
904 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
905 box_it2.data()->bounding_box().bottom() < box.bottom()) {
906 box_it2.forward();
907 }
908 if (!box_it2.cycled_list() && box_it2.data() == bbox &&
909 bbox->region_type() >= BRT_UNKNOWN &&
910 (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN))
911 ++num_matched;
912 else
913 ++num_unmatched;
914 total_widths += box.width();
915 prev_bbox = bbox;
916 }
917 if (num_unmatched + num_matched == 0) return nullptr;
918 double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
919 double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
920 int min_box_match = static_cast<int>((num_matched + num_unmatched) *
922 bool is_vertical = (gaps.get_total() > 0 &&
923 num_matched >= min_box_match &&
924 gaps.median() <= max_gap);
925 if (textord_debug_tabfind > 1) {
926 tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
927 "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
928 gaps.get_total(), num_matched, num_unmatched, min_box_match,
929 gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
930 }
931 return (is_vertical) ? partner : nullptr;
932}
@ BRT_UNKNOWN
Definition: blobbox.h:78
double textord_tabvector_vertical_gap_fraction
Definition: tabvector.cpp:55
double textord_tabvector_vertical_box_ratio
Definition: tabvector.cpp:58
BlobRegionType region_type() const
Definition: blobbox.h:283
const ICOORD & startpt() const
Definition: tabvector.h:145

◆ VOverlap() [1/2]

int tesseract::TabVector::VOverlap ( const TabVector other) const
inline

Definition at line 198 of file tabvector.h.

198 {
199 return std::min(other.endpt_.y(), endpt_.y()) -
200 std::max(other.startpt_.y(), startpt_.y());
201 }

◆ VOverlap() [2/2]

int tesseract::TabVector::VOverlap ( int  top_y,
int  bottom_y 
) const
inline

Definition at line 203 of file tabvector.h.

203 {
204 return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y()));
205 }

◆ XAtY() [1/2]

static int tesseract::TabVector::XAtY ( const ICOORD vertical,
int  sort_key,
int  y 
)
inlinestatic

Definition at line 285 of file tabvector.h.

285 {
286 if (vertical.y() != 0)
287 return (vertical.x() * y + sort_key) / vertical.y();
288 else
289 return sort_key;
290 }
int sort_key() const
Definition: tabvector.h:157

◆ XAtY() [2/2]

int tesseract::TabVector::XAtY ( int  y) const
inline

Definition at line 188 of file tabvector.h.

188 {
189 int height = endpt_.y() - startpt_.y();
190 if (height != 0)
191 return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
192 startpt_.x();
193 else
194 return startpt_.x();
195 }

◆ XYFlip()

void tesseract::TabVector::XYFlip ( )
inline

Definition at line 254 of file tabvector.h.

254 {
255 int x = startpt_.y();
256 startpt_.set_y(startpt_.x());
257 startpt_.set_x(x);
258 x = endpt_.y();
259 endpt_.set_y(endpt_.x());
260 endpt_.set_x(x);
261 }

The documentation for this class was generated from the following files: