tesseract 4.1.1
Loading...
Searching...
No Matches
tabvector.h
Go to the documentation of this file.
1
2// File: tabvector.h
3// Description: Class to hold a near-vertical vector representing a tab-stop.
4// Author: Ray Smith
5//
6// (C) Copyright 2008, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_TEXTORD_TABVECTOR_H_
20#define TESSERACT_TEXTORD_TABVECTOR_H_
21
22#include "blobgrid.h"
23#include "clst.h"
24#include "elst.h"
25#include "elst2.h"
26#include "rect.h"
27#include "bbgrid.h"
28
29#include <algorithm>
30
31class BLOBNBOX;
32class ScrollView;
33
34namespace tesseract {
35
36
38 "Max fraction of mean blob width allowed for vertical gaps in vertical text");
40 "Fraction of box matches required to declare a line vertical");
41
42// The alignment type that a tab vector represents.
43// Keep this enum synced with kAlignmentNames in tabvector.cpp.
52};
53
54// Forward declarations. The classes use their own list types, so we
55// need to make the list types first.
56class TabFind;
57class TabVector;
58class TabConstraint;
59
60ELIST2IZEH(TabVector)
61CLISTIZEH(TabVector)
62ELISTIZEH(TabConstraint)
63
64// TabConstraint is a totally self-contained class to maintain
65// a list of [min,max] constraints, each referring to a TabVector.
66// The constraints are manipulated through static methods that act
67// on a list of constraints. The list itself is cooperatively owned
68// by the TabVectors of the constraints on the list and managed
69// by implicit reference counting via the elements of the list.
70class TabConstraint : public ELIST_LINK {
71 public:
72 // This empty constructor is here only so that the class can be ELISTIZED.
73 // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
74 // and eliminate CLASSNAME##_copier.
75 TabConstraint() = default;
76
77 // Create a constraint for the top or bottom of this TabVector.
78 static void CreateConstraint(TabVector* vector, bool is_top);
79
80 // Test to see if the constraints are compatible enough to merge.
81 static bool CompatibleConstraints(TabConstraint_LIST* list1,
82 TabConstraint_LIST* list2);
83
84 // Merge the lists of constraints and update the TabVector pointers.
85 // The second list is deleted.
86 static void MergeConstraints(TabConstraint_LIST* list1,
87 TabConstraint_LIST* list2);
88
89 // Set all the tops and bottoms as appropriate to a mean of the
90 // constrained range. Delete all the constraints and list.
91 static void ApplyConstraints(TabConstraint_LIST* constraints);
92
93 private:
94 TabConstraint(TabVector* vector, bool is_top);
95
96 // Get the max of the mins and the min of the maxes.
97 static void GetConstraints(TabConstraint_LIST* constraints,
98 int* y_min, int* y_max);
99
100 // The TabVector this constraint applies to.
101 TabVector* vector_;
102 // If true then we refer to the top of the vector_.
103 bool is_top_;
104 // The allowed range of this vector_.
105 int y_min_;
106 int y_max_;
107};
108
109// Class to hold information about a single vector
110// that represents a tab stop or a rule line.
111class TabVector : public ELIST2_LINK {
112 public:
113 // TODO(rays) fix this in elst.h line 1076, where it should use the
114 // copy constructor instead of operator=.
115 TabVector() = default;
116 ~TabVector() = default;
117
118 // Public factory to build a TabVector from a list of boxes.
119 // The TabVector will be of the given alignment type.
120 // The input vertical vector is used in fitting, and the output
121 // vertical_x, vertical_y have the resulting line vector added to them
122 // if the alignment is not ragged.
123 // The extended_start_y and extended_end_y are the maximum possible
124 // extension to the line segment that can be used to align with others.
125 // The input CLIST of BLOBNBOX good_points is consumed and taken over.
126 static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
127 int extended_start_y, int extended_end_y,
128 BLOBNBOX_CLIST* good_points,
129 int* vertical_x, int* vertical_y);
130
131 // Build a ragged TabVector by copying another's direction, shifting it
132 // to match the given blob, and making its initial extent the height
133 // of the blob, but its extended bounds from the bounds of the original.
134 TabVector(const TabVector& src, TabAlignment alignment,
135 const ICOORD& vertical_skew, BLOBNBOX* blob);
136
137 // Copies basic attributes of a tab vector for simple operations.
138 // Copies things such startpt, endpt, range, width.
139 // Does not copy things such as partners, boxes, or constraints.
140 // This is useful if you only need vector information for processing, such
141 // as in the table detection code.
142 TabVector* ShallowCopy() const;
143
144 // Simple accessors.
145 const ICOORD& startpt() const {
146 return startpt_;
147 }
148 const ICOORD& endpt() const {
149 return endpt_;
150 }
151 int extended_ymax() const {
152 return extended_ymax_;
153 }
154 int extended_ymin() const {
155 return extended_ymin_;
156 }
157 int sort_key() const {
158 return sort_key_;
159 }
160 int mean_width() const {
161 return mean_width_;
162 }
163 void set_top_constraints(TabConstraint_LIST* constraints) {
164 top_constraints_ = constraints;
165 }
166 void set_bottom_constraints(TabConstraint_LIST* constraints) {
167 bottom_constraints_ = constraints;
168 }
169 TabVector_CLIST* partners() {
170 return &partners_;
171 }
172 void set_startpt(const ICOORD& start) {
173 startpt_ = start;
174 }
175 void set_endpt(const ICOORD& end) {
176 endpt_ = end;
177 }
179 return intersects_other_lines_;
180 }
181 void set_intersects_other_lines(bool value) {
182 intersects_other_lines_ = value;
183 }
184
185 // Inline quasi-accessors that require some computation.
186
187 // Compute the x coordinate at the given y coordinate.
188 int XAtY(int y) const {
189 int height = endpt_.y() - startpt_.y();
190 if (height != 0)
191 return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
192 startpt_.x();
193 else
194 return startpt_.x();
195 }
196
197 // Compute the vertical overlap with the other TabVector.
198 int VOverlap(const TabVector& other) const {
199 return std::min(other.endpt_.y(), endpt_.y()) -
200 std::max(other.startpt_.y(), startpt_.y());
201 }
202 // Compute the vertical overlap with the given y bounds.
203 int VOverlap(int top_y, int bottom_y) const {
204 return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y()));
205 }
206 // Compute the extended vertical overlap with the given y bounds.
207 int ExtendedOverlap(int top_y, int bottom_y) const {
208 return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
209 }
210
211 // Return true if this is a left tab stop, either aligned, or ragged.
212 bool IsLeftTab() const {
213 return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
214 }
215 // Return true if this is a right tab stop, either aligned, or ragged.
216 bool IsRightTab() const {
217 return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
218 }
219 // Return true if this is a separator.
220 bool IsSeparator() const {
221 return alignment_ == TA_SEPARATOR;
222 }
223 // Return true if this is a center aligned tab stop.
224 bool IsCenterTab() const {
225 return alignment_ == TA_CENTER_JUSTIFIED;
226 }
227 // Return true if this is a ragged tab top, either left or right.
228 bool IsRagged() const {
229 return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
230 }
231
232 // Return true if this vector is to the left of the other in terms
233 // of sort_key_.
234 bool IsLeftOf(const TabVector& other) const {
235 return sort_key_ < other.sort_key_;
236 }
237
238 // Return true if the vector has no partners.
239 bool Partnerless() {
240 return partners_.empty();
241 }
242
243 // Return the number of tab boxes in this vector.
244 int BoxCount() {
245 return boxes_.length();
246 }
247
248 // Lock the vector from refits by clearing the boxes_ list.
249 void Freeze() {
250 boxes_.shallow_clear();
251 }
252
253 // Flip x and y on the ends so a vector can be created from flipped input.
254 void XYFlip() {
255 int x = startpt_.y();
256 startpt_.set_y(startpt_.x());
257 startpt_.set_x(x);
258 x = endpt_.y();
259 endpt_.set_y(endpt_.x());
260 endpt_.set_x(x);
261 }
262
263 // Reflect the tab vector in the y-axis.
265 startpt_.set_x(-startpt_.x());
266 endpt_.set_x(-endpt_.x());
267 sort_key_ = -sort_key_;
268 if (alignment_ == TA_LEFT_ALIGNED)
269 alignment_ = TA_RIGHT_ALIGNED;
270 else if (alignment_ == TA_RIGHT_ALIGNED)
271 alignment_ = TA_LEFT_ALIGNED;
272 if (alignment_ == TA_LEFT_RAGGED)
273 alignment_ = TA_RIGHT_RAGGED;
274 else if (alignment_ == TA_RIGHT_RAGGED)
275 alignment_ = TA_LEFT_RAGGED;
276 }
277
278 // Separate function to compute the sort key for a given coordinate pair.
279 static int SortKey(const ICOORD& vertical, int x, int y) {
280 ICOORD pt(x, y);
281 return pt * vertical;
282 }
283
284 // Return the x at the given y for the given sort key.
285 static int XAtY(const ICOORD& vertical, int sort_key, int y) {
286 if (vertical.y() != 0)
287 return (vertical.x() * y + sort_key) / vertical.y();
288 else
289 return sort_key;
290 }
291
292 // Sort function for E2LIST::sort to sort by sort_key_.
293 static int SortVectorsByKey(const void* v1, const void* v2) {
294 const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
295 const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
296 return tv1->sort_key_ - tv2->sort_key_;
297 }
298
299 // More complex members.
300
301 // Extend this vector to include the supplied blob if it doesn't
302 // already have it.
303 void ExtendToBox(BLOBNBOX* blob);
304
305 // Set the ycoord of the start and move the xcoord to match.
306 void SetYStart(int start_y);
307 // Set the ycoord of the end and move the xcoord to match.
308 void SetYEnd(int end_y);
309
310 // Rotate the ends by the given vector.
311 void Rotate(const FCOORD& rotation);
312
313 // Setup the initial constraints, being the limits of
314 // the vector and the extended ends.
315 void SetupConstraints();
316
317 // Setup the constraints between the partners of this TabVector.
319
320 // Setup the constraints between this and its partner.
321 void SetupPartnerConstraints(TabVector* partner);
322
323 // Use the constraints to modify the top and bottom.
324 void ApplyConstraints();
325
326 // Merge close tab vectors of the same side that overlap.
327 static void MergeSimilarTabVectors(const ICOORD& vertical,
328 TabVector_LIST* vectors, BlobGrid* grid);
329
330 // Return true if this vector is the same side, overlaps, and close
331 // enough to the other to be merged.
332 bool SimilarTo(const ICOORD& vertical,
333 const TabVector& other, BlobGrid* grid) const;
334
335 // Eat the other TabVector into this and delete it.
336 void MergeWith(const ICOORD& vertical, TabVector* other);
337
338 // Add a new element to the list of partner TabVectors.
339 // Partners must be added in order of increasing y coordinate of the text line
340 // that makes them partners.
341 // Groups of identical partners are merged into one.
342 void AddPartner(TabVector* partner);
343
344 // Return true if other is a partner of this.
345 bool IsAPartner(const TabVector* other);
346
347 // Print basic information about this tab vector.
348 void Print(const char* prefix);
349
350 // Print basic information about this tab vector and every box in it.
351 void Debug(const char* prefix);
352
353 // Draw this tabvector in place in the given window.
354 void Display(ScrollView* tab_win);
355
356 // Refit the line and/or re-evaluate the vector if the dirty flags are set.
357 void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
358
359 // Evaluate the vector in terms of coverage of its length by good-looking
360 // box edges. A good looking box is one where its nearest neighbour on the
361 // inside is nearer than half the distance its nearest neighbour on the
362 // outside of the putative column. Bad boxes are removed from the line.
363 // A second pass then further filters boxes by requiring that the gutter
364 // width be a minimum fraction of the mean gutter along the line.
365 void Evaluate(const ICOORD& vertical, TabFind* finder);
366
367 // (Re)Fit a line to the stored points. Returns false if the line
368 // is degenerate. Althougth the TabVector code mostly doesn't care about the
369 // direction of lines, XAtY would give silly results for a horizontal line.
370 // The class is mostly aimed at use for vertical lines representing
371 // horizontal tab stops.
372 bool Fit(ICOORD vertical, bool force_parallel);
373
374 // Return the partner of this TabVector if the vector qualifies as
375 // being a vertical text line, otherwise nullptr.
377
378 // Return the matching tabvector if there is exactly one partner, or
379 // nullptr otherwise. This can be used after matching is done, eg. by
380 // VerticalTextlinePartner(), without checking if the line is vertical.
382
383 private:
384 // Constructor is private as the static factory is the external way
385 // to build a TabVector.
387 TabAlignment alignment, BLOBNBOX_CLIST* boxes);
388
389 // Delete this, but first, repoint all the partners to point to
390 // replacement. If replacement is nullptr, then partner relationships
391 // are removed.
392 void Delete(TabVector* replacement);
393
394 private:
395 // The bottom of the tab line.
396 ICOORD startpt_;
397 // The top of the tab line.
398 ICOORD endpt_;
399 // The lowest y that the vector might extend to.
400 int extended_ymin_ = 0;
401 // The highest y that the vector might extend to.
402 int extended_ymax_ = 0;
403 // Perpendicular distance of vector from a given vertical for sorting.
404 int sort_key_ = 0;
405 // Result of Evaluate 0-100. Coverage of line with good boxes.
406 int percent_score_ = 0;
407 // The mean width of the blobs. Meaningful only for separator lines.
408 int mean_width_ = 0;
409 // True if the boxes_ list has been modified, so a refit is needed.
410 bool needs_refit_ = false;
411 // True if a fit has been done, so re-evaluation is needed.
412 bool needs_evaluation_ = false;
413 // True if a separator line intersects at least 2 other lines.
414 bool intersects_other_lines_ = false;
415 // The type of this TabVector.
416 TabAlignment alignment_ = TA_LEFT_ALIGNED;
417 // The list of boxes whose edges are aligned at this TabVector.
418 BLOBNBOX_CLIST boxes_;
419 // List of TabVectors that have a connection with this via a text line.
420 TabVector_CLIST partners_;
421 // Constraints used to resolve the exact location of the top and bottom
422 // of the tab line.
423 TabConstraint_LIST* top_constraints_ = nullptr;
424 TabConstraint_LIST* bottom_constraints_ = nullptr;
425};
426
427} // namespace tesseract.
428
429#endif // TESSERACT_TEXTORD_TABVECTOR_H_
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:879
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:918
#define ELIST2IZEH(CLASSNAME)
Definition: elst2.h:927
#define double_VAR_H(name, val, comment)
Definition: params.h:301
double textord_tabvector_vertical_gap_fraction
Definition: tabvector.cpp:55
@ TA_COUNT
Definition: tabvector.h:51
@ TA_RIGHT_ALIGNED
Definition: tabvector.h:48
@ TA_RIGHT_RAGGED
Definition: tabvector.h:49
@ TA_LEFT_ALIGNED
Definition: tabvector.h:45
@ TA_SEPARATOR
Definition: tabvector.h:50
@ TA_LEFT_RAGGED
Definition: tabvector.h:46
@ TA_CENTER_JUSTIFIED
Definition: tabvector.h:47
double textord_tabvector_vertical_box_ratio
Definition: tabvector.cpp:58
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
int16_t y() const
access_function
Definition: points.h:56
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
int16_t x() const
access function
Definition: points.h:52
Definition: points.h:189
int XAtY(int y) const
Definition: tabvector.h:188
void set_bottom_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:166
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:279
const ICOORD & endpt() const
Definition: tabvector.h:148
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:273
void set_startpt(const ICOORD &start)
Definition: tabvector.h:172
void AddPartner(TabVector *partner)
Definition: tabvector.cpp:484
bool IsSeparator() const
Definition: tabvector.h:220
void set_endpt(const ICOORD &end)
Definition: tabvector.h:175
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:353
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:207
bool IsCenterTab() const
Definition: tabvector.h:224
int extended_ymin() const
Definition: tabvector.h:154
void set_intersects_other_lines(bool value)
Definition: tabvector.h:181
bool IsAPartner(const TabVector *other)
Definition: tabvector.cpp:497
int sort_key() const
Definition: tabvector.h:157
bool intersects_other_lines() const
Definition: tabvector.h:178
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
Definition: tabvector.cpp:176
TabVector_CLIST * partners()
Definition: tabvector.h:169
bool IsRightTab() const
Definition: tabvector.h:216
TabVector * VerticalTextlinePartner()
Definition: tabvector.cpp:876
static int XAtY(const ICOORD &vertical, int sort_key, int y)
Definition: tabvector.h:285
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:579
int extended_ymax() const
Definition: tabvector.h:151
void SetupPartnerConstraints()
Definition: tabvector.cpp:294
void SetYEnd(int end_y)
Definition: tabvector.cpp:267
int mean_width() const
Definition: tabvector.h:160
void set_top_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:163
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:780
bool IsRagged() const
Definition: tabvector.h:228
void SetYStart(int start_y)
Definition: tabvector.cpp:262
bool IsLeftTab() const
Definition: tabvector.h:212
void Print(const char *prefix)
Definition: tabvector.cpp:517
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:565
const ICOORD & startpt() const
Definition: tabvector.h:145
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:539
int VOverlap(const TabVector &other) const
Definition: tabvector.h:198
TabVector * ShallowCopy() const
Definition: tabvector.cpp:225
void MergeWith(const ICOORD &vertical, TabVector *other)
Definition: tabvector.cpp:450
bool IsLeftOf(const TabVector &other) const
Definition: tabvector.h:234
int VOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:203
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:293
void Debug(const char *prefix)
Definition: tabvector.cpp:527
TabVector * GetSinglePartner()
Definition: tabvector.cpp:866
void ExtendToBox(BLOBNBOX *blob)
Definition: tabvector.cpp:238
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
Definition: tabvector.cpp:386