tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::BaselineRow Class Reference

#include <baselinedetect.h>

Public Member Functions

 BaselineRow (double line_size, TO_ROW *to_row)
 
const TBOXbounding_box () const
 
void SetupOldLineParameters (TO_ROW *row) const
 
void Print () const
 
double BaselineAngle () const
 
double SpaceBetween (const BaselineRow &other) const
 
double PerpDisp (const FCOORD &direction) const
 
double StraightYAtX (double x) const
 
bool FitBaseline (bool use_box_bottoms)
 
void AdjustBaselineToParallel (int debug, const FCOORD &direction)
 
double AdjustBaselineToGrid (int debug, const FCOORD &direction, double line_spacing, double line_offset)
 

Detailed Description

Definition at line 40 of file baselinedetect.h.

Constructor & Destructor Documentation

◆ BaselineRow()

tesseract::BaselineRow::BaselineRow ( double  line_size,
TO_ROW to_row 
)

Definition at line 64 of file baselinedetect.cpp.

65 : blobs_(to_row->blob_list()),
66 baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
67 baseline_error_(0.0), good_baseline_(false) {
68 ComputeBoundingBox();
69 // Compute a scale factor for rounding to ints.
70 disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
71 fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
72 max_baseline_error_ = kMaxBaselineError * line_spacing;
73}
const double kFitHalfrangeFactor
const double kOffsetQuantizationFactor
const double kMaxBaselineError
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:600

Member Function Documentation

◆ AdjustBaselineToGrid()

double tesseract::BaselineRow::AdjustBaselineToGrid ( int  debug,
const FCOORD direction,
double  line_spacing,
double  line_offset 
)

Definition at line 228 of file baselinedetect.cpp.

231 {
232 if (blobs_->empty()) {
233 if (debug > 1) {
234 tprintf("Row empty at:");
235 bounding_box_.print();
236 }
237 return line_offset;
238 }
239 // Find the displacement_modes_ entry nearest to the grid.
240 double best_error = 0.0;
241 int best_index = -1;
242 for (int i = 0; i < displacement_modes_.size(); ++i) {
243 double blob_y = displacement_modes_[i];
244 double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
245 line_offset);
246 if (debug > 1) {
247 tprintf("Mode at %g has error %g from model \n", blob_y, error);
248 }
249 if (best_index < 0 || error < best_error) {
250 best_error = error;
251 best_index = i;
252 }
253 }
254 // We will move the baseline only if the chosen mode is close enough to the
255 // model.
256 double model_margin = max_baseline_error_ - best_error;
257 if (best_index >= 0 && model_margin > 0.0) {
258 // But if the current baseline is already close to the mode there is no
259 // point, and only the potential to damage accuracy by changing its angle.
260 double perp_disp = PerpDisp(direction);
261 double shift = displacement_modes_[best_index] - perp_disp;
262 if (fabs(shift) > max_baseline_error_) {
263 if (debug > 1) {
264 tprintf("Attempting linespacing model fit with mode %g to row at:",
265 displacement_modes_[best_index]);
266 bounding_box_.print();
267 }
268 FitConstrainedIfBetter(debug, direction, model_margin,
269 displacement_modes_[best_index]);
270 } else if (debug > 1) {
271 tprintf("Linespacing model only moves current line by %g for row at:",
272 shift);
273 bounding_box_.print();
274 }
275 } else if (debug > 1) {
276 tprintf("Linespacing model not close enough to any mode for row at:");
277 bounding_box_.print();
278 }
279 return fmod(PerpDisp(direction), line_spacing);
280}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int size() const
Definition: genericvector.h:72
void print() const
Definition: rect.h:278
double PerpDisp(const FCOORD &direction) const
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)

◆ AdjustBaselineToParallel()

void tesseract::BaselineRow::AdjustBaselineToParallel ( int  debug,
const FCOORD direction 
)

Definition at line 213 of file baselinedetect.cpp.

214 {
215 SetupBlobDisplacements(direction);
216 if (displacement_modes_.empty())
217 return;
218#ifdef kDebugYCoord
219 if (bounding_box_.bottom() < kDebugYCoord &&
220 bounding_box_.top() > kDebugYCoord && debug < 3)
221 debug = 3;
222#endif
223 FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
224}
bool empty() const
Definition: genericvector.h:91
int16_t top() const
Definition: rect.h:58
int16_t bottom() const
Definition: rect.h:65

◆ BaselineAngle()

double tesseract::BaselineRow::BaselineAngle ( ) const

Definition at line 97 of file baselinedetect.cpp.

97 {
98 FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
99 double angle = baseline_dir.angle();
100 // Baseline directions are only unique in a range of pi so constrain to
101 // [-pi/2, pi/2].
102 return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
103}
Definition: points.h:189

◆ bounding_box()

const TBOX & tesseract::BaselineRow::bounding_box ( ) const
inline

Definition at line 44 of file baselinedetect.h.

44 {
45 return bounding_box_;
46 }

◆ FitBaseline()

bool tesseract::BaselineRow::FitBaseline ( bool  use_box_bottoms)

Definition at line 141 of file baselinedetect.cpp.

141 {
142 // Deterministic fitting is used wherever possible.
143 fitter_.Clear();
144 // Linear least squares is a backup if the DetLineFit produces a bad line.
145 LLSQ llsq;
146 BLOBNBOX_IT blob_it(blobs_);
147
148 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149 BLOBNBOX* blob = blob_it.data();
150 if (!use_box_bottoms) blob->EstimateBaselinePosition();
151 const TBOX& box = blob->bounding_box();
152 int x_middle = (box.left() + box.right()) / 2;
153#ifdef kDebugYCoord
154 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
155 tprintf("Box bottom = %d, baseline pos=%d for box at:",
156 box.bottom(), blob->baseline_position());
157 box.print();
158 }
159#endif
160 fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
161 llsq.add(x_middle, blob->baseline_position());
162 }
163 // Fit the line.
164 ICOORD pt1, pt2;
165 baseline_error_ = fitter_.Fit(&pt1, &pt2);
166 baseline_pt1_ = pt1;
167 baseline_pt2_ = pt2;
168 if (baseline_error_ > max_baseline_error_ &&
170 // The fit was bad but there were plenty of points, so try skipping
171 // the first and last few, and use the new line if it dramatically improves
172 // the error of fit.
173 double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
174 if (error < baseline_error_ / 2.0) {
175 baseline_error_ = error;
176 baseline_pt1_ = pt1;
177 baseline_pt2_ = pt2;
178 }
179 }
180 int debug = 0;
181#ifdef kDebugYCoord
182 Print();
183 debug = bounding_box_.bottom() < kDebugYCoord &&
184 bounding_box_.top() > kDebugYCoord
185 ? 3 : 2;
186#endif
187 // Now we obtained a direction from that fit, see if we can improve the
188 // fit using the same direction and some other start point.
189 FCOORD direction(pt2 - pt1);
190 double target_offset = direction * pt1;
191 good_baseline_ = false;
192 FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
193 // Wild lines can be produced because DetLineFit allows vertical lines, but
194 // vertical text has been rotated so angles over pi/4 should be disallowed.
195 // Near vertical lines can still be produced by vertically aligned components
196 // on very short lines.
197 double angle = BaselineAngle();
198 if (fabs(angle) > M_PI * 0.25) {
199 // Use the llsq fit as a backup.
200 baseline_pt1_ = llsq.mean_point();
201 baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
202 // TODO(rays) get rid of this when m and c are no longer used.
203 double m = llsq.m();
204 double c = llsq.c(m);
205 baseline_error_ = llsq.rms(m, c);
206 good_baseline_ = false;
207 }
208 return good_baseline_;
209}
const int kNumSkipPoints
void EstimateBaselinePosition()
Definition: blobbox.cpp:357
const TBOX & bounding_box() const
Definition: blobbox.h:230
int baseline_position() const
Definition: blobbox.h:389
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:162
Definition: linlsq.h:28
double m() const
Definition: linlsq.cpp:100
double c(double m) const
Definition: linlsq.cpp:116
double rms(double m, double c) const
Definition: linlsq.cpp:130
void add(double x, double y)
Definition: linlsq.cpp:48
FCOORD mean_point() const
Definition: linlsq.cpp:166
integer coordinate
Definition: points.h:32
Definition: rect.h:34
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
double BaselineAngle() const

◆ PerpDisp()

double tesseract::BaselineRow::PerpDisp ( const FCOORD direction) const

Definition at line 120 of file baselinedetect.cpp.

120 {
121 float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
122 FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
123 return direction * middle_pos / direction.length();
124}
float length() const
find length
Definition: points.h:228
double StraightYAtX(double x) const

◆ Print()

void tesseract::BaselineRow::Print ( ) const

Definition at line 86 of file baselinedetect.cpp.

86 {
87 tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
88 baseline_pt1_.x(), baseline_pt1_.y(),
89 baseline_pt2_.x(), baseline_pt2_.y(),
91 tprintf("Quant factor=%g, error=%g, good=%d, box:",
92 disp_quant_factor_, baseline_error_, good_baseline_);
93 bounding_box_.print();
94}
float y() const
Definition: points.h:210
float x() const
Definition: points.h:207

◆ SetupOldLineParameters()

void tesseract::BaselineRow::SetupOldLineParameters ( TO_ROW row) const

Definition at line 76 of file baselinedetect.cpp.

76 {
77 // TODO(rays) get rid of this when m and c are no longer used.
78 double gradient = tan(BaselineAngle());
79 // para_c is the actual intercept of the baseline on the y-axis.
80 float para_c = StraightYAtX(0.0);
81 row->set_line(gradient, para_c, baseline_error_);
82 row->set_parallel_line(gradient, para_c, baseline_error_);
83}
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:604
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:612

◆ SpaceBetween()

double tesseract::BaselineRow::SpaceBetween ( const BaselineRow other) const

Definition at line 107 of file baselinedetect.cpp.

107 {
108 // Find the x-centre of overlap of the lines.
109 float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
110 std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
111 // Find the vertical centre between them.
112 float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
113 // Find the perpendicular distance of (x,y) from each line.
114 FCOORD pt(x, y);
115 return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
116}

◆ StraightYAtX()

double tesseract::BaselineRow::StraightYAtX ( double  x) const

Definition at line 128 of file baselinedetect.cpp.

128 {
129 double denominator = baseline_pt2_.x() - baseline_pt1_.x();
130 if (denominator == 0.0)
131 return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
132 return baseline_pt1_.y() +
133 (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
134 denominator;
135}

The documentation for this class was generated from the following files: