tesseract 4.1.1
Loading...
Searching...
No Matches
baselinedetect.cpp
Go to the documentation of this file.
1
2// File: baselinedetect.cpp
3// Description: Initial Baseline Determination.
4// Copyright 2012 Google Inc. All Rights Reserved.
5// Author: rays@google.com (Ray Smith)
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#define _USE_MATH_DEFINES // for M_PI
20
21#ifdef HAVE_CONFIG_H
22#include "config_auto.h"
23#endif
24
25#include "baselinedetect.h"
26
27#include <algorithm>
28#include <cfloat> // for FLT_MAX
29#include <cmath> // for M_PI
30#include "allheaders.h"
31#include "blobbox.h"
32#include "detlinefit.h"
33#include "drawtord.h"
34#include "helpers.h"
35#include "linlsq.h"
36#include "makerow.h"
37#include "textord.h"
38#include "tprintf.h"
39#include "underlin.h"
40
41// Number of displacement modes kept in displacement_modes_;
43// Number of points to skip when retrying initial fit.
44const int kNumSkipPoints = 3;
45// Max angle deviation (in radians) allowed to keep the independent baseline.
46const double kMaxSkewDeviation = 1.0 / 64;
47// Fraction of line spacing estimate for quantization of blob displacements.
48const double kOffsetQuantizationFactor = 3.0 / 64;
49// Fraction of line spacing estimate for computing blob fit error.
50const double kFitHalfrangeFactor = 6.0 / 64;
51// Max fraction of line spacing allowed before a baseline counts as badly fitting.
52const double kMaxBaselineError = 3.0 / 64;
53// Multiple of linespacing that sets max_blob_size in TO_BLOCK.
54// Copied from textord_excess_blobsize.
55const double kMaxBlobSizeMultiple = 1.3;
56// Min fraction of linespacing gaps that should be close to the model before
57// we will force the linespacing model on all the lines.
58const double kMinFittingLinespacings = 0.25;
59// A y-coordinate within a textline that is to be debugged.
60//#define kDebugYCoord 1525
61
62namespace tesseract {
63
64BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
65 : blobs_(to_row->blob_list()),
66 baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
67 baseline_error_(0.0), good_baseline_(false) {
68 ComputeBoundingBox();
69 // Compute a scale factor for rounding to ints.
70 disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
71 fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
72 max_baseline_error_ = kMaxBaselineError * line_spacing;
73}
74
75// Sets the TO_ROW with the output straight line.
77 // TODO(rays) get rid of this when m and c are no longer used.
78 double gradient = tan(BaselineAngle());
79 // para_c is the actual intercept of the baseline on the y-axis.
80 float para_c = StraightYAtX(0.0);
81 row->set_line(gradient, para_c, baseline_error_);
82 row->set_parallel_line(gradient, para_c, baseline_error_);
83}
84
85// Outputs diagnostic information.
86void BaselineRow::Print() const {
87 tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
88 baseline_pt1_.x(), baseline_pt1_.y(),
89 baseline_pt2_.x(), baseline_pt2_.y(),
91 tprintf("Quant factor=%g, error=%g, good=%d, box:",
92 disp_quant_factor_, baseline_error_, good_baseline_);
93 bounding_box_.print();
94}
95
96// Returns the skew angle (in radians) of the current baseline in [-pi,pi].
98 FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
99 double angle = baseline_dir.angle();
100 // Baseline directions are only unique in a range of pi so constrain to
101 // [-pi/2, pi/2].
102 return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
103}
104
105// Computes and returns the linespacing at the middle of the overlap
106// between this and other.
107double BaselineRow::SpaceBetween(const BaselineRow& other) const {
108 // Find the x-centre of overlap of the lines.
109 float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
110 std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
111 // Find the vertical centre between them.
112 float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
113 // Find the perpendicular distance of (x,y) from each line.
114 FCOORD pt(x, y);
115 return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
116}
117
118// Computes and returns the displacement of the center of the line
119// perpendicular to the given direction.
120double BaselineRow::PerpDisp(const FCOORD& direction) const {
121 float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
122 FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
123 return direction * middle_pos / direction.length();
124}
125
126// Computes the y coordinate at the given x using the straight baseline
127// defined by baseline_pt1_ and baseline_pt2__.
128double BaselineRow::StraightYAtX(double x) const {
129 double denominator = baseline_pt2_.x() - baseline_pt1_.x();
130 if (denominator == 0.0)
131 return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
132 return baseline_pt1_.y() +
133 (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
134 denominator;
135}
136
137// Fits a straight baseline to the points. Returns true if it had enough
138// points to be reasonably sure of the fitted baseline.
139// If use_box_bottoms is false, baselines positions are formed by
140// considering the outlines of the blobs.
141bool BaselineRow::FitBaseline(bool use_box_bottoms) {
142 // Deterministic fitting is used wherever possible.
143 fitter_.Clear();
144 // Linear least squares is a backup if the DetLineFit produces a bad line.
145 LLSQ llsq;
146 BLOBNBOX_IT blob_it(blobs_);
147
148 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
149 BLOBNBOX* blob = blob_it.data();
150 if (!use_box_bottoms) blob->EstimateBaselinePosition();
151 const TBOX& box = blob->bounding_box();
152 int x_middle = (box.left() + box.right()) / 2;
153#ifdef kDebugYCoord
154 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
155 tprintf("Box bottom = %d, baseline pos=%d for box at:",
156 box.bottom(), blob->baseline_position());
157 box.print();
158 }
159#endif
160 fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
161 llsq.add(x_middle, blob->baseline_position());
162 }
163 // Fit the line.
164 ICOORD pt1, pt2;
165 baseline_error_ = fitter_.Fit(&pt1, &pt2);
166 baseline_pt1_ = pt1;
167 baseline_pt2_ = pt2;
168 if (baseline_error_ > max_baseline_error_ &&
170 // The fit was bad but there were plenty of points, so try skipping
171 // the first and last few, and use the new line if it dramatically improves
172 // the error of fit.
173 double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
174 if (error < baseline_error_ / 2.0) {
175 baseline_error_ = error;
176 baseline_pt1_ = pt1;
177 baseline_pt2_ = pt2;
178 }
179 }
180 int debug = 0;
181#ifdef kDebugYCoord
182 Print();
183 debug = bounding_box_.bottom() < kDebugYCoord &&
184 bounding_box_.top() > kDebugYCoord
185 ? 3 : 2;
186#endif
187 // Now we obtained a direction from that fit, see if we can improve the
188 // fit using the same direction and some other start point.
189 FCOORD direction(pt2 - pt1);
190 double target_offset = direction * pt1;
191 good_baseline_ = false;
192 FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
193 // Wild lines can be produced because DetLineFit allows vertical lines, but
194 // vertical text has been rotated so angles over pi/4 should be disallowed.
195 // Near vertical lines can still be produced by vertically aligned components
196 // on very short lines.
197 double angle = BaselineAngle();
198 if (fabs(angle) > M_PI * 0.25) {
199 // Use the llsq fit as a backup.
200 baseline_pt1_ = llsq.mean_point();
201 baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
202 // TODO(rays) get rid of this when m and c are no longer used.
203 double m = llsq.m();
204 double c = llsq.c(m);
205 baseline_error_ = llsq.rms(m, c);
206 good_baseline_ = false;
207 }
208 return good_baseline_;
209}
210
211// Modifies an existing result of FitBaseline to be parallel to the given
212// direction vector if that produces a better result.
214 const FCOORD& direction) {
215 SetupBlobDisplacements(direction);
216 if (displacement_modes_.empty())
217 return;
218#ifdef kDebugYCoord
219 if (bounding_box_.bottom() < kDebugYCoord &&
220 bounding_box_.top() > kDebugYCoord && debug < 3)
221 debug = 3;
222#endif
223 FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
224}
225
226// Modifies the baseline to snap to the textline grid if the existing
227// result is not good enough.
229 const FCOORD& direction,
230 double line_spacing,
231 double line_offset) {
232 if (blobs_->empty()) {
233 if (debug > 1) {
234 tprintf("Row empty at:");
235 bounding_box_.print();
236 }
237 return line_offset;
238 }
239 // Find the displacement_modes_ entry nearest to the grid.
240 double best_error = 0.0;
241 int best_index = -1;
242 for (int i = 0; i < displacement_modes_.size(); ++i) {
243 double blob_y = displacement_modes_[i];
244 double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
245 line_offset);
246 if (debug > 1) {
247 tprintf("Mode at %g has error %g from model \n", blob_y, error);
248 }
249 if (best_index < 0 || error < best_error) {
250 best_error = error;
251 best_index = i;
252 }
253 }
254 // We will move the baseline only if the chosen mode is close enough to the
255 // model.
256 double model_margin = max_baseline_error_ - best_error;
257 if (best_index >= 0 && model_margin > 0.0) {
258 // But if the current baseline is already close to the mode there is no
259 // point, and only the potential to damage accuracy by changing its angle.
260 double perp_disp = PerpDisp(direction);
261 double shift = displacement_modes_[best_index] - perp_disp;
262 if (fabs(shift) > max_baseline_error_) {
263 if (debug > 1) {
264 tprintf("Attempting linespacing model fit with mode %g to row at:",
265 displacement_modes_[best_index]);
266 bounding_box_.print();
267 }
268 FitConstrainedIfBetter(debug, direction, model_margin,
269 displacement_modes_[best_index]);
270 } else if (debug > 1) {
271 tprintf("Linespacing model only moves current line by %g for row at:",
272 shift);
273 bounding_box_.print();
274 }
275 } else if (debug > 1) {
276 tprintf("Linespacing model not close enough to any mode for row at:");
277 bounding_box_.print();
278 }
279 return fmod(PerpDisp(direction), line_spacing);
280}
281
282// Sets up displacement_modes_ with the top few modes of the perpendicular
283// distance of each blob from the given direction vector, after rounding.
284void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
285 // Set of perpendicular displacements of the blob bottoms from the required
286 // baseline direction.
287 GenericVector<double> perp_blob_dists;
288 displacement_modes_.truncate(0);
289 // Gather the skew-corrected position of every blob.
290 double min_dist = FLT_MAX;
291 double max_dist = -FLT_MAX;
292 BLOBNBOX_IT blob_it(blobs_);
293#ifdef kDebugYCoord
294 bool debug = false;
295#endif
296 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
297 BLOBNBOX* blob = blob_it.data();
298 const TBOX& box = blob->bounding_box();
299#ifdef kDebugYCoord
300 if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
301#endif
302 FCOORD blob_pos((box.left() + box.right()) / 2.0f,
303 blob->baseline_position());
304 double offset = direction * blob_pos;
305 perp_blob_dists.push_back(offset);
306#ifdef kDebugYCoord
307 if (debug) {
308 tprintf("Displacement %g for blob at:", offset);
309 box.print();
310 }
311#endif
312 UpdateRange(offset, &min_dist, &max_dist);
313 }
314 // Set up a histogram using disp_quant_factor_ as the bucket size.
315 STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
316 IntCastRounded(max_dist / disp_quant_factor_) + 1);
317 for (int i = 0; i < perp_blob_dists.size(); ++i) {
318 dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
319 }
321 dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
322#ifdef kDebugYCoord
323 if (debug) {
324 for (int i = 0; i < scaled_modes.size(); ++i) {
325 tprintf("Top mode = %g * %d\n",
326 scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data);
327 }
328 }
329#endif
330 for (int i = 0; i < scaled_modes.size(); ++i)
331 displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key);
332}
333
334// Fits a line in the given direction to blobs that are close to the given
335// target_offset perpendicular displacement from the direction. The fit
336// error is allowed to be cheat_allowance worse than the existing fit, and
337// will still be used.
338// If cheat_allowance > 0, the new fit will be good and replace the current
339// fit if it has better fit (with cheat) OR its error is below
340// max_baseline_error_ and the old fit is marked bad.
341// Otherwise the new fit will only replace the old if it is really better,
342// or the old fit is marked bad and the new fit has sufficient points, as
343// well as being within the max_baseline_error_.
344void BaselineRow::FitConstrainedIfBetter(int debug,
345 const FCOORD& direction,
346 double cheat_allowance,
347 double target_offset) {
348 double halfrange = fit_halfrange_ * direction.length();
349 double min_dist = target_offset - halfrange;
350 double max_dist = target_offset + halfrange;
351 ICOORD line_pt;
352 double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
353 debug > 2, &line_pt);
354 // Allow cheat_allowance off the new error
355 new_error -= cheat_allowance;
356 double old_angle = BaselineAngle();
357 double new_angle = direction.angle();
358 if (debug > 1) {
359 tprintf("Constrained error = %g, original = %g",
360 new_error, baseline_error_);
361 tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
362 old_angle, new_angle,
363 new_angle - old_angle, kMaxSkewDeviation);
364 }
365 bool new_good_baseline = new_error <= max_baseline_error_ &&
366 (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
367 // The new will replace the old if any are true:
368 // 1. the new error is better
369 // 2. the old is NOT good, but the new is
370 // 3. there is a wild angular difference between them (assuming that the new
371 // is a better guess at the angle.)
372 if (new_error <= baseline_error_ ||
373 (!good_baseline_ && new_good_baseline) ||
374 fabs(new_angle - old_angle) > kMaxSkewDeviation) {
375 baseline_error_ = new_error;
376 baseline_pt1_ = line_pt;
377 baseline_pt2_ = baseline_pt1_ + direction;
378 good_baseline_ = new_good_baseline;
379 if (debug > 1) {
380 tprintf("Replacing with constrained baseline, good = %d\n",
381 good_baseline_);
382 }
383 } else if (debug > 1) {
384 tprintf("Keeping old baseline\n");
385 }
386}
387
388// Returns the perpendicular distance of the point from the straight
389// baseline.
390double BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
391 FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
392 FCOORD offset_vector(pt - baseline_pt1_);
393 double distance = baseline_vector * offset_vector;
394 return sqrt(distance * distance / baseline_vector.sqlength());
395}
396
397// Computes the bounding box of the row.
398void BaselineRow::ComputeBoundingBox() {
399 BLOBNBOX_IT it(blobs_);
400 TBOX box;
401 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
402 box += it.data()->bounding_box();
403 }
404 bounding_box_ = box;
405}
406
407
408BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
409 : block_(block), debug_level_(debug_level), non_text_block_(non_text),
410 good_skew_angle_(false), skew_angle_(0.0),
411 line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
412 TO_ROW_IT row_it(block_->get_rows());
413 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
414 // Sort the blobs on the rows.
415 row_it.data()->blob_list()->sort(blob_x_order);
416 rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
417 }
418}
419
420// Computes and returns the absolute error of the given perp_disp from the
421// given linespacing model.
422double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
423 double line_offset) {
424 // Round to the nearest multiple of line_spacing + line offset.
425 int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
426 double model_y = line_spacing * multiple + line_offset;
427 return fabs(perp_disp - model_y);
428}
429
430// Fits straight line baselines and computes the skew angle from the
431// median angle. Returns true if a good angle is found.
432// If use_box_bottoms is false, baseline positions are formed by
433// considering the outlines of the blobs.
434bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
435 if (non_text_block_) return false;
437 for (int r = 0; r < rows_.size(); ++r) {
438 BaselineRow* row = rows_[r];
439 if (row->FitBaseline(use_box_bottoms)) {
440 double angle = row->BaselineAngle();
441 angles.push_back(angle);
442 }
443 if (debug_level_ > 1)
444 row->Print();
445 }
446
447 if (!angles.empty()) {
448 skew_angle_ = MedianOfCircularValues(M_PI, &angles);
449 good_skew_angle_ = true;
450 } else {
451 skew_angle_ = 0.0f;
452 good_skew_angle_ = false;
453 }
454 if (debug_level_ > 0) {
455 tprintf("Initial block skew angle = %g, good = %d\n",
456 skew_angle_, good_skew_angle_);
457 }
458 return good_skew_angle_;
459}
460
461// Refits the baseline to a constrained angle, using the stored block
462// skew if good enough, otherwise the supplied default skew.
463void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
464 if (non_text_block_) return;
465 if (!good_skew_angle_) skew_angle_ = default_block_skew;
466 if (debug_level_ > 0)
467 tprintf("Adjusting block to skew angle %g\n", skew_angle_);
468 FCOORD direction(cos(skew_angle_), sin(skew_angle_));
469 for (int r = 0; r < rows_.size(); ++r) {
470 BaselineRow* row = rows_[r];
471 row->AdjustBaselineToParallel(debug_level_, direction);
472 if (debug_level_ > 1)
473 row->Print();
474 }
475 if (rows_.size() < 3 || !ComputeLineSpacing())
476 return;
477 // Enforce the line spacing model on all lines that don't yet have a good
478 // baseline.
479 // Start by finding the row that is best fitted to the model.
480 int best_row = 0;
481 double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
482 line_spacing_, line_offset_);
483 for (int r = 1; r < rows_.size(); ++r) {
484 double error = SpacingModelError(rows_[r]->PerpDisp(direction),
485 line_spacing_, line_offset_);
486 if (error < best_error) {
487 best_error = error;
488 best_row = r;
489 }
490 }
491 // Starting at the best fitting row, work outwards, syncing the offset.
492 double offset = line_offset_;
493 for (int r = best_row + 1; r < rows_.size(); ++r) {
494 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
495 line_spacing_, offset);
496 }
497 offset = line_offset_;
498 for (int r = best_row - 1; r >= 0; --r) {
499 offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
500 line_spacing_, offset);
501 }
502}
503
504// Sets the parameters in TO_BLOCK that are needed by subsequent processes.
506 if (line_spacing_ > 0.0) {
507 // Where was block_line_spacing set before?
508 float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
509 if (min_spacing < block_->line_size)
510 block_->line_size = min_spacing;
511 block_->line_spacing = line_spacing_;
512 block_->baseline_offset = line_offset_;
513 block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
514 }
515 // Setup the parameters on all the rows.
516 TO_ROW_IT row_it(block_->get_rows());
517 for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
518 BaselineRow* row = rows_[r];
519 TO_ROW* to_row = row_it.data();
520 row->SetupOldLineParameters(to_row);
521 }
522}
523
524// Processing that is required before fitting baseline splines, but requires
525// linear baselines in order to be successful:
526// Removes noise if required
527// Separates out underlines
528// Pre-associates blob fragments.
529// TODO(rays/joeliu) This entire section of code is inherited from the past
530// and could be improved/eliminated.
531// page_tr is used to size a debug window.
532void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
533 if (non_text_block_) return;
534 if (remove_noise) {
536 }
537 FCOORD rotation(1.0f, 0.0f);
538 double gradient = tan(skew_angle_);
539 separate_underlines(block_, gradient, rotation, true);
540 pre_associate_blobs(page_tr, block_, rotation, true);
541}
542
543// Fits splines to the textlines, or creates fake QSPLINES from the straight
544// baselines that are already on the TO_ROWs.
545// As a side-effect, computes the xheights of the rows and the block.
546// Although x-height estimation is conceptually separate, it is part of
547// detecting perspective distortion and therefore baseline fitting.
548void BaselineBlock::FitBaselineSplines(bool enable_splines,
549 bool show_final_rows,
550 Textord* textord) {
551 double gradient = tan(skew_angle_);
552 FCOORD rotation(1.0f, 0.0f);
553
554 if (enable_splines) {
555 textord->make_spline_rows(block_, gradient, show_final_rows);
556 } else {
557 // Make a fake spline from the existing line.
558 TBOX block_box= block_->block->pdblk.bounding_box();
559 TO_ROW_IT row_it = block_->get_rows();
560 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
561 TO_ROW* row = row_it.data();
562 int32_t xstarts[2] = { block_box.left(), block_box.right() };
563 double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
564 row->baseline = QSPLINE(1, xstarts, coeffs);
565 textord->compute_row_xheight(row, block_->block->classify_rotation(),
566 row->line_m(), block_->line_size);
567 }
568 }
569 textord->compute_block_xheight(block_, gradient);
570 block_->block->set_xheight(block_->xheight);
571 if (textord_restore_underlines) // fix underlines
573}
574
575// Draws the (straight) baselines and final blobs colored according to
576// what was discarded as noise and what is associated with each row.
578#ifndef GRAPHICS_DISABLED
579 if (non_text_block_) return;
580 double gradient = tan(skew_angle_);
581 FCOORD rotation(1.0f, 0.0f);
582 int left_edge = block_->block->pdblk.bounding_box().left();
583 ScrollView* win = create_to_win(page_tr);
585 TO_ROW_IT row_it = block_->get_rows();
586 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
587 plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
588 colour = static_cast<ScrollView::Color>(colour + 1);
589 if (colour > ScrollView::MAGENTA)
590 colour = ScrollView::RED;
591 }
593 // Show discarded blobs.
594 plot_blob_list(win, &block_->underlines,
596 if (block_->blobs.length() > 0)
597 tprintf("%d blobs discarded as noise\n", block_->blobs.length());
598 draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
599#endif
600}
601
603 if (non_text_block_) return;
604 TO_ROW_IT row_it = block_->get_rows();
605 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
606 row_it.data()->baseline.plot(pix_in);
607 }
608}
609
610// Top-level line-spacing calculation. Computes an estimate of the line-
611// spacing, using the current baselines in the TO_ROWS of the block, and
612// then refines it by fitting a regression line to the baseline positions
613// as a function of their integer index.
614// Returns true if it seems that the model is a reasonable fit to the
615// observations.
616bool BaselineBlock::ComputeLineSpacing() {
617 FCOORD direction(cos(skew_angle_), sin(skew_angle_));
618 GenericVector<double> row_positions;
619 ComputeBaselinePositions(direction, &row_positions);
620 if (row_positions.size() < 2) return false;
621 EstimateLineSpacing();
622 RefineLineSpacing(row_positions);
623 // Verify that the model is reasonable.
624 double max_baseline_error = kMaxBaselineError * line_spacing_;
625 int non_trivial_gaps = 0;
626 int fitting_gaps = 0;
627 for (int i = 1; i < row_positions.size(); ++i) {
628 double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
629 if (row_gap > max_baseline_error) {
630 ++non_trivial_gaps;
631 if (fabs(row_gap - line_spacing_) <= max_baseline_error)
632 ++fitting_gaps;
633 }
634 }
635 if (debug_level_ > 0) {
636 tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
637 line_spacing_, row_positions.size(), fitting_gaps,
638 non_trivial_gaps);
639 }
640 return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
641}
642
643// Computes the deskewed vertical position of each baseline in the block and
644// stores them in the given vector.
645// This is calculated as the perpendicular distance of the middle of each
646// baseline (in case it has a different skew angle) from the line passing
647// through the origin parallel to the block baseline angle.
648// NOTE that "distance" above is a signed quantity so we can tell which side
649// of the block baseline a line sits, hence the function and argument name
650// positions not distances.
651void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
652 GenericVector<double>* positions) {
653 positions->clear();
654 for (int r = 0; r < rows_.size(); ++r) {
655 BaselineRow* row = rows_[r];
656 const TBOX& row_box = row->bounding_box();
657 float x_middle = (row_box.left() + row_box.right()) / 2.0f;
658 FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
659 float offset = direction * row_pos;
660 positions->push_back(offset);
661 }
662}
663
664// Computes an estimate of the line spacing of the block from the median
665// of the spacings between adjacent overlapping textlines.
666void BaselineBlock::EstimateLineSpacing() {
667 GenericVector<float> spacings;
668 for (int r = 0; r < rows_.size(); ++r) {
669 BaselineRow* row = rows_[r];
670 // Exclude silly lines.
671 if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
672 // Find the first row after row that overlaps it significantly.
673 const TBOX& row_box = row->bounding_box();
674 int r2;
675 for (r2 = r + 1; r2 < rows_.size() &&
676 !row_box.major_x_overlap(rows_[r2]->bounding_box());
677 ++r2);
678 if (r2 < rows_.size()) {
679 BaselineRow* row2 = rows_[r2];
680 // Exclude silly lines.
681 if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
682 float spacing = row->SpaceBetween(*row2);
683 spacings.push_back(spacing);
684 }
685 }
686 // If we have at least one value, use it, otherwise leave the previous
687 // value unchanged.
688 if (!spacings.empty()) {
689 line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
690 if (debug_level_ > 1)
691 tprintf("Estimate of linespacing = %g\n", line_spacing_);
692 }
693}
694
695// Refines the line spacing of the block by fitting a regression
696// line to the deskewed y-position of each baseline as a function of its
697// estimated line index, allowing for a small error in the initial linespacing
698// and choosing the best available model.
699void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
700 double spacings[3], offsets[3], errors[3];
701 int index_range;
702 errors[0] = FitLineSpacingModel(positions, line_spacing_,
703 &spacings[0], &offsets[0], &index_range);
704 if (index_range > 1) {
705 double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
706 // Try the hypotheses that there might be index_range +/- 1 line spaces.
707 errors[1] = FitLineSpacingModel(positions, spacing_plus,
708 &spacings[1], &offsets[1], nullptr);
709 double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
710 errors[2] = FitLineSpacingModel(positions, spacing_minus,
711 &spacings[2], &offsets[2], nullptr);
712 for (int i = 1; i <= 2; ++i) {
713 if (errors[i] < errors[0]) {
714 spacings[0] = spacings[i];
715 offsets[0] = offsets[i];
716 errors[0] = errors[i];
717 }
718 }
719 }
720 if (spacings[0] > 0.0) {
721 line_spacing_ = spacings[0];
722 line_offset_ = offsets[0];
723 model_error_ = errors[0];
724 if (debug_level_ > 0) {
725 tprintf("Final linespacing model = %g + offset %g, error %g\n",
726 line_spacing_, line_offset_, model_error_);
727 }
728 }
729}
730
731// Given an initial estimate of line spacing (m_in) and the positions of each
732// baseline, computes the line spacing of the block more accurately in m_out,
733// and the corresponding intercept in c_out, and the number of spacings seen
734// in index_delta. Returns the error of fit to the line spacing model.
735// Uses a simple linear regression, but optimized the offset using the median.
736double BaselineBlock::FitLineSpacingModel(
737 const GenericVector<double>& positions, double m_in,
738 double* m_out, double* c_out, int* index_delta) {
739 if (m_in == 0.0f || positions.size() < 2) {
740 *m_out = m_in;
741 *c_out = 0.0;
742 if (index_delta != nullptr) *index_delta = 0;
743 return 0.0;
744 }
745 GenericVector<double> offsets;
746 // Get the offset (remainder) linespacing for each line and choose the median.
747 for (int i = 0; i < positions.size(); ++i)
748 offsets.push_back(fmod(positions[i], m_in));
749 // Get the median offset.
750 double median_offset = MedianOfCircularValues(m_in, &offsets);
751 // Now fit a line to quantized line number and offset.
752 LLSQ llsq;
753 int min_index = INT32_MAX;
754 int max_index = -INT32_MAX;
755 for (int i = 0; i < positions.size(); ++i) {
756 double y_pos = positions[i];
757 int row_index = IntCastRounded((y_pos - median_offset) / m_in);
758 UpdateRange(row_index, &min_index, &max_index);
759 llsq.add(row_index, y_pos);
760 }
761 // Get the refined line spacing.
762 *m_out = llsq.m();
763 // Use the median offset rather than the mean.
764 offsets.truncate(0);
765 for (int i = 0; i < positions.size(); ++i)
766 offsets.push_back(fmod(positions[i], *m_out));
767 // Get the median offset.
768 if (debug_level_ > 2) {
769 for (int i = 0; i < offsets.size(); ++i)
770 tprintf("%d: %g\n", i, offsets[i]);
771 }
772 *c_out = MedianOfCircularValues(*m_out, &offsets);
773 if (debug_level_ > 1) {
774 tprintf("Median offset = %g, compared to mean of %g.\n",
775 *c_out, llsq.c(*m_out));
776 }
777 // Index_delta is the number of hypothesized line gaps present.
778 if (index_delta != nullptr)
779 *index_delta = max_index - min_index;
780 // Use the regression model's intercept to compute the error, as it may be
781 // a full line-spacing in disagreement with the median.
782 double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
783 if (debug_level_ > 1) {
784 tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
785 m_in, median_offset, *m_out, *c_out, rms_error);
786 }
787 return rms_error;
788}
789
790BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
791 TO_BLOCK_LIST* blocks)
792 : page_skew_(page_skew), debug_level_(debug_level) {
793 TO_BLOCK_IT it(blocks);
794 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
795 TO_BLOCK* to_block = it.data();
796 BLOCK* block = to_block->block;
797 POLY_BLOCK* pb = block->pdblk.poly_block();
798 // A note about non-text blocks.
799 // On output, non-text blocks are supposed to contain a single empty word
800 // in each incoming text line. These mark out the polygonal bounds of the
801 // block. Ideally no baselines should be required, but currently
802 // make_words crashes if a baseline and xheight are not provided, so we
803 // include non-text blocks here, but flag them for special treatment.
804 bool non_text = pb != nullptr && !pb->IsText();
805 blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
806 }
807}
808
809// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
810// block-wise and page-wise data to smooth small blocks/rows, and applies
811// smoothing based on block/page-level skew and block-level linespacing.
813 GenericVector<double> block_skew_angles;
814 for (int i = 0; i < blocks_.size(); ++i) {
815 BaselineBlock* bl_block = blocks_[i];
816 if (debug_level_ > 0)
817 tprintf("Fitting initial baselines...\n");
818 if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
819 block_skew_angles.push_back(bl_block->skew_angle());
820 }
821 }
822 // Compute a page-wide default skew for blocks with too little information.
823 double default_block_skew = page_skew_.angle();
824 if (!block_skew_angles.empty()) {
825 default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
826 }
827 if (debug_level_ > 0) {
828 tprintf("Page skew angle = %g\n", default_block_skew);
829 }
830 // Set bad lines in each block to the default block skew and then force fit
831 // a linespacing model where it makes sense to do so.
832 for (int i = 0; i < blocks_.size(); ++i) {
833 BaselineBlock* bl_block = blocks_[i];
834 bl_block->ParallelizeBaselines(default_block_skew);
835 bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
836 }
837}
838
839// Computes the baseline splines for each TO_ROW in each TO_BLOCK and
840// other associated side-effects, including pre-associating blobs, computing
841// x-heights and displaying debug information.
842// NOTE that ComputeStraightBaselines must have been called first as this
843// sets up data in the TO_ROWs upon which this function depends.
845 bool enable_splines,
846 bool remove_noise,
847 bool show_final_rows,
848 Textord* textord) {
849 for (int i = 0; i < blocks_.size(); ++i) {
850 BaselineBlock* bl_block = blocks_[i];
851 if (enable_splines)
852 bl_block->PrepareForSplineFitting(page_tr, remove_noise);
853 bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
854 if (show_final_rows) {
855 bl_block->DrawFinalRows(page_tr);
856 }
857 }
858}
859
860} // namespace tesseract.
void plot_blob_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:1086
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:113
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:120
int IntCastRounded(double x)
Definition: helpers.h:175
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const double kMaxBaselineError
const double kMaxBlobSizeMultiple
const double kFitHalfrangeFactor
const double kOffsetQuantizationFactor
const int kMaxDisplacementsModes
const double kMaxSkewDeviation
const double kMinFittingLinespacings
const int kNumSkipPoints
void draw_meanlines(TO_BLOCK *block, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:207
void plot_parallel_row(TO_ROW *row, float gradient, int32_t left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:122
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:44
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1845
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2573
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1772
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:466
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:30
bool textord_restore_underlines
Definition: underlin.cpp:22
const double kMaxBaselineError
int push_back(T object)
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
int choose_nth_item(int target_index)
void truncate(int size)
void EstimateBaselinePosition()
Definition: blobbox.cpp:357
const TBOX & bounding_box() const
Definition: blobbox.h:230
int baseline_position() const
Definition: blobbox.h:389
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:604
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:612
float line_c() const
Definition: blobbox.h:574
QSPLINE baseline
Definition: blobbox.h:670
float line_m() const
Definition: blobbox.h:571
BLOCK * block
Definition: blobbox.h:777
float xheight
Definition: blobbox.h:788
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
float line_size
Definition: blobbox.h:785
float baseline_offset
Definition: blobbox.h:787
float max_blob_size
Definition: blobbox.h:786
BLOBNBOX_LIST underlines
Definition: blobbox.h:773
float line_spacing
Definition: blobbox.h:779
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:162
Definition: linlsq.h:28
double m() const
Definition: linlsq.cpp:100
double c(double m) const
Definition: linlsq.cpp:116
double rms(double m, double c) const
Definition: linlsq.cpp:130
void add(double x, double y)
Definition: linlsq.cpp:48
FCOORD mean_point() const
Definition: linlsq.cpp:166
Definition: ocrblock.h:31
void set_xheight(int32_t height)
set char size
Definition: ocrblock.h:68
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
FCOORD classify_rotation() const
Definition: ocrblock.h:140
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
integer coordinate
Definition: points.h:32
Definition: points.h:189
float length() const
find length
Definition: points.h:228
float angle() const
find angle
Definition: points.h:247
float y() const
Definition: points.h:210
float x() const
Definition: points.h:207
bool IsText() const
Definition: polyblk.h:49
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
void print() const
Definition: rect.h:278
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:412
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
bool FitBaseline(bool use_box_bottoms)
double PerpDisp(const FCOORD &direction) const
double BaselineAngle() const
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
double SpaceBetween(const BaselineRow &other) const
BaselineRow(double line_size, TO_ROW *to_row)
double StraightYAtX(double x) const
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
void SetupOldLineParameters(TO_ROW *row) const
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
void DrawFinalRows(const ICOORD &page_tr)
void DrawPixSpline(Pix *pix_in)
void ParallelizeBaselines(double default_block_skew)
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
TO_BLOCK * block() const
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1366
void make_spline_rows(TO_BLOCK *block, float gradient, bool testing_on)
Definition: makerow.cpp:2003
void compute_block_xheight(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:1254