tesseract 4.1.1
Loading...
Searching...
No Matches
textlineprojection.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifdef HAVE_CONFIG_H
15#include "config_auto.h"
16#endif
17
18#include "textlineprojection.h"
19#include "allheaders.h"
20#include "bbgrid.h" // Base class.
21#include "blobbox.h" // BlobNeighourDir.
22#include "blobs.h"
23#include "colpartition.h"
24#include "normalis.h"
25
26#include <algorithm>
27
28// Padding factor to use on definitely oriented blobs
29const int kOrientedPadFactor = 8;
30// Padding factor to use on not definitely oriented blobs.
31const int kDefaultPadFactor = 2;
32// Penalty factor for going away from the line center.
33const int kWrongWayPenalty = 4;
34// Ratio between parallel gap and perpendicular gap used to measure total
35// distance of a box from a target box in curved textline space.
36// parallel-gap is treated more favorably by this factor to allow catching
37// quotes and elipsis at the end of textlines.
38const int kParaPerpDistRatio = 4;
39// Multiple of scale_factor_ that the inter-line gap must be before we start
40// padding the increment box perpendicular to the text line.
42// Maximum tab-stop overrun for horizontal padding, in projection pixels.
43const int kMaxTabStopOverrun = 6;
44
45namespace tesseract {
46
48 : x_origin_(0), y_origin_(0), pix_(nullptr) {
49 // The projection map should be about 100 ppi, whatever the input.
50 scale_factor_ = IntCastRounded(resolution / 100.0);
51 if (scale_factor_ < 1) scale_factor_ = 1;
52}
54 pixDestroy(&pix_);
55}
56
57// Build the projection profile given the input_block containing lists of
58// blobs, a rotation to convert to image coords,
59// and a full-resolution nontext_map, marking out areas to avoid.
60// During construction, we have the following assumptions:
61// The rotation is a multiple of 90 degrees, ie no deskew yet.
62// The blobs have had their left and right rules set to also limit
63// the range of projection.
65 const FCOORD& rotation,
66 Pix* nontext_map) {
67 pixDestroy(&pix_);
68 TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
69 x_origin_ = 0;
70 y_origin_ = image_box.height();
71 int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
72 int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
73
74 pix_ = pixCreate(width, height, 8);
75 ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
76 ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
77 Pix* final_pix = pixBlockconv(pix_, 1, 1);
78// Pix* final_pix = pixBlockconv(pix_, 2, 2);
79 pixDestroy(&pix_);
80 pix_ = final_pix;
81}
82
83// Display the blobs in the window colored according to textline quality.
84void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
85 ScrollView* win) {
86#ifndef GRAPHICS_DISABLED
87 BLOBNBOX_IT it(blobs);
88 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
89 BLOBNBOX* blob = it.data();
90 const TBOX& box = blob->bounding_box();
91 bool bad_box = BoxOutOfHTextline(box, nullptr, false);
92 if (blob->UniquelyVertical())
94 else
95 win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
96 win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
97 }
98 win->Update();
99#endif // GRAPHICS_DISABLED
100}
101
102// Moves blobs that look like they don't sit well on a textline from the
103// input blobs list to the output small_blobs list.
104// This gets them away from initial textline finding to stop diacritics
105// from forming incorrect textlines. (Introduced mainly to fix Thai.)
107 BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
108 BLOBNBOX_IT it(blobs);
109 BLOBNBOX_IT small_it(small_blobs);
110 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
111 BLOBNBOX* blob = it.data();
112 const TBOX& box = blob->bounding_box();
113 bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
114 box.bottom());
115 if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
116 blob->ClearNeighbours();
117 small_it.add_to_end(it.extract());
118 }
119 }
120}
121
122// Create a window and display the projection in it.
124#ifndef GRAPHICS_DISABLED
125 int width = pixGetWidth(pix_);
126 int height = pixGetHeight(pix_);
127 Pix* pixc = pixCreate(width, height, 32);
128 int src_wpl = pixGetWpl(pix_);
129 int col_wpl = pixGetWpl(pixc);
130 uint32_t* src_data = pixGetData(pix_);
131 uint32_t* col_data = pixGetData(pixc);
132 for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
133 for (int x = 0; x < width; ++x) {
134 int pixel = GET_DATA_BYTE(src_data, x);
135 l_uint32 result;
136 if (pixel <= 17)
137 composeRGBPixel(0, 0, pixel * 15, &result);
138 else if (pixel <= 145)
139 composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
140 else
141 composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
142 col_data[x] = result;
143 }
144 }
145 auto* win = new ScrollView("Projection", 0, 0,
146 width, height, width, height);
147 win->Image(pixc, 0, 0);
148 win->Update();
149 pixDestroy(&pixc);
150#endif // GRAPHICS_DISABLED
151}
152
153// Compute the distance of the box from the partition using curved projection
154// space. As DistanceOfBoxFromBox, except that the direction is taken from
155// the ColPartition and the median bounds of the ColPartition are used as
156// the to_box.
158 const ColPartition& part,
159 const DENORM* denorm,
160 bool debug) const {
161 // Compute a partition box that uses the median top/bottom of the blobs
162 // within and median left/right for vertical.
163 TBOX part_box = part.bounding_box();
164 if (part.IsHorizontalType()) {
165 part_box.set_top(part.median_top());
166 part_box.set_bottom(part.median_bottom());
167 } else {
168 part_box.set_left(part.median_left());
169 part_box.set_right(part.median_right());
170 }
171 // Now use DistanceOfBoxFromBox to make the actual calculation.
172 return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
173 denorm, debug);
174}
175
176// Compute the distance from the from_box to the to_box using curved
177// projection space. Separation that involves a decrease in projection
178// density (moving from the from_box to the to_box) is weighted more heavily
179// than constant density, and an increase is weighted less.
180// If horizontal_textline is true, then curved space is used vertically,
181// as for a diacritic on the edge of a textline.
182// The projection uses original image coords, so denorm is used to get
183// back to the image coords from box/part space.
184// How the calculation works: Think of a diacritic near a textline.
185// Distance is measured from the far side of the from_box to the near side of
186// the to_box. Shown is the horizontal textline case.
187// |------^-----|
188// | from | box |
189// |------|-----|
190// perpendicular |
191// <------v-------->|--------------------|
192// parallel | to box |
193// |--------------------|
194// Perpendicular distance uses "curved space" See VerticalDistance below.
195// Parallel distance is linear.
196// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
198 const TBOX& to_box,
199 bool horizontal_textline,
200 const DENORM* denorm,
201 bool debug) const {
202 // The parallel_gap is the horizontal gap between a horizontal textline and
203 // the box. Analogous for vertical.
204 int parallel_gap = 0;
205 // start_pt is the box end of the line to be modified for curved space.
206 TPOINT start_pt;
207 // end_pt is the partition end of the line to be modified for curved space.
208 TPOINT end_pt;
209 if (horizontal_textline) {
210 parallel_gap = from_box.x_gap(to_box) + from_box.width();
211 start_pt.x = (from_box.left() + from_box.right()) / 2;
212 end_pt.x = start_pt.x;
213 if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
214 start_pt.y = from_box.top();
215 end_pt.y = std::min(to_box.top(), start_pt.y);
216 } else {
217 start_pt.y = from_box.bottom();
218 end_pt.y = std::max(to_box.bottom(), start_pt.y);
219 }
220 } else {
221 parallel_gap = from_box.y_gap(to_box) + from_box.height();
222 if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
223 start_pt.x = from_box.right();
224 end_pt.x = std::min(to_box.right(), start_pt.x);
225 } else {
226 start_pt.x = from_box.left();
227 end_pt.x = std::max(to_box.left(), start_pt.x);
228 }
229 start_pt.y = (from_box.bottom() + from_box.top()) / 2;
230 end_pt.y = start_pt.y;
231 }
232 // The perpendicular gap is the max vertical distance gap out of:
233 // top of from_box to to_box top and bottom of from_box to to_box bottom.
234 // This value is then modified for curved projection space.
235 // Analogous for vertical.
236 int perpendicular_gap = 0;
237 // If start_pt == end_pt, then the from_box lies entirely within the to_box
238 // (in the perpendicular direction), so we don't need to calculate the
239 // perpendicular_gap.
240 if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
241 if (denorm != nullptr) {
242 // Denormalize the start and end.
243 denorm->DenormTransform(nullptr, start_pt, &start_pt);
244 denorm->DenormTransform(nullptr, end_pt, &end_pt);
245 }
246 if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
247 perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
248 end_pt.y);
249 } else {
250 perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
251 start_pt.y);
252 }
253 }
254 // The parallel_gap weighs less than the perpendicular_gap.
255 return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
256}
257
258// Compute the distance between (x, y1) and (x, y2) using the rule that
259// a decrease in textline density is weighted more heavily than an increase.
260// The coordinates are in source image space, ie processed by any denorm
261// already, but not yet scaled by scale_factor_.
262// Going from the outside of a textline to the inside should measure much
263// less distance than going from the inside of a textline to the outside.
264// How it works:
265// An increase is cheap (getting closer to a textline).
266// Constant costs unity.
267// A decrease is expensive (getting further from a textline).
268// Pixels in projection map Counted distance
269// 2
270// 3 1/x
271// 3 1
272// 2 x
273// 5 1/x
274// 7 1/x
275// Total: 1 + x + 3/x where x = kWrongWayPenalty.
277 int y1, int y2) const {
278 x = ImageXToProjectionX(x);
279 y1 = ImageYToProjectionY(y1);
280 y2 = ImageYToProjectionY(y2);
281 if (y1 == y2) return 0;
282 int wpl = pixGetWpl(pix_);
283 int step = y1 < y2 ? 1 : -1;
284 uint32_t* data = pixGetData(pix_) + y1 * wpl;
285 wpl *= step;
286 int prev_pixel = GET_DATA_BYTE(data, x);
287 int distance = 0;
288 int right_way_steps = 0;
289 for (int y = y1; y != y2; y += step) {
290 data += wpl;
291 int pixel = GET_DATA_BYTE(data, x);
292 if (debug)
293 tprintf("At (%d,%d), pix = %d, prev=%d\n",
294 x, y + step, pixel, prev_pixel);
295 if (pixel < prev_pixel)
296 distance += kWrongWayPenalty;
297 else if (pixel > prev_pixel)
298 ++right_way_steps;
299 else
300 ++distance;
301 prev_pixel = pixel;
302 }
303 return distance * scale_factor_ +
304 right_way_steps * scale_factor_ / kWrongWayPenalty;
305}
306
307// Compute the distance between (x1, y) and (x2, y) using the rule that
308// a decrease in textline density is weighted more heavily than an increase.
309int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
310 int y) const {
311 x1 = ImageXToProjectionX(x1);
312 x2 = ImageXToProjectionX(x2);
313 y = ImageYToProjectionY(y);
314 if (x1 == x2) return 0;
315 int wpl = pixGetWpl(pix_);
316 int step = x1 < x2 ? 1 : -1;
317 uint32_t* data = pixGetData(pix_) + y * wpl;
318 int prev_pixel = GET_DATA_BYTE(data, x1);
319 int distance = 0;
320 int right_way_steps = 0;
321 for (int x = x1; x != x2; x += step) {
322 int pixel = GET_DATA_BYTE(data, x + step);
323 if (debug)
324 tprintf("At (%d,%d), pix = %d, prev=%d\n",
325 x + step, y, pixel, prev_pixel);
326 if (pixel < prev_pixel)
327 distance += kWrongWayPenalty;
328 else if (pixel > prev_pixel)
329 ++right_way_steps;
330 else
331 ++distance;
332 prev_pixel = pixel;
333 }
334 return distance * scale_factor_ +
335 right_way_steps * scale_factor_ / kWrongWayPenalty;
336}
337
338// Returns true if the blob appears to be outside of a textline.
339// Such blobs are potentially diacritics (even if large in Thai) and should
340// be kept away from initial textline finding.
342 const DENORM* denorm,
343 bool debug) const {
344 int grad1 = 0;
345 int grad2 = 0;
346 EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
347 int worst_result = std::min(grad1, grad2);
348 int total_result = grad1 + grad2;
349 if (total_result >= 6) return false; // Strongly in textline.
350 // Medium strength: if either gradient is negative, it is likely outside
351 // the body of the textline.
352 if (worst_result < 0)
353 return true;
354 return false;
355}
356
357// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
358// but uses the median top/bottom for horizontal and median left/right for
359// vertical instead of the bounding box edges.
360// Evaluates for both horizontal and vertical and returns the best result,
361// with a positive value for horizontal and a negative value for vertical.
363 const DENORM* denorm,
364 bool debug) const {
365 if (part.IsSingleton())
366 return EvaluateBox(part.bounding_box(), denorm, debug);
367 // Test vertical orientation.
368 TBOX box = part.bounding_box();
369 // Use the partition median for left/right.
370 box.set_left(part.median_left());
371 box.set_right(part.median_right());
372 int vresult = EvaluateBox(box, denorm, debug);
373
374 // Test horizontal orientation.
375 box = part.bounding_box();
376 // Use the partition median for top/bottom.
377 box.set_top(part.median_top());
378 box.set_bottom(part.median_bottom());
379 int hresult = EvaluateBox(box, denorm, debug);
380 if (debug) {
381 tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
382 part.bounding_box().print();
383 part.Print();
384 }
385 return hresult >= -vresult ? hresult : vresult;
386}
387
388// Computes the mean projection gradients over the horizontal and vertical
389// edges of the box:
390// -h-h-h-h-h-h
391// |------------| mean=htop -v|+v--------+v|-v
392// |+h+h+h+h+h+h| -v|+v +v|-v
393// | | -v|+v +v|-v
394// | box | -v|+v box +v|-v
395// | | -v|+v +v|-v
396// |+h+h+h+h+h+h| -v|+v +v|-v
397// |------------| mean=hbot -v|+v--------+v|-v
398// -h-h-h-h-h-h
399// mean=vleft mean=vright
400//
401// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
402// for a horizontal textline, a negative number for a vertical textline,
403// and near zero for undecided. Undecided is most likely non-text.
404// All the gradients are truncated to remain non-negative, since negative
405// horizontal gradients don't give any indication of being vertical and
406// vice versa.
407// Additional complexity: The coordinates have to be transformed to original
408// image coordinates with denorm (if not null), scaled to match the projection
409// pix, and THEN step out 2 pixels each way from the edge to compute the
410// gradient, and tries 3 positions, each measuring the gradient over a
411// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
412// several layers of helpers below.
413int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
414 bool debug) const {
415 return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
416}
417
418// Internal version of EvaluateBox returns the unclipped gradients as well
419// as the result of EvaluateBox.
420// hgrad1 and hgrad2 are the gradients for the horizontal textline.
421int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
422 const DENORM* denorm, bool debug,
423 int* hgrad1, int* hgrad2,
424 int* vgrad1, int* vgrad2) const {
425 int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
426 box.top(), true);
427 int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
428 box.bottom(), false);
429 int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
430 box.top(), true);
431 int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
432 box.bottom(), box.top(),
433 false);
434 int top_clipped = std::max(top_gradient, 0);
435 int bottom_clipped = std::max(bottom_gradient, 0);
436 int left_clipped = std::max(left_gradient, 0);
437 int right_clipped = std::max(right_gradient, 0);
438 if (debug) {
439 tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
440 top_gradient, bottom_gradient, left_gradient, right_gradient);
441 box.print();
442 }
443 int result = std::max(top_clipped, bottom_clipped) -
444 std::max(left_clipped, right_clipped);
445 if (hgrad1 != nullptr && hgrad2 != nullptr) {
446 *hgrad1 = top_gradient;
447 *hgrad2 = bottom_gradient;
448 }
449 if (vgrad1 != nullptr && vgrad2 != nullptr) {
450 *vgrad1 = left_gradient;
451 *vgrad2 = right_gradient;
452 }
453 return result;
454}
455
456// Helper returns the mean gradient value for the horizontal row at the given
457// y, (in the external coordinates) by subtracting the mean of the transformed
458// row 2 pixels above from the mean of the transformed row 2 pixels below.
459// This gives a positive value for a good top edge and negative for bottom.
460// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
461int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
462 int16_t min_x, int16_t max_x, int16_t y,
463 bool best_is_max) const {
464 TPOINT start_pt(min_x, y);
465 TPOINT end_pt(max_x, y);
466 int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
467 int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
468 int best_gradient = lower - upper;
469 upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
470 lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
471 int gradient = lower - upper;
472 if ((gradient > best_gradient) == best_is_max)
473 best_gradient = gradient;
474 upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
475 lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
476 gradient = lower - upper;
477 if ((gradient > best_gradient) == best_is_max)
478 best_gradient = gradient;
479 return best_gradient;
480}
481
482// Helper returns the mean gradient value for the vertical column at the
483// given x, (in the external coordinates) by subtracting the mean of the
484// transformed column 2 pixels left from the mean of the transformed column
485// 2 pixels to the right.
486// This gives a positive value for a good left edge and negative for right.
487// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
488int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x,
489 int16_t min_y, int16_t max_y,
490 bool best_is_max) const {
491 TPOINT start_pt(x, min_y);
492 TPOINT end_pt(x, max_y);
493 int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
494 int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
495 int best_gradient = right - left;
496 left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
497 right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
498 int gradient = right - left;
499 if ((gradient > best_gradient) == best_is_max)
500 best_gradient = gradient;
501 left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
502 right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
503 gradient = right - left;
504 if ((gradient > best_gradient) == best_is_max)
505 best_gradient = gradient;
506 return best_gradient;
507}
508
509// Helper returns the mean pixel value over the line between the start_pt and
510// end_pt (inclusive), but shifted perpendicular to the line in the projection
511// image by offset pixels. For simplicity, it is assumed that the vector is
512// either nearly horizontal or nearly vertical. It works on skewed textlines!
513// The end points are in external coordinates, and will be denormalized with
514// the denorm if not nullptr before further conversion to pix coordinates.
515// After all the conversions, the offset is added to the direction
516// perpendicular to the line direction. The offset is thus in projection image
517// coordinates, which allows the caller to get a guaranteed displacement
518// between pixels used to calculate gradients.
519int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
520 int offset,
521 TPOINT start_pt,
522 TPOINT end_pt) const {
523 TransformToPixCoords(denorm, &start_pt);
524 TransformToPixCoords(denorm, &end_pt);
525 TruncateToImageBounds(&start_pt);
526 TruncateToImageBounds(&end_pt);
527 int wpl = pixGetWpl(pix_);
528 uint32_t* data = pixGetData(pix_);
529 int total = 0;
530 int count = 0;
531 int x_delta = end_pt.x - start_pt.x;
532 int y_delta = end_pt.y - start_pt.y;
533 if (abs(x_delta) >= abs(y_delta)) {
534 if (x_delta == 0)
535 return 0;
536 // Horizontal line. Add the offset vertically.
537 int x_step = x_delta > 0 ? 1 : -1;
538 // Correct offset for rotation, keeping it anti-clockwise of the delta.
539 offset *= x_step;
540 start_pt.y += offset;
541 end_pt.y += offset;
542 TruncateToImageBounds(&start_pt);
543 TruncateToImageBounds(&end_pt);
544 x_delta = end_pt.x - start_pt.x;
545 y_delta = end_pt.y - start_pt.y;
546 count = x_delta * x_step + 1;
547 for (int x = start_pt.x; x != end_pt.x; x += x_step) {
548 int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
549 total += GET_DATA_BYTE(data + wpl * y, x);
550 }
551 } else {
552 // Vertical line. Add the offset horizontally.
553 int y_step = y_delta > 0 ? 1 : -1;
554 // Correct offset for rotation, keeping it anti-clockwise of the delta.
555 // Pix holds the image with y=0 at the top, so the offset is negated.
556 offset *= -y_step;
557 start_pt.x += offset;
558 end_pt.x += offset;
559 TruncateToImageBounds(&start_pt);
560 TruncateToImageBounds(&end_pt);
561 x_delta = end_pt.x - start_pt.x;
562 y_delta = end_pt.y - start_pt.y;
563 count = y_delta * y_step + 1;
564 for (int y = start_pt.y; y != end_pt.y; y += y_step) {
565 int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
566 total += GET_DATA_BYTE(data + wpl * y, x);
567 }
568 }
569 return DivRounded(total, count);
570}
571
572// Given an input pix, and a box, the sides of the box are shrunk inwards until
573// they bound any black pixels found within the original box.
574// The function converts between tesseract coords and the pix coords assuming
575// that this pix is full resolution equal in size to the original image.
576// Returns an empty box if there are no black pixels in the source box.
577static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
578 int im_height = pixGetHeight(pix);
579 Box* input_box = boxCreate(box.left(), im_height - box.top(),
580 box.width(), box.height());
581 Box* output_box = nullptr;
582 pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
583 TBOX result_box;
584 if (output_box != nullptr) {
585 l_int32 x, y, width, height;
586 boxGetGeometry(output_box, &x, &y, &width, &height);
587 result_box.set_left(x);
588 result_box.set_right(x + width);
589 result_box.set_top(im_height - y);
590 result_box.set_bottom(result_box.top() - height);
591 boxDestroy(&output_box);
592 }
593 boxDestroy(&input_box);
594 return result_box;
595}
596
597// Splits the given box in half at x_middle or y_middle according to split_on_x
598// and checks for nontext_map pixels in each half. Reduces the bbox so that it
599// still includes the middle point, but does not touch any fg pixels in
600// nontext_map. An empty box may be returned if there is no such box.
601static void TruncateBoxToMissNonText(int x_middle, int y_middle,
602 bool split_on_x, Pix* nontext_map,
603 TBOX* bbox) {
604 TBOX box1(*bbox);
605 TBOX box2(*bbox);
606 TBOX im_box;
607 if (split_on_x) {
608 box1.set_right(x_middle);
609 im_box = BoundsWithinBox(nontext_map, box1);
610 if (!im_box.null_box()) box1.set_left(im_box.right());
611 box2.set_left(x_middle);
612 im_box = BoundsWithinBox(nontext_map, box2);
613 if (!im_box.null_box()) box2.set_right(im_box.left());
614 } else {
615 box1.set_bottom(y_middle);
616 im_box = BoundsWithinBox(nontext_map, box1);
617 if (!im_box.null_box()) box1.set_top(im_box.bottom());
618 box2.set_top(y_middle);
619 im_box = BoundsWithinBox(nontext_map, box2);
620 if (!im_box.null_box()) box2.set_bottom(im_box.top());
621 }
622 box1 += box2;
623 *bbox = box1;
624}
625
626
627// Helper function to add 1 to a rectangle in source image coords to the
628// internal projection pix_.
629void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
630 int scaled_left = ImageXToProjectionX(box.left());
631 int scaled_top = ImageYToProjectionY(box.top());
632 int scaled_right = ImageXToProjectionX(box.right());
633 int scaled_bottom = ImageYToProjectionY(box.bottom());
634 int wpl = pixGetWpl(pix_);
635 uint32_t* data = pixGetData(pix_) + scaled_top * wpl;
636 for (int y = scaled_top; y <= scaled_bottom; ++y) {
637 for (int x = scaled_left; x <= scaled_right; ++x) {
638 int pixel = GET_DATA_BYTE(data, x);
639 if (pixel < 255)
640 SET_DATA_BYTE(data, x, pixel + 1);
641 }
642 data += wpl;
643 }
644}
645
646// Inserts a list of blobs into the projection.
647// Rotation is a multiple of 90 degrees to get from blob coords to
648// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
649// Blobs are spread horizontally or vertically according to their internal
650// flags, but the spreading is truncated by set pixels in the nontext_map
651// and also by the horizontal rule line limits on the blobs.
652void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
653 const FCOORD& rotation,
654 const TBOX& nontext_map_box,
655 Pix* nontext_map) {
656 BLOBNBOX_IT blob_it(blobs);
657 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
658 BLOBNBOX* blob = blob_it.data();
659 TBOX bbox = blob->bounding_box();
660 ICOORD middle((bbox.left() + bbox.right()) / 2,
661 (bbox.bottom() + bbox.top()) / 2);
662 bool spreading_horizontally = PadBlobBox(blob, &bbox);
663 // Rotate to match the nontext_map.
664 bbox.rotate(rotation);
665 middle.rotate(rotation);
666 if (rotation.x() == 0.0f)
667 spreading_horizontally = !spreading_horizontally;
668 // Clip to the image before applying the increments.
669 bbox &= nontext_map_box; // This is in-place box intersection.
670 // Check for image pixels before spreading.
671 TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
672 nontext_map, &bbox);
673 if (bbox.area() > 0) {
674 IncrementRectangle8Bit(bbox);
675 }
676 }
677}
678
679// Pads the bounding box of the given blob according to whether it is on
680// a horizontal or vertical text line, taking into account tab-stops near
681// the blob. Returns true if padding was in the horizontal direction.
682bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
683 // Determine which direction to spread.
684 // If text is well spaced out, it can be useful to pad perpendicular to
685 // the textline direction, so as to ensure diacritics get absorbed
686 // correctly, but if the text is tightly spaced, this will destroy the
687 // blank space between textlines in the projection map, and that would
688 // be very bad.
689 int pad_limit = scale_factor_ * kMinLineSpacingFactor;
690 int xpad = 0;
691 int ypad = 0;
692 bool padding_horizontally = false;
693 if (blob->UniquelyHorizontal()) {
694 xpad = bbox->height() * kOrientedPadFactor;
695 padding_horizontally = true;
696 // If the text appears to be very well spaced, pad the other direction by a
697 // single pixel in the projection profile space to help join diacritics to
698 // the textline.
699 if ((blob->neighbour(BND_ABOVE) == nullptr ||
700 bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
701 (blob->neighbour(BND_BELOW) == nullptr ||
702 bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
703 ypad = scale_factor_;
704 }
705 } else if (blob->UniquelyVertical()) {
706 ypad = bbox->width() * kOrientedPadFactor;
707 if ((blob->neighbour(BND_LEFT) == nullptr ||
708 bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
709 (blob->neighbour(BND_RIGHT) == nullptr ||
710 bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
711 xpad = scale_factor_;
712 }
713 } else {
714 if ((blob->neighbour(BND_ABOVE) != nullptr &&
715 blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
716 (blob->neighbour(BND_BELOW) != nullptr &&
717 blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
718 ypad = bbox->width() * kDefaultPadFactor;
719 }
720 if ((blob->neighbour(BND_RIGHT) != nullptr &&
721 blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
722 (blob->neighbour(BND_LEFT) != nullptr &&
723 blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
724 xpad = bbox->height() * kDefaultPadFactor;
725 padding_horizontally = true;
726 }
727 }
728 bbox->pad(xpad, ypad);
729 pad_limit = scale_factor_ * kMaxTabStopOverrun;
730 // Now shrink horizontally to avoid stepping more than pad_limit over a
731 // tab-stop.
732 if (bbox->left() < blob->left_rule() - pad_limit) {
733 bbox->set_left(blob->left_rule() - pad_limit);
734 }
735 if (bbox->right() > blob->right_rule() + pad_limit) {
736 bbox->set_right(blob->right_rule() + pad_limit);
737 }
738 return padding_horizontally;
739}
740
741// Helper denormalizes the TPOINT with the denorm if not nullptr, then
742// converts to pix_ coordinates.
743void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
744 TPOINT* pt) const {
745 if (denorm != nullptr) {
746 // Denormalize the point.
747 denorm->DenormTransform(nullptr, *pt, pt);
748 }
749 pt->x = ImageXToProjectionX(pt->x);
750 pt->y = ImageYToProjectionY(pt->y);
751}
752
753#if defined(_MSC_VER) && !defined(__clang__)
754#pragma optimize("g", off)
755#endif // _MSC_VER
756// Helper truncates the TPOINT to be within the pix_.
757void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
758 pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
759 pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
760}
761#if defined(_MSC_VER) && !defined(__clang__)
762#pragma optimize("", on)
763#endif // _MSC_VER
764
765// Transform tesseract image coordinates to coordinates used in the projection.
766int TextlineProjection::ImageXToProjectionX(int x) const {
767 x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
768 return x;
769}
770int TextlineProjection::ImageYToProjectionY(int y) const {
771 y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
772 return y;
773}
774
775} // namespace tesseract.
@ BND_ABOVE
Definition: blobbox.h:91
@ BND_LEFT
Definition: blobbox.h:88
@ BND_BELOW
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:90
int DivRounded(int a, int b)
Definition: helpers.h:167
int IntCastRounded(double x)
Definition: helpers.h:175
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int count(LIST var_list)
Definition: oldlist.cpp:95
const int kWrongWayPenalty
const int kMaxTabStopOverrun
const int kParaPerpDistRatio
const int kMinLineSpacingFactor
const int kOrientedPadFactor
const int kDefaultPadFactor
BLOBNBOX * neighbour(BlobNeighbourDir n) const
Definition: blobbox.h:370
int right_rule() const
Definition: blobbox.h:319
int left_rule() const
Definition: blobbox.h:313
const TBOX & bounding_box() const
Definition: blobbox.h:230
void ClearNeighbours()
Definition: blobbox.h:499
bool UniquelyHorizontal() const
Definition: blobbox.h:413
bool UniquelyVertical() const
Definition: blobbox.h:410
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
Definition: blobs.h:51
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
integer coordinate
Definition: points.h:32
Definition: points.h:189
float x() const
Definition: points.h:207
Definition: rect.h:34
void set_right(int x)
Definition: rect.h:82
void rotate(const FCOORD &vec)
Definition: rect.h:197
int16_t top() const
Definition: rect.h:58
void print() const
Definition: rect.h:278
void set_bottom(int y)
Definition: rect.h:68
int16_t width() const
Definition: rect.h:115
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
void set_top(int y)
Definition: rect.h:61
int16_t left() const
Definition: rect.h:72
int y_gap(const TBOX &box) const
Definition: rect.h:233
int16_t bottom() const
Definition: rect.h:65
void pad(int xpad, int ypad)
Definition: rect.h:131
bool null_box() const
Definition: rect.h:50
void set_left(int x)
Definition: rect.h:75
int x_gap(const TBOX &box) const
Definition: rect.h:225
int16_t right() const
Definition: rect.h:79
static bool WithinTestRegion(int detail_level, int x, int y)
const TBOX & bounding_box() const
Definition: colpartition.h:110
bool IsSingleton() const
Definition: colpartition.h:362
bool IsHorizontalType() const
Definition: colpartition.h:446
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
int HorizontalDistance(bool debug, int x1, int x2, int y) const
int VerticalDistance(bool debug, int x, int y1, int y2) const
static void Update()
Definition: scrollview.cpp:709
void Pen(Color color)
Definition: scrollview.cpp:719
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:600