tesseract 4.1.1
Loading...
Searching...
No Matches
imagefind.cpp
Go to the documentation of this file.
1
2// File: imagefind.cpp
3// Description: Function to find image and drawing regions in an image
4// and create a corresponding list of empty blobs.
5// Author: Ray Smith
6//
7// (C) Copyright 2008, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifdef HAVE_CONFIG_H
21#include "config_auto.h"
22#endif
23
24#include "imagefind.h"
25#include "colpartitiongrid.h"
26#include "linlsq.h"
27#include "statistc.h"
28#include "params.h"
29
30#include "allheaders.h"
31
32#include <algorithm>
33
34static INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
35
36namespace tesseract {
37
38// Fraction of width or height of on pixels that can be discarded from a
39// roughly rectangular image.
40const double kMinRectangularFraction = 0.125;
41// Fraction of width or height to consider image completely used.
42const double kMaxRectangularFraction = 0.75;
43// Fraction of width or height to allow transition from kMinRectangularFraction
44// to kMaxRectangularFraction, equivalent to a dy/dx skew.
45const double kMaxRectangularGradient = 0.1; // About 6 degrees.
46// Minimum image size to be worth looking for images on.
47const int kMinImageFindSize = 100;
48// Scale factor for the rms color fit error.
49const double kRMSFitScaling = 8.0;
50// Min color difference to call it two colors.
51const int kMinColorDifference = 16;
52// Pixel padding for noise blobs and partitions when rendering on the image
53// mask to encourage them to join together. Make it too big and images
54// will fatten out too much and have to be clipped to text.
55const int kNoisePadding = 4;
56
57// Finds image regions within the BINARY source pix (page image) and returns
58// the image regions as a mask image.
59// The returned pix may be nullptr, meaning no images found.
60// If not nullptr, it must be PixDestroyed by the caller.
61// If textord_tabfind_show_images, debug images are appended to pixa_debug.
62Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
63 // Not worth looking at small images.
64 if (pixGetWidth(pix) < kMinImageFindSize ||
65 pixGetHeight(pix) < kMinImageFindSize)
66 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
67
68 // Reduce by factor 2.
69 Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
70 if (textord_tabfind_show_images && pixa_debug != nullptr)
71 pixa_debug->AddPix(pixr, "CascadeReduced");
72
73 // Get the halftone mask directly from Leptonica.
74 //
75 // Leptonica will print an error message and return nullptr if we call
76 // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
77 // want to bypass that.
78 if (pixGetWidth(pixr) < kMinImageFindSize ||
79 pixGetHeight(pixr) < kMinImageFindSize) {
80 pixDestroy(&pixr);
81 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
82 }
83 // Get the halftone mask.
84 l_int32 ht_found = 0;
85 Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr)
86 ? pixaCreate(0)
87 : nullptr;
88 Pix* pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
89 if (pixadb) {
90 Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
91 if (textord_tabfind_show_images && pixa_debug != nullptr)
92 pixa_debug->AddPix(pixdb, "HalftoneMask");
93 pixDestroy(&pixdb);
94 pixaDestroy(&pixadb);
95 }
96 pixDestroy(&pixr);
97 if (!ht_found && pixht2 != nullptr)
98 pixDestroy(&pixht2);
99 if (pixht2 == nullptr)
100 return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
101
102 // Expand back up again.
103 Pix *pixht = pixExpandReplicate(pixht2, 2);
104 if (textord_tabfind_show_images && pixa_debug != nullptr)
105 pixa_debug->AddPix(pixht, "HalftoneReplicated");
106 pixDestroy(&pixht2);
107
108 // Fill to capture pixels near the mask edges that were missed
109 Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
110 pixOr(pixht, pixht, pixt);
111 pixDestroy(&pixt);
112
113 // Eliminate lines and bars that may be joined to images.
114 Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
115 pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
116 if (textord_tabfind_show_images && pixa_debug != nullptr)
117 pixa_debug->AddPix(pixfinemask, "FineMask");
118 Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
119 Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
120 pixDestroy(&pixreduced);
121 pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
122 Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
123 pixDestroy(&pixreduced2);
124 if (textord_tabfind_show_images && pixa_debug != nullptr)
125 pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
126 // Combine the coarse and fine image masks.
127 pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
128 pixDestroy(&pixfinemask);
129 // Dilate a bit to make sure we get everything.
130 pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
131 Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
132 pixDestroy(&pixcoarsemask);
133 if (textord_tabfind_show_images && pixa_debug != nullptr)
134 pixa_debug->AddPix(pixmask, "MaskDilated");
135 // And the image mask with the line and bar remover.
136 pixAnd(pixht, pixht, pixmask);
137 pixDestroy(&pixmask);
138 if (textord_tabfind_show_images && pixa_debug != nullptr)
139 pixa_debug->AddPix(pixht, "FinalMask");
140 // Make the result image the same size as the input.
141 Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
142 pixOr(result, result, pixht);
143 pixDestroy(&pixht);
144 return result;
145}
146
147// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
148// analgous to pixConnComp, except that connected components which are nearly
149// rectangular are replaced with solid rectangles.
150// The returned boxa, pixa may be nullptr, meaning no images found.
151// If not nullptr, they must be destroyed by the caller.
152// Resolution of pix should match the source image (Tesseract::pix_binary_)
153// so the output coordinate systems match.
155 Boxa** boxa, Pixa** pixa) {
156 *boxa = nullptr;
157 *pixa = nullptr;
158
159 if (textord_tabfind_show_images && pixa_debug != nullptr)
160 pixa_debug->AddPix(pix, "Conncompimage");
161 // Find the individual image regions in the mask image.
162 *boxa = pixConnComp(pix, pixa, 8);
163 // Rectangularize the individual images. If a sharp edge in vertical and/or
164 // horizontal occupancy can be found, it indicates a probably rectangular
165 // image with unwanted bits merged on, so clip to the approximate rectangle.
166 int npixes = 0;
167 if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa);
168 for (int i = 0; i < npixes; ++i) {
169 int x_start, x_end, y_start, y_end;
170 Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
171 if (textord_tabfind_show_images && pixa_debug != nullptr)
172 pixa_debug->AddPix(img_pix, "A component");
176 &x_start, &y_start, &x_end, &y_end)) {
177 Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
178 pixSetAll(simple_pix);
179 pixDestroy(&img_pix);
180 // pixaReplacePix takes ownership of the simple_pix.
181 pixaReplacePix(*pixa, i, simple_pix, nullptr);
182 img_pix = pixaGetPix(*pixa, i, L_CLONE);
183 // Fix the box to match the new pix.
184 l_int32 x, y, width, height;
185 boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
186 Box* simple_box = boxCreate(x + x_start, y + y_start,
187 x_end - x_start, y_end - y_start);
188 boxaReplaceBox(*boxa, i, simple_box);
189 }
190 pixDestroy(&img_pix);
191 }
192}
193
194// Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
195// stepping y+=y_step, until y=y_end. *ystart is input/output.
196// If the number of black pixels in a row, pix_count fits this pattern:
197// 0 or more rows with pix_count < min_count then
198// <= mid_width rows with min_count <= pix_count <= max_count then
199// a row with pix_count > max_count then
200// true is returned, and *y_start = the first y with pix_count >= min_count.
201static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end,
202 int min_count, int mid_width, int max_count,
203 int y_end, int y_step, int* y_start) {
204 int mid_rows = 0;
205 for (int y = *y_start; y != y_end; y += y_step) {
206 // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset.
207 int pix_count = 0;
208 uint32_t* line = data + wpl * y;
209 for (int x = x_start; x < x_end; ++x) {
210 if (GET_DATA_BIT(line, x))
211 ++pix_count;
212 }
213 if (mid_rows == 0 && pix_count < min_count)
214 continue; // In the min phase.
215 if (mid_rows == 0)
216 *y_start = y; // Save the y_start where we came out of the min phase.
217 if (pix_count > max_count)
218 return true; // Found the pattern.
219 ++mid_rows;
220 if (mid_rows > mid_width)
221 break; // Middle too big.
222 }
223 return false; // Never found max_count.
224}
225
226// Scans vertically on y=[y_start,y_end), starting with x=*x_start,
227// stepping x+=x_step, until x=x_end. *x_start is input/output.
228// If the number of black pixels in a column, pix_count fits this pattern:
229// 0 or more cols with pix_count < min_count then
230// <= mid_width cols with min_count <= pix_count <= max_count then
231// a column with pix_count > max_count then
232// true is returned, and *x_start = the first x with pix_count >= min_count.
233static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end,
234 int min_count, int mid_width, int max_count,
235 int x_end, int x_step, int* x_start) {
236 int mid_cols = 0;
237 for (int x = *x_start; x != x_end; x += x_step) {
238 int pix_count = 0;
239 uint32_t* line = data + y_start * wpl;
240 for (int y = y_start; y < y_end; ++y, line += wpl) {
241 if (GET_DATA_BIT(line, x))
242 ++pix_count;
243 }
244 if (mid_cols == 0 && pix_count < min_count)
245 continue; // In the min phase.
246 if (mid_cols == 0)
247 *x_start = x; // Save the place where we came out of the min phase.
248 if (pix_count > max_count)
249 return true; // found the pattern.
250 ++mid_cols;
251 if (mid_cols > mid_width)
252 break; // Middle too big.
253 }
254 return false; // Never found max_count.
255}
256
257// Returns true if there is a rectangle in the source pix, such that all
258// pixel rows and column slices outside of it have less than
259// min_fraction of the pixels black, and within max_skew_gradient fraction
260// of the pixels on the inside, there are at least max_fraction of the
261// pixels black. In other words, the inside of the rectangle looks roughly
262// rectangular, and the outside of it looks like extra bits.
263// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
264// Note: the algorithm is iterative, allowing it to slice off pixels from
265// one edge, allowing it to then slice off more pixels from another edge.
267 double min_fraction, double max_fraction,
268 double max_skew_gradient,
269 int* x_start, int* y_start,
270 int* x_end, int* y_end) {
271 ASSERT_HOST(pix != nullptr);
272 *x_start = 0;
273 *x_end = pixGetWidth(pix);
274 *y_start = 0;
275 *y_end = pixGetHeight(pix);
276
277 uint32_t* data = pixGetData(pix);
278 int wpl = pixGetWpl(pix);
279 bool any_cut = false;
280 bool left_done = false;
281 bool right_done = false;
282 bool top_done = false;
283 bool bottom_done = false;
284 do {
285 any_cut = false;
286 // Find the top/bottom edges.
287 int width = *x_end - *x_start;
288 int min_count = static_cast<int>(width * min_fraction);
289 int max_count = static_cast<int>(width * max_fraction);
290 int edge_width = static_cast<int>(width * max_skew_gradient);
291 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
292 max_count, *y_end, 1, y_start) && !top_done) {
293 top_done = true;
294 any_cut = true;
295 }
296 --(*y_end);
297 if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
298 max_count, *y_start, -1, y_end) && !bottom_done) {
299 bottom_done = true;
300 any_cut = true;
301 }
302 ++(*y_end);
303
304 // Find the left/right edges.
305 int height = *y_end - *y_start;
306 min_count = static_cast<int>(height * min_fraction);
307 max_count = static_cast<int>(height * max_fraction);
308 edge_width = static_cast<int>(height * max_skew_gradient);
309 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
310 max_count, *x_end, 1, x_start) && !left_done) {
311 left_done = true;
312 any_cut = true;
313 }
314 --(*x_end);
315 if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
316 max_count, *x_start, -1, x_end) && !right_done) {
317 right_done = true;
318 any_cut = true;
319 }
320 ++(*x_end);
321 } while (any_cut);
322
323 // All edges must satisfy the condition of sharp gradient in pixel density
324 // in order for the full rectangle to be present.
325 return left_done && right_done && top_done && bottom_done;
326}
327
328// Given an input pix, and a bounding rectangle, the sides of the rectangle
329// are shrunk inwards until they bound any black pixels found within the
330// original rectangle. Returns false if the rectangle contains no black
331// pixels at all.
332bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
333 int* x_end, int* y_end) {
334 Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
335 *y_end - *y_start);
336 Box* output_box = nullptr;
337 pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
338 bool result = output_box != nullptr;
339 if (result) {
340 l_int32 x, y, width, height;
341 boxGetGeometry(output_box, &x, &y, &width, &height);
342 *x_start = x;
343 *y_start = y;
344 *x_end = x + width;
345 *y_end = y + height;
346 boxDestroy(&output_box);
347 }
348 boxDestroy(&input_box);
349 return result;
350}
351
352// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
353// of the point from the given line, defined by a pair of points in the 3-D
354// (RGB) space, line1 and line2.
355double ImageFind::ColorDistanceFromLine(const uint8_t* line1,
356 const uint8_t* line2,
357 const uint8_t* point) {
358 int line_vector[kRGBRMSColors];
359 int point_vector[kRGBRMSColors];
360 for (int i = 0; i < kRGBRMSColors; ++i) {
361 line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
362 point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
363 }
364 line_vector[L_ALPHA_CHANNEL] = 0;
365 // Now the cross product in 3d.
366 int cross[kRGBRMSColors];
367 cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
368 - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
369 cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
370 - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
371 cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
372 - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
373 cross[L_ALPHA_CHANNEL] = 0;
374 // Now the sums of the squares.
375 double cross_sq = 0.0;
376 double line_sq = 0.0;
377 for (int j = 0; j < kRGBRMSColors; ++j) {
378 cross_sq += static_cast<double>(cross[j]) * cross[j];
379 line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
380 }
381 if (line_sq == 0.0) {
382 return 0.0;
383 }
384 return cross_sq / line_sq; // This is the squared distance.
385}
386
387
388// Returns the leptonica combined code for the given RGB triplet.
389uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) {
390 l_uint32 result;
391 composeRGBPixel(r, g, b, &result);
392 return result;
393}
394
395// Returns the input value clipped to a uint8_t.
396uint8_t ImageFind::ClipToByte(double pixel) {
397 if (pixel < 0.0)
398 return 0;
399 else if (pixel >= 255.0)
400 return 255;
401 return static_cast<uint8_t>(pixel);
402}
403
404// Computes the light and dark extremes of color in the given rectangle of
405// the given pix, which is factor smaller than the coordinate system in rect.
406// The light and dark points are taken to be the upper and lower 8th-ile of
407// the most deviant of R, G and B. The value of the other 2 channels are
408// computed by linear fit against the most deviant.
409// The colors of the two points are returned in color1 and color2, with the
410// alpha channel set to a scaled mean rms of the fits.
411// If color_map1 is not null then it and color_map2 get rect pasted in them
412// with the two calculated colors, and rms map gets a pasted rect of the rms.
413// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
414void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
415 Pix* color_map1, Pix* color_map2,
416 Pix* rms_map,
417 uint8_t* color1, uint8_t* color2) {
418 ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
419 // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
420 // background.
421 int width = pixGetWidth(pix);
422 int height = pixGetHeight(pix);
423 int left_pad = std::max(rect.left() - 2 * factor, 0) / factor;
424 int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
425 top_pad = std::min(height, top_pad);
426 int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
427 right_pad = std::min(width, right_pad);
428 int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor;
429 int width_pad = right_pad - left_pad;
430 int height_pad = top_pad - bottom_pad;
431 if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
432 return;
433 // Now crop the pix to the rectangle.
434 Box* scaled_box = boxCreate(left_pad, height - top_pad,
435 width_pad, height_pad);
436 Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr);
437
438 // Compute stats over the whole image.
439 STATS red_stats(0, 256);
440 STATS green_stats(0, 256);
441 STATS blue_stats(0, 256);
442 uint32_t* data = pixGetData(scaled);
443 ASSERT_HOST(pixGetWpl(scaled) == width_pad);
444 for (int y = 0; y < height_pad; ++y) {
445 for (int x = 0; x < width_pad; ++x, ++data) {
446 int r = GET_DATA_BYTE(data, COLOR_RED);
447 int g = GET_DATA_BYTE(data, COLOR_GREEN);
448 int b = GET_DATA_BYTE(data, COLOR_BLUE);
449 red_stats.add(r, 1);
450 green_stats.add(g, 1);
451 blue_stats.add(b, 1);
452 }
453 }
454 // Find the RGB component with the greatest 8th-ile-range.
455 // 8th-iles are used instead of quartiles to get closer to the true
456 // foreground color, which is going to be faint at best because of the
457 // pre-scaling of the input image.
458 int best_l8 = static_cast<int>(red_stats.ile(0.125f));
459 int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
460 int best_i8r = best_u8 - best_l8;
461 int x_color = COLOR_RED;
462 int y1_color = COLOR_GREEN;
463 int y2_color = COLOR_BLUE;
464 int l8 = static_cast<int>(green_stats.ile(0.125f));
465 int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
466 if (u8 - l8 > best_i8r) {
467 best_i8r = u8 - l8;
468 best_l8 = l8;
469 best_u8 = u8;
470 x_color = COLOR_GREEN;
471 y1_color = COLOR_RED;
472 }
473 l8 = static_cast<int>(blue_stats.ile(0.125f));
474 u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
475 if (u8 - l8 > best_i8r) {
476 best_i8r = u8 - l8;
477 best_l8 = l8;
478 best_u8 = u8;
479 x_color = COLOR_BLUE;
480 y1_color = COLOR_GREEN;
481 y2_color = COLOR_RED;
482 }
483 if (best_i8r >= kMinColorDifference) {
484 LLSQ line1;
485 LLSQ line2;
486 uint32_t* data = pixGetData(scaled);
487 for (int im_y = 0; im_y < height_pad; ++im_y) {
488 for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
489 int x = GET_DATA_BYTE(data, x_color);
490 int y1 = GET_DATA_BYTE(data, y1_color);
491 int y2 = GET_DATA_BYTE(data, y2_color);
492 line1.add(x, y1);
493 line2.add(x, y2);
494 }
495 }
496 double m1 = line1.m();
497 double c1 = line1.c(m1);
498 double m2 = line2.m();
499 double c2 = line2.c(m2);
500 double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
501 rms *= kRMSFitScaling;
502 // Save the results.
503 color1[x_color] = ClipToByte(best_l8);
504 color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
505 color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
506 color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
507 color2[x_color] = ClipToByte(best_u8);
508 color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
509 color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
510 color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
511 } else {
512 // There is only one color.
513 color1[COLOR_RED] = ClipToByte(red_stats.median());
514 color1[COLOR_GREEN] = ClipToByte(green_stats.median());
515 color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
516 color1[L_ALPHA_CHANNEL] = 0;
517 memcpy(color2, color1, 4);
518 }
519 if (color_map1 != nullptr) {
520 pixSetInRectArbitrary(color_map1, scaled_box,
521 ComposeRGB(color1[COLOR_RED],
522 color1[COLOR_GREEN],
523 color1[COLOR_BLUE]));
524 pixSetInRectArbitrary(color_map2, scaled_box,
525 ComposeRGB(color2[COLOR_RED],
526 color2[COLOR_GREEN],
527 color2[COLOR_BLUE]));
528 pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
529 }
530 pixDestroy(&scaled);
531 boxDestroy(&scaled_box);
532}
533
534// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
535// The following functions are responsible for cutting a polygonal image from
536// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
537// with DivideImageIntoParts as the master.
538// Problem statement:
539// We start with a single connected component from the image mask: we get
540// a Pix of the component, and its location on the page (im_box).
541// The objective of cutting a polygonal image from its rectangle is to avoid
542// interfering text, but not text that completely overlaps the image.
543// ------------------------------ ------------------------------
544// | Single input partition | | 1 Cut up output partitions |
545// | | ------------------------------
546// Av|oid | Avoid | |
547// | | |________________________|
548// Int|erfering | Interfering | |
549// | | _____|__________________|
550// T|ext | Text | |
551// | Text-on-image | | Text-on-image |
552// ------------------------------ --------------------------
553// DivideImageIntoParts does this by building a ColPartition_LIST (not in the
554// grid) with each ColPartition representing one of the rectangles needed,
555// starting with a single rectangle for the whole image component, and cutting
556// bits out of it with CutChunkFromParts as needed to avoid text. The output
557// ColPartitions are supposed to be ordered from top to bottom.
558
559// The problem is complicated by the fact that we have rotated the coordinate
560// system to make text lines horizontal, so if we need to look at the component
561// image, we have to rotate the coordinates. Throughout the functions in this
562// section im_box is the rectangle representing the image component in the
563// rotated page coordinates (where we are building our output ColPartitions),
564// rotation is the rotation that we used to get there, and rerotation is the
565// rotation required to get back to original page image coordinates.
566// To get to coordinates in the component image, pix, we rotate the im_box,
567// the point we want to locate, and subtract the rotated point from the top-left
568// of the rotated im_box.
569// im_box is therefore essential to calculating coordinates within the pix.
570
571// Returns true if there are no black pixels in between the boxes.
572// The im_box must represent the bounding box of the pix in tesseract
573// coordinates, which may be negative, due to rotations to make the textlines
574// horizontal. The boxes are rotated by rotation, which should undo such
575// rotations, before mapping them onto the pix.
576bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
577 const TBOX& im_box, const FCOORD& rotation,
578 Pix* pix) {
579 TBOX search_box(box1);
580 search_box += box2;
581 if (box1.x_gap(box2) >= box1.y_gap(box2)) {
582 if (box1.x_gap(box2) <= 0)
583 return true;
584 search_box.set_left(std::min(box1.right(), box2.right()));
585 search_box.set_right(std::max(box1.left(), box2.left()));
586 } else {
587 if (box1.y_gap(box2) <= 0)
588 return true;
589 search_box.set_top(std::max(box1.bottom(), box2.bottom()));
590 search_box.set_bottom(std::min(box1.top(), box2.top()));
591 }
592 return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
593}
594
595// Returns the number of pixels in box in the pix.
596// rotation, pix and im_box are defined in the large comment above.
598 const FCOORD& rotation, Pix* pix) {
599 // Intersect it with the image box.
600 box &= im_box; // This is in-place box intersection.
601 if (box.null_box())
602 return 0;
603 box.rotate(rotation);
604 TBOX rotated_im_box(im_box);
605 rotated_im_box.rotate(rotation);
606 Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
607 pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
608 PIX_SRC, pix, box.left() - rotated_im_box.left(),
609 rotated_im_box.top() - box.top());
610 l_int32 result;
611 pixCountPixels(rect_pix, &result, nullptr);
612 pixDestroy(&rect_pix);
613 return result;
614}
615
616// The box given by slice contains some black pixels, but not necessarily
617// over the whole box. Shrink the x bounds of slice, but not the y bounds
618// until there is at least one black pixel in the outermost columns.
619// rotation, rerotation, pix and im_box are defined in the large comment above.
620static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
621 const TBOX& im_box, Pix* pix, TBOX* slice) {
622 TBOX rotated_box(*slice);
623 rotated_box.rotate(rerotation);
624 TBOX rotated_im_box(im_box);
625 rotated_im_box.rotate(rerotation);
626 int left = rotated_box.left() - rotated_im_box.left();
627 int right = rotated_box.right() - rotated_im_box.left();
628 int top = rotated_im_box.top() - rotated_box.top();
629 int bottom = rotated_im_box.top() - rotated_box.bottom();
630 ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
631 top = rotated_im_box.top() - top;
632 bottom = rotated_im_box.top() - bottom;
633 left += rotated_im_box.left();
634 right += rotated_im_box.left();
635 rotated_box.set_to_given_coords(left, bottom, right, top);
636 rotated_box.rotate(rotation);
637 slice->set_left(rotated_box.left());
638 slice->set_right(rotated_box.right());
639}
640
641// The meat of cutting a polygonal image around text.
642// This function covers the general case of cutting a box out of a box
643// as shown:
644// Input Output
645// ------------------------------ ------------------------------
646// | Single input partition | | 1 Cut up output partitions |
647// | | ------------------------------
648// | ---------- | --------- ----------
649// | | box | | | 2 | box | 3 |
650// | | | | | | is cut | |
651// | ---------- | --------- out ----------
652// | | ------------------------------
653// | | | 4 |
654// ------------------------------ ------------------------------
655// In the context that this function is used, at most 3 of the above output
656// boxes will be created, as the overlapping box is never contained by the
657// input.
658// The above cutting operation is executed for each element of part_list that
659// is overlapped by the input box. Each modified ColPartition is replaced
660// in place in the list by the output of the cutting operation in the order
661// shown above, so iff no holes are ever created, the output will be in
662// top-to-bottom order, but in extreme cases, hole creation is possible.
663// In such cases, the output order may cause strange block polygons.
664// rotation, rerotation, pix and im_box are defined in the large comment above.
665static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
666 const FCOORD& rotation, const FCOORD& rerotation,
667 Pix* pix, ColPartition_LIST* part_list) {
668 ASSERT_HOST(!part_list->empty());
669 ColPartition_IT part_it(part_list);
670 do {
671 ColPartition* part = part_it.data();
672 TBOX part_box = part->bounding_box();
673 if (part_box.overlap(box)) {
674 // This part must be cut and replaced with the remains. There are
675 // up to 4 pieces to be made. Start with the first one and use
676 // add_before_stay_put. For each piece if it has no black pixels
677 // left, just don't make the box.
678 // Above box.
679 if (box.top() < part_box.top()) {
680 TBOX slice(part_box);
681 slice.set_bottom(box.top());
682 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
683 pix) > 0) {
684 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
685 part_it.add_before_stay_put(
687 BTFT_NONTEXT));
688 }
689 }
690 // Left of box.
691 if (box.left() > part_box.left()) {
692 TBOX slice(part_box);
693 slice.set_right(box.left());
694 if (box.top() < part_box.top())
695 slice.set_top(box.top());
696 if (box.bottom() > part_box.bottom())
697 slice.set_bottom(box.bottom());
698 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
699 pix) > 0) {
700 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
701 part_it.add_before_stay_put(
703 BTFT_NONTEXT));
704 }
705 }
706 // Right of box.
707 if (box.right() < part_box.right()) {
708 TBOX slice(part_box);
709 slice.set_left(box.right());
710 if (box.top() < part_box.top())
711 slice.set_top(box.top());
712 if (box.bottom() > part_box.bottom())
713 slice.set_bottom(box.bottom());
714 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
715 pix) > 0) {
716 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
717 part_it.add_before_stay_put(
719 BTFT_NONTEXT));
720 }
721 }
722 // Below box.
723 if (box.bottom() > part_box.bottom()) {
724 TBOX slice(part_box);
725 slice.set_top(box.bottom());
726 if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
727 pix) > 0) {
728 AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
729 part_it.add_before_stay_put(
731 BTFT_NONTEXT));
732 }
733 }
734 part->DeleteBoxes();
735 delete part_it.extract();
736 }
737 part_it.forward();
738 } while (!part_it.at_first());
739}
740
741// Starts with the bounding box of the image component and cuts it up
742// so that it doesn't intersect text where possible.
743// Strong fully contained horizontal text is marked as text on image,
744// and does not cause a division of the image.
745// For more detail see the large comment above on cutting polygonal images
746// from a rectangle.
747// rotation, rerotation, pix and im_box are defined in the large comment above.
748static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
749 const FCOORD& rerotation, Pix* pix,
750 ColPartitionGridSearch* rectsearch,
751 ColPartition_LIST* part_list) {
752 // Add the full im_box partition to the list to begin with.
753 ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN,
756 ColPartition_IT part_it(part_list);
757 part_it.add_after_then_move(pix_part);
758
759 rectsearch->StartRectSearch(im_box);
760 ColPartition* part;
761 while ((part = rectsearch->NextRectSearch()) != nullptr) {
762 TBOX part_box = part->bounding_box();
763 if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
764 // This image is completely covered by an existing text partition.
765 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
766 ColPartition* pix_part = part_it.extract();
767 pix_part->DeleteBoxes();
768 delete pix_part;
769 }
770 } else if (part->flow() == BTFT_STRONG_CHAIN) {
771 // Text intersects the box.
772 TBOX overlap_box = part_box.intersection(im_box);
773 // Intersect it with the image box.
774 int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
775 rerotation, pix);
776 if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
777 // Eat a piece out of the image.
778 // Pad it so that pieces eaten out look decent.
779 int padding = part->blob_type() == BRT_VERT_TEXT
780 ? part_box.width() : part_box.height();
781 part_box.set_top(part_box.top() + padding / 2);
782 part_box.set_bottom(part_box.bottom() - padding / 2);
783 CutChunkFromParts(part_box, im_box, rotation, rerotation,
784 pix, part_list);
785 } else {
786 // Strong overlap with the black area, so call it text on image.
787 part->set_flow(BTFT_TEXT_ON_IMAGE);
788 }
789 }
790 if (part_list->empty()) {
791 break;
792 }
793 }
794}
795
796// Search for the rightmost text that overlaps vertically and is to the left
797// of the given box, but within the given left limit.
798static int ExpandImageLeft(const TBOX& box, int left_limit,
799 ColPartitionGrid* part_grid) {
801 ColPartition* part;
802 // Search right to left for any text that overlaps.
803 search.StartSideSearch(box.left(), box.bottom(), box.top());
804 while ((part = search.NextSideSearch(true)) != nullptr) {
805 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
806 const TBOX& part_box(part->bounding_box());
807 if (part_box.y_gap(box) < 0) {
808 if (part_box.right() > left_limit && part_box.right() < box.left())
809 left_limit = part_box.right();
810 break;
811 }
812 }
813 }
814 if (part != nullptr) {
815 // Search for the nearest text up to the one we already found.
816 TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
817 search.StartRectSearch(search_box);
818 while ((part = search.NextRectSearch()) != nullptr) {
819 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
820 const TBOX& part_box(part->bounding_box());
821 if (part_box.y_gap(box) < 0) {
822 if (part_box.right() > left_limit && part_box.right() < box.left()) {
823 left_limit = part_box.right();
824 }
825 }
826 }
827 }
828 }
829 return left_limit;
830}
831
832// Search for the leftmost text that overlaps vertically and is to the right
833// of the given box, but within the given right limit.
834static int ExpandImageRight(const TBOX& box, int right_limit,
835 ColPartitionGrid* part_grid) {
837 ColPartition* part;
838 // Search left to right for any text that overlaps.
839 search.StartSideSearch(box.right(), box.bottom(), box.top());
840 while ((part = search.NextSideSearch(false)) != nullptr) {
841 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
842 const TBOX& part_box(part->bounding_box());
843 if (part_box.y_gap(box) < 0) {
844 if (part_box.left() < right_limit && part_box.left() > box.right())
845 right_limit = part_box.left();
846 break;
847 }
848 }
849 }
850 if (part != nullptr) {
851 // Search for the nearest text up to the one we already found.
852 TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
853 search.StartRectSearch(search_box);
854 while ((part = search.NextRectSearch()) != nullptr) {
855 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
856 const TBOX& part_box(part->bounding_box());
857 if (part_box.y_gap(box) < 0) {
858 if (part_box.left() < right_limit && part_box.left() > box.right())
859 right_limit = part_box.left();
860 }
861 }
862 }
863 }
864 return right_limit;
865}
866
867// Search for the topmost text that overlaps horizontally and is below
868// the given box, but within the given bottom limit.
869static int ExpandImageBottom(const TBOX& box, int bottom_limit,
870 ColPartitionGrid* part_grid) {
872 ColPartition* part;
873 // Search right to left for any text that overlaps.
874 search.StartVerticalSearch(box.left(), box.right(), box.bottom());
875 while ((part = search.NextVerticalSearch(true)) != nullptr) {
876 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
877 const TBOX& part_box(part->bounding_box());
878 if (part_box.x_gap(box) < 0) {
879 if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
880 bottom_limit = part_box.top();
881 break;
882 }
883 }
884 }
885 if (part != nullptr) {
886 // Search for the nearest text up to the one we already found.
887 TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
888 search.StartRectSearch(search_box);
889 while ((part = search.NextRectSearch()) != nullptr) {
890 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
891 const TBOX& part_box(part->bounding_box());
892 if (part_box.x_gap(box) < 0) {
893 if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
894 bottom_limit = part_box.top();
895 }
896 }
897 }
898 }
899 return bottom_limit;
900}
901
902// Search for the bottommost text that overlaps horizontally and is above
903// the given box, but within the given top limit.
904static int ExpandImageTop(const TBOX& box, int top_limit,
905 ColPartitionGrid* part_grid) {
907 ColPartition* part;
908 // Search right to left for any text that overlaps.
909 search.StartVerticalSearch(box.left(), box.right(), box.top());
910 while ((part = search.NextVerticalSearch(false)) != nullptr) {
911 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
912 const TBOX& part_box(part->bounding_box());
913 if (part_box.x_gap(box) < 0) {
914 if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
915 top_limit = part_box.bottom();
916 break;
917 }
918 }
919 }
920 if (part != nullptr) {
921 // Search for the nearest text up to the one we already found.
922 TBOX search_box(box.left(), box.top(), box.right(), top_limit);
923 search.StartRectSearch(search_box);
924 while ((part = search.NextRectSearch()) != nullptr) {
925 if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
926 const TBOX& part_box(part->bounding_box());
927 if (part_box.x_gap(box) < 0) {
928 if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
929 top_limit = part_box.bottom();
930 }
931 }
932 }
933 }
934 return top_limit;
935}
936
937// Expands the image box in the given direction until it hits text,
938// limiting the expansion to the given limit box, returning the result
939// in the expanded box, and
940// returning the increase in area resulting from the expansion.
941static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
942 const TBOX& limit_box,
943 ColPartitionGrid* part_grid, TBOX* expanded_box) {
944 *expanded_box = im_box;
945 switch (dir) {
946 case BND_LEFT:
947 expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
948 part_grid));
949 break;
950 case BND_RIGHT:
951 expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
952 part_grid));
953 break;
954 case BND_ABOVE:
955 expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
956 break;
957 case BND_BELOW:
958 expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
959 part_grid));
960 break;
961 default:
962 return 0;
963 }
964 return expanded_box->area() - im_box.area();
965}
966
967// Expands the image partition into any non-text until it touches text.
968// The expansion proceeds in the order of increasing increase in area
969// as a heuristic to find the best rectangle by expanding in the most
970// constrained direction first.
971static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
972 bool dunnit[BND_COUNT];
973 memset(dunnit, 0, sizeof(dunnit));
974 TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
975 part_grid->tright().x(), part_grid->tright().y());
976 TBOX text_box(*im_box);
977 for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
978 // Find the direction with least area increase.
979 int best_delta = -1;
980 BlobNeighbourDir best_dir = BND_LEFT;
981 TBOX expanded_boxes[BND_COUNT];
982 for (int dir = 0; dir < BND_COUNT; ++dir) {
983 auto bnd = static_cast<BlobNeighbourDir>(dir);
984 if (!dunnit[bnd]) {
985 TBOX expanded_box;
986 int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
987 &expanded_boxes[bnd]);
988 if (best_delta < 0 || area_delta < best_delta) {
989 best_delta = area_delta;
990 best_dir = bnd;
991 }
992 }
993 }
994 // Run the best and remember the direction.
995 dunnit[best_dir] = true;
996 text_box = expanded_boxes[best_dir];
997 }
998 *im_box = text_box;
999}
1000
1001// Helper deletes the given partition but first marks up all the blobs as
1002// noise, so they get deleted later, and disowns them.
1003// If the initial type of the partition is image, then it actually deletes
1004// the blobs, as the partition owns them in that case.
1005static void DeletePartition(ColPartition* part) {
1006 BlobRegionType type = part->blob_type();
1007 if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1008 // The partition owns the boxes of these types, so just delete them.
1009 part->DeleteBoxes(); // From a previous iteration.
1010 } else {
1011 // Once marked, the blobs will be swept up by TidyBlobs.
1012 part->set_flow(BTFT_NONTEXT);
1013 part->set_blob_type(BRT_NOISE);
1014 part->SetBlobTypes();
1015 part->DisownBoxes(); // Created before FindImagePartitions.
1016 }
1017 delete part;
1018}
1019
1020// The meat of joining fragmented images and consuming ColPartitions of
1021// uncertain type.
1022// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
1023// expanded to consume overlapping and nearby ColPartitions of uncertain type
1024// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
1025// max_image_box. *part_ptr is NOT in the part_grid.
1026// rectsearch is already constructed on the part_grid, and is used for
1027// searching for overlapping and nearby ColPartitions.
1028// ExpandImageIntoParts is called iteratively until it returns false. Each
1029// time it absorbs the nearest non-contained candidate, and everything that
1030// is fully contained within part_ptr's bounding box.
1031// TODO(rays) what if it just eats everything inside max_image_box in one go?
1032static bool ExpandImageIntoParts(const TBOX& max_image_box,
1033 ColPartitionGridSearch* rectsearch,
1034 ColPartitionGrid* part_grid,
1035 ColPartition** part_ptr) {
1036 ColPartition* image_part = *part_ptr;
1037 TBOX im_part_box = image_part->bounding_box();
1038 if (textord_tabfind_show_images > 1) {
1039 tprintf("Searching for merge with image part:");
1040 im_part_box.print();
1041 tprintf("Text box=");
1042 max_image_box.print();
1043 }
1044 rectsearch->StartRectSearch(max_image_box);
1045 ColPartition* part;
1046 ColPartition* best_part = nullptr;
1047 int best_dist = 0;
1048 while ((part = rectsearch->NextRectSearch()) != nullptr) {
1049 if (textord_tabfind_show_images > 1) {
1050 tprintf("Considering merge with part:");
1051 part->Print();
1052 if (im_part_box.contains(part->bounding_box()))
1053 tprintf("Fully contained\n");
1054 else if (!max_image_box.contains(part->bounding_box()))
1055 tprintf("Not within text box\n");
1056 else if (part->flow() == BTFT_STRONG_CHAIN)
1057 tprintf("Too strong text\n");
1058 else
1059 tprintf("Real candidate\n");
1060 }
1061 if (part->flow() == BTFT_STRONG_CHAIN ||
1062 part->flow() == BTFT_TEXT_ON_IMAGE ||
1063 part->blob_type() == BRT_POLYIMAGE)
1064 continue;
1065 TBOX box = part->bounding_box();
1066 if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
1067 if (im_part_box.contains(box)) {
1068 // Eat it completely.
1069 rectsearch->RemoveBBox();
1070 DeletePartition(part);
1071 continue;
1072 }
1073 int x_dist = std::max(0, box.x_gap(im_part_box));
1074 int y_dist = std::max(0, box.y_gap(im_part_box));
1075 int dist = x_dist * x_dist + y_dist * y_dist;
1076 if (dist > box.area() || dist > im_part_box.area())
1077 continue; // Not close enough.
1078 if (best_part == nullptr || dist < best_dist) {
1079 // We keep the nearest qualifier, which is not necessarily the nearest.
1080 best_part = part;
1081 best_dist = dist;
1082 }
1083 }
1084 }
1085 if (best_part != nullptr) {
1086 // It needs expanding. We can do it without touching text.
1087 TBOX box = best_part->bounding_box();
1088 if (textord_tabfind_show_images > 1) {
1089 tprintf("Merging image part:");
1090 im_part_box.print();
1091 tprintf("with part:");
1092 box.print();
1093 }
1094 im_part_box += box;
1095 *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
1097 BTFT_NONTEXT);
1098 DeletePartition(image_part);
1099 part_grid->RemoveBBox(best_part);
1100 DeletePartition(best_part);
1101 rectsearch->RepositionIterator();
1102 return true;
1103 }
1104 return false;
1105}
1106
1107// Helper function to compute the overlap area between the box and the
1108// given list of partitions.
1109static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
1110 int intersect_area = 0;
1111 ColPartition_IT part_it(part_list);
1112 // Iterate the parts and subtract intersecting area.
1113 for (part_it.mark_cycle_pt(); !part_it.cycled_list();
1114 part_it.forward()) {
1115 ColPartition* image_part = part_it.data();
1116 TBOX intersect = box.intersection(image_part->bounding_box());
1117 intersect_area += intersect.area();
1118 }
1119 return intersect_area;
1120}
1121
1122// part_list is a set of ColPartitions representing a polygonal image, and
1123// im_box is the union of the bounding boxes of all the parts in part_list.
1124// Tests whether part is to be consumed by the polygonal image.
1125// Returns true if part is weak text and more than half of its area is
1126// intersected by parts from the part_list, and it is contained within im_box.
1127static bool TestWeakIntersectedPart(const TBOX& im_box,
1128 ColPartition_LIST* part_list,
1129 ColPartition* part) {
1130 if (part->flow() < BTFT_STRONG_CHAIN) {
1131 // A weak partition intersects the box.
1132 const TBOX& part_box = part->bounding_box();
1133 if (im_box.contains(part_box)) {
1134 int area = part_box.area();
1135 int intersect_area = IntersectArea(part_box, part_list);
1136 if (area < 2 * intersect_area) {
1137 return true;
1138 }
1139 }
1140 }
1141 return false;
1142}
1143
1144// A rectangular or polygonal image has been completed, in part_list, bounding
1145// box in im_box. We want to eliminate weak text or other uncertain partitions
1146// (basically anything that is not BRT_STRONG_CHAIN or better) from both the
1147// part_grid and the big_parts list that are contained within im_box and
1148// overlapped enough by the possibly polygonal image.
1149static void EliminateWeakParts(const TBOX& im_box,
1150 ColPartitionGrid* part_grid,
1151 ColPartition_LIST* big_parts,
1152 ColPartition_LIST* part_list) {
1153 ColPartitionGridSearch rectsearch(part_grid);
1154 ColPartition* part;
1155 rectsearch.StartRectSearch(im_box);
1156 while ((part = rectsearch.NextRectSearch()) != nullptr) {
1157 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1158 BlobRegionType type = part->blob_type();
1159 if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
1160 rectsearch.RemoveBBox();
1161 DeletePartition(part);
1162 } else {
1163 // The part is mostly covered, so mark it. Non-image partitions are
1164 // kept hanging around to mark the image for pass2
1165 part->set_flow(BTFT_NONTEXT);
1166 part->set_blob_type(BRT_NOISE);
1167 part->SetBlobTypes();
1168 }
1169 }
1170 }
1171 ColPartition_IT big_it(big_parts);
1172 for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
1173 part = big_it.data();
1174 if (TestWeakIntersectedPart(im_box, part_list, part)) {
1175 // Once marked, the blobs will be swept up by TidyBlobs.
1176 DeletePartition(big_it.extract());
1177 }
1178 }
1179}
1180
1181// Helper scans for good text partitions overlapping the given box.
1182// If there are no good text partitions overlapping an expanded box, then
1183// the box is expanded, otherwise, the original box is returned.
1184// If good text overlaps the box, true is returned.
1185static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
1186 ColPartitionGridSearch rectsearch(part_grid);
1187 TBOX padded_box(*box);
1188 padded_box.pad(kNoisePadding, kNoisePadding);
1189 rectsearch.StartRectSearch(padded_box);
1190 ColPartition* part;
1191 bool any_text_in_padded_rect = false;
1192 while ((part = rectsearch.NextRectSearch()) != nullptr) {
1193 if (part->flow() == BTFT_CHAIN ||
1194 part->flow() == BTFT_STRONG_CHAIN) {
1195 // Text intersects the box.
1196 any_text_in_padded_rect = true;
1197 const TBOX& part_box = part->bounding_box();
1198 if (box->overlap(part_box)) {
1199 return true;
1200 }
1201 }
1202 }
1203 if (!any_text_in_padded_rect)
1204 *box = padded_box;
1205 return false;
1206}
1207
1208// Renders the boxes of image parts from the supplied list onto the image_pix,
1209// except where they interfere with existing strong text in the part_grid,
1210// and then deletes them.
1211// Box coordinates are rotated by rerotate to match the image.
1212static void MarkAndDeleteImageParts(const FCOORD& rerotate,
1213 ColPartitionGrid* part_grid,
1214 ColPartition_LIST* image_parts,
1215 Pix* image_pix) {
1216 if (image_pix == nullptr)
1217 return;
1218 int imageheight = pixGetHeight(image_pix);
1219 ColPartition_IT part_it(image_parts);
1220 for (; !part_it.empty(); part_it.forward()) {
1221 ColPartition* part = part_it.extract();
1222 TBOX part_box = part->bounding_box();
1223 BlobRegionType type = part->blob_type();
1224 if (!ScanForOverlappingText(part_grid, &part_box) ||
1225 type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1226 // Mark the box on the image.
1227 // All coords need to be rotated to match the image.
1228 part_box.rotate(rerotate);
1229 int left = part_box.left();
1230 int top = part_box.top();
1231 pixRasterop(image_pix, left, imageheight - top,
1232 part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0);
1233 }
1234 DeletePartition(part);
1235 }
1236}
1237
1238// Locates all the image partitions in the part_grid, that were found by a
1239// previous call to FindImagePartitions, marks them in the image_mask,
1240// removes them from the grid, and deletes them. This makes it possible to
1241// call FindImagePartitions again to produce less broken-up and less
1242// overlapping image partitions.
1243// rerotation specifies how to rotate the partition coords to match
1244// the image_mask, since this function is used after orientation correction.
1246 ColPartitionGrid* part_grid,
1247 Pix* image_mask) {
1248 // Extract the noise parts from the grid and put them on a temporary list.
1249 ColPartition_LIST parts_list;
1250 ColPartition_IT part_it(&parts_list);
1251 ColPartitionGridSearch gsearch(part_grid);
1252 gsearch.StartFullSearch();
1253 ColPartition* part;
1254 while ((part = gsearch.NextFullSearch()) != nullptr) {
1255 BlobRegionType type = part->blob_type();
1256 if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
1257 part_it.add_after_then_move(part);
1258 gsearch.RemoveBBox();
1259 }
1260 }
1261 // Render listed noise partitions to the image mask.
1262 MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
1263}
1264
1265// Removes and deletes all image partitions that are too small to be worth
1266// keeping. We have to do this as a separate phase after creating the image
1267// partitions as the small images are needed to join the larger ones together.
1268static void DeleteSmallImages(ColPartitionGrid* part_grid) {
1269 if (part_grid != nullptr) return;
1270 ColPartitionGridSearch gsearch(part_grid);
1271 gsearch.StartFullSearch();
1272 ColPartition* part;
1273 while ((part = gsearch.NextFullSearch()) != nullptr) {
1274 // Only delete rectangular images, since if it became a poly image, it
1275 // is more evidence that it is somehow important.
1276 if (part->blob_type() == BRT_RECTIMAGE) {
1277 const TBOX& part_box = part->bounding_box();
1278 if (part_box.width() < kMinImageFindSize ||
1279 part_box.height() < kMinImageFindSize) {
1280 // It is too small to keep. Just make it disappear.
1281 gsearch.RemoveBBox();
1282 DeletePartition(part);
1283 }
1284 }
1285 }
1286}
1287
1288// Runs a CC analysis on the image_pix mask image, and creates
1289// image partitions from them, cutting out strong text, and merging with
1290// nearby image regions such that they don't interfere with text.
1291// Rotation and rerotation specify how to rotate image coords to match
1292// the blob and partition coords and back again.
1293// The input/output part_grid owns all the created partitions, and
1294// the partitions own all the fake blobs that belong in the partitions.
1295// Since the other blobs in the other partitions will be owned by the block,
1296// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
1297// situation and collect the image blobs.
1298void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
1299 const FCOORD& rerotation, TO_BLOCK* block,
1300 TabFind* tab_grid, DebugPixa* pixa_debug,
1301 ColPartitionGrid* part_grid,
1302 ColPartition_LIST* big_parts) {
1303 int imageheight = pixGetHeight(image_pix);
1304 Boxa* boxa;
1305 Pixa* pixa;
1306 ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
1307 // Iterate the connected components in the image regions mask.
1308 int nboxes = 0;
1309 if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa);
1310 for (int i = 0; i < nboxes; ++i) {
1311 l_int32 x, y, width, height;
1312 boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
1313 Pix* pix = pixaGetPix(pixa, i, L_CLONE);
1314 TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
1315 im_box.rotate(rotation); // Now matches all partitions and blobs.
1316 ColPartitionGridSearch rectsearch(part_grid);
1317 rectsearch.SetUniqueMode(true);
1318 ColPartition_LIST part_list;
1319 DivideImageIntoParts(im_box, rotation, rerotation, pix,
1320 &rectsearch, &part_list);
1321 if (textord_tabfind_show_images && pixa_debug != nullptr) {
1322 pixa_debug->AddPix(pix, "ImageComponent");
1323 tprintf("Component has %d parts\n", part_list.length());
1324 }
1325 pixDestroy(&pix);
1326 if (!part_list.empty()) {
1327 ColPartition_IT part_it(&part_list);
1328 if (part_list.singleton()) {
1329 // We didn't have to chop it into a polygon to fit around text, so
1330 // try expanding it to merge fragmented image parts, as long as it
1331 // doesn't touch strong text.
1332 ColPartition* part = part_it.extract();
1333 TBOX text_box(im_box);
1334 MaximalImageBoundingBox(part_grid, &text_box);
1335 while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
1336 part_it.set_to_list(&part_list);
1337 part_it.add_after_then_move(part);
1338 im_box = part->bounding_box();
1339 }
1340 EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
1341 // Iterate the part_list and put the parts into the grid.
1342 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
1343 ColPartition* image_part = part_it.extract();
1344 im_box = image_part->bounding_box();
1345 part_grid->InsertBBox(true, true, image_part);
1346 if (!part_it.at_last()) {
1347 ColPartition* neighbour = part_it.data_relative(1);
1348 image_part->AddPartner(false, neighbour);
1349 neighbour->AddPartner(true, image_part);
1350 }
1351 }
1352 }
1353 }
1354 boxaDestroy(&boxa);
1355 pixaDestroy(&pixa);
1356 DeleteSmallImages(part_grid);
1357 if (textord_tabfind_show_images) {
1358 ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
1359 part_grid->DisplayBoxes(images_win_);
1360 }
1361}
1362
1363
1364} // namespace tesseract.
@ PT_UNKNOWN
Definition: capi.h:129
BlobNeighbourDir
Definition: blobbox.h:87
@ BND_COUNT
Definition: blobbox.h:92
@ BND_ABOVE
Definition: blobbox.h:91
@ BND_LEFT
Definition: blobbox.h:88
@ BND_BELOW
Definition: blobbox.h:89
@ BND_RIGHT
Definition: blobbox.h:90
@ BTFT_TEXT_ON_IMAGE
Definition: blobbox.h:120
@ BTFT_CHAIN
Definition: blobbox.h:118
@ BTFT_STRONG_CHAIN
Definition: blobbox.h:119
@ BTFT_NONTEXT
Definition: blobbox.h:116
BlobRegionType
Definition: blobbox.h:72
@ BRT_RECTIMAGE
Definition: blobbox.h:76
@ BRT_POLYIMAGE
Definition: blobbox.h:77
@ BRT_NOISE
Definition: blobbox.h:73
@ BRT_VERT_TEXT
Definition: blobbox.h:79
#define ASSERT_HOST(x)
Definition: errcode.h:88
#define INT_VAR(name, val, comment)
Definition: params.h:303
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
LIST search(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:258
const double kMinRectangularFraction
Definition: imagefind.cpp:40
const double kRMSFitScaling
Definition: imagefind.cpp:49
const int kNoisePadding
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
Definition: colpartition.h:936
const int kMinColorDifference
Definition: imagefind.cpp:51
const int kRGBRMSColors
Definition: colpartition.h:37
const double kMaxRectangularGradient
Definition: imagefind.cpp:45
const int kMinImageFindSize
Definition: imagefind.cpp:47
const double kMaxRectangularFraction
Definition: imagefind.cpp:42
void AddPix(const Pix *pix, const char *caption)
Definition: debugpixa.h:26
Definition: linlsq.h:28
double m() const
Definition: linlsq.cpp:100
double c(double m) const
Definition: linlsq.cpp:116
double rms(double m, double c) const
Definition: linlsq.cpp:130
void add(double x, double y)
Definition: linlsq.cpp:48
Definition: points.h:189
Definition: rect.h:34
void set_right(int x)
Definition: rect.h:82
void rotate(const FCOORD &vec)
Definition: rect.h:197
int16_t top() const
Definition: rect.h:58
void print() const
Definition: rect.h:278
void set_bottom(int y)
Definition: rect.h:68
int16_t width() const
Definition: rect.h:115
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
bool overlap(const TBOX &box) const
Definition: rect.h:355
void set_top(int y)
Definition: rect.h:61
int16_t left() const
Definition: rect.h:72
int y_gap(const TBOX &box) const
Definition: rect.h:233
int16_t bottom() const
Definition: rect.h:65
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
bool contains(const FCOORD pt) const
Definition: rect.h:333
bool null_box() const
Definition: rect.h:50
void set_left(int x)
Definition: rect.h:75
int x_gap(const TBOX &box) const
Definition: rect.h:225
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
double median() const
Definition: statistc.cpp:231
double ile(double frac) const
Definition: statistc.cpp:166
void SetUniqueMode(bool mode)
Definition: bbgrid.h:253
void StartFullSearch()
Definition: bbgrid.h:665
BBC * NextFullSearch()
Definition: bbgrid.h:675
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:613
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:486
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
BlobRegionType blob_type() const
Definition: colpartition.h:149
const TBOX & bounding_box() const
Definition: colpartition.h:110
void AddPartner(bool upper, ColPartition *partner)
static void FindImagePartitions(Pix *image_pix, const FCOORD &rotation, const FCOORD &rerotation, TO_BLOCK *block, TabFind *tab_grid, DebugPixa *pixa_debug, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: imagefind.cpp:1298
static void ComputeRectangleColors(const TBOX &rect, Pix *pix, int factor, Pix *color_map1, Pix *color_map2, Pix *rms_map, uint8_t *color1, uint8_t *color2)
Definition: imagefind.cpp:414
static bool pixNearlyRectangular(Pix *pix, double min_fraction, double max_fraction, double max_skew_gradient, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:266
static int CountPixelsInRotatedBox(TBOX box, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:597
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
Definition: imagefind.cpp:576
static void ConnCompAndRectangularize(Pix *pix, DebugPixa *pixa_debug, Boxa **boxa, Pixa **pixa)
Definition: imagefind.cpp:154
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:332
static Pix * FindImages(Pix *pix, DebugPixa *pixa_debug)
Definition: imagefind.cpp:62
static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b)
Definition: imagefind.cpp:389
static uint8_t ClipToByte(double pixel)
Definition: imagefind.cpp:396
static double ColorDistanceFromLine(const uint8_t *line1, const uint8_t *line2, const uint8_t *point)
Definition: imagefind.cpp:355
static void TransferImagePartsToImageMask(const FCOORD &rerotation, ColPartitionGrid *part_grid, Pix *image_mask)
Definition: imagefind.cpp:1245