tesseract 4.1.1
Loading...
Searching...
No Matches
ccnontextdetect.cpp
Go to the documentation of this file.
1
2// File: ccnontextdetect.cpp
3// Description: Connected-Component-based photo (non-text) detection.
4// Copyright 2011 Google Inc. All Rights Reserved.
5// Author: rays@google.com (Ray Smith)
6// Created: Sat Jun 11 10:12:01 PST 2011
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifdef HAVE_CONFIG_H
21#include "config_auto.h"
22#endif
23
24#include "ccnontextdetect.h"
25#include "imagefind.h"
26#include "strokewidth.h"
27
28namespace tesseract {
29
30// Max number of neighbour small objects per squared gridsize before a grid
31// cell becomes image.
32const double kMaxSmallNeighboursPerPix = 1.0 / 32;
33// Max number of small blobs a large blob may overlap before it is rejected
34// and determined to be image.
36// Max number of small blobs a medium blob may overlap before it is rejected
37// and determined to be image. Larger than for large blobs as medium blobs
38// may be complex Chinese characters. Very large Chinese characters are going
39// to overlap more medium blobs than small.
41// Max number of normal blobs a large blob may overlap before it is rejected
42// and determined to be image. This is set higher to allow for drop caps, which
43// may overlap a lot of good text blobs.
45// Multiplier of original noise_count used to test for the case of spreading
46// noise beyond where it should really be.
48// Pixel padding for noise blobs when rendering on the image
49// mask to encourage them to join together. Make it too big and images
50// will fatten out too much and have to be clipped to text.
51const int kNoisePadding = 4;
52// Fraction of max_noise_count_ to be added to the noise count if there is
53// photo mask in the background.
54const double kPhotoOffsetFraction = 0.375;
55// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
56// density. Good blobs are supposed to be highly likely real text.
57// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
58// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
59// the 1/(4pi) that you would expect.
60const double kMinGoodTextPARatio = 1.5;
61
63 const ICOORD& bleft, const ICOORD& tright)
64 : BlobGrid(gridsize, bleft, tright),
65 max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
66 gridsize * gridsize)),
67 noise_density_(nullptr) {
68 // TODO(rays) break max_noise_count_ out into an area-proportional
69 // value, as now plus an additive constant for the number of text blobs
70 // in the 3x3 neighbourhood - maybe 9.
71}
72
74 delete noise_density_;
75}
76
77// Creates and returns a Pix with the same resolution as the original
78// in which 1 (black) pixels represent likely non text (photo, line drawing)
79// areas of the page, deleting from the blob_block the blobs that were
80// determined to be non-text.
81// The photo_map is used to bias the decision towards non-text, rather than
82// supplying definite decision.
83// The blob_block is the usual result of connected component analysis,
84// holding the detected blobs.
85// The returned Pix should be PixDestroyed after use.
86Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
87 TO_BLOCK* blob_block) {
88 // Insert the smallest blobs into the grid.
89 InsertBlobList(&blob_block->small_blobs);
90 InsertBlobList(&blob_block->noise_blobs);
91 // Add the medium blobs that don't have a good strokewidth neighbour.
92 // Those that do go into good_grid as an antidote to spreading beyond the
93 // real reaches of a noise region.
94 BlobGrid good_grid(gridsize(), bleft(), tright());
95 BLOBNBOX_IT blob_it(&blob_block->blobs);
96 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
97 BLOBNBOX* blob = blob_it.data();
98 double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
99 perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
100 if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
101 InsertBBox(true, true, blob);
102 else
103 good_grid.InsertBBox(true, true, blob);
104 }
105 noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
106 good_grid.Clear(); // Not needed any more.
107 Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
108 if (debug) {
109 pixWrite("junknoisemask.png", pix, IFF_PNG);
110 }
111 ScrollView* win = nullptr;
112 #ifndef GRAPHICS_DISABLED
113 if (debug) {
114 win = MakeWindow(0, 400, "Photo Mask Blobs");
115 }
116 #endif // GRAPHICS_DISABLED
117 // Large and medium blobs are not text if they overlap with "a lot" of small
118 // blobs.
119 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
121 win, ScrollView::DARK_GREEN, pix);
122 MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
123 win, ScrollView::WHITE, pix);
124 // Clear the grid of small blobs and insert the medium blobs.
125 Clear();
126 InsertBlobList(&blob_block->blobs);
127 MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
129 win, ScrollView::DARK_GREEN, pix);
130 // Clear again before we start deleting the blobs in the grid.
131 Clear();
132 MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
133 win, ScrollView::CORAL, pix);
134 MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
135 win, ScrollView::GOLDENROD, pix);
136 MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
137 win, ScrollView::WHITE, pix);
138 if (debug) {
139 #ifndef GRAPHICS_DISABLED
140 win->Update();
141 #endif // GRAPHICS_DISABLED
142 pixWrite("junkccphotomask.png", pix, IFF_PNG);
143 #ifndef GRAPHICS_DISABLED
144 delete win->AwaitEvent(SVET_DESTROY);
145 delete win;
146 #endif // GRAPHICS_DISABLED
147 }
148 return pix;
149}
150
151// Computes and returns the noise_density IntGrid, at the same gridsize as
152// this by summing the number of small elements in a 3x3 neighbourhood of
153// each grid cell. good_grid is filled with blobs that are considered most
154// likely good text, and this is filled with small and medium blobs that are
155// more likely non-text.
156// The photo_map is used to bias the decision towards non-text, rather than
157// supplying definite decision.
158IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
159 BlobGrid* good_grid) {
160 IntGrid* noise_counts = CountCellElements();
161 IntGrid* noise_density = noise_counts->NeighbourhoodSum();
162 IntGrid* good_counts = good_grid->CountCellElements();
163 // Now increase noise density in photo areas, to bias the decision and
164 // minimize hallucinated text on image, but trim the noise_density where
165 // there are good blobs and the original count is low in non-photo areas,
166 // indicating that most of the result came from neighbouring cells.
167 int height = pixGetHeight(photo_map);
168 int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
169 for (int y = 0; y < gridheight(); ++y) {
170 for (int x = 0; x < gridwidth(); ++x) {
171 int noise = noise_density->GridCellValue(x, y);
172 if (max_noise_count_ < noise + photo_offset &&
173 noise <= max_noise_count_) {
174 // Test for photo.
175 int left = x * gridsize();
176 int right = left + gridsize();
177 int bottom = height - y * gridsize();
178 int top = bottom - gridsize();
179 if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
180 &bottom)) {
181 noise_density->SetGridCell(x, y, noise + photo_offset);
182 }
183 }
184 if (debug && noise > max_noise_count_ &&
185 good_counts->GridCellValue(x, y) > 0) {
186 tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
187 x * gridsize(), y * gridsize(),
188 noise_density->GridCellValue(x, y),
189 good_counts->GridCellValue(x, y),
190 noise_counts->GridCellValue(x, y), max_noise_count_);
191 }
192 if (noise > max_noise_count_ &&
193 good_counts->GridCellValue(x, y) > 0 &&
194 noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
195 max_noise_count_) {
196 noise_density->SetGridCell(x, y, 0);
197 }
198 }
199 }
200 delete noise_counts;
201 delete good_counts;
202 return noise_density;
203}
204
205// Helper to expand a box in one of the 4 directions by the given pad,
206// provided it does not expand into any cell with a zero noise density.
207// If that is not possible, try expanding all round by a small constant.
208static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
209 int pad) {
210 TBOX expanded_box(box);
211 expanded_box.set_right(box.right() + pad);
212 if (!noise_density.AnyZeroInRect(expanded_box))
213 return expanded_box;
214 expanded_box = box;
215 expanded_box.set_left(box.left() - pad);
216 if (!noise_density.AnyZeroInRect(expanded_box))
217 return expanded_box;
218 expanded_box = box;
219 expanded_box.set_top(box.top() + pad);
220 if (!noise_density.AnyZeroInRect(expanded_box))
221 return expanded_box;
222 expanded_box = box;
223 expanded_box.set_bottom(box.bottom() + pad);
224 if (!noise_density.AnyZeroInRect(expanded_box))
225 return expanded_box;
226 expanded_box = box;
227 expanded_box.pad(kNoisePadding, kNoisePadding);
228 if (!noise_density.AnyZeroInRect(expanded_box))
229 return expanded_box;
230 return box;
231}
232
233// Tests each blob in the list to see if it is certain non-text using 2
234// conditions:
235// 1. blob overlaps a cell with high value in noise_density_ (previously set
236// by ComputeNoiseDensity).
237// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
238// condition is disabled with max_blob_overlaps == -1.
239// If it does, the blob is declared non-text, and is used to mark up the
240// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
241// neighbours reset, as they may now point to deleted data.
242// WARNING: The blobs list blobs may be in the *this grid, but they are
243// not removed. If any deleted blobs might be in *this, then this must be
244// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
245// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
246// blobs are drawn on it in ok_color.
247void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
248 int max_blob_overlaps,
249 ScrollView* win,
250 ScrollView::Color ok_color,
251 Pix* nontext_mask) {
252 int imageheight = tright().y() - bleft().x();
253 BLOBNBOX_IT blob_it(blobs);
254 BLOBNBOX_LIST dead_blobs;
255 BLOBNBOX_IT dead_it(&dead_blobs);
256 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
257 BLOBNBOX* blob = blob_it.data();
258 TBOX box = blob->bounding_box();
259 if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
260 (max_blob_overlaps < 0 ||
261 !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
262 blob->ClearNeighbours();
263 #ifndef GRAPHICS_DISABLED
264 if (win != nullptr)
265 blob->plot(win, ok_color, ok_color);
266 #endif // GRAPHICS_DISABLED
267 } else {
268 if (noise_density_->AnyZeroInRect(box)) {
269 // There is a danger that the bounding box may overlap real text, so
270 // we need to render the outline.
271 Pix* blob_pix = blob->cblob()->render_outline();
272 pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
273 box.width(), box.height(), PIX_SRC | PIX_DST,
274 blob_pix, 0, 0);
275 pixDestroy(&blob_pix);
276 } else {
277 if (box.area() < gridsize() * gridsize()) {
278 // It is a really bad idea to make lots of small components in the
279 // photo mask, so try to join it to a bigger area by expanding the
280 // box in a way that does not touch any zero noise density cell.
281 box = AttemptBoxExpansion(box, *noise_density_, gridsize());
282 }
283 // All overlapped cells are non-zero, so just mark the rectangle.
284 pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
285 box.width(), box.height(), PIX_SET, nullptr, 0, 0);
286 }
287 #ifndef GRAPHICS_DISABLED
288 if (win != nullptr)
290 #endif // GRAPHICS_DISABLED
291 // It is safe to delete the cblob now, as it isn't used by the grid
292 // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
293 // dead_blobs list.
294 // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
295 // the cblob.
296 delete blob->cblob();
297 dead_it.add_to_end(blob_it.extract());
298 }
299 }
300}
301
302// Returns true if the given blob overlaps more than max_overlaps blobs
303// in the current grid.
304bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
305 // Search the grid to see what intersects it.
306 // Setup a Rectangle search for overlapping this blob.
307 BlobGridSearch rsearch(this);
308 const TBOX& box = blob->bounding_box();
309 rsearch.StartRectSearch(box);
310 rsearch.SetUniqueMode(true);
311 BLOBNBOX* neighbour;
312 int overlap_count = 0;
313 while (overlap_count <= max_overlaps &&
314 (neighbour = rsearch.NextRectSearch()) != nullptr) {
315 if (box.major_overlap(neighbour->bounding_box())) {
316 ++overlap_count;
317 if (overlap_count > max_overlaps)
318 return true;
319 }
320 }
321 return false;
322}
323
324} // namespace tesseract.
int IntCastRounded(double x)
Definition: helpers.h:175
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ SVET_DESTROY
Definition: scrollview.h:46
const double kMinGoodTextPARatio
const int kMaxMediumOverlapsWithSmall
const int kOriginalNoiseMultiple
const int kMaxLargeOverlapsWithMedium
const int kMaxLargeOverlapsWithSmall
const int kNoisePadding
const double kMaxSmallNeighboursPerPix
const double kPhotoOffsetFraction
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
void plot(ScrollView *window, ScrollView::Color blob_colour, ScrollView::Color child_colour)
Definition: blobbox.cpp:485
const TBOX & bounding_box() const
Definition: blobbox.h:230
C_BLOB * cblob() const
Definition: blobbox.h:268
int GoodTextBlob() const
Definition: blobbox.cpp:226
int32_t enclosed_area() const
Definition: blobbox.h:253
void ClearNeighbours()
Definition: blobbox.h:499
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:775
integer coordinate
Definition: points.h:32
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
void set_bottom(int y)
Definition: rect.h:68
int16_t width() const
Definition: rect.h:115
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
bool major_overlap(const TBOX &box) const
Definition: rect.h:368
void set_top(int y)
Definition: rect.h:61
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
void pad(int xpad, int ypad)
Definition: rect.h:131
void set_left(int x)
Definition: rect.h:75
int16_t right() const
Definition: rect.h:79
Pix * render_outline()
Definition: stepblob.cpp:522
int32_t perimeter()
Definition: stepblob.cpp:292
int gridsize() const
Definition: bbgrid.h:63
int gridheight() const
Definition: bbgrid.h:69
const ICOORD & bleft() const
Definition: bbgrid.h:72
int gridwidth() const
Definition: bbgrid.h:66
const ICOORD & tright() const
Definition: bbgrid.h:75
IntGrid * NeighbourhoodSum() const
Definition: bbgrid.cpp:132
Pix * ThresholdToPix(int threshold) const
Definition: bbgrid.cpp:190
int GridCellValue(int grid_x, int grid_y) const
Definition: bbgrid.h:120
bool RectMostlyOverThreshold(const TBOX &rect, int threshold) const
Definition: bbgrid.cpp:154
void SetGridCell(int grid_x, int grid_y, int value)
Definition: bbgrid.h:124
bool AnyZeroInRect(const TBOX &rect) const
Definition: bbgrid.cpp:174
void Clear()
Definition: bbgrid.h:455
void InsertBBox(bool h_spread, bool v_spread, BLOBNBOX *bbox)
Definition: bbgrid.h:486
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:589
void InsertBlobList(BLOBNBOX_LIST *blobs)
Definition: blobgrid.cpp:36
CCNonTextDetect(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Pix * ComputeNonTextMask(bool debug, Pix *photo_map, TO_BLOCK *blob_block)
static bool BoundsWithinRect(Pix *pix, int *x_start, int *y_start, int *x_end, int *y_end)
Definition: imagefind.cpp:332
static void Update()
Definition: scrollview.cpp:709
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443