tesseract 4.1.1
Loading...
Searching...
No Matches
OL_BUCKETS Class Reference

#include <edgblob.h>

Public Member Functions

 ~OL_BUCKETS ()=default
 
C_OUTLINE_LIST * start_scan ()
 
C_OUTLINE_LIST * scan_next ()
 
OL_BUCKETS::OL_BUCKETS

Construct an array of buckets for associating outlines into blobs.

 OL_BUCKETS (ICOORD bleft, ICOORD tright)
 
OL_BUCKETS::operator(

Return a pointer to a list of C_OUTLINEs corresponding to the given pixel coordinates.

C_OUTLINE_LIST * operator() (int16_t x, int16_t y)
 
OL_BUCKETS::count_children

Find number of descendants of this outline.

int32_t count_children (C_OUTLINE *outline, int32_t max_count)
 
OL_BUCKETS::outline_complexity

This is the new version of count_child.

The goal of this function is to determine if an outline and its interiors could be part of a character blob. This is done by computing a "complexity" index for the outline, which is the return value of this function, and checking it against a threshold. The max_count is used for short-circuiting the recursion and forcing a rejection that guarantees to fail the threshold test. The complexity F for outline X with N children X[i] is F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild so each layer of nesting increases complexity exponentially. An outline can be rejected as a text blob candidate if its complexity is too high, has too many children(likely a container), or has too many layers of nested inner loops. This has the side-effect of flattening out boxed or reversed video text regions.

int32_t outline_complexity (C_OUTLINE *outline, int32_t max_count, int16_t depth)
 
OL_BUCKETS::extract_children

Find number of descendants of this outline.

void extract_children (C_OUTLINE *outline, C_OUTLINE_IT *it)
 

Detailed Description

Definition at line 32 of file edgblob.h.

Constructor & Destructor Documentation

◆ OL_BUCKETS()

OL_BUCKETS::OL_BUCKETS ( ICOORD  bleft,
ICOORD  tright 
)

Definition at line 63 of file edgblob.cpp.

65 : bl(bleft), tr(tright) {
66 bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
67 bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
68 // make array
69 buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]);
70 index = 0;
71}
#define BUCKETSIZE
Definition: edgblob.h:30
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52

◆ ~OL_BUCKETS()

OL_BUCKETS::~OL_BUCKETS ( )
default

Member Function Documentation

◆ count_children()

int32_t OL_BUCKETS::count_children ( C_OUTLINE outline,
int32_t  max_count 
)

Definition at line 178 of file edgblob.cpp.

181 {
182 bool parent_box; // could it be boxy
183 int16_t xmin, xmax; // coord limits
184 int16_t ymin, ymax;
185 int16_t xindex, yindex; // current bucket
186 C_OUTLINE *child; // current child
187 int32_t child_count; // no of children
188 int32_t grandchild_count; // no of grandchildren
189 int32_t parent_area; // potential box
190 float max_parent_area; // potential box
191 int32_t child_area; // current child
192 int32_t child_length; // current child
193 TBOX olbox;
194 C_OUTLINE_IT child_it; // search iterator
195
196 olbox = outline->bounding_box();
197 xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
198 xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
199 ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
200 ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
201 child_count = 0;
202 grandchild_count = 0;
203 parent_area = 0;
204 max_parent_area = 0;
205 parent_box = true;
206 for (yindex = ymin; yindex <= ymax; yindex++) {
207 for (xindex = xmin; xindex <= xmax; xindex++) {
208 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
209 if (child_it.empty())
210 continue;
211 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
212 child_it.forward()) {
213 child = child_it.data();
214 if (child != outline && *child < *outline) {
215 child_count++;
216 if (child_count <= max_count) {
217 int max_grand =(max_count - child_count) /
218 edges_children_per_grandchild;
219 if (max_grand > 0)
220 grandchild_count += count_children(child, max_grand) *
221 edges_children_per_grandchild;
222 else
223 grandchild_count += count_children(child, 1);
224 }
225 if (child_count + grandchild_count > max_count) {
226 if (edges_debug)
227 tprintf("Discarding parent with child count=%d, gc=%d\n",
228 child_count,grandchild_count);
229 return child_count + grandchild_count;
230 }
231 if (parent_area == 0) {
232 parent_area = outline->outer_area();
233 if (parent_area < 0)
234 parent_area = -parent_area;
235 max_parent_area = outline->bounding_box().area() * edges_boxarea;
236 if (parent_area < max_parent_area)
237 parent_box = false;
238 }
239 if (parent_box &&
240 (!edges_children_fix ||
241 child->bounding_box().height() > edges_min_nonhole)) {
242 child_area = child->outer_area();
243 if (child_area < 0)
244 child_area = -child_area;
245 if (edges_children_fix) {
246 if (parent_area - child_area < max_parent_area) {
247 parent_box = false;
248 continue;
249 }
250 if (grandchild_count > 0) {
251 if (edges_debug)
252 tprintf("Discarding parent of area %d, child area=%d, max%g "
253 "with gc=%d\n",
254 parent_area, child_area, max_parent_area,
255 grandchild_count);
256 return max_count + 1;
257 }
258 child_length = child->pathlength();
259 if (child_length * child_length >
260 child_area * edges_patharea_ratio) {
261 if (edges_debug)
262 tprintf("Discarding parent of area %d, child area=%d, max%g "
263 "with child length=%d\n",
264 parent_area, child_area, max_parent_area,
265 child_length);
266 return max_count + 1;
267 }
268 }
269 if (child_area < child->bounding_box().area() * edges_childarea) {
270 if (edges_debug)
271 tprintf("Discarding parent of area %d, child area=%d, max%g "
272 "with child rect=%d\n",
273 parent_area, child_area, max_parent_area,
274 child->bounding_box().area());
275 return max_count + 1;
276 }
277 }
278 }
279 }
280 }
281 }
282 return child_count + grandchild_count;
283}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const TBOX & bounding_box() const
Definition: coutln.h:113
int32_t outer_area() const
Definition: coutln.cpp:308
int32_t pathlength() const
Definition: coutln.h:135
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int32_t area() const
Definition: rect.h:122
int16_t height() const
Definition: rect.h:108
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79
int32_t count_children(C_OUTLINE *outline, int32_t max_count)
Definition: edgblob.cpp:178

◆ extract_children()

void OL_BUCKETS::extract_children ( C_OUTLINE outline,
C_OUTLINE_IT *  it 
)

Definition at line 294 of file edgblob.cpp.

297 {
298 int16_t xmin, xmax; // coord limits
299 int16_t ymin, ymax;
300 int16_t xindex, yindex; // current bucket
301 TBOX olbox;
302 C_OUTLINE_IT child_it; // search iterator
303
304 olbox = outline->bounding_box();
305 xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
306 xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
307 ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
308 ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
309 for (yindex = ymin; yindex <= ymax; yindex++) {
310 for (xindex = xmin; xindex <= xmax; xindex++) {
311 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
312 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
313 child_it.forward()) {
314 if (*child_it.data() < *outline) {
315 it->add_after_then_move(child_it.extract());
316 }
317 }
318 }
319 }
320}

◆ operator()()

C_OUTLINE_LIST * OL_BUCKETS::operator() ( int16_t  x,
int16_t  y 
)

Definition at line 82 of file edgblob.cpp.

84 {
85 return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
86}

◆ outline_complexity()

int32_t OL_BUCKETS::outline_complexity ( C_OUTLINE outline,
int32_t  max_count,
int16_t  depth 
)

Definition at line 109 of file edgblob.cpp.

113 {
114 int16_t xmin, xmax; // coord limits
115 int16_t ymin, ymax;
116 int16_t xindex, yindex; // current bucket
117 C_OUTLINE *child; // current child
118 int32_t child_count; // no of children
119 int32_t grandchild_count; // no of grandchildren
120 C_OUTLINE_IT child_it; // search iterator
121
122 TBOX olbox = outline->bounding_box();
123 xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
124 xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
125 ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
126 ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
127 child_count = 0;
128 grandchild_count = 0;
129 if (++depth > edges_max_children_layers) // nested loops are too deep
130 return max_count + depth;
131
132 for (yindex = ymin; yindex <= ymax; yindex++) {
133 for (xindex = xmin; xindex <= xmax; xindex++) {
134 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
135 if (child_it.empty())
136 continue;
137 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
138 child_it.forward()) {
139 child = child_it.data();
140 if (child == outline || !(*child < *outline))
141 continue;
142 child_count++;
143
144 if (child_count > edges_max_children_per_outline) { // too fragmented
145 if (edges_debug)
146 tprintf("Discard outline on child_count=%d > "
147 "max_children_per_outline=%d\n",
148 child_count,
149 static_cast<int32_t>(edges_max_children_per_outline));
150 return max_count + child_count;
151 }
152
153 // Compute the "complexity" of each child recursively
154 int32_t remaining_count = max_count - child_count - grandchild_count;
155 if (remaining_count > 0)
156 grandchild_count += edges_children_per_grandchild *
157 outline_complexity(child, remaining_count, depth);
158 if (child_count + grandchild_count > max_count) { // too complex
159 if (edges_debug)
160 tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
161 "> max_count=%d\n",
162 child_count, grandchild_count, max_count);
163 return child_count + grandchild_count;
164 }
165 }
166 }
167 }
168 return child_count + grandchild_count;
169}
int32_t outline_complexity(C_OUTLINE *outline, int32_t max_count, int16_t depth)
Definition: edgblob.cpp:109

◆ scan_next()

C_OUTLINE_LIST * OL_BUCKETS::scan_next ( )
inline

Definition at line 51 of file edgblob.h.

51 {
52 for (; buckets[index].empty () && index < bxdim * bydim - 1; index++);
53 return &buckets[index];
54 }

◆ start_scan()

C_OUTLINE_LIST * OL_BUCKETS::start_scan ( )
inline

Definition at line 45 of file edgblob.h.

45 {
46 for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1;
47 index++);
48 return &buckets[index];
49 }

The documentation for this class was generated from the following files: