tesseract 4.1.1
Loading...
Searching...
No Matches
split.cpp
Go to the documentation of this file.
1/* -*-C-*-
2 ********************************************************************************
3 *
4 * File: split.cpp (Formerly split.c)
5 * Author: Mark Seaman, OCR Technology
6 *
7 * (c) Copyright 1987, Hewlett-Packard Company.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 *************************************************************************/
19/*----------------------------------------------------------------------
20 I n c l u d e s
21----------------------------------------------------------------------*/
22// Include automatically generated configuration file if running autoconf.
23#ifdef HAVE_CONFIG_H
24#include "config_auto.h"
25#endif
26
27#include "split.h"
28#include "coutln.h"
29#include "tprintf.h"
30
31#include <algorithm>
32
33/*----------------------------------------------------------------------
34 V a r i a b l e s
35----------------------------------------------------------------------*/
36// Limit on the amount of penalty for the chop being off-center.
37const int kCenterGradeCap = 25;
38// Ridiculously large priority for splits that are no use.
39const double kBadPriority = 999.0;
40
41BOOL_VAR(wordrec_display_splits, 0, "Display splits");
42
43// Returns the bounding box of all the points in the split.
45 return TBOX(
46 std::min(point1->pos.x, point2->pos.x), std::min(point1->pos.y, point2->pos.y),
47 std::max(point1->pos.x, point2->pos.x), std::max(point1->pos.y, point2->pos.y));
48}
49
50// Hides the SPLIT so the outlines appear not to be cut by it.
51void SPLIT::Hide() const {
52 EDGEPT* edgept = point1;
53 do {
54 edgept->Hide();
55 edgept = edgept->next;
56 } while (!edgept->EqualPos(*point2) && edgept != point1);
57 edgept = point2;
58 do {
59 edgept->Hide();
60 edgept = edgept->next;
61 } while (!edgept->EqualPos(*point1) && edgept != point2);
62}
63
64// Undoes hide, so the outlines are cut by the SPLIT.
65void SPLIT::Reveal() const {
66 EDGEPT* edgept = point1;
67 do {
68 edgept->Reveal();
69 edgept = edgept->next;
70 } while (!edgept->EqualPos(*point2) && edgept != point1);
71 edgept = point2;
72 do {
73 edgept->Reveal();
74 edgept = edgept->next;
75 } while (!edgept->EqualPos(*point1) && edgept != point2);
76}
77
78// Compute a split priority based on the bounding boxes of the parts.
79// The arguments here are config parameters defined in Wordrec. Add chop_
80// to the beginning of the name.
81float SPLIT::FullPriority(int xmin, int xmax, double overlap_knob,
82 int centered_maxwidth, double center_knob,
83 double width_change_knob) const {
84 TBOX box1 = Box12();
85 TBOX box2 = Box21();
86 int min_left = std::min(box1.left(), box2.left());
87 int max_right = std::max(box1.right(), box2.right());
88 if (xmin < min_left && xmax > max_right) return kBadPriority;
89
90 float grade = 0.0f;
91 // grade_overlap.
92 int width1 = box1.width();
93 int width2 = box2.width();
94 int min_width = std::min(width1, width2);
95 int overlap = -box1.x_gap(box2);
96 if (overlap == min_width) {
97 grade += 100.0f; // Total overlap.
98 } else {
99 if (2 * overlap > min_width) overlap += 2 * overlap - min_width;
100 if (overlap > 0) grade += overlap_knob * overlap;
101 }
102 // grade_center_of_blob.
103 if (width1 <= centered_maxwidth || width2 <= centered_maxwidth) {
104 grade += std::min(static_cast<double>(kCenterGradeCap), center_knob * abs(width1 - width2));
105 }
106 // grade_width_change.
107 float width_change_grade = 20 - (max_right - min_left - std::max(width1, width2));
108 if (width_change_grade > 0.0f)
109 grade += width_change_grade * width_change_knob;
110 return grade;
111}
112
113// Returns true if *this SPLIT appears OK in the sense that it does not cross
114// any outlines and does not chop off any ridiculously small pieces.
115bool SPLIT::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
116 return !IsLittleChunk(min_points, min_area) &&
118}
119
120// Returns true if the split generates a small chunk in terms of either area
121// or number of points.
122bool SPLIT::IsLittleChunk(int min_points, int min_area) const {
123 if (point1->ShortNonCircularSegment(min_points, point2) &&
124 point1->SegmentArea(point2) < min_area) {
125 return true;
126 }
127 if (point2->ShortNonCircularSegment(min_points, point1) &&
128 point2->SegmentArea(point1) < min_area) {
129 return true;
130 }
131 return false;
132}
133
134/**********************************************************************
135 * make_edgept
136 *
137 * Create an EDGEPT and hook it into an existing list of edge points.
138 **********************************************************************/
139EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev) {
140 EDGEPT *this_edgept;
141 /* Create point */
142 this_edgept = new EDGEPT;
143 this_edgept->pos.x = x;
144 this_edgept->pos.y = y;
145 // Now deal with the src_outline steps.
146 C_OUTLINE* prev_ol = prev->src_outline;
147 if (prev_ol != nullptr && prev->next == next) {
148 // Compute the fraction of the segment that is being cut.
149 FCOORD segment_vec(next->pos.x - prev->pos.x, next->pos.y - prev->pos.y);
150 FCOORD target_vec(x - prev->pos.x, y - prev->pos.y);
151 double cut_fraction = target_vec.length() / segment_vec.length();
152 // Get the start and end at the step level.
153 ICOORD step_start = prev_ol->position_at_index(prev->start_step);
154 int end_step = prev->start_step + prev->step_count;
155 int step_length = prev_ol->pathlength();
156 ICOORD step_end = prev_ol->position_at_index(end_step % step_length);
157 ICOORD step_vec = step_end - step_start;
158 double target_length = step_vec.length() * cut_fraction;
159 // Find the point on the segment that gives the length nearest to target.
160 int best_step = prev->start_step;
161 ICOORD total_step(0, 0);
162 double best_dist = target_length;
163 for (int s = prev->start_step; s < end_step; ++s) {
164 total_step += prev_ol->step(s % step_length);
165 double dist = fabs(target_length - total_step.length());
166 if (dist < best_dist) {
167 best_dist = dist;
168 best_step = s + 1;
169 }
170 }
171 // The new point is an intermediate point.
172 this_edgept->src_outline = prev_ol;
173 this_edgept->step_count = end_step - best_step;
174 this_edgept->start_step = best_step % step_length;
175 prev->step_count = best_step - prev->start_step;
176 } else {
177 // The new point is poly only.
178 this_edgept->src_outline = nullptr;
179 this_edgept->step_count = 0;
180 this_edgept->start_step = 0;
181 }
182 /* Hook it up */
183 this_edgept->next = next;
184 this_edgept->prev = prev;
185 prev->next = this_edgept;
186 next->prev = this_edgept;
187 /* Set up vec entries */
188 this_edgept->vec.x = this_edgept->next->pos.x - x;
189 this_edgept->vec.y = this_edgept->next->pos.y - y;
190 this_edgept->prev->vec.x = x - this_edgept->prev->pos.x;
191 this_edgept->prev->vec.y = y - this_edgept->prev->pos.y;
192 return this_edgept;
193}
194
195/**********************************************************************
196 * remove_edgept
197 *
198 * Remove a given EDGEPT from its list and delete it.
199 **********************************************************************/
200void remove_edgept(EDGEPT *point) {
201 EDGEPT *prev = point->prev;
202 EDGEPT *next = point->next;
203 // Add point's steps onto prev's steps if they are from the same outline.
204 if (prev->src_outline == point->src_outline && prev->src_outline != nullptr) {
205 prev->step_count += point->step_count;
206 }
207 prev->next = next;
208 next->prev = prev;
209 prev->vec.x = next->pos.x - prev->pos.x;
210 prev->vec.y = next->pos.y - prev->pos.y;
211 delete point;
212}
213
214/**********************************************************************
215 * Print
216 *
217 * Shows the coordinates of both points in a split.
218 **********************************************************************/
219void SPLIT::Print() const {
220 tprintf("(%d,%d)--(%d,%d)", point1->pos.x, point1->pos.y, point2->pos.x,
221 point2->pos.y);
222}
223
224#ifndef GRAPHICS_DISABLED
225// Draws the split in the given window.
226void SPLIT::Mark(ScrollView* window) const {
227 window->Pen(ScrollView::GREEN);
228 window->Line(point1->pos.x, point1->pos.y, point2->pos.x, point2->pos.y);
229 window->UpdateWindow();
230}
231#endif
232
233// Creates two outlines out of one by splitting the original one in half.
234// Inserts the resulting outlines into the given list.
235void SPLIT::SplitOutlineList(TESSLINE* outlines) const {
236 SplitOutline();
237 while (outlines->next != nullptr) outlines = outlines->next;
238
239 outlines->next = new TESSLINE;
240 outlines->next->loop = point1;
241 outlines->next->ComputeBoundingBox();
242
243 outlines = outlines->next;
244
245 outlines->next = new TESSLINE;
246 outlines->next->loop = point2;
247 outlines->next->ComputeBoundingBox();
248
249 outlines->next->next = nullptr;
250}
251
252// Makes a split between these two edge points, but does not affect the
253// outlines to which they belong.
255 EDGEPT* temp2 = point2->next;
256 EDGEPT* temp1 = point1->next;
257 /* Create two new points */
258 EDGEPT* new_point1 = make_edgept(point1->pos.x, point1->pos.y, temp1, point2);
259 EDGEPT* new_point2 = make_edgept(point2->pos.x, point2->pos.y, temp2, point1);
260 // point1 and 2 are now cross-over points, so they must have nullptr
261 // src_outlines and give their src_outline information their new
262 // replacements.
263 new_point1->src_outline = point1->src_outline;
264 new_point1->start_step = point1->start_step;
265 new_point1->step_count = point1->step_count;
266 new_point2->src_outline = point2->src_outline;
267 new_point2->start_step = point2->start_step;
268 new_point2->step_count = point2->step_count;
269 point1->src_outline = nullptr;
270 point1->start_step = 0;
271 point1->step_count = 0;
272 point2->src_outline = nullptr;
273 point2->start_step = 0;
274 point2->step_count = 0;
275}
276
277// Undoes the effect of SplitOutlineList, correcting the outlines for undoing
278// the split, but possibly leaving some duplicate outlines.
280 /* Modify edge points */
282
283 auto* outline1 = new TESSLINE;
284 outline1->next = blob->outlines;
285 blob->outlines = outline1;
286 outline1->loop = point1;
287
288 auto* outline2 = new TESSLINE;
289 outline2->next = blob->outlines;
290 blob->outlines = outline2;
291 outline2->loop = point2;
292}
293
294// Removes the split that was put between these two points.
296 EDGEPT* tmp1 = point1->next;
297 EDGEPT* tmp2 = point2->next;
298
299 tmp1->next->prev = point2;
300 tmp2->next->prev = point1;
301
302 // tmp2 is coincident with point1. point1 takes tmp2's place as tmp2 is
303 // deleted.
304 point1->next = tmp2->next;
308 // Likewise point2 takes tmp1's place.
309 point2->next = tmp1->next;
313
314 delete tmp1;
315 delete tmp2;
316
317 point1->vec.x = point1->next->pos.x - point1->pos.x;
318 point1->vec.y = point1->next->pos.y - point1->pos.y;
319
320 point2->vec.x = point2->next->pos.x - point2->pos.x;
321 point2->vec.y = point2->next->pos.y - point2->pos.y;
322}
const double kBadPriority
Definition: split.cpp:39
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:139
const int kCenterGradeCap
Definition: split.cpp:37
bool wordrec_display_splits
Definition: split.cpp:41
void remove_edgept(EDGEPT *point)
Definition: split.cpp:200
EDGEPT * make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev)
Definition: split.cpp:139
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
Definition: blobs.h:99
bool ShortNonCircularSegment(int min_points, const EDGEPT *end) const
Definition: blobs.h:158
int start_step
Definition: blobs.h:196
void Hide()
Definition: blobs.h:170
EDGEPT * next
Definition: blobs.h:192
int step_count
Definition: blobs.h:197
void Reveal()
Definition: blobs.h:173
int SegmentArea(const EDGEPT *end) const
Definition: blobs.h:145
C_OUTLINE * src_outline
Definition: blobs.h:194
VECTOR vec
Definition: blobs.h:187
EDGEPT * prev
Definition: blobs.h:193
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:128
TPOINT pos
Definition: blobs.h:186
EDGEPT * loop
Definition: blobs.h:280
TESSLINE * next
Definition: blobs.h:281
void ComputeBoundingBox()
Definition: blobs.cpp:213
Definition: blobs.h:284
TESSLINE * outlines
Definition: blobs.h:400
bool SegmentCrossesOutline(const TPOINT &pt1, const TPOINT &pt2) const
Definition: blobs.h:339
ICOORD step(int index) const
Definition: coutln.h:144
ICOORD position_at_index(int index) const
Definition: coutln.h:153
int32_t pathlength() const
Definition: coutln.h:135
integer coordinate
Definition: points.h:32
float length() const
find length
Definition: points.h:78
Definition: points.h:189
float length() const
find length
Definition: points.h:228
Definition: rect.h:34
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
int x_gap(const TBOX &box) const
Definition: rect.h:225
int16_t right() const
Definition: rect.h:79
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:81
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:122
TBOX Box12() const
Definition: split.h:44
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:115
TBOX Box21() const
Definition: split.h:46
void Reveal() const
Definition: split.cpp:65
EDGEPT * point1
Definition: split.h:103
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:235
EDGEPT * point2
Definition: split.h:104
TBOX bounding_box() const
Definition: split.cpp:44
void Hide() const
Definition: split.cpp:51
void Print() const
Definition: split.cpp:219
void SplitOutline() const
Definition: split.cpp:254
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:279
void Mark(ScrollView *window) const
Definition: split.cpp:226
void UnsplitOutlines() const
Definition: split.cpp:295
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:532
void UpdateWindow()
Definition: scrollview.cpp:704
void Pen(Color color)
Definition: scrollview.cpp:719