tesseract 4.1.1
Loading...
Searching...
No Matches
underlin.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: underlin.cpp (Formerly undrline.c)
3 * Description: Code to chop blobs apart from underlines.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1994, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "underlin.h"
20
21double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
22BOOL_VAR (textord_restore_underlines, true, "Chop underlines & put back");
23
24/**********************************************************************
25 * restore_underlined_blobs
26 *
27 * Find underlined blobs and put them back in the row.
28 **********************************************************************/
29
30void restore_underlined_blobs( //get chop points
31 TO_BLOCK *block //block to do
32 ) {
33 int16_t chop_coord; //chop boundary
34 TBOX blob_box; //of underline
35 BLOBNBOX *u_line; //underline bit
36 TO_ROW *row; //best row for blob
37 ICOORDELT_LIST chop_cells; //blobs to cut out
38 //real underlines
39 BLOBNBOX_LIST residual_underlines;
40 C_OUTLINE_LIST left_coutlines;
41 C_OUTLINE_LIST right_coutlines;
42 ICOORDELT_IT cell_it = &chop_cells;
43 //under lines
44 BLOBNBOX_IT under_it = &block->underlines;
45 BLOBNBOX_IT ru_it = &residual_underlines;
46
47 if (block->get_rows()->empty())
48 return; // Don't crash if there are no rows.
49 for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
50 under_it.forward ()) {
51 u_line = under_it.extract ();
52 blob_box = u_line->bounding_box ();
53 row = most_overlapping_row (block->get_rows (), u_line);
54 if (row == nullptr)
55 return; // Don't crash if there is no row.
56 find_underlined_blobs (u_line, &row->baseline, row->xheight,
58 &chop_cells);
59 cell_it.set_to_list (&chop_cells);
60 for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
61 cell_it.forward ()) {
62 chop_coord = cell_it.data ()->x ();
63 if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
64 split_to_blob (u_line, chop_coord,
66 &left_coutlines,
67 &right_coutlines);
68 if (!left_coutlines.empty()) {
69 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
70 }
71 chop_coord = cell_it.data ()->y ();
72 split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
73 &left_coutlines, &right_coutlines);
74 if (!left_coutlines.empty()) {
75 row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
76 }
77 u_line = nullptr; //no more blobs to add
78 }
79 delete cell_it.extract();
80 }
81 if (!right_coutlines.empty ()) {
82 split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
83 &left_coutlines, &right_coutlines);
84 if (!left_coutlines.empty())
85 ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
86 }
87 if (u_line != nullptr) {
88 delete u_line->cblob();
89 delete u_line;
90 }
91 }
92 if (!ru_it.empty()) {
93 ru_it.move_to_first();
94 for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
95 under_it.add_after_then_move(ru_it.extract());
96 }
97 }
98}
99
100
101/**********************************************************************
102 * most_overlapping_row
103 *
104 * Return the row which most overlaps the blob.
105 **********************************************************************/
106
108 TO_ROW_LIST *rows, //list of rows
109 BLOBNBOX *blob //blob to place
110 ) {
111 int16_t x = (blob->bounding_box ().left ()
112 + blob->bounding_box ().right ()) / 2;
113 TO_ROW_IT row_it = rows; //row iterator
114 TO_ROW *row; //current row
115 TO_ROW *best_row; //output row
116 float overlap; //of blob & row
117 float bestover; //best overlap
118
119 best_row = nullptr;
120 bestover = static_cast<float>(-INT32_MAX);
121 if (row_it.empty ())
122 return nullptr;
123 row = row_it.data ();
124 row_it.mark_cycle_pt ();
125 while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
126 && !row_it.cycled_list ()) {
127 best_row = row;
128 bestover =
129 blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
130 row_it.forward ();
131 row = row_it.data ();
132 }
133 while (row->baseline.y (x) + row->xheight + row->ascrise
134 >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
135 overlap = row->baseline.y (x) + row->xheight + row->ascrise;
136 if (blob->bounding_box ().top () < overlap)
137 overlap = blob->bounding_box ().top ();
138 if (blob->bounding_box ().bottom () >
139 row->baseline.y (x) + row->descdrop)
140 overlap -= blob->bounding_box ().bottom ();
141 else
142 overlap -= row->baseline.y (x) + row->descdrop;
143 if (overlap > bestover) {
144 bestover = overlap;
145 best_row = row;
146 }
147 row_it.forward ();
148 row = row_it.data ();
149 }
150 if (bestover < 0
151 && row->baseline.y (x) + row->xheight + row->ascrise
152 - blob->bounding_box ().bottom () > bestover)
153 best_row = row;
154 return best_row;
155}
156
157
158/**********************************************************************
159 * find_underlined_blobs
160 *
161 * Find the start and end coords of blobs in the underline.
162 **********************************************************************/
163
164void find_underlined_blobs( //get chop points
165 BLOBNBOX *u_line, //underlined unit
166 QSPLINE *baseline, //actual baseline
167 float xheight, //height of line
168 float baseline_offset, //amount to shrinke it
169 ICOORDELT_LIST *chop_cells //places to chop
170 ) {
171 int16_t x, y; //sides of blob
172 ICOORD blob_chop; //sides of blob
173 TBOX blob_box = u_line->bounding_box ();
174 //cell iterator
175 ICOORDELT_IT cell_it = chop_cells;
176 STATS upper_proj (blob_box.left (), blob_box.right () + 1);
177 STATS middle_proj (blob_box.left (), blob_box.right () + 1);
178 STATS lower_proj (blob_box.left (), blob_box.right () + 1);
179 C_OUTLINE_IT out_it; //outlines of blob
180
181 ASSERT_HOST (u_line->cblob () != nullptr);
182
183 out_it.set_to_list (u_line->cblob ()->out_list ());
184 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
185 vertical_cunderline_projection (out_it.data (),
186 baseline, xheight, baseline_offset,
187 &lower_proj, &middle_proj, &upper_proj);
188 }
189
190 for (x = blob_box.left (); x < blob_box.right (); x++) {
191 if (middle_proj.pile_count (x) > 0) {
192 for (y = x + 1;
193 y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
194 blob_chop = ICOORD (x, y);
195 cell_it.add_after_then_move (new ICOORDELT (blob_chop));
196 x = y;
197 }
198 }
199}
200
201
202/**********************************************************************
203 * vertical_cunderline_projection
204 *
205 * Compute the vertical projection of a outline from its outlines
206 * and add to the given STATS.
207 **********************************************************************/
208
209void vertical_cunderline_projection( //project outlines
210 C_OUTLINE *outline, //outline to project
211 QSPLINE *baseline, //actual baseline
212 float xheight, //height of line
213 float baseline_offset, //amount to shrinke it
214 STATS *lower_proj, //below baseline
215 STATS *middle_proj, //centre region
216 STATS *upper_proj //top region
217 ) {
218 ICOORD pos; //current point
219 ICOORD step; //edge step
220 int16_t lower_y, upper_y; //region limits
221 int32_t length; //of outline
222 int16_t stepindex; //current step
223 C_OUTLINE_IT out_it = outline->child ();
224
225 pos = outline->start_pos ();
226 length = outline->pathlength ();
227 for (stepindex = 0; stepindex < length; stepindex++) {
228 step = outline->step (stepindex);
229 if (step.x () > 0) {
230 lower_y =
231 static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + 0.5));
232 upper_y =
233 static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset +
234 xheight + 0.5));
235 if (pos.y () >= lower_y) {
236 lower_proj->add (pos.x (), -lower_y);
237 if (pos.y () >= upper_y) {
238 middle_proj->add (pos.x (), lower_y - upper_y);
239 upper_proj->add (pos.x (), upper_y - pos.y ());
240 }
241 else
242 middle_proj->add (pos.x (), lower_y - pos.y ());
243 }
244 else
245 lower_proj->add (pos.x (), -pos.y ());
246 }
247 else if (step.x () < 0) {
248 lower_y =
249 static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
250 0.5));
251 upper_y =
252 static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
253 xheight + 0.5));
254 if (pos.y () >= lower_y) {
255 lower_proj->add (pos.x () - 1, lower_y);
256 if (pos.y () >= upper_y) {
257 middle_proj->add (pos.x () - 1, upper_y - lower_y);
258 upper_proj->add (pos.x () - 1, pos.y () - upper_y);
259 }
260 else
261 middle_proj->add (pos.x () - 1, pos.y () - lower_y);
262 }
263 else
264 lower_proj->add (pos.x () - 1, pos.y ());
265 }
266 pos += step;
267 }
268
269 for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
270 vertical_cunderline_projection (out_it.data (),
271 baseline, xheight, baseline_offset,
272 lower_proj, middle_proj, upper_proj);
273 }
274}
#define ASSERT_HOST(x)
Definition: errcode.h:88
#define BOOL_VAR(name, val, comment)
Definition: params.h:306
#define double_VAR(name, val, comment)
Definition: params.h:312
@ baseline
Definition: mfoutline.h:63
int textord_fp_chop_error
Definition: fpchop.cpp:32
void split_to_blob(BLOBNBOX *blob, int16_t chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:236
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:30
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:107
double textord_underline_offset
Definition: underlin.cpp:21
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:209
bool textord_restore_underlines
Definition: underlin.cpp:22
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:164
const TBOX & bounding_box() const
Definition: blobbox.h:230
C_BLOB * cblob() const
Definition: blobbox.h:268
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:769
QSPLINE baseline
Definition: blobbox.h:670
float xheight
Definition: blobbox.h:657
float descdrop
Definition: blobbox.h:660
float ascrise
Definition: blobbox.h:659
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
BLOBNBOX_LIST underlines
Definition: blobbox.h:773
C_OUTLINE_LIST * child()
Definition: coutln.h:108
ICOORD step(int index) const
Definition: coutln.h:144
const ICOORD & start_pos() const
Definition: coutln.h:148
int32_t pathlength() const
Definition: coutln.h:135
integer coordinate
Definition: points.h:32
int16_t y() const
access_function
Definition: points.h:56
int16_t x() const
access function
Definition: points.h:52
double y(double x) const
Definition: quspline.cpp:209
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
int32_t pile_count(int32_t value) const
Definition: statistc.h:76
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70