tesseract 4.1.1
Loading...
Searching...
No Matches
makerow.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: makerow.h (Formerly makerows.h)
3 * Description: Code to arrange blobs into rows of text.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1992, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef MAKEROW_H
20#define MAKEROW_H
21
22#include "params.h"
23#include "ocrblock.h"
24#include "blobs.h"
25#include "blobbox.h"
26#include "statistc.h"
27
29{
30 ASSIGN, //assign it to row
31 REJECT, //reject it - dual overlap
33};
34
40};
41
42extern BOOL_VAR_H(textord_heavy_nr, false, "Vigorously remove noise");
44"Display row accumulation");
46"Display page correlated rows");
48"Display rows after expanding");
50"Display rows after final fitting");
52"Display blob bounds after pre-ass");
53extern BOOL_VAR_H (textord_test_landscape, false, "Tests refer to land/port");
55"Force parallel baselines");
57"Force straight baselines");
59"Use quadratic splines");
60extern BOOL_VAR_H (textord_old_baselines, true, "Use old baseline algorithm");
61extern BOOL_VAR_H (textord_old_xheight, true, "Use old xheight algorithm");
62extern BOOL_VAR_H (textord_fix_xheight_bug, true, "Use spline baseline");
64"Prevent multiple baselines");
66"Use new projection for underlines");
67extern BOOL_VAR_H (textord_debug_xheights, false, "Test xheight algorithms");
68extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt");
69extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt");
71"Min blobs before gradient counted");
73"Min blobs in each spline segment");
75"Size of window for spline segmentation");
76extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
78"Fraction of line spacing for quad");
80"Fraction of line spacing for outlier");
81extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
82extern double_VAR_H (textord_skew_lag, 0.75,
83"Lag for skew on row accumulation");
85"Max iqr/median for linespace");
87"Max width of blobs to make rows");
88extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
89extern double_VAR_H (textord_minxh, 0.25,
90"fraction of linesize for min xheight");
92"* blob height for initial linesize");
94"New row made if blob makes row this big");
96"Fraction of neighbourhood");
98"Multiple of line_size for underline");
100"Min blob height/top to include blob top into xheight stats");
102"Min pile height to make xheight");
104"Min pile height to make ascheight");
105extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
106extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
107extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
108extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
109extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
110extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
112"Use test xheight mechanism");
113extern BOOL_VAR_H(textord_debug_blob, false, "Print test blob information");
114
115inline void get_min_max_xheight(int block_linesize,
116 int *min_height, int *max_height) {
117 *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
118 if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
119 *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
120}
121
123 if (row->xheight <= 0) return ROW_INVALID;
124 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
126}
127
128inline bool within_error_margin(float test, float num, float margin) {
129 return (test >= num * (1 - margin) && test <= num * (1 + margin));
130}
131
132void fill_heights(TO_ROW *row, float gradient, int min_height,
133 int max_height, STATS *heights, STATS *floating_heights);
134
135float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block,
136 TO_BLOCK_LIST* blocks);
137float make_rows(ICOORD page_tr, // top right
138 TO_BLOCK_LIST *port_blocks);
139void make_initial_textrows(ICOORD page_tr,
140 TO_BLOCK* block, // block to do
141 FCOORD rotation, // for drawing
142 bool testing_on); // correct orientation
143void fit_lms_line(TO_ROW *row);
144void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
145 float &page_m, // average gradient
146 float &page_err); // average error
148void cleanup_rows_making(ICOORD page_tr, // top right
149 TO_BLOCK* block, // block to do
150 float gradient, // gradient to fit
151 FCOORD rotation, // for drawing
152 int32_t block_edge, // edge of block
153 bool testing_on); // correct orientation
154void delete_non_dropout_rows( //find lines
155 TO_BLOCK* block, //block to do
156 float gradient, //global skew
157 FCOORD rotation, //deskew vector
158 int32_t block_edge, //left edge
159 bool testing_on //correct orientation
160);
161bool find_best_dropout_row( //find neighbours
162 TO_ROW* row, //row to test
163 int32_t distance, //dropout dist
164 float dist_limit, //threshold distance
165 int32_t line_index, //index of row
166 TO_ROW_IT* row_it, //current position
167 bool testing_on //correct orientation
168);
169TBOX deskew_block_coords( //block box
170 TO_BLOCK *block, //block to do
171 float gradient //global skew
172 );
173void compute_line_occupation( //project blobs
174 TO_BLOCK *block, //block to do
175 float gradient, //global skew
176 int32_t min_y, //min coord in block
177 int32_t max_y, //in block
178 int32_t *occupation, //output projection
179 int32_t *deltas //derivative
180 );
181void compute_occupation_threshold( //project blobs
182 int32_t low_window, //below result point
183 int32_t high_window, //above result point
184 int32_t line_count, //array sizes
185 int32_t *occupation, //input projection
186 int32_t *thresholds //output thresholds
187 );
188void compute_dropout_distances( //project blobs
189 int32_t *occupation, //input projection
190 int32_t *thresholds, //output thresholds
191 int32_t line_count //array sizes
192 );
193void expand_rows( //find lines
194 ICOORD page_tr, //top right
195 TO_BLOCK* block, //block to do
196 float gradient, //gradient to fit
197 FCOORD rotation, //for drawing
198 int32_t block_edge, //edge of block
199 bool testing_on //correct orientation
200);
201void adjust_row_limits( //tidy limits
202 TO_BLOCK *block //block to do
203 );
204void compute_row_stats( //find lines
205 TO_BLOCK* block, //block to do
206 bool testing_on //correct orientation
207);
208float median_block_xheight( //find lines
209 TO_BLOCK *block, //block to do
210 float gradient //global skew
211 );
212
214 STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
215 int max_height, float *xheight, float *ascrise);
216
217int32_t compute_row_descdrop(TO_ROW *row, // row to do
218 float gradient, // global skew
219 int xheight_blob_count,
220 STATS *heights);
221int32_t compute_height_modes(STATS *heights, // stats to search
222 int32_t min_height, // bottom of range
223 int32_t max_height, // top of range
224 int32_t *modes, // output array
225 int32_t maxmodes); // size of modes
226void correct_row_xheight(TO_ROW *row, // row to fix
227 float xheight, // average values
228 float ascrise,
229 float descdrop);
230void separate_underlines(TO_BLOCK* block, // block to do
231 float gradient, // skew angle
232 FCOORD rotation, // inverse landscape
233 bool testing_on); // correct orientation
234void pre_associate_blobs(ICOORD page_tr, // top right
235 TO_BLOCK* block, // block to do
236 FCOORD rotation, // inverse landscape
237 bool testing_on); // correct orientation
238void fit_parallel_rows(TO_BLOCK* block, // block to do
239 float gradient, // gradient to fit
240 FCOORD rotation, // for drawing
241 int32_t block_edge, // edge of block
242 bool testing_on); // correct orientation
243void fit_parallel_lms(float gradient, // forced gradient
244 TO_ROW *row); // row to fit
245void make_baseline_spline(TO_ROW *row, // row to fit
246 TO_BLOCK *block); // block it came from
247bool segment_baseline( //split baseline
248 TO_ROW* row, //row to fit
249 TO_BLOCK* block, //block it came from
250 int32_t& segments, //no fo segments
251 int32_t* xstarts //coords of segments
252);
253double *linear_spline_baseline ( //split baseline
254TO_ROW * row, //row to fit
255TO_BLOCK * block, //block it came from
256int32_t & segments, //no fo segments
257int32_t xstarts[] //coords of segments
258);
259void assign_blobs_to_rows( //find lines
260 TO_BLOCK* block, //block to do
261 float* gradient, //block skew
262 int pass, //identification
263 bool reject_misses, //chuck big ones out
264 bool make_new_rows, //add rows for unmatched
265 bool drawing_skew //draw smoothed skew
266);
267 //find best row
268OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it, //iterator
269 TO_ROW*& best_row, //output row
270 float top, //top of blob
271 float bottom, //bottom of blob
272 float rowsize, //max row size
273 bool testing_blob //test stuff
274 );
275int blob_x_order( //sort function
276 const void *item1, //items to compare
277 const void *item2);
278int row_y_order( //sort function
279 const void *item1, //items to compare
280 const void *item2);
281int row_spacing_order( //sort function
282 const void *item1, //items to compare
283 const void *item2);
284
285void mark_repeated_chars(TO_ROW *row);
286#endif
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
#define double_VAR_H(name, val, comment)
Definition: params.h:301
int textord_spline_minblobs
Definition: makerow.cpp:63
void compute_dropout_distances(int32_t *occupation, int32_t *thresholds, int32_t line_count)
Definition: makerow.cpp:902
bool find_best_dropout_row(TO_ROW *row, int32_t distance, float dist_limit, int32_t line_index, TO_ROW_IT *row_it, bool testing_on)
Definition: makerow.cpp:652
double textord_chop_width
Definition: makerow.cpp:76
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:128
double textord_xheight_mode_fraction
Definition: makerow.cpp:89
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1406
double textord_skew_ile
Definition: makerow.cpp:72
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:517
double textord_min_blob_height_fraction
Definition: makerow.cpp:87
bool segment_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t *xstarts)
Definition: makerow.cpp:2088
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:570
float median_block_xheight(TO_BLOCK *block, float gradient)
void pre_associate_blobs(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1845
bool textord_cblob_blockocc
double textord_ascx_ratio_max
Definition: makerow.cpp:95
int32_t compute_height_modes(STATS *heights, int32_t min_height, int32_t max_height, int32_t *modes, int32_t maxmodes)
Definition: makerow.cpp:1623
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
int textord_test_x
Definition: makerow.cpp:60
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:226
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:1931
bool textord_debug_blob
Definition: makerow.cpp:101
int textord_min_blobs_in_row
Definition: makerow.cpp:62
bool textord_show_final_blobs
Definition: makerow.cpp:47
int textord_lms_line_trials
Definition: makerow.cpp:99
double textord_linespace_iqrlimit
Definition: makerow.cpp:74
bool textord_show_parallel_rows
Definition: makerow.cpp:44
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1467
double textord_spline_outlier_fraction
Definition: makerow.cpp:71
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, int32_t block_edge, bool testing_on)
Definition: makerow.cpp:949
bool textord_test_landscape
Definition: makerow.cpp:48
bool textord_quadratic_baselines
double textord_descx_ratio_max
Definition: makerow.cpp:97
double textord_minxh
Definition: makerow.cpp:80
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2573
bool textord_show_final_rows
Definition: makerow.cpp:46
bool textord_old_xheight
Definition: makerow.cpp:52
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1973
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1685
void compute_occupation_threshold(int32_t low_window, int32_t high_window, int32_t line_count, int32_t *occupation, int32_t *thresholds)
Definition: makerow.cpp:821
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:163
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:115
void separate_underlines(TO_BLOCK *block, float gradient, FCOORD rotation, bool testing_on)
Definition: makerow.cpp:1772
double textord_ascx_ratio_min
Definition: makerow.cpp:94
void compute_row_stats(TO_BLOCK *block, bool testing_on)
Definition: makerow.cpp:1143
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, bool testing_blob)
Definition: makerow.cpp:2478
int textord_min_xheight
Definition: makerow.cpp:67
double textord_skew_lag
Definition: makerow.cpp:73
bool textord_show_initial_rows
Definition: makerow.cpp:43
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, int32_t &segments, int32_t xstarts[])
Definition: makerow.cpp:2187
double textord_excess_blobsize
Definition: makerow.cpp:83
double textord_descx_ratio_min
Definition: makerow.cpp:96
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2617
int textord_spline_medianwin
Definition: makerow.cpp:64
double textord_xheight_error_margin
Definition: makerow.cpp:98
bool textord_fix_xheight_bug
Definition: makerow.cpp:53
bool textord_debug_xheights
Definition: makerow.cpp:55
bool textord_heavy_nr
Definition: makerow.cpp:42
double textord_width_limit
Definition: makerow.cpp:75
bool textord_fix_makerow_bug
Definition: makerow.cpp:54
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, bool reject_misses, bool make_new_rows, bool drawing_skew)
Definition: makerow.cpp:2278
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2595
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:266
double textord_spline_shift_fraction
Definition: makerow.cpp:69
void compute_line_occupation(TO_BLOCK *block, float gradient, int32_t min_y, int32_t max_y, int32_t *occupation, int32_t *deltas)
Definition: makerow.cpp:768
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:286
bool textord_show_expanded_rows
Definition: makerow.cpp:45
OVERLAP_STATE
Definition: makerow.h:29
@ ASSIGN
Definition: makerow.h:30
@ REJECT
Definition: makerow.h:31
@ NEW_ROW
Definition: makerow.h:32
bool textord_parallel_baselines
Definition: makerow.cpp:49
double textord_underline_width
Definition: makerow.cpp:85
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *heights)
Definition: makerow.cpp:1563
int textord_test_y
Definition: makerow.cpp:61
double textord_min_linesize
Definition: makerow.cpp:81
bool textord_new_initial_xheight
Definition: makerow.cpp:100
bool textord_old_baselines
Definition: makerow.cpp:51
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:200
void vigorous_noise_removal(TO_BLOCK *block)
Definition: makerow.cpp:466
double textord_ascheight_mode_fraction
Definition: makerow.cpp:91
double textord_occupancy_threshold
Definition: makerow.cpp:84
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1107
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:122
bool textord_straight_baselines
Definition: makerow.cpp:50
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:732
ROW_CATEGORY
Definition: makerow.h:35
@ ROW_DESCENDERS_FOUND
Definition: makerow.h:37
@ ROW_UNKNOWN
Definition: makerow.h:38
@ ROW_ASCENDERS_FOUND
Definition: makerow.h:36
@ ROW_INVALID
Definition: makerow.h:39
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2056
float xheight
Definition: blobbox.h:657
float descdrop
Definition: blobbox.h:660
float ascrise
Definition: blobbox.h:659
integer coordinate
Definition: points.h:32
Definition: points.h:189
Definition: rect.h:34
Definition: statistc.h:31