tesseract 4.1.1
Loading...
Searching...
No Matches
textord.h
Go to the documentation of this file.
1
2// File: textord.h
3// Description: The Textord class definition gathers text line and word
4// finding functionality.
5// Author: Ray Smith
6// Created: Fri Mar 13 14:29:01 PDT 2009
7//
8// (C) Copyright 2009, Google Inc.
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
20
21#ifndef TESSERACT_TEXTORD_TEXTORD_H_
22#define TESSERACT_TEXTORD_TEXTORD_H_
23
24#include "ccstruct.h"
25#include "bbgrid.h"
26#include "blobbox.h"
27#include "gap_map.h"
28#include "publictypes.h" // For PageSegMode.
29
30class FCOORD;
31class BLOCK_LIST;
32class PAGE_RES;
33class TO_BLOCK;
34class TO_BLOCK_LIST;
35class ScrollView;
36
37namespace tesseract {
38
39// A simple class that can be used by BBGrid to hold a word and an expanded
40// bounding box that makes it easy to find words to put diacritics.
42 public:
43 WordWithBox() : word_(nullptr) {}
45 : word_(word), bounding_box_(word->bounding_box()) {
46 int height = bounding_box_.height();
47 bounding_box_.pad(height, height);
48 }
49
50 const TBOX &bounding_box() const { return bounding_box_; }
51 // Returns the bounding box of only the good blobs.
52 TBOX true_bounding_box() const { return word_->true_bounding_box(); }
53 C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); }
54 const WERD *word() const { return word_; }
55
56 private:
57 // Borrowed pointer to a real word somewhere that must outlive this class.
58 WERD *word_;
59 // Cached expanded bounding box of the word, padded all round by its height.
60 TBOX bounding_box_;
61};
62
63// Make it usable by BBGrid.
64CLISTIZEH(WordWithBox)
65using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
66using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
67
68class Textord {
69 public:
70 explicit Textord(CCStruct* ccstruct);
71 ~Textord() = default;
72
73 // Make the textlines and words inside each block.
74 // binary_pix is mandatory and is the binarized input after line removal.
75 // grey_pix is optional, but if present must match the binary_pix in size,
76 // and must be a *real* grey image instead of binary_pix * 255.
77 // thresholds_pix is expected to be present iff grey_pix is present and
78 // can be an integer factor reduction of the grey_pix. It represents the
79 // thresholds that were used to create the binary_pix from the grey_pix.
80 // diacritic_blobs contain small confusing components that should be added
81 // to the appropriate word(s) in case they are really diacritics.
82 void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width,
83 int height, Pix *binary_pix, Pix *thresholds_pix,
84 Pix *grey_pix, bool use_box_bottoms,
85 BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
86 TO_BLOCK_LIST *to_blocks);
87
88 // If we were supposed to return only a single textline, and there is more
89 // than one, clean up and leave only the best.
90 void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
91
92 bool use_cjk_fp_model() const {
93 return use_cjk_fp_model_;
94 }
95 void set_use_cjk_fp_model(bool flag) {
96 use_cjk_fp_model_ = flag;
97 }
98
99 // tospace.cpp ///////////////////////////////////////////
100 void to_spacing(
101 ICOORD page_tr, //topright of page
102 TO_BLOCK_LIST *blocks //blocks on page
103 );
104 ROW *make_prop_words(TO_ROW *row, // row to make
105 FCOORD rotation // for drawing
106 );
107 ROW *make_blob_words(TO_ROW *row, // row to make
108 FCOORD rotation // for drawing
109 );
110 // tordmain.cpp ///////////////////////////////////////////
111 void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
112 void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on);
113
114 private:
115 // For underlying memory management and other utilities.
116 CCStruct* ccstruct_;
117
118 // The size of the input image.
119 ICOORD page_tr_;
120
121 bool use_cjk_fp_model_;
122
123 // makerow.cpp ///////////////////////////////////////////
124 // Make the textlines inside each block.
125 void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
126 int width, int height, TO_BLOCK_LIST* to_blocks);
127 // Make the textlines inside a single block.
128 void MakeBlockRows(int min_spacing, int max_spacing,
129 const FCOORD& skew, TO_BLOCK* block,
130 ScrollView* win);
131
132 public:
133 void compute_block_xheight(TO_BLOCK *block, float gradient);
134 void compute_row_xheight(TO_ROW *row, // row to do
135 const FCOORD& rotation,
136 float gradient, // global skew
137 int block_line_size);
138 void make_spline_rows(TO_BLOCK* block, // block to do
139 float gradient, // gradient to fit
140 bool testing_on);
141 private:
143 void make_old_baselines(TO_BLOCK* block, // block to do
144 bool testing_on, // correct orientation
145 float gradient);
146 void correlate_lines(TO_BLOCK *block, float gradient);
147 void correlate_neighbours(TO_BLOCK *block, // block rows are in.
148 TO_ROW **rows, // rows of block.
149 int rowcount); // no of rows to do.
150 int correlate_with_stats(TO_ROW **rows, // rows of block.
151 int rowcount, // no of rows to do.
152 TO_BLOCK* block);
153 void find_textlines(TO_BLOCK *block, // block row is in
154 TO_ROW *row, // row to do
155 int degree, // required approximation
156 QSPLINE *spline); // starting spline
157 // tospace.cpp ///////////////////////////////////////////
158 //DEBUG USE ONLY
159 void block_spacing_stats(TO_BLOCK* block,
160 GAPMAP* gapmap,
161 bool& old_text_ord_proportional,
162 //resulting estimate
163 int16_t& block_space_gap_width,
164 //resulting estimate
165 int16_t& block_non_space_gap_width
166 );
167 void row_spacing_stats(TO_ROW *row,
168 GAPMAP *gapmap,
169 int16_t block_idx,
170 int16_t row_idx,
171 //estimate for block
172 int16_t block_space_gap_width,
173 //estimate for block
174 int16_t block_non_space_gap_width
175 );
176 void old_to_method(TO_ROW *row,
177 STATS *all_gap_stats,
178 STATS *space_gap_stats,
179 STATS *small_gap_stats,
180 int16_t block_space_gap_width,
181 //estimate for block
182 int16_t block_non_space_gap_width
183 );
184 bool isolated_row_stats(TO_ROW* row,
185 GAPMAP* gapmap,
186 STATS* all_gap_stats,
187 bool suspected_table,
188 int16_t block_idx,
189 int16_t row_idx);
190 int16_t stats_count_under(STATS *stats, int16_t threshold);
191 void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
192 bool make_a_word_break(TO_ROW* row, // row being made
193 TBOX blob_box, // for next_blob // how many blanks?
194 int16_t prev_gap,
195 TBOX prev_blob_box,
196 int16_t real_current_gap,
197 int16_t within_xht_current_gap,
198 TBOX next_blob_box,
199 int16_t next_gap,
200 uint8_t& blanks,
201 bool& fuzzy_sp,
202 bool& fuzzy_non,
203 bool& prev_gap_was_a_space,
204 bool& break_at_next_gap);
205 bool narrow_blob(TO_ROW* row, TBOX blob_box);
206 bool wide_blob(TO_ROW* row, TBOX blob_box);
207 bool suspected_punct_blob(TO_ROW* row, TBOX box);
208 void peek_at_next_gap(TO_ROW *row,
209 BLOBNBOX_IT box_it,
210 TBOX &next_blob_box,
211 int16_t &next_gap,
212 int16_t &next_within_xht_gap);
213 void mark_gap(TBOX blob, //blob following gap
214 int16_t rule, // heuristic id
215 int16_t prev_gap,
216 int16_t prev_blob_width,
217 int16_t current_gap,
218 int16_t next_blob_width,
219 int16_t next_gap);
220 float find_mean_blob_spacing(WERD *word);
221 bool ignore_big_gap(TO_ROW* row,
222 int32_t row_length,
223 GAPMAP* gapmap,
224 int16_t left,
225 int16_t right);
226 //get bounding box
227 TBOX reduced_box_next(TO_ROW *row, //current row
228 BLOBNBOX_IT *it //iterator to blobds
229 );
230 TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
231 // tordmain.cpp ///////////////////////////////////////////
232 float filter_noise_blobs(BLOBNBOX_LIST *src_list,
233 BLOBNBOX_LIST *noise_list,
234 BLOBNBOX_LIST *small_list,
235 BLOBNBOX_LIST *large_list);
236 // Fixes the block so it obeys all the rules:
237 // Must have at least one ROW.
238 // Must have at least one WERD.
239 // WERDs contain a fake blob.
240 void cleanup_nontext_block(BLOCK* block);
241 void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
242 bool clean_noise_from_row(ROW* row);
243 void clean_noise_from_words(ROW *row);
244 // Remove outlines that are a tiny fraction in either width or height
245 // of the word height.
246 void clean_small_noise_from_words(ROW *row);
247 // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
248 // TransferDiacriticsToWords to copy the diacritic blobs to the most
249 // appropriate words in the group of blocks. Source blobs are not touched.
250 void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
251 BLOCK_LIST* blocks);
252 // Places a copy of blobs that are near a word (after applying rotation to the
253 // blob) in the most appropriate word, unless there is doubt, in which case a
254 // blob can end up in two words. Source blobs are not touched.
255 void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
256 const FCOORD &rotation, WordGrid *word_grid);
257
258 public:
259 // makerow.cpp ///////////////////////////////////////////
260 BOOL_VAR_H(textord_single_height_mode, false,
261 "Script has no xheight, so use a single mode for horizontal text");
262 // tospace.cpp ///////////////////////////////////////////
263 BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?");
264 BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false,
265 "Constrain relative values of inter and intra-word gaps for "
266 "old_to_method.");
267 BOOL_VAR_H(tosp_only_use_prop_rows, true,
268 "Block stats to use fixed pitch rows?");
269 BOOL_VAR_H(tosp_force_wordbreak_on_punct, false,
270 "Force word breaks on punct to break long lines in non-space "
271 "delimited langs");
272 BOOL_VAR_H(tosp_use_pre_chopping, false,
273 "Space stats use prechopping?");
274 BOOL_VAR_H(tosp_old_to_bug_fix, false,
275 "Fix suspected bug in old code");
276 BOOL_VAR_H(tosp_block_use_cert_spaces, true,
277 "Only stat OBVIOUS spaces");
278 BOOL_VAR_H(tosp_row_use_cert_spaces, true,
279 "Only stat OBVIOUS spaces");
280 BOOL_VAR_H(tosp_narrow_blobs_not_cert, true,
281 "Only stat OBVIOUS spaces");
282 BOOL_VAR_H(tosp_row_use_cert_spaces1, true,
283 "Only stat OBVIOUS spaces");
284 BOOL_VAR_H(tosp_recovery_isolated_row_stats, true,
285 "Use row alone when inadequate cert spaces");
286 BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
287 BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
288 BOOL_VAR_H(tosp_fuzzy_limit_all, true,
289 "Don't restrict kn->sp fuzzy limit to tables");
290 BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
291 "Use within xht gap for wd breaks");
292 BOOL_VAR_H(tosp_use_xht_gaps, true,
293 "Use within xht gap for wd breaks");
294 BOOL_VAR_H(tosp_only_use_xht_gaps, false,
295 "Only use within xht gap for wd breaks");
296 BOOL_VAR_H(tosp_rule_9_test_punct, false,
297 "Don't chng kn to space next to punct");
298 BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
299 BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
300 BOOL_VAR_H(tosp_improve_thresh, false,
301 "Enable improvement heuristic");
302 INT_VAR_H(tosp_debug_level, 0, "Debug data");
303 INT_VAR_H(tosp_enough_space_samples_for_median, 3,
304 "or should we use mean");
305 INT_VAR_H(tosp_redo_kern_limit, 10,
306 "No.samples reqd to reestimate for row");
307 INT_VAR_H(tosp_few_samples, 40,
308 "No.gaps reqd with 1 large gap to treat as a table");
309 INT_VAR_H(tosp_short_row, 20,
310 "No.gaps reqd with few cert spaces to use certs");
311 INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly");
312 double_VAR_H(tosp_old_sp_kn_th_factor, 2.0,
313 "Factor for defining space threshold in terms of space and "
314 "kern sizes");
315 double_VAR_H(tosp_threshold_bias1, 0,
316 "how far between kern and space?");
317 double_VAR_H(tosp_threshold_bias2, 0,
318 "how far between kern and space?");
319 double_VAR_H(tosp_narrow_fraction, 0.3,
320 "Fract of xheight for narrow");
321 double_VAR_H(tosp_narrow_aspect_ratio, 0.48,
322 "narrow if w/h less than this");
323 double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide");
324 double_VAR_H(tosp_wide_aspect_ratio, 0.0,
325 "wide if w/h less than this");
326 double_VAR_H(tosp_fuzzy_space_factor, 0.6,
327 "Fract of xheight for fuzz sp");
328 double_VAR_H(tosp_fuzzy_space_factor1, 0.5,
329 "Fract of xheight for fuzz sp");
330 double_VAR_H(tosp_fuzzy_space_factor2, 0.72,
331 "Fract of xheight for fuzz sp");
332 double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
333 double_VAR_H(tosp_kern_gap_factor1, 2.0,
334 "gap ratio to flip kern->sp");
335 double_VAR_H(tosp_kern_gap_factor2, 1.3,
336 "gap ratio to flip kern->sp");
337 double_VAR_H(tosp_kern_gap_factor3, 2.5,
338 "gap ratio to flip kern->sp");
339 double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier");
340 double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
341 double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space");
342 double_VAR_H(tosp_enough_small_gaps, 0.65,
343 "Fract of kerns reqd for isolated row stats");
344 double_VAR_H(tosp_table_kn_sp_ratio, 2.25,
345 "Min difference of kn & sp in table");
346 double_VAR_H(tosp_table_xht_sp_ratio, 0.33,
347 "Expect spaces bigger than this");
348 double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0,
349 "Fuzzy if less than this");
350 double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
351 double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
352 double_VAR_H(tosp_min_sane_kn_sp, 1.5,
353 "Don't trust spaces less than this time kn");
354 double_VAR_H(tosp_init_guess_kn_mult, 2.2,
355 "Thresh guess - mult kn by this");
356 double_VAR_H(tosp_init_guess_xht_mult, 0.28,
357 "Thresh guess - mult xht by this");
358 double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
359 "Multiplier on kn to limit thresh");
360 double_VAR_H(tosp_flip_caution, 0.0,
361 "Don't autoflip kn to sp when large separation");
362 double_VAR_H(tosp_large_kerning, 0.19,
363 "Limit use of xht gap with large kns");
364 double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
365 "Limit use of xht gap with odd small kns");
366 double_VAR_H(tosp_near_lh_edge, 0,
367 "Don't reduce box if the top left is non blank");
368 double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
369 "Don't let sp minus kn get too small");
370 double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
371 "How wide fuzzies need context");
372 // tordmain.cpp ///////////////////////////////////////////
373 BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs");
374 BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
375 BOOL_VAR_H(textord_show_boxes, false, "Display boxes");
376 INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise");
377 INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level");
378 double_VAR_H(textord_noise_area_ratio, 0.7,
379 "Fraction of bounding box for noise");
380 double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess");
381 double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess");
382 INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima");
383 double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count");
384 INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob");
385 double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion");
386 BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words");
387 BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows");
388 double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs");
389 double_VAR_H(textord_noise_sxfract, 0.4,
390 "xh fract width error for norm blobs");
391 double_VAR_H(textord_noise_hfract, 1.0/64,
392 "Height fraction to discard outlines as speckle noise");
393 INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row");
394 double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion");
395 BOOL_VAR_H(textord_noise_debug, false, "Debug row garbage detector");
396 double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift");
397 double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift");
398};
399} // namespace tesseract.
400
401#endif // TESSERACT_TEXTORD_TEXTORD_H_
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:879
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
#define double_VAR_H(name, val, comment)
Definition: params.h:301
Definition: ocrblock.h:31
Definition: ocrrow.h:37
integer coordinate
Definition: points.h:32
Definition: points.h:189
Definition: rect.h:34
int16_t height() const
Definition: rect.h:108
void pad(int xpad, int ypad)
Definition: rect.h:131
Definition: statistc.h:31
Definition: werd.h:56
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
TBOX true_bounding_box() const
Definition: werd.cpp:169
Definition: gap_map.h:17
const TBOX & bounding_box() const
Definition: textord.h:50
C_BLOB_LIST * RejBlobs() const
Definition: textord.h:53
WordWithBox(WERD *word)
Definition: textord.h:44
const WERD * word() const
Definition: textord.h:54
TBOX true_bounding_box() const
Definition: textord.h:52
bool use_cjk_fp_model() const
Definition: textord.h:92
void set_use_cjk_fp_model(bool flag)
Definition: textord.h:95