tesseract 4.1.1
Loading...
Searching...
No Matches
textord.cpp
Go to the documentation of this file.
1
2// File: textord.cpp
3// Description: The top-level text line and word finding functionality.
4// Author: Ray Smith
5// Created: Fri Mar 13 14:43:01 PDT 2009
6//
7// (C) Copyright 2009, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20// Include automatically generated configuration file if running autoconf.
21#ifdef HAVE_CONFIG_H
22#include "config_auto.h"
23#endif
24
25#include "baselinedetect.h"
26#include "drawtord.h"
27#include "textord.h"
28#include "makerow.h"
29#include "pageres.h"
30#include "tordmain.h"
31#include "wordseg.h"
32
33namespace tesseract {
34
36 : ccstruct_(ccstruct),
37 use_cjk_fp_model_(false),
38 // makerow.cpp ///////////////////////////////////////////
39 BOOL_MEMBER(textord_single_height_mode, false,
40 "Script has no xheight, so use a single mode",
41 ccstruct_->params()),
42 // tospace.cpp ///////////////////////////////////////////
43 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44 ccstruct_->params()),
45 BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46 "Constrain relative values of inter and intra-word gaps for "
47 "old_to_method.",
48 ccstruct_->params()),
49 BOOL_MEMBER(tosp_only_use_prop_rows, true,
50 "Block stats to use fixed pitch rows?", ccstruct_->params()),
51 BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52 "Force word breaks on punct to break long lines in non-space "
53 "delimited langs",
54 ccstruct_->params()),
55 BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56 ccstruct_->params()),
57 BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58 ccstruct_->params()),
59 BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60 ccstruct_->params()),
61 BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62 ccstruct_->params()),
63 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64 ccstruct_->params()),
65 BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66 ccstruct_->params()),
67 BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
68 "Use row alone when inadequate cert spaces",
69 ccstruct_->params()),
70 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71 ccstruct_->params()),
72 BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73 ccstruct_->params()),
74 BOOL_MEMBER(tosp_fuzzy_limit_all, true,
75 "Don't restrict kn->sp fuzzy limit to tables",
76 ccstruct_->params()),
77 BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
78 "Use within xht gap for wd breaks", ccstruct_->params()),
79 BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80 ccstruct_->params()),
81 BOOL_MEMBER(tosp_only_use_xht_gaps, false,
82 "Only use within xht gap for wd breaks", ccstruct_->params()),
83 BOOL_MEMBER(tosp_rule_9_test_punct, false,
84 "Don't chng kn to space next to punct", ccstruct_->params()),
85 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86 ccstruct_->params()),
87 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88 ccstruct_->params()),
89 BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90 ccstruct_->params()),
91 INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
92 INT_MEMBER(tosp_enough_space_samples_for_median, 3,
93 "or should we use mean", ccstruct_->params()),
94 INT_MEMBER(tosp_redo_kern_limit, 10,
95 "No.samples reqd to reestimate for row", ccstruct_->params()),
96 INT_MEMBER(tosp_few_samples, 40,
97 "No.gaps reqd with 1 large gap to treat as a table",
98 ccstruct_->params()),
99 INT_MEMBER(tosp_short_row, 20,
100 "No.gaps reqd with few cert spaces to use certs",
101 ccstruct_->params()),
102 INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103 ccstruct_->params()),
104 double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
105 "Factor for defining space threshold in terms of space and "
106 "kern sizes",
107 ccstruct_->params()),
108 double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109 ccstruct_->params()),
110 double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111 ccstruct_->params()),
112 double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113 ccstruct_->params()),
114 double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
115 "narrow if w/h less than this", ccstruct_->params()),
116 double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117 ccstruct_->params()),
118 double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119 ccstruct_->params()),
120 double_MEMBER(tosp_fuzzy_space_factor, 0.6,
121 "Fract of xheight for fuzz sp", ccstruct_->params()),
122 double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
123 "Fract of xheight for fuzz sp", ccstruct_->params()),
124 double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
125 "Fract of xheight for fuzz sp", ccstruct_->params()),
126 double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127 ccstruct_->params()),
128 double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129 ccstruct_->params()),
130 double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131 ccstruct_->params()),
132 double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133 ccstruct_->params()),
134 double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135 ccstruct_->params()),
136 double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137 ccstruct_->params()),
138 double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139 ccstruct_->params()),
140 double_MEMBER(tosp_enough_small_gaps, 0.65,
141 "Fract of kerns reqd for isolated row stats",
142 ccstruct_->params()),
143 double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
144 "Min difference of kn & sp in table", ccstruct_->params()),
145 double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
146 "Expect spaces bigger than this", ccstruct_->params()),
147 double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
148 "Fuzzy if less than this", ccstruct_->params()),
149 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150 ccstruct_->params()),
151 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152 ccstruct_->params()),
153 double_MEMBER(tosp_min_sane_kn_sp, 1.5,
154 "Don't trust spaces less than this time kn",
155 ccstruct_->params()),
156 double_MEMBER(tosp_init_guess_kn_mult, 2.2,
157 "Thresh guess - mult kn by this", ccstruct_->params()),
158 double_MEMBER(tosp_init_guess_xht_mult, 0.28,
159 "Thresh guess - mult xht by this", ccstruct_->params()),
160 double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
161 "Multiplier on kn to limit thresh", ccstruct_->params()),
162 double_MEMBER(tosp_flip_caution, 0.0,
163 "Don't autoflip kn to sp when large separation",
164 ccstruct_->params()),
165 double_MEMBER(tosp_large_kerning, 0.19,
166 "Limit use of xht gap with large kns", ccstruct_->params()),
167 double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
168 "Limit use of xht gap with odd small kns",
169 ccstruct_->params()),
170 double_MEMBER(tosp_near_lh_edge, 0,
171 "Don't reduce box if the top left is non blank",
172 ccstruct_->params()),
173 double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
174 "Don't let sp minus kn get too small", ccstruct_->params()),
175 double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
176 "How wide fuzzies need context", ccstruct_->params()),
177 // tordmain.cpp ///////////////////////////////////////////
178 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179 ccstruct_->params()),
180 BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181 ccstruct_->params()),
182 BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183 ccstruct_->params()),
184 INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185 ccstruct_->params()),
186 INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187 ccstruct_->params()),
188 double_MEMBER(textord_noise_area_ratio, 0.7,
189 "Fraction of bounding box for noise", ccstruct_->params()),
190 double_MEMBER(textord_initialx_ile, 0.75,
191 "Ile of sizes for xheight guess", ccstruct_->params()),
192 double_MEMBER(textord_initialasc_ile, 0.90,
193 "Ile of sizes for xheight guess", ccstruct_->params()),
194 INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
195 ccstruct_->params()),
196 double_MEMBER(textord_noise_sizelimit, 0.5,
197 "Fraction of x for big t count", ccstruct_->params()),
198 INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
199 ccstruct_->params()),
200 double_MEMBER(textord_noise_normratio, 2.0,
201 "Dot to norm ratio for deletion", ccstruct_->params()),
202 BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
203 ccstruct_->params()),
204 BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
205 ccstruct_->params()),
206 double_MEMBER(textord_noise_syfract, 0.2,
207 "xh fract height error for norm blobs",
208 ccstruct_->params()),
209 double_MEMBER(textord_noise_sxfract, 0.4,
210 "xh fract width error for norm blobs", ccstruct_->params()),
211 double_MEMBER(textord_noise_hfract, 1.0 / 64,
212 "Height fraction to discard outlines as speckle noise",
213 ccstruct_->params()),
214 INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
215 ccstruct_->params()),
216 double_MEMBER(textord_noise_rowratio, 6.0,
217 "Dot to norm ratio for deletion", ccstruct_->params()),
218 BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
219 ccstruct_->params()),
220 double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
221 ccstruct_->params()),
222 double_MEMBER(textord_blshift_xfraction, 9.99,
223 "Min size of baseline shift", ccstruct_->params()) {}
224
225// Make the textlines and words inside each block.
226void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
227 int width, int height, Pix* binary_pix,
228 Pix* thresholds_pix, Pix* grey_pix,
229 bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
230 BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
231 page_tr_.set_x(width);
232 page_tr_.set_y(height);
233 if (to_blocks->empty()) {
234 // AutoPageSeg was not used, so we need to find_components first.
235 find_components(binary_pix, blocks, to_blocks);
236 TO_BLOCK_IT it(to_blocks);
237 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
238 TO_BLOCK* to_block = it.data();
239 // Compute the edge offsets whether or not there is a grey_pix.
240 // We have by-passed auto page seg, so we have to run it here.
241 // By page segmentation mode there is no non-text to avoid running on.
242 to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
243 }
244 } else if (!PSM_SPARSE(pageseg_mode)) {
245 // AutoPageSeg does not need to find_components as it did that already.
246 // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
247 filter_blobs(page_tr_, to_blocks, true);
248 }
249
250 ASSERT_HOST(!to_blocks->empty());
251 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
252 const FCOORD anticlockwise90(0.0f, 1.0f);
253 const FCOORD clockwise90(0.0f, -1.0f);
254 TO_BLOCK_IT it(to_blocks);
255 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256 TO_BLOCK* to_block = it.data();
257 BLOCK* block = to_block->block;
258 // Create a fake poly_block in block from its bounding box.
261 // Rotate the to_block along with its contained block and blobnbox lists.
262 to_block->rotate(anticlockwise90);
263 // Set the block's rotation values to obey the convention followed in
264 // layout analysis for vertical text.
265 block->set_re_rotation(clockwise90);
266 block->set_classify_rotation(clockwise90);
267 }
268 }
269
270 TO_BLOCK_IT to_block_it(to_blocks);
271 TO_BLOCK* to_block = to_block_it.data();
272 // Make the rows in the block.
273 float gradient;
274 // Do it the old fashioned way.
275 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
276 gradient = make_rows(page_tr_, to_blocks);
277 } else if (!PSM_SPARSE(pageseg_mode)) {
278 // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
279 gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
280 to_block, to_blocks);
281 } else {
282 gradient = 0.0f;
283 }
284 BaselineDetect baseline_detector(textord_baseline_debug,
285 reskew, to_blocks);
286 baseline_detector.ComputeStraightBaselines(use_box_bottoms);
287 baseline_detector.ComputeBaselineSplinesAndXheights(
288 page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
290 // Now make the words in the lines.
291 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
292 // SINGLE_LINE uses the old word maker on the single line.
293 make_words(this, page_tr_, gradient, blocks, to_blocks);
294 } else {
295 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
296 // single word, and in SINGLE_CHAR mode, all the outlines
297 // go in a single blob.
298 TO_BLOCK* to_block = to_block_it.data();
299 make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
300 to_block->get_rows(), to_block->block->row_list());
301 }
302 // Remove empties.
303 cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
304 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
305 // Compute the margins for each row in the block, to be used later for
306 // paragraph detection.
307 BLOCK_IT b_it(blocks);
308 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
309 b_it.data()->compute_row_margins();
310 }
311#ifndef GRAPHICS_DISABLED
312 close_to_win();
313#endif
314}
315
316// If we were supposed to return only a single textline, and there is more
317// than one, clean up and leave only the best.
319 PAGE_RES* page_res) {
320 if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
321 return; // No cleanup required.
322 PAGE_RES_IT it(page_res);
323 // Find the best row, being the greatest mean word conf.
324 float row_total_conf = 0.0f;
325 int row_word_count = 0;
326 ROW_RES* best_row = nullptr;
327 float best_conf = 0.0f;
328 for (it.restart_page(); it.word() != nullptr; it.forward()) {
329 WERD_RES* word = it.word();
330 row_total_conf += word->best_choice->certainty();
331 ++row_word_count;
332 if (it.next_row() != it.row()) {
333 row_total_conf /= row_word_count;
334 if (best_row == nullptr || best_conf < row_total_conf) {
335 best_row = it.row();
336 best_conf = row_total_conf;
337 }
338 row_total_conf = 0.0f;
339 row_word_count = 0;
340 }
341 }
342 // Now eliminate any word not in the best row.
343 for (it.restart_page(); it.word() != nullptr; it.forward()) {
344 if (it.row() != best_row)
346 }
347}
348
349} // namespace tesseract.
@ PT_VERTICAL_TEXT
Definition: capi.h:136
#define ASSERT_HOST(x)
Definition: errcode.h:88
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:315
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:324
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:318
void close_to_win()
Definition: drawtord.cpp:53
bool textord_show_final_rows
Definition: makerow.cpp:46
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:163
bool textord_heavy_nr
Definition: makerow.cpp:42
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:200
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:96
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:52
@ PSM_SINGLE_BLOCK_VERT_TEXT
aligned text.
Definition: publictypes.h:170
@ PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:176
@ PSM_RAW_LINE
hacks that are Tesseract-specific.
Definition: publictypes.h:179
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:200
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:209
BLOCK * block
Definition: blobbox.h:777
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1055
void rotate(const FCOORD &rotation)
Definition: blobbox.h:710
Definition: ocrblock.h:31
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:137
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:143
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD_RES * word() const
Definition: pageres.h:754
ROW_RES * row() const
Definition: pageres.h:757
WERD_RES * restart_page()
Definition: pageres.h:701
WERD_RES * forward()
Definition: pageres.h:734
ROW_RES * next_row() const
Definition: pageres.h:766
void DeleteCurrentWord()
Definition: pageres.cpp:1440
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
Definition: points.h:189
float certainty() const
Definition: ratngs.h:320
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeStraightBaselines(bool use_box_bottoms)
int textord_baseline_debug
Definition: textord.h:377
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:250
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
Definition: textord.cpp:318
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:219
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: textord.cpp:226
Textord(CCStruct *ccstruct)
Definition: textord.cpp:35