tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::Textord Class Reference

#include <textord.h>

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()=default
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on)
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Public Attributes

bool textord_single_height_mode = false
 
bool tosp_old_to_method = false
 
bool tosp_old_to_constrain_sp_kn = false
 
bool tosp_only_use_prop_rows = true
 
bool tosp_force_wordbreak_on_punct = false
 
bool tosp_use_pre_chopping = false
 
bool tosp_old_to_bug_fix = false
 
bool tosp_block_use_cert_spaces = true
 
bool tosp_row_use_cert_spaces = true
 
bool tosp_narrow_blobs_not_cert = true
 
bool tosp_row_use_cert_spaces1 = true
 
bool tosp_recovery_isolated_row_stats = true
 
bool tosp_only_small_gaps_for_kern = false
 
bool tosp_all_flips_fuzzy = false
 
bool tosp_fuzzy_limit_all = true
 
bool tosp_stats_use_xht_gaps = true
 
bool tosp_use_xht_gaps = true
 
bool tosp_only_use_xht_gaps = false
 
bool tosp_rule_9_test_punct = false
 
bool tosp_flip_fuzz_kn_to_sp = true
 
bool tosp_flip_fuzz_sp_to_kn = true
 
bool tosp_improve_thresh = false
 
int tosp_debug_level = 0
 
int tosp_enough_space_samples_for_median = 3
 
int tosp_redo_kern_limit = 10
 
int tosp_few_samples = 40
 
int tosp_short_row = 20
 
int tosp_sanity_method = 1
 
double tosp_old_sp_kn_th_factor = 2.0
 
double tosp_threshold_bias1 = 0
 
double tosp_threshold_bias2 = 0
 
double tosp_narrow_fraction = 0.3
 
double tosp_narrow_aspect_ratio = 0.48
 
double tosp_wide_fraction = 0.52
 
double tosp_wide_aspect_ratio = 0.0
 
double tosp_fuzzy_space_factor = 0.6
 
double tosp_fuzzy_space_factor1 = 0.5
 
double tosp_fuzzy_space_factor2 = 0.72
 
double tosp_gap_factor = 0.83
 
double tosp_kern_gap_factor1 = 2.0
 
double tosp_kern_gap_factor2 = 1.3
 
double tosp_kern_gap_factor3 = 2.5
 
double tosp_ignore_big_gaps = -1
 
double tosp_ignore_very_big_gaps = 3.5
 
double tosp_rep_space = 1.6
 
double tosp_enough_small_gaps = 0.65
 
double tosp_table_kn_sp_ratio = 2.25
 
double tosp_table_xht_sp_ratio = 0.33
 
double tosp_table_fuzzy_kn_sp_ratio = 3.0
 
double tosp_fuzzy_kn_fraction = 0.5
 
double tosp_fuzzy_sp_fraction = 0.5
 
double tosp_min_sane_kn_sp = 1.5
 
double tosp_init_guess_kn_mult = 2.2
 
double tosp_init_guess_xht_mult = 0.28
 
double tosp_max_sane_kn_thresh = 5.0
 
double tosp_flip_caution = 0.0
 
double tosp_large_kerning = 0.19
 
double tosp_dont_fool_with_small_kerns = -1
 
double tosp_near_lh_edge = 0
 
double tosp_silly_kn_sp_gap = 0.2
 
double tosp_pass_wide_fuzz_sp_to_context = 0.75
 
bool textord_no_rejects = false
 
bool textord_show_blobs = false
 
bool textord_show_boxes = false
 
int textord_max_noise_size = 7
 
int textord_baseline_debug = 0
 
double textord_noise_area_ratio = 0.7
 
double textord_initialx_ile = 0.75
 
double textord_initialasc_ile = 0.90
 
int textord_noise_sizefraction = 10
 
double textord_noise_sizelimit = 0.5
 
int textord_noise_translimit = 16
 
double textord_noise_normratio = 2.0
 
bool textord_noise_rejwords = true
 
bool textord_noise_rejrows = true
 
double textord_noise_syfract = 0.2
 
double textord_noise_sxfract = 0.4
 
double textord_noise_hfract = 1.0/64
 
int textord_noise_sncount = 1
 
double textord_noise_rowratio = 6.0
 
bool textord_noise_debug = false
 
double textord_blshift_maxshift = 0.00
 
double textord_blshift_xfraction = 9.99
 

Detailed Description

Definition at line 68 of file textord.h.

Constructor & Destructor Documentation

◆ Textord()

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

Definition at line 35 of file textord.cpp.

36 : ccstruct_(ccstruct),
37 use_cjk_fp_model_(false),
38 // makerow.cpp ///////////////////////////////////////////
40 "Script has no xheight, so use a single mode",
41 ccstruct_->params()),
42 // tospace.cpp ///////////////////////////////////////////
43 BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44 ccstruct_->params()),
46 "Constrain relative values of inter and intra-word gaps for "
47 "old_to_method.",
48 ccstruct_->params()),
50 "Block stats to use fixed pitch rows?", ccstruct_->params()),
52 "Force word breaks on punct to break long lines in non-space "
53 "delimited langs",
54 ccstruct_->params()),
55 BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56 ccstruct_->params()),
57 BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58 ccstruct_->params()),
59 BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60 ccstruct_->params()),
61 BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62 ccstruct_->params()),
63 BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64 ccstruct_->params()),
65 BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66 ccstruct_->params()),
68 "Use row alone when inadequate cert spaces",
69 ccstruct_->params()),
70 BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71 ccstruct_->params()),
72 BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73 ccstruct_->params()),
75 "Don't restrict kn->sp fuzzy limit to tables",
76 ccstruct_->params()),
78 "Use within xht gap for wd breaks", ccstruct_->params()),
79 BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80 ccstruct_->params()),
82 "Only use within xht gap for wd breaks", ccstruct_->params()),
84 "Don't chng kn to space next to punct", ccstruct_->params()),
85 BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86 ccstruct_->params()),
87 BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88 ccstruct_->params()),
89 BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90 ccstruct_->params()),
91 INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
93 "or should we use mean", ccstruct_->params()),
95 "No.samples reqd to reestimate for row", ccstruct_->params()),
97 "No.gaps reqd with 1 large gap to treat as a table",
98 ccstruct_->params()),
100 "No.gaps reqd with few cert spaces to use certs",
101 ccstruct_->params()),
102 INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103 ccstruct_->params()),
105 "Factor for defining space threshold in terms of space and "
106 "kern sizes",
107 ccstruct_->params()),
108 double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109 ccstruct_->params()),
110 double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111 ccstruct_->params()),
112 double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113 ccstruct_->params()),
115 "narrow if w/h less than this", ccstruct_->params()),
116 double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117 ccstruct_->params()),
118 double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119 ccstruct_->params()),
121 "Fract of xheight for fuzz sp", ccstruct_->params()),
123 "Fract of xheight for fuzz sp", ccstruct_->params()),
125 "Fract of xheight for fuzz sp", ccstruct_->params()),
126 double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127 ccstruct_->params()),
128 double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129 ccstruct_->params()),
130 double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131 ccstruct_->params()),
132 double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133 ccstruct_->params()),
134 double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135 ccstruct_->params()),
136 double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137 ccstruct_->params()),
138 double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139 ccstruct_->params()),
141 "Fract of kerns reqd for isolated row stats",
142 ccstruct_->params()),
144 "Min difference of kn & sp in table", ccstruct_->params()),
146 "Expect spaces bigger than this", ccstruct_->params()),
148 "Fuzzy if less than this", ccstruct_->params()),
149 double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150 ccstruct_->params()),
151 double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152 ccstruct_->params()),
154 "Don't trust spaces less than this time kn",
155 ccstruct_->params()),
157 "Thresh guess - mult kn by this", ccstruct_->params()),
159 "Thresh guess - mult xht by this", ccstruct_->params()),
161 "Multiplier on kn to limit thresh", ccstruct_->params()),
163 "Don't autoflip kn to sp when large separation",
164 ccstruct_->params()),
166 "Limit use of xht gap with large kns", ccstruct_->params()),
168 "Limit use of xht gap with odd small kns",
169 ccstruct_->params()),
171 "Don't reduce box if the top left is non blank",
172 ccstruct_->params()),
174 "Don't let sp minus kn get too small", ccstruct_->params()),
176 "How wide fuzzies need context", ccstruct_->params()),
177 // tordmain.cpp ///////////////////////////////////////////
178 BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179 ccstruct_->params()),
180 BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181 ccstruct_->params()),
182 BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183 ccstruct_->params()),
184 INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185 ccstruct_->params()),
186 INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187 ccstruct_->params()),
189 "Fraction of bounding box for noise", ccstruct_->params()),
191 "Ile of sizes for xheight guess", ccstruct_->params()),
193 "Ile of sizes for xheight guess", ccstruct_->params()),
194 INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
195 ccstruct_->params()),
197 "Fraction of x for big t count", ccstruct_->params()),
198 INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
199 ccstruct_->params()),
201 "Dot to norm ratio for deletion", ccstruct_->params()),
202 BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
203 ccstruct_->params()),
204 BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
205 ccstruct_->params()),
207 "xh fract height error for norm blobs",
208 ccstruct_->params()),
210 "xh fract width error for norm blobs", ccstruct_->params()),
212 "Height fraction to discard outlines as speckle noise",
213 ccstruct_->params()),
214 INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
215 ccstruct_->params()),
217 "Dot to norm ratio for deletion", ccstruct_->params()),
218 BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
219 ccstruct_->params()),
220 double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
221 ccstruct_->params()),
223 "Min size of baseline shift", ccstruct_->params()) {}
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:315
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:324
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:318
ParamsVectors * params()
Definition: ccutil.h:67
bool tosp_only_use_xht_gaps
Definition: textord.h:295
bool tosp_old_to_constrain_sp_kn
Definition: textord.h:266
double tosp_min_sane_kn_sp
Definition: textord.h:353
double tosp_wide_fraction
Definition: textord.h:323
double tosp_table_fuzzy_kn_sp_ratio
Definition: textord.h:349
bool tosp_only_small_gaps_for_kern
Definition: textord.h:286
bool tosp_block_use_cert_spaces
Definition: textord.h:277
double tosp_pass_wide_fuzz_sp_to_context
Definition: textord.h:371
double tosp_init_guess_xht_mult
Definition: textord.h:357
double tosp_fuzzy_kn_fraction
Definition: textord.h:350
double tosp_kern_gap_factor1
Definition: textord.h:334
double tosp_fuzzy_space_factor2
Definition: textord.h:331
double tosp_table_xht_sp_ratio
Definition: textord.h:347
double tosp_wide_aspect_ratio
Definition: textord.h:325
int textord_baseline_debug
Definition: textord.h:377
bool tosp_all_flips_fuzzy
Definition: textord.h:287
double tosp_large_kerning
Definition: textord.h:363
bool textord_no_rejects
Definition: textord.h:373
bool tosp_only_use_prop_rows
Definition: textord.h:268
bool tosp_old_to_method
Definition: textord.h:263
bool tosp_force_wordbreak_on_punct
Definition: textord.h:271
bool tosp_recovery_isolated_row_stats
Definition: textord.h:285
bool tosp_use_pre_chopping
Definition: textord.h:273
int tosp_sanity_method
Definition: textord.h:311
double tosp_kern_gap_factor2
Definition: textord.h:336
bool textord_noise_rejrows
Definition: textord.h:387
double tosp_threshold_bias1
Definition: textord.h:316
double tosp_dont_fool_with_small_kerns
Definition: textord.h:365
int textord_noise_translimit
Definition: textord.h:384
double tosp_threshold_bias2
Definition: textord.h:318
double tosp_rep_space
Definition: textord.h:341
bool tosp_rule_9_test_punct
Definition: textord.h:297
bool tosp_narrow_blobs_not_cert
Definition: textord.h:281
bool tosp_flip_fuzz_sp_to_kn
Definition: textord.h:299
bool tosp_row_use_cert_spaces1
Definition: textord.h:283
bool tosp_improve_thresh
Definition: textord.h:301
double tosp_silly_kn_sp_gap
Definition: textord.h:369
bool tosp_stats_use_xht_gaps
Definition: textord.h:291
double textord_noise_sxfract
Definition: textord.h:390
bool tosp_flip_fuzz_kn_to_sp
Definition: textord.h:298
bool textord_show_boxes
Definition: textord.h:375
double textord_noise_syfract
Definition: textord.h:388
double tosp_max_sane_kn_thresh
Definition: textord.h:359
double textord_noise_sizelimit
Definition: textord.h:383
double tosp_fuzzy_space_factor
Definition: textord.h:327
double textord_initialasc_ile
Definition: textord.h:381
double tosp_enough_small_gaps
Definition: textord.h:343
bool tosp_use_xht_gaps
Definition: textord.h:293
int tosp_redo_kern_limit
Definition: textord.h:306
int tosp_enough_space_samples_for_median
Definition: textord.h:304
bool textord_noise_debug
Definition: textord.h:395
double textord_blshift_maxshift
Definition: textord.h:396
double tosp_init_guess_kn_mult
Definition: textord.h:355
int textord_noise_sizefraction
Definition: textord.h:382
double tosp_table_kn_sp_ratio
Definition: textord.h:345
bool tosp_fuzzy_limit_all
Definition: textord.h:289
double textord_noise_normratio
Definition: textord.h:385
double textord_noise_area_ratio
Definition: textord.h:379
bool tosp_old_to_bug_fix
Definition: textord.h:275
bool textord_single_height_mode
Definition: textord.h:261
double textord_blshift_xfraction
Definition: textord.h:397
bool textord_show_blobs
Definition: textord.h:374
double tosp_kern_gap_factor3
Definition: textord.h:338
double textord_noise_rowratio
Definition: textord.h:394
double tosp_fuzzy_sp_fraction
Definition: textord.h:351
double textord_noise_hfract
Definition: textord.h:392
double tosp_fuzzy_space_factor1
Definition: textord.h:329
double tosp_ignore_very_big_gaps
Definition: textord.h:340
double textord_initialx_ile
Definition: textord.h:380
double tosp_gap_factor
Definition: textord.h:332
int textord_noise_sncount
Definition: textord.h:393
bool tosp_row_use_cert_spaces
Definition: textord.h:279
double tosp_flip_caution
Definition: textord.h:361
bool textord_noise_rejwords
Definition: textord.h:386
double tosp_ignore_big_gaps
Definition: textord.h:339
double tosp_near_lh_edge
Definition: textord.h:367
int textord_max_noise_size
Definition: textord.h:376
double tosp_old_sp_kn_th_factor
Definition: textord.h:314
double tosp_narrow_aspect_ratio
Definition: textord.h:322
double tosp_narrow_fraction
Definition: textord.h:320

◆ ~Textord()

tesseract::Textord::~Textord ( )
default

Member Function Documentation

◆ CleanupSingleRowResult()

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

Definition at line 318 of file textord.cpp.

319 {
320 if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
321 return; // No cleanup required.
322 PAGE_RES_IT it(page_res);
323 // Find the best row, being the greatest mean word conf.
324 float row_total_conf = 0.0f;
325 int row_word_count = 0;
326 ROW_RES* best_row = nullptr;
327 float best_conf = 0.0f;
328 for (it.restart_page(); it.word() != nullptr; it.forward()) {
329 WERD_RES* word = it.word();
330 row_total_conf += word->best_choice->certainty();
331 ++row_word_count;
332 if (it.next_row() != it.row()) {
333 row_total_conf /= row_word_count;
334 if (best_row == nullptr || best_conf < row_total_conf) {
335 best_row = it.row();
336 best_conf = row_total_conf;
337 }
338 row_total_conf = 0.0f;
339 row_word_count = 0;
340 }
341 }
342 // Now eliminate any word not in the best row.
343 for (it.restart_page(); it.word() != nullptr; it.forward()) {
344 if (it.row() != best_row)
345 it.DeleteCurrentWord();
346 }
347}
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:206
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:200
ROW * row
Definition: pageres.h:140
WERD_CHOICE * best_choice
Definition: pageres.h:241
WERD * word
Definition: pageres.h:186
float certainty() const
Definition: ratngs.h:320

◆ compute_block_xheight()

void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

Definition at line 1254 of file makerow.cpp.

1254 {
1255 TO_ROW *row; // current row
1256 float asc_frac_xheight = CCStruct::kAscenderFraction /
1258 float desc_frac_xheight = CCStruct::kDescenderFraction /
1260 int32_t min_height, max_height; // limits on xheight
1261 TO_ROW_IT row_it = block->get_rows();
1262 if (row_it.empty()) return; // no rows
1263
1264 // Compute the best guess of xheight of each row individually.
1265 // Use xheight and ascrise values of the rows where ascenders were found.
1266 get_min_max_xheight(block->line_size, &min_height, &max_height);
1267 STATS row_asc_xheights(min_height, max_height + 1);
1268 STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
1269 static_cast<int>(max_height * asc_frac_xheight) + 1);
1270 int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
1271 int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
1272 STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
1273 STATS row_desc_xheights(min_height, max_height + 1);
1274 STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
1275 STATS row_cap_xheights(min_height, max_height + 1);
1276 STATS row_cap_floating_xheights(min_height, max_height + 1);
1277 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1278 row = row_it.data();
1279 // Compute the xheight of this row if it has not been computed before.
1280 if (row->xheight <= 0.0) {
1282 gradient, block->line_size);
1283 }
1284 ROW_CATEGORY row_category = get_row_category(row);
1285 if (row_category == ROW_ASCENDERS_FOUND) {
1286 row_asc_xheights.add(static_cast<int32_t>(row->xheight),
1287 row->xheight_evidence);
1288 row_asc_ascrise.add(static_cast<int32_t>(row->ascrise),
1289 row->xheight_evidence);
1290 row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1291 row->xheight_evidence);
1292 } else if (row_category == ROW_DESCENDERS_FOUND) {
1293 row_desc_xheights.add(static_cast<int32_t>(row->xheight),
1294 row->xheight_evidence);
1295 row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop),
1296 row->xheight_evidence);
1297 } else if (row_category == ROW_UNKNOWN) {
1298 fill_heights(row, gradient, min_height, max_height,
1299 &row_cap_xheights, &row_cap_floating_xheights);
1300 }
1301 }
1302
1303 float xheight = 0.0;
1304 float ascrise = 0.0;
1305 float descdrop = 0.0;
1306 // Compute our best guess of xheight of this block.
1307 if (row_asc_xheights.get_total() > 0) {
1308 // Determine xheight from rows where ascenders were found.
1309 xheight = row_asc_xheights.median();
1310 ascrise = row_asc_ascrise.median();
1311 descdrop = -row_asc_descdrop.median();
1312 } else if (row_desc_xheights.get_total() > 0) {
1313 // Determine xheight from rows where descenders were found.
1314 xheight = row_desc_xheights.median();
1315 descdrop = -row_desc_descdrop.median();
1316 } else if (row_cap_xheights.get_total() > 0) {
1317 // All the rows in the block were (a/de)scenderless.
1318 // Try to search for two modes in row_cap_heights that could
1319 // be the xheight and the capheight (e.g. some of the rows
1320 // were lowercase, but did not have enough (a/de)scenders.
1321 // If such two modes can not be found, this block is most
1322 // likely all caps (or all small caps, in which case the code
1323 // still works as intended).
1324 compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
1326 block->block->classify_rotation().y() == 0.0,
1327 min_height, max_height, &(xheight), &(ascrise));
1328 if (ascrise == 0) { // assume only caps in the whole block
1329 xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
1330 }
1331 } else { // default block sizes
1332 xheight = block->line_size * CCStruct::kXHeightFraction;
1333 }
1334 // Correct xheight, ascrise and descdrop if necessary.
1335 bool corrected_xheight = false;
1336 if (xheight < textord_min_xheight) {
1337 xheight = static_cast<float>(textord_min_xheight);
1338 corrected_xheight = true;
1339 }
1340 if (corrected_xheight || ascrise <= 0.0) {
1341 ascrise = xheight * asc_frac_xheight;
1342 }
1343 if (corrected_xheight || descdrop >= 0.0) {
1344 descdrop = -(xheight * desc_frac_xheight);
1345 }
1346 block->xheight = xheight;
1347
1349 tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
1350 xheight, ascrise, descdrop);
1351 }
1352 // Correct xheight, ascrise, descdrop of rows based on block averages.
1353 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1354 correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
1355 }
1356}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void fill_heights(TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
Definition: makerow.cpp:1406
int compute_xheight_from_modes(STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
Definition: makerow.cpp:1467
void correct_row_xheight(TO_ROW *row, float xheight, float ascrise, float descdrop)
Definition: makerow.cpp:1685
int textord_min_xheight
Definition: makerow.cpp:67
bool textord_debug_xheights
Definition: makerow.cpp:55
void get_min_max_xheight(int block_linesize, int *min_height, int *max_height)
Definition: makerow.h:115
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:122
ROW_CATEGORY
Definition: makerow.h:35
@ ROW_DESCENDERS_FOUND
Definition: makerow.h:37
@ ROW_UNKNOWN
Definition: makerow.h:38
@ ROW_ASCENDERS_FOUND
Definition: makerow.h:36
int xheight_evidence
Definition: blobbox.h:658
float xheight
Definition: blobbox.h:657
float descdrop
Definition: blobbox.h:660
float ascrise
Definition: blobbox.h:659
BLOCK * block
Definition: blobbox.h:777
float xheight
Definition: blobbox.h:788
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
float line_size
Definition: blobbox.h:785
static const double kXHeightCapRatio
Definition: ccstruct.h:37
static const double kXHeightFraction
Definition: ccstruct.h:34
static const double kDescenderFraction
Definition: ccstruct.h:33
static const double kAscenderFraction
Definition: ccstruct.h:35
FCOORD classify_rotation() const
Definition: ocrblock.h:140
float y() const
Definition: points.h:210
Definition: statistc.h:31
void compute_row_xheight(TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
Definition: makerow.cpp:1366

◆ compute_row_xheight()

void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

Definition at line 1366 of file makerow.cpp.

1369 {
1370 // Find blobs representing repeated characters in rows and mark them.
1371 // This information is used for computing row xheight and at a later
1372 // stage when words are formed by make_words.
1373 if (!row->rep_chars_marked()) {
1375 }
1376
1377 int min_height, max_height;
1378 get_min_max_xheight(block_line_size, &min_height, &max_height);
1379 STATS heights(min_height, max_height + 1);
1380 STATS floating_heights(min_height, max_height + 1);
1381 fill_heights(row, gradient, min_height, max_height,
1382 &heights, &floating_heights);
1383 row->ascrise = 0.0f;
1384 row->xheight = 0.0f;
1385 row->xheight_evidence =
1386 compute_xheight_from_modes(&heights, &floating_heights,
1388 rotation.y() == 0.0,
1389 min_height, max_height,
1390 &(row->xheight), &(row->ascrise));
1391 row->descdrop = 0.0f;
1392 if (row->xheight > 0.0) {
1393 row->descdrop = static_cast<float>(
1394 compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
1395 }
1396}
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
int32_t compute_row_descdrop(TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
Definition: makerow.cpp:1563
bool rep_chars_marked() const
Definition: blobbox.h:631

◆ filter_blobs()

void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
bool  testing_on 
)

Definition at line 250 of file tordmain.cpp.

252 { // for plotting
253 TO_BLOCK_IT block_it = blocks; // destination iterator
254 TO_BLOCK *block; // created block
255
256 #ifndef GRAPHICS_DISABLED
257 if (to_win != nullptr)
258 to_win->Clear();
259 #endif // GRAPHICS_DISABLED
260
261 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
262 block_it.forward()) {
263 block = block_it.data();
264 block->line_size = filter_noise_blobs(&block->blobs,
265 &block->noise_blobs,
266 &block->small_blobs,
267 &block->large_blobs);
268 if (block->line_size == 0) block->line_size = 1;
269 block->line_spacing = block->line_size *
276
277 #ifndef GRAPHICS_DISABLED
278 if (textord_show_blobs && testing_on) {
279 if (to_win == nullptr)
280 create_to_win(page_tr);
282 }
283 if (textord_show_boxes && testing_on) {
284 if (to_win == nullptr)
285 create_to_win(page_tr);
290 }
291 #endif // GRAPHICS_DISABLED
292 }
293}
ScrollView * to_win
Definition: drawtord.cpp:35
void plot_box_list(ScrollView *win, BLOBNBOX_LIST *list, ScrollView::Color body_colour)
Definition: drawtord.cpp:67
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:44
double textord_excess_blobsize
Definition: makerow.cpp:83
double textord_min_linesize
Definition: makerow.cpp:81
BLOBNBOX_LIST blobs
Definition: blobbox.h:772
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:774
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:776
float max_blob_size
Definition: blobbox.h:786
float line_spacing
Definition: blobbox.h:779
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:775
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1071
void Clear()
Definition: scrollview.cpp:589

◆ find_components()

void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 219 of file tordmain.cpp.

220 {
221 int width = pixGetWidth(pix);
222 int height = pixGetHeight(pix);
223 if (width > INT16_MAX || height > INT16_MAX) {
224 tprintf("Input image too large! (%d, %d)\n", width, height);
225 return; // Can't handle it.
226 }
227
229
230 BLOCK_IT block_it(blocks); // iterator
231 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
232 block_it.forward()) {
233 BLOCK* block = block_it.data();
234 if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
235 extract_edges(pix, block);
236 }
237 }
238
239 assign_blobs_to_blocks2(pix, blocks, to_blocks);
240 ICOORD page_tr(width, height);
241 filter_blobs(page_tr, to_blocks, !textord_test_landscape);
242}
#define LOC_EDGE_PROG
Definition: errcode.h:43
void set_global_loc_code(int loc_code)
Definition: globaloc.cpp:25
void extract_edges(Pix *pix, BLOCK *block)
Definition: edgblob.cpp:329
bool textord_test_landscape
Definition: makerow.cpp:48
void assign_blobs_to_blocks2(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: tordmain.cpp:168
Definition: ocrblock.h:31
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
integer coordinate
Definition: points.h:32
bool IsText() const
Definition: polyblk.h:49
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
Definition: tordmain.cpp:250

◆ make_blob_words()

ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 1184 of file tospace.cpp.

1187 {
1188 bool bol; // start of line
1189 ROW *real_row; // output row
1190 C_OUTLINE_IT cout_it;
1191 C_BLOB_LIST cblobs;
1192 C_BLOB_IT cblob_it = &cblobs;
1193 WERD_LIST words;
1194 WERD *word; // new word
1195 BLOBNBOX *bblob; // current blob
1196 TBOX blob_box; // bounding box
1197 BLOBNBOX_IT box_it; // iterator
1198 int16_t word_count = 0;
1199
1200 cblob_it.set_to_list(&cblobs);
1201 box_it.set_to_list(row->blob_list());
1202 // new words
1203 WERD_IT word_it(&words);
1204 bol = true;
1205 if (!box_it.empty()) {
1206
1207 do {
1208 bblob = box_it.data();
1209 blob_box = bblob->bounding_box();
1210 if (bblob->joined_to_prev()) {
1211 if (bblob->cblob() != nullptr) {
1212 cout_it.set_to_list(cblob_it.data()->out_list());
1213 cout_it.move_to_last();
1214 cout_it.add_list_after(bblob->cblob()->out_list());
1215 delete bblob->cblob();
1216 }
1217 } else {
1218 if (bblob->cblob() != nullptr)
1219 cblob_it.add_after_then_move(bblob->cblob());
1220 }
1221 box_it.forward(); // next one
1222 bblob = box_it.data();
1223 blob_box = bblob->bounding_box();
1224
1225 if (!bblob->joined_to_prev() && !cblobs.empty()) {
1226 word = new WERD(&cblobs, 1, nullptr);
1227 word_count++;
1228 word_it.add_after_then_move(word);
1229 if (bol) {
1230 word->set_flag(W_BOL, true);
1231 bol = false;
1232 }
1233 if (box_it.at_first()) { // at end of line
1234 word->set_flag(W_EOL, true);
1235 }
1236 }
1237 }
1238 while (!box_it.at_first()); // until back at start
1239 /* Setup the row with created words. */
1240 real_row = new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
1241 word_it.set_to_list(real_row->word_list());
1242 //put words in row
1243 word_it.add_list_after(&words);
1244 real_row->recalc_bounding_box();
1245 if (tosp_debug_level > 4) {
1246 tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1247 word_count,
1248 real_row->bounding_box().left(),
1249 real_row->bounding_box().bottom(),
1250 real_row->bounding_box().right(),
1251 real_row->bounding_box().top());
1252 }
1253 return real_row;
1254 }
1255 return nullptr;
1256}
@ W_EOL
end of line
Definition: werd.h:33
@ W_BOL
start of line
Definition: werd.h:32
const TBOX & bounding_box() const
Definition: blobbox.h:230
C_BLOB * cblob() const
Definition: blobbox.h:268
bool joined_to_prev() const
Definition: blobbox.h:256
float kern_size
Definition: blobbox.h:666
float space_size
Definition: blobbox.h:667
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:600
Definition: ocrrow.h:37
WERD_LIST * word_list()
Definition: ocrrow.h:55
TBOX bounding_box() const
Definition: ocrrow.h:88
void recalc_bounding_box()
Definition: ocrrow.cpp:100
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
Definition: werd.h:56
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118

◆ make_prop_words()

ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

Definition at line 891 of file tospace.cpp.

894 {
895 bool bol; // start of line
896 /* prev_ values are for start of word being built. non prev_ values are for
897 the gap between the word being built and the next one. */
898 bool prev_fuzzy_sp; // probably space
899 bool prev_fuzzy_non; // probably not
900 uint8_t prev_blanks; // in front of word
901 bool fuzzy_sp = false; // probably space
902 bool fuzzy_non = false; // probably not
903 uint8_t blanks = 0; // in front of word
904 bool prev_gap_was_a_space = false;
905 bool break_at_next_gap = false;
906 ROW *real_row; // output row
907 C_OUTLINE_IT cout_it;
908 C_BLOB_LIST cblobs;
909 C_BLOB_IT cblob_it = &cblobs;
910 WERD_LIST words;
911 WERD *word; // new word
912 int32_t next_rep_char_word_right = INT32_MAX;
913 float repetition_spacing; // gap between repetitions
914 int32_t xstarts[2]; // row ends
915 int32_t prev_x; // end of prev blob
916 BLOBNBOX *bblob; // current blob
917 TBOX blob_box; // bounding box
918 BLOBNBOX_IT box_it; // iterator
919 TBOX prev_blob_box;
920 TBOX next_blob_box;
921 int16_t prev_gap = INT16_MAX;
922 int16_t current_gap = INT16_MAX;
923 int16_t next_gap = INT16_MAX;
924 int16_t prev_within_xht_gap = INT16_MAX;
925 int16_t current_within_xht_gap = INT16_MAX;
926 int16_t next_within_xht_gap = INT16_MAX;
927 int16_t word_count = 0;
928
929 // repeated char words
930 WERD_IT rep_char_it(&(row->rep_words));
931 if (!rep_char_it.empty ()) {
932 next_rep_char_word_right =
933 rep_char_it.data ()->bounding_box ().right ();
934 }
935
936 prev_x = -INT16_MAX;
937 cblob_it.set_to_list (&cblobs);
938 box_it.set_to_list (row->blob_list ());
939 // new words
940 WERD_IT word_it(&words);
941 bol = true;
942 prev_blanks = 0;
943 prev_fuzzy_sp = false;
944 prev_fuzzy_non = false;
945 if (!box_it.empty ()) {
946 xstarts[0] = box_it.data ()->bounding_box ().left ();
947 if (xstarts[0] > next_rep_char_word_right) {
948 /* We need to insert a repeated char word at the start of the row */
949 word = rep_char_it.extract ();
950 word_it.add_after_then_move (word);
951 /* Set spaces before repeated char word */
952 word->set_flag (W_BOL, true);
953 bol = false;
954 word->set_blanks (0);
955 //NO uncertainty
956 word->set_flag (W_FUZZY_SP, false);
957 word->set_flag (W_FUZZY_NON, false);
958 xstarts[0] = word->bounding_box ().left ();
959 /* Set spaces after repeated char word (and leave current word set) */
960 repetition_spacing = find_mean_blob_spacing (word);
961 current_gap = box_it.data ()->bounding_box ().left () -
962 next_rep_char_word_right;
963 current_within_xht_gap = current_gap;
964 if (current_gap > tosp_rep_space * repetition_spacing) {
965 prev_blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
966 if (prev_blanks < 1)
967 prev_blanks = 1;
968 }
969 else
970 prev_blanks = 0;
971 if (tosp_debug_level > 5)
972 tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
973 box_it.data ()->bounding_box ().left (),
974 box_it.data ()->bounding_box ().bottom (),
975 repetition_spacing, current_gap);
976 prev_fuzzy_sp = false;
977 prev_fuzzy_non = false;
978 if (rep_char_it.empty ()) {
979 next_rep_char_word_right = INT32_MAX;
980 }
981 else {
982 rep_char_it.forward ();
983 next_rep_char_word_right =
984 rep_char_it.data ()->bounding_box ().right ();
985 }
986 }
987
988 peek_at_next_gap(row,
989 box_it,
990 next_blob_box,
991 next_gap,
992 next_within_xht_gap);
993 do {
994 bblob = box_it.data ();
995 blob_box = bblob->bounding_box ();
996 if (bblob->joined_to_prev ()) {
997 if (bblob->cblob () != nullptr) {
998 cout_it.set_to_list (cblob_it.data ()->out_list ());
999 cout_it.move_to_last ();
1000 cout_it.add_list_after (bblob->cblob ()->out_list ());
1001 delete bblob->cblob ();
1002 }
1003 } else {
1004 if (bblob->cblob() != nullptr)
1005 cblob_it.add_after_then_move (bblob->cblob ());
1006 prev_x = blob_box.right ();
1007 }
1008 box_it.forward (); //next one
1009 bblob = box_it.data ();
1010 blob_box = bblob->bounding_box ();
1011
1012 if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
1013 /* Real Blob - not multiple outlines or pre-chopped */
1014 prev_gap = current_gap;
1015 prev_within_xht_gap = current_within_xht_gap;
1016 prev_blob_box = next_blob_box;
1017 current_gap = next_gap;
1018 current_within_xht_gap = next_within_xht_gap;
1019 peek_at_next_gap(row,
1020 box_it,
1021 next_blob_box,
1022 next_gap,
1023 next_within_xht_gap);
1024
1025 int16_t prev_gap_arg = prev_gap;
1026 int16_t next_gap_arg = next_gap;
1028 prev_gap_arg = prev_within_xht_gap;
1029 next_gap_arg = next_within_xht_gap;
1030 }
1031 // Decide if a word-break should be inserted
1032 if (blob_box.left () > next_rep_char_word_right ||
1033 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1034 current_gap, current_within_xht_gap,
1035 next_blob_box, next_gap_arg,
1036 blanks, fuzzy_sp, fuzzy_non,
1037 prev_gap_was_a_space,
1038 break_at_next_gap) ||
1039 box_it.at_first()) {
1040 /* Form a new word out of the blobs collected */
1041 word = new WERD (&cblobs, prev_blanks, nullptr);
1042 word_count++;
1043 word_it.add_after_then_move (word);
1044 if (bol) {
1045 word->set_flag (W_BOL, true);
1046 bol = false;
1047 }
1048 if (prev_fuzzy_sp)
1049 //probably space
1050 word->set_flag (W_FUZZY_SP, true);
1051 else if (prev_fuzzy_non)
1052 word->set_flag (W_FUZZY_NON, true);
1053 //probably not
1054
1055 if (blob_box.left () > next_rep_char_word_right) {
1056 /* We need to insert a repeated char word */
1057 word = rep_char_it.extract ();
1058 word_it.add_after_then_move (word);
1059
1060 /* Set spaces before repeated char word */
1061 repetition_spacing = find_mean_blob_spacing (word);
1062 current_gap = word->bounding_box ().left () - prev_x;
1063 current_within_xht_gap = current_gap;
1064 if (current_gap > tosp_rep_space * repetition_spacing) {
1065 blanks =
1066 static_cast<uint8_t>(floor (current_gap / row->space_size));
1067 if (blanks < 1)
1068 blanks = 1;
1069 }
1070 else
1071 blanks = 0;
1072 if (tosp_debug_level > 5)
1073 tprintf
1074 ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1075 word->bounding_box ().left (),
1076 word->bounding_box ().bottom (),
1077 repetition_spacing, current_gap, blanks);
1078 word->set_blanks (blanks);
1079 //NO uncertainty
1080 word->set_flag (W_FUZZY_SP, false);
1081 word->set_flag (W_FUZZY_NON, false);
1082
1083 /* Set spaces after repeated char word (and leave current word set) */
1084 current_gap =
1085 blob_box.left () - next_rep_char_word_right;
1086 if (current_gap > tosp_rep_space * repetition_spacing) {
1087 blanks = static_cast<uint8_t>(current_gap / row->space_size);
1088 if (blanks < 1)
1089 blanks = 1;
1090 }
1091 else
1092 blanks = 0;
1093 if (tosp_debug_level > 5)
1094 tprintf (" Rgap:%d (%d blanks)\n",
1095 current_gap, blanks);
1096 fuzzy_sp = false;
1097 fuzzy_non = false;
1098
1099 if (rep_char_it.empty ()) {
1100 next_rep_char_word_right = INT32_MAX;
1101 }
1102 else {
1103 rep_char_it.forward ();
1104 next_rep_char_word_right =
1105 rep_char_it.data ()->bounding_box ().right ();
1106 }
1107 }
1108
1109 if (box_it.at_first () && rep_char_it.empty ()) {
1110 //at end of line
1111 word->set_flag (W_EOL, true);
1112 xstarts[1] = prev_x;
1113 }
1114 else {
1115 prev_blanks = blanks;
1116 prev_fuzzy_sp = fuzzy_sp;
1117 prev_fuzzy_non = fuzzy_non;
1118 }
1119 }
1120 }
1121 }
1122 while (!box_it.at_first ()); //until back at start
1123
1124 /* Insert any further repeated char words */
1125 while (!rep_char_it.empty ()) {
1126 word = rep_char_it.extract ();
1127 word_it.add_after_then_move (word);
1128
1129 /* Set spaces before repeated char word */
1130 repetition_spacing = find_mean_blob_spacing (word);
1131 current_gap = word->bounding_box ().left () - prev_x;
1132 if (current_gap > tosp_rep_space * repetition_spacing) {
1133 blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
1134 if (blanks < 1)
1135 blanks = 1;
1136 }
1137 else
1138 blanks = 0;
1139 if (tosp_debug_level > 5)
1140 tprintf(
1141 "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1142 word->bounding_box().left(), word->bounding_box().bottom(),
1143 repetition_spacing, current_gap, blanks);
1144 word->set_blanks (blanks);
1145 //NO uncertainty
1146 word->set_flag (W_FUZZY_SP, false);
1147 word->set_flag (W_FUZZY_NON, false);
1148 prev_x = word->bounding_box ().right ();
1149 if (rep_char_it.empty ()) {
1150 //at end of line
1151 word->set_flag (W_EOL, true);
1152 xstarts[1] = prev_x;
1153 }
1154 else {
1155 rep_char_it.forward ();
1156 }
1157 }
1158 real_row = new ROW (row,
1159 static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
1160 word_it.set_to_list (real_row->word_list ());
1161 //put words in row
1162 word_it.add_list_after (&words);
1163 real_row->recalc_bounding_box ();
1164
1165 if (tosp_debug_level > 4) {
1166 tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1167 word_count,
1168 real_row->bounding_box ().left (),
1169 real_row->bounding_box ().bottom (),
1170 real_row->bounding_box ().right (),
1171 real_row->bounding_box ().top ());
1172 }
1173 return real_row;
1174 }
1175 return nullptr;
1176}
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:39
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:40
WERD_LIST rep_words
Definition: blobbox.h:668
void set_blanks(uint8_t new_blanks)
Definition: werd.h:102
TBOX bounding_box() const
Definition: werd.cpp:148

◆ make_spline_rows()

void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
bool  testing_on 
)

Definition at line 2003 of file makerow.cpp.

2005 {
2006#ifndef GRAPHICS_DISABLED
2007 ScrollView::Color colour; //of row
2008#endif
2009 TO_ROW_IT row_it = block->get_rows ();
2010
2011 row_it.move_to_first ();
2012 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2013 if (row_it.data ()->blob_list ()->empty ())
2014 delete row_it.extract (); //nothing in it
2015 else
2016 make_baseline_spline (row_it.data (), block);
2017 }
2019#ifndef GRAPHICS_DISABLED
2020 if (testing_on) {
2021 colour = ScrollView::RED;
2022 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
2023 row_it.forward ()) {
2024 row_it.data ()->baseline.plot (to_win, colour);
2025 colour = static_cast<ScrollView::Color>(colour + 1);
2026 if (colour > ScrollView::MAGENTA)
2027 colour = ScrollView::RED;
2028 }
2029 }
2030#endif
2031 make_old_baselines(block, testing_on, gradient);
2032 }
2033#ifndef GRAPHICS_DISABLED
2034 if (testing_on) {
2035 colour = ScrollView::RED;
2036 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
2037 row_it.data ()->baseline.plot (to_win, colour);
2038 colour = static_cast<ScrollView::Color>(colour + 1);
2039 if (colour > ScrollView::MAGENTA)
2040 colour = ScrollView::RED;
2041 }
2042 }
2043#endif
2044}
bool textord_old_baselines
Definition: makerow.cpp:51
void make_baseline_spline(TO_ROW *row, TO_BLOCK *block)
Definition: makerow.cpp:2056

◆ set_use_cjk_fp_model()

void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

Definition at line 95 of file textord.h.

95 {
96 use_cjk_fp_model_ = flag;
97 }

◆ TextordPage()

void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

Definition at line 226 of file textord.cpp.

230 {
231 page_tr_.set_x(width);
232 page_tr_.set_y(height);
233 if (to_blocks->empty()) {
234 // AutoPageSeg was not used, so we need to find_components first.
235 find_components(binary_pix, blocks, to_blocks);
236 TO_BLOCK_IT it(to_blocks);
237 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
238 TO_BLOCK* to_block = it.data();
239 // Compute the edge offsets whether or not there is a grey_pix.
240 // We have by-passed auto page seg, so we have to run it here.
241 // By page segmentation mode there is no non-text to avoid running on.
242 to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
243 }
244 } else if (!PSM_SPARSE(pageseg_mode)) {
245 // AutoPageSeg does not need to find_components as it did that already.
246 // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
247 filter_blobs(page_tr_, to_blocks, true);
248 }
249
250 ASSERT_HOST(!to_blocks->empty());
251 if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
252 const FCOORD anticlockwise90(0.0f, 1.0f);
253 const FCOORD clockwise90(0.0f, -1.0f);
254 TO_BLOCK_IT it(to_blocks);
255 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
256 TO_BLOCK* to_block = it.data();
257 BLOCK* block = to_block->block;
258 // Create a fake poly_block in block from its bounding box.
261 // Rotate the to_block along with its contained block and blobnbox lists.
262 to_block->rotate(anticlockwise90);
263 // Set the block's rotation values to obey the convention followed in
264 // layout analysis for vertical text.
265 block->set_re_rotation(clockwise90);
266 block->set_classify_rotation(clockwise90);
267 }
268 }
269
270 TO_BLOCK_IT to_block_it(to_blocks);
271 TO_BLOCK* to_block = to_block_it.data();
272 // Make the rows in the block.
273 float gradient;
274 // Do it the old fashioned way.
275 if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
276 gradient = make_rows(page_tr_, to_blocks);
277 } else if (!PSM_SPARSE(pageseg_mode)) {
278 // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
279 gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
280 to_block, to_blocks);
281 } else {
282 gradient = 0.0f;
283 }
284 BaselineDetect baseline_detector(textord_baseline_debug,
285 reskew, to_blocks);
286 baseline_detector.ComputeStraightBaselines(use_box_bottoms);
287 baseline_detector.ComputeBaselineSplinesAndXheights(
288 page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
290 // Now make the words in the lines.
291 if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
292 // SINGLE_LINE uses the old word maker on the single line.
293 make_words(this, page_tr_, gradient, blocks, to_blocks);
294 } else {
295 // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
296 // single word, and in SINGLE_CHAR mode, all the outlines
297 // go in a single blob.
298 TO_BLOCK* to_block = to_block_it.data();
299 make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
300 to_block->get_rows(), to_block->block->row_list());
301 }
302 // Remove empties.
303 cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
304 TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
305 // Compute the margins for each row in the block, to be used later for
306 // paragraph detection.
307 BLOCK_IT b_it(blocks);
308 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
309 b_it.data()->compute_row_margins();
310 }
311#ifndef GRAPHICS_DISABLED
312 close_to_win();
313#endif
314}
@ PT_VERTICAL_TEXT
Definition: capi.h:136
#define ASSERT_HOST(x)
Definition: errcode.h:88
void close_to_win()
Definition: drawtord.cpp:53
bool textord_show_final_rows
Definition: makerow.cpp:46
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:163
bool textord_heavy_nr
Definition: makerow.cpp:42
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:200
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:96
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:52
@ PSM_SINGLE_BLOCK_VERT_TEXT
aligned text.
Definition: publictypes.h:170
@ PSM_SINGLE_CHAR
Treat the image as a single character.
Definition: publictypes.h:176
@ PSM_RAW_LINE
hacks that are Tesseract-specific.
Definition: publictypes.h:179
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:209
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1055
void rotate(const FCOORD &rotation)
Definition: blobbox.h:710
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:137
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:143
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
Definition: points.h:189
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:219

◆ to_spacing()

void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

Definition at line 44 of file tospace.cpp.

47 {
48 TO_BLOCK_IT block_it; //iterator
49 TO_BLOCK *block; //current block;
50 TO_ROW *row; //current row
51 int block_index; //block number
52 int row_index; //row number
53 //estimated width of real spaces for whole block
54 int16_t block_space_gap_width;
55 //estimated width of non space gaps for whole block
56 int16_t block_non_space_gap_width;
57 bool old_text_ord_proportional;//old fixed/prop result
58
59 block_it.set_to_list (blocks);
60 block_index = 1;
61 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62 block_it.forward ()) {
63 block = block_it.data ();
64 std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk
65 block_spacing_stats(block,
66 gapmap.get(),
67 old_text_ord_proportional,
68 block_space_gap_width,
69 block_non_space_gap_width);
70 // Make sure relative values of block-level space and non-space gap
71 // widths are reasonable. The ratio of 1:3 is also used in
72 // block_spacing_stats, to corrrect the block_space_gap_width
73 // Useful for arabic and hindi, when the non-space gap width is
74 // often over-estimated and should not be trusted. A similar ratio
75 // is found in block_spacing_stats.
77 static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
78 block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
79 }
80 // row iterator
81 TO_ROW_IT row_it(block->get_rows());
82 row_index = 1;
83 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
84 row = row_it.data ();
85 if ((row->pitch_decision == PITCH_DEF_PROP) ||
87 if ((tosp_debug_level > 0) && !old_text_ord_proportional)
88 tprintf ("Block %d Row %d: Now Proportional\n",
89 block_index, row_index);
90 row_spacing_stats(row,
91 gapmap.get(),
92 block_index,
93 row_index,
94 block_space_gap_width,
95 block_non_space_gap_width);
96 }
97 else {
98 if ((tosp_debug_level > 0) && old_text_ord_proportional)
100 ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
101 block_index, row_index, row->pitch_decision,
102 row->fixed_pitch);
103 }
104#ifndef GRAPHICS_DISABLED
106 plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
107#endif
108 row_index++;
109 }
110 block_index++;
111 }
112}
@ PITCH_DEF_PROP
Definition: blobbox.h:49
@ PITCH_CORR_PROP
Definition: blobbox.h:52
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:247
bool textord_show_initial_words
Definition: tovars.cpp:23
float fixed_pitch
Definition: blobbox.h:651
PITCH_TYPE pitch_decision
Definition: blobbox.h:650
Definition: gap_map.h:17

◆ use_cjk_fp_model()

bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

Definition at line 92 of file textord.h.

92 {
93 return use_cjk_fp_model_;
94 }

Member Data Documentation

◆ textord_baseline_debug

int tesseract::Textord::textord_baseline_debug = 0

"Baseline debug level"

Definition at line 377 of file textord.h.

◆ textord_blshift_maxshift

double tesseract::Textord::textord_blshift_maxshift = 0.00

"Max baseline shift"

Definition at line 396 of file textord.h.

◆ textord_blshift_xfraction

double tesseract::Textord::textord_blshift_xfraction = 9.99

"Min size of baseline shift"

Definition at line 397 of file textord.h.

◆ textord_initialasc_ile

double tesseract::Textord::textord_initialasc_ile = 0.90

"Ile of sizes for xheight guess"

Definition at line 381 of file textord.h.

◆ textord_initialx_ile

double tesseract::Textord::textord_initialx_ile = 0.75

"Ile of sizes for xheight guess"

Definition at line 380 of file textord.h.

◆ textord_max_noise_size

int tesseract::Textord::textord_max_noise_size = 7

"Pixel size of noise"

Definition at line 376 of file textord.h.

◆ textord_no_rejects

bool tesseract::Textord::textord_no_rejects = false

"Don't remove noise blobs"

Definition at line 373 of file textord.h.

◆ textord_noise_area_ratio

double tesseract::Textord::textord_noise_area_ratio = 0.7

"Fraction of bounding box for noise"

Definition at line 379 of file textord.h.

◆ textord_noise_debug

bool tesseract::Textord::textord_noise_debug = false

"Debug row garbage detector"

Definition at line 395 of file textord.h.

◆ textord_noise_hfract

double tesseract::Textord::textord_noise_hfract = 1.0/64

"Height fraction to discard outlines as speckle noise"

Definition at line 392 of file textord.h.

◆ textord_noise_normratio

double tesseract::Textord::textord_noise_normratio = 2.0

"Dot to norm ratio for deletion"

Definition at line 385 of file textord.h.

◆ textord_noise_rejrows

bool tesseract::Textord::textord_noise_rejrows = true

"Reject noise-like rows"

Definition at line 387 of file textord.h.

◆ textord_noise_rejwords

bool tesseract::Textord::textord_noise_rejwords = true

"Reject noise-like words"

Definition at line 386 of file textord.h.

◆ textord_noise_rowratio

double tesseract::Textord::textord_noise_rowratio = 6.0

"Dot to norm ratio for deletion"

Definition at line 394 of file textord.h.

◆ textord_noise_sizefraction

int tesseract::Textord::textord_noise_sizefraction = 10

"Fraction of size for maxima"

Definition at line 382 of file textord.h.

◆ textord_noise_sizelimit

double tesseract::Textord::textord_noise_sizelimit = 0.5

"Fraction of x for big t count"

Definition at line 383 of file textord.h.

◆ textord_noise_sncount

int tesseract::Textord::textord_noise_sncount = 1

"super norm blobs to save row"

Definition at line 393 of file textord.h.

◆ textord_noise_sxfract

double tesseract::Textord::textord_noise_sxfract = 0.4

"xh fract width error for norm blobs"

Definition at line 390 of file textord.h.

◆ textord_noise_syfract

double tesseract::Textord::textord_noise_syfract = 0.2

"xh fract error for norm blobs"

Definition at line 388 of file textord.h.

◆ textord_noise_translimit

int tesseract::Textord::textord_noise_translimit = 16

"Transitions for normal blob"

Definition at line 384 of file textord.h.

◆ textord_show_blobs

bool tesseract::Textord::textord_show_blobs = false

"Display unsorted blobs"

Definition at line 374 of file textord.h.

◆ textord_show_boxes

bool tesseract::Textord::textord_show_boxes = false

"Display boxes"

Definition at line 375 of file textord.h.

◆ textord_single_height_mode

bool tesseract::Textord::textord_single_height_mode = false

"Script has no xheight, so use a single mode for horizontal text"

Definition at line 261 of file textord.h.

◆ tosp_all_flips_fuzzy

bool tesseract::Textord::tosp_all_flips_fuzzy = false

"Pass ANY flip to context?"

Definition at line 287 of file textord.h.

◆ tosp_block_use_cert_spaces

bool tesseract::Textord::tosp_block_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 277 of file textord.h.

◆ tosp_debug_level

int tesseract::Textord::tosp_debug_level = 0

"Debug data"

Definition at line 302 of file textord.h.

◆ tosp_dont_fool_with_small_kerns

double tesseract::Textord::tosp_dont_fool_with_small_kerns = -1

"Limit use of xht gap with odd small kns"

Definition at line 365 of file textord.h.

◆ tosp_enough_small_gaps

double tesseract::Textord::tosp_enough_small_gaps = 0.65

"Fract of kerns reqd for isolated row stats"

Definition at line 343 of file textord.h.

◆ tosp_enough_space_samples_for_median

int tesseract::Textord::tosp_enough_space_samples_for_median = 3

"or should we use mean"

Definition at line 304 of file textord.h.

◆ tosp_few_samples

int tesseract::Textord::tosp_few_samples = 40

"No.gaps reqd with 1 large gap to treat as a table"

Definition at line 308 of file textord.h.

◆ tosp_flip_caution

double tesseract::Textord::tosp_flip_caution = 0.0

"Don't autoflip kn to sp when large separation"

Definition at line 361 of file textord.h.

◆ tosp_flip_fuzz_kn_to_sp

bool tesseract::Textord::tosp_flip_fuzz_kn_to_sp = true

"Default flip"

Definition at line 298 of file textord.h.

◆ tosp_flip_fuzz_sp_to_kn

bool tesseract::Textord::tosp_flip_fuzz_sp_to_kn = true

"Default flip"

Definition at line 299 of file textord.h.

◆ tosp_force_wordbreak_on_punct

bool tesseract::Textord::tosp_force_wordbreak_on_punct = false

"Force word breaks on punct to break long lines in non-space " "delimited langs"

Definition at line 271 of file textord.h.

◆ tosp_fuzzy_kn_fraction

double tesseract::Textord::tosp_fuzzy_kn_fraction = 0.5

"New fuzzy kn alg"

Definition at line 350 of file textord.h.

◆ tosp_fuzzy_limit_all

bool tesseract::Textord::tosp_fuzzy_limit_all = true

"Don't restrict kn->sp fuzzy limit to tables"

Definition at line 289 of file textord.h.

◆ tosp_fuzzy_sp_fraction

double tesseract::Textord::tosp_fuzzy_sp_fraction = 0.5

"New fuzzy sp alg"

Definition at line 351 of file textord.h.

◆ tosp_fuzzy_space_factor

double tesseract::Textord::tosp_fuzzy_space_factor = 0.6

"Fract of xheight for fuzz sp"

Definition at line 327 of file textord.h.

◆ tosp_fuzzy_space_factor1

double tesseract::Textord::tosp_fuzzy_space_factor1 = 0.5

"Fract of xheight for fuzz sp"

Definition at line 329 of file textord.h.

◆ tosp_fuzzy_space_factor2

double tesseract::Textord::tosp_fuzzy_space_factor2 = 0.72

"Fract of xheight for fuzz sp"

Definition at line 331 of file textord.h.

◆ tosp_gap_factor

double tesseract::Textord::tosp_gap_factor = 0.83

"gap ratio to flip sp->kern"

Definition at line 332 of file textord.h.

◆ tosp_ignore_big_gaps

double tesseract::Textord::tosp_ignore_big_gaps = -1

"xht multiplier"

Definition at line 339 of file textord.h.

◆ tosp_ignore_very_big_gaps

double tesseract::Textord::tosp_ignore_very_big_gaps = 3.5

"xht multiplier"

Definition at line 340 of file textord.h.

◆ tosp_improve_thresh

bool tesseract::Textord::tosp_improve_thresh = false

"Enable improvement heuristic"

Definition at line 301 of file textord.h.

◆ tosp_init_guess_kn_mult

double tesseract::Textord::tosp_init_guess_kn_mult = 2.2

"Thresh guess - mult kn by this"

Definition at line 355 of file textord.h.

◆ tosp_init_guess_xht_mult

double tesseract::Textord::tosp_init_guess_xht_mult = 0.28

"Thresh guess - mult xht by this"

Definition at line 357 of file textord.h.

◆ tosp_kern_gap_factor1

double tesseract::Textord::tosp_kern_gap_factor1 = 2.0

"gap ratio to flip kern->sp"

Definition at line 334 of file textord.h.

◆ tosp_kern_gap_factor2

double tesseract::Textord::tosp_kern_gap_factor2 = 1.3

"gap ratio to flip kern->sp"

Definition at line 336 of file textord.h.

◆ tosp_kern_gap_factor3

double tesseract::Textord::tosp_kern_gap_factor3 = 2.5

"gap ratio to flip kern->sp"

Definition at line 338 of file textord.h.

◆ tosp_large_kerning

double tesseract::Textord::tosp_large_kerning = 0.19

"Limit use of xht gap with large kns"

Definition at line 363 of file textord.h.

◆ tosp_max_sane_kn_thresh

double tesseract::Textord::tosp_max_sane_kn_thresh = 5.0

"Multiplier on kn to limit thresh"

Definition at line 359 of file textord.h.

◆ tosp_min_sane_kn_sp

double tesseract::Textord::tosp_min_sane_kn_sp = 1.5

"Don't trust spaces less than this time kn"

Definition at line 353 of file textord.h.

◆ tosp_narrow_aspect_ratio

double tesseract::Textord::tosp_narrow_aspect_ratio = 0.48

"narrow if w/h less than this"

Definition at line 322 of file textord.h.

◆ tosp_narrow_blobs_not_cert

bool tesseract::Textord::tosp_narrow_blobs_not_cert = true

"Only stat OBVIOUS spaces"

Definition at line 281 of file textord.h.

◆ tosp_narrow_fraction

double tesseract::Textord::tosp_narrow_fraction = 0.3

"Fract of xheight for narrow"

Definition at line 320 of file textord.h.

◆ tosp_near_lh_edge

double tesseract::Textord::tosp_near_lh_edge = 0

"Don't reduce box if the top left is non blank"

Definition at line 367 of file textord.h.

◆ tosp_old_sp_kn_th_factor

double tesseract::Textord::tosp_old_sp_kn_th_factor = 2.0

"Factor for defining space threshold in terms of space and " "kern sizes"

Definition at line 314 of file textord.h.

◆ tosp_old_to_bug_fix

bool tesseract::Textord::tosp_old_to_bug_fix = false

"Fix suspected bug in old code"

Definition at line 275 of file textord.h.

◆ tosp_old_to_constrain_sp_kn

bool tesseract::Textord::tosp_old_to_constrain_sp_kn = false

"Constrain relative values of inter and intra-word gaps for " "old_to_method."

Definition at line 266 of file textord.h.

◆ tosp_old_to_method

bool tesseract::Textord::tosp_old_to_method = false

"Space stats use prechopping?"

Definition at line 263 of file textord.h.

◆ tosp_only_small_gaps_for_kern

bool tesseract::Textord::tosp_only_small_gaps_for_kern = false

"Better guess"

Definition at line 286 of file textord.h.

◆ tosp_only_use_prop_rows

bool tesseract::Textord::tosp_only_use_prop_rows = true

"Block stats to use fixed pitch rows?"

Definition at line 268 of file textord.h.

◆ tosp_only_use_xht_gaps

bool tesseract::Textord::tosp_only_use_xht_gaps = false

"Only use within xht gap for wd breaks"

Definition at line 295 of file textord.h.

◆ tosp_pass_wide_fuzz_sp_to_context

double tesseract::Textord::tosp_pass_wide_fuzz_sp_to_context = 0.75

"How wide fuzzies need context"

Definition at line 371 of file textord.h.

◆ tosp_recovery_isolated_row_stats

bool tesseract::Textord::tosp_recovery_isolated_row_stats = true

"Use row alone when inadequate cert spaces"

Definition at line 285 of file textord.h.

◆ tosp_redo_kern_limit

int tesseract::Textord::tosp_redo_kern_limit = 10

"No.samples reqd to reestimate for row"

Definition at line 306 of file textord.h.

◆ tosp_rep_space

double tesseract::Textord::tosp_rep_space = 1.6

"rep gap multiplier for space"

Definition at line 341 of file textord.h.

◆ tosp_row_use_cert_spaces

bool tesseract::Textord::tosp_row_use_cert_spaces = true

"Only stat OBVIOUS spaces"

Definition at line 279 of file textord.h.

◆ tosp_row_use_cert_spaces1

bool tesseract::Textord::tosp_row_use_cert_spaces1 = true

"Only stat OBVIOUS spaces"

Definition at line 283 of file textord.h.

◆ tosp_rule_9_test_punct

bool tesseract::Textord::tosp_rule_9_test_punct = false

"Don't chng kn to space next to punct"

Definition at line 297 of file textord.h.

◆ tosp_sanity_method

int tesseract::Textord::tosp_sanity_method = 1

"How to avoid being silly"

Definition at line 311 of file textord.h.

◆ tosp_short_row

int tesseract::Textord::tosp_short_row = 20

"No.gaps reqd with few cert spaces to use certs"

Definition at line 310 of file textord.h.

◆ tosp_silly_kn_sp_gap

double tesseract::Textord::tosp_silly_kn_sp_gap = 0.2

"Don't let sp minus kn get too small"

Definition at line 369 of file textord.h.

◆ tosp_stats_use_xht_gaps

bool tesseract::Textord::tosp_stats_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 291 of file textord.h.

◆ tosp_table_fuzzy_kn_sp_ratio

double tesseract::Textord::tosp_table_fuzzy_kn_sp_ratio = 3.0

"Fuzzy if less than this"

Definition at line 349 of file textord.h.

◆ tosp_table_kn_sp_ratio

double tesseract::Textord::tosp_table_kn_sp_ratio = 2.25

"Min difference of kn & sp in table"

Definition at line 345 of file textord.h.

◆ tosp_table_xht_sp_ratio

double tesseract::Textord::tosp_table_xht_sp_ratio = 0.33

"Expect spaces bigger than this"

Definition at line 347 of file textord.h.

◆ tosp_threshold_bias1

double tesseract::Textord::tosp_threshold_bias1 = 0

"how far between kern and space?"

Definition at line 316 of file textord.h.

◆ tosp_threshold_bias2

double tesseract::Textord::tosp_threshold_bias2 = 0

"how far between kern and space?"

Definition at line 318 of file textord.h.

◆ tosp_use_pre_chopping

bool tesseract::Textord::tosp_use_pre_chopping = false

"Space stats use prechopping?"

Definition at line 273 of file textord.h.

◆ tosp_use_xht_gaps

bool tesseract::Textord::tosp_use_xht_gaps = true

"Use within xht gap for wd breaks"

Definition at line 293 of file textord.h.

◆ tosp_wide_aspect_ratio

double tesseract::Textord::tosp_wide_aspect_ratio = 0.0

"wide if w/h less than this"

Definition at line 325 of file textord.h.

◆ tosp_wide_fraction

double tesseract::Textord::tosp_wide_fraction = 0.52

"Fract of xheight for wide"

Definition at line 323 of file textord.h.


The documentation for this class was generated from the following files: