tesseract 4.1.1
Loading...
Searching...
No Matches
wordseg.cpp File Reference
#include "blobbox.h"
#include "statistc.h"
#include "drawtord.h"
#include "makerow.h"
#include "pitsync1.h"
#include "tovars.h"
#include "topitch.h"
#include "cjkpitch.h"
#include "textord.h"
#include "fpchop.h"
#include "wordseg.h"

Go to the source code of this file.

Macros

#define BLOCK_STATS_CLUSTERS   10
 

Functions

make_single_word

For each row, arrange the blobs into one word. There is no fixed pitch detection.

void make_single_word (bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
 
void make_words (tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
set_row_spaces

Set the min_space and max_nonspace members of the row so that the blobs can be arranged into words.

void set_row_spaces (TO_BLOCK *block, FCOORD rotation, bool testing_on)
 
row_words

Compute the max nonspace and min space for the row.

int32_t row_words (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
row_words2

Compute the max nonspace and min space for the row.

int32_t row_words2 (TO_BLOCK *block, TO_ROW *row, int32_t maxwidth, FCOORD rotation, bool testing_on)
 
make_real_words

Convert a TO_BLOCK to a BLOCK.

void make_real_words (tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
 
make_rep_words

Fabricate a real row from only the repeated blob words. Get the xheight from the block as it may be more meaningful.

ROWmake_rep_words (TO_ROW *row, TO_BLOCK *block)
 
make_real_word

Construct a WERD from a given number of adjacent entries in a list of BLOBNBOXs.

WERDmake_real_word (BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
 

Variables

bool textord_fp_chopping = true
 
bool textord_force_make_prop_words = false
 
bool textord_chopper_test = false
 

Macro Definition Documentation

◆ BLOCK_STATS_CLUSTERS

#define BLOCK_STATS_CLUSTERS   10

Definition at line 42 of file wordseg.cpp.

Function Documentation

◆ make_real_word()

WERD * make_real_word ( BLOBNBOX_IT *  box_it,
int32_t  blobcount,
bool  bol,
uint8_t  blanks 
)

Definition at line 579 of file wordseg.cpp.

583 {
584 C_OUTLINE_IT cout_it;
585 C_BLOB_LIST cblobs;
586 C_BLOB_IT cblob_it = &cblobs;
587 WERD *word; // new word
588 BLOBNBOX *bblob; // current blob
589 int32_t blobindex; // in row
590
591 for (blobindex = 0; blobindex < blobcount; blobindex++) {
592 bblob = box_it->extract();
593 if (bblob->joined_to_prev()) {
594 if (bblob->cblob() != nullptr) {
595 cout_it.set_to_list(cblob_it.data()->out_list());
596 cout_it.move_to_last();
597 cout_it.add_list_after(bblob->cblob()->out_list());
598 delete bblob->cblob();
599 }
600 }
601 else {
602 if (bblob->cblob() != nullptr)
603 cblob_it.add_after_then_move(bblob->cblob());
604 }
605 delete bblob;
606 box_it->forward(); // next one
607 }
608
609 if (blanks < 1)
610 blanks = 1;
611
612 word = new WERD(&cblobs, blanks, nullptr);
613
614 if (bol)
615 word->set_flag(W_BOL, true);
616 if (box_it->at_first())
617 word->set_flag(W_EOL, true); // at end of line
618
619 return word;
620}
@ W_EOL
end of line
Definition: werd.h:33
@ W_BOL
start of line
Definition: werd.h:32
C_BLOB * cblob() const
Definition: blobbox.h:268
bool joined_to_prev() const
Definition: blobbox.h:256
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:70
Definition: werd.h:56
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118

◆ make_real_words()

void make_real_words ( tesseract::Textord textord,
TO_BLOCK block,
FCOORD  rotation 
)

Definition at line 491 of file wordseg.cpp.

495 {
496 TO_ROW *row; //current row
497 TO_ROW_IT row_it = block->get_rows ();
498 ROW *real_row = nullptr; //output row
499 ROW_IT real_row_it = block->block->row_list ();
500
501 if (row_it.empty ())
502 return; //empty block
503 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
504 row = row_it.data ();
505 if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
506 real_row = make_rep_words (row, block);
507 } else if (!row->blob_list()->empty()) {
508 // In a fixed pitch document, some lines may be detected as fixed pitch
509 // while others don't, and will go through different path.
510 // For non-space delimited language like CJK, fixed pitch chop always
511 // leave the entire line as one word. We can force consistent chopping
512 // with force_make_prop_words flag.
513 POLY_BLOCK* pb = block->block->pdblk.poly_block();
515 real_row = textord->make_blob_words (row, rotation);
517 (pb != nullptr && !pb->IsText()) ||
520 real_row = textord->make_prop_words (row, rotation);
521 } else if (row->pitch_decision == PITCH_DEF_FIXED ||
523 real_row = fixed_pitch_words (row, rotation);
524 } else {
525 ASSERT_HOST(false);
526 }
527 }
528 if (real_row != nullptr) {
529 //put row in block
530 real_row_it.add_after_then_move (real_row);
531 }
532 }
533 block->block->set_stats (block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
534 static_cast<int16_t>(block->space_size),
535 static_cast<int16_t>(block->fixed_pitch));
536 block->block->check_pitch ();
537}
@ PITCH_DEF_FIXED
Definition: blobbox.h:47
@ PITCH_CORR_FIXED
Definition: blobbox.h:51
@ PITCH_DEF_PROP
Definition: blobbox.h:49
@ PITCH_CORR_PROP
Definition: blobbox.h:52
#define ASSERT_HOST(x)
Definition: errcode.h:88
ROW * fixed_pitch_words(TO_ROW *row, FCOORD rotation)
Definition: fpchop.cpp:44
bool textord_chopper_test
Definition: wordseg.cpp:40
bool textord_force_make_prop_words
Definition: wordseg.cpp:38
ROW * make_rep_words(TO_ROW *row, TO_BLOCK *block)
Definition: wordseg.cpp:547
PITCH_TYPE pitch_decision
Definition: blobbox.h:650
WERD_LIST rep_words
Definition: blobbox.h:668
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:600
BLOCK * block
Definition: blobbox.h:777
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
float space_size
Definition: blobbox.h:791
float kern_size
Definition: blobbox.h:790
float fixed_pitch
Definition: blobbox.h:789
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:116
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
void check_pitch()
check proportional
Definition: ocrblock.cpp:164
void set_stats(bool prop, int16_t kern, int16_t space, int16_t ch_pitch)
Definition: ocrblock.h:58
Definition: ocrrow.h:37
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
bool IsText() const
Definition: polyblk.h:49
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:891
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
Definition: tospace.cpp:1184

◆ make_rep_words()

ROW * make_rep_words ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 547 of file wordseg.cpp.

550 {
551 ROW *real_row; //output row
552 TBOX word_box; //bounding box
553 //iterator
554 WERD_IT word_it = &row->rep_words;
555
556 if (word_it.empty ())
557 return nullptr;
558 word_box = word_it.data ()->bounding_box ();
559 for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
560 word_box += word_it.data ()->bounding_box ();
561 row->xheight = block->xheight;
562 real_row = new ROW(row,
563 static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
564 word_it.set_to_list (real_row->word_list ());
565 //put words in row
566 word_it.add_list_after (&row->rep_words);
567 real_row->recalc_bounding_box ();
568 return real_row;
569}
float xheight
Definition: blobbox.h:657
float xheight
Definition: blobbox.h:788
WERD_LIST * word_list()
Definition: ocrrow.h:55
void recalc_bounding_box()
Definition: ocrrow.cpp:100
Definition: rect.h:34

◆ make_single_word()

void make_single_word ( bool  one_blob,
TO_ROW_LIST *  rows,
ROW_LIST *  real_rows 
)

Definition at line 52 of file wordseg.cpp.

52 {
53 TO_ROW_IT to_row_it(rows);
54 ROW_IT row_it(real_rows);
55 for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
56 to_row_it.forward()) {
57 TO_ROW* row = to_row_it.data();
58 // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
59 // to create the word.
60 C_BLOB_LIST cblobs;
61 C_BLOB_IT cblob_it(&cblobs);
62 BLOBNBOX_IT box_it(row->blob_list());
63 for (;!box_it.empty(); box_it.forward()) {
64 BLOBNBOX* bblob= box_it.extract();
65 if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
66 if (bblob->cblob() != nullptr) {
67 C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
68 cout_it.move_to_last();
69 cout_it.add_list_after(bblob->cblob()->out_list());
70 delete bblob->cblob();
71 }
72 } else {
73 if (bblob->cblob() != nullptr)
74 cblob_it.add_after_then_move(bblob->cblob());
75 }
76 delete bblob;
77 }
78 // Convert the TO_ROW to a ROW.
79 ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
80 static_cast<int16_t>(row->space_size));
81 WERD_IT word_it(real_row->word_list());
82 WERD* word = new WERD(&cblobs, 0, nullptr);
83 word->set_flag(W_BOL, true);
84 word->set_flag(W_EOL, true);
85 word->set_flag(W_DONT_CHOP, one_blob);
86 word_it.add_after_then_move(word);
87 row_it.add_after_then_move(real_row);
88 }
89}
@ W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:37
float kern_size
Definition: blobbox.h:666
float space_size
Definition: blobbox.h:667

◆ make_words()

void make_words ( tesseract::Textord textord,
ICOORD  page_tr,
float  gradient,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

make_words

Arrange the blobs into words.

Definition at line 96 of file wordseg.cpp.

100 { // output list
101 TO_BLOCK_IT block_it; // iterator
102 TO_BLOCK *block; // current block
103
104 if (textord->use_cjk_fp_model()) {
105 compute_fixed_pitch_cjk(page_tr, port_blocks);
106 } else {
107 compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
109 }
110 textord->to_spacing(page_tr, port_blocks);
111 block_it.set_to_list(port_blocks);
112 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
113 block = block_it.data();
114 make_real_words(textord, block, FCOORD(1.0f, 0.0f));
115 }
116}
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1040
bool textord_test_landscape
Definition: makerow.cpp:48
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
Definition: topitch.cpp:80
void make_real_words(tesseract::Textord *textord, TO_BLOCK *block, FCOORD rotation)
Definition: wordseg.cpp:491
Definition: points.h:189
bool use_cjk_fp_model() const
Definition: textord.h:92
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
Definition: tospace.cpp:44

◆ row_words()

int32_t row_words ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 170 of file wordseg.cpp.

176 {
177 bool testing_row; //contains testpt
178 bool prev_valid; //if decent size
179 int32_t prev_x; //end of prev blob
180 int32_t cluster_count; //no of clusters
181 int32_t gap_index; //which cluster
182 int32_t smooth_factor; //for smoothing stats
183 BLOBNBOX *blob; //current blob
184 float lower, upper; //clustering parameters
185 float gaps[3]; //gap clusers
186 ICOORD testpt;
187 TBOX blob_box; //bounding box
188 //iterator
189 BLOBNBOX_IT blob_it = row->blob_list ();
190 STATS gap_stats (0, maxwidth);
191 STATS cluster_stats[4]; //clusters
192
194 smooth_factor =
195 static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
196 // if (testing_on)
197 // tprintf("Row smooth factor=%d\n",smooth_factor);
198 prev_valid = false;
199 prev_x = -INT32_MAX;
200 testing_row = false;
201 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
202 blob = blob_it.data ();
203 blob_box = blob->bounding_box ();
204 if (blob_box.contains (testpt))
205 testing_row = true;
206 gap_stats.add (blob_box.width (), 1);
207 }
208 gap_stats.clear ();
209 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
210 blob = blob_it.data ();
211 if (!blob->joined_to_prev ()) {
212 blob_box = blob->bounding_box ();
213 if (prev_valid && blob_box.left () - prev_x < maxwidth) {
214 gap_stats.add (blob_box.left () - prev_x, 1);
215 }
216 prev_valid = true;
217 prev_x = blob_box.right ();
218 }
219 }
220 if (gap_stats.get_total () == 0) {
221 row->min_space = 0; //no evidence
222 row->max_nonspace = 0;
223 return 0;
224 }
225 gap_stats.smooth (smooth_factor);
228 cluster_count = gap_stats.cluster (lower, upper,
230 cluster_stats);
231 while (cluster_count < 2 && ceil (lower) < floor (upper)) {
232 //shrink gap
233 upper = (upper * 3 + lower) / 4;
234 lower = (lower * 3 + upper) / 4;
235 cluster_count = gap_stats.cluster (lower, upper,
237 cluster_stats);
238 }
239 if (cluster_count < 2) {
240 row->min_space = 0; //no evidence
241 row->max_nonspace = 0;
242 return 0;
243 }
244 for (gap_index = 0; gap_index < cluster_count; gap_index++)
245 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
246 //get medians
247 if (cluster_count > 2) {
248 if (testing_on && textord_show_initial_words) {
249 tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
250 row->intercept (),
251 cluster_stats[1].ile (0.5),
252 cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
253 }
254 lower = gaps[0];
255 if (gaps[1] > lower) {
256 upper = gaps[1]; //prefer most frequent
257 if (upper < block->xheight * textord_words_min_minspace
258 && gaps[2] > gaps[1]) {
259 upper = gaps[2];
260 }
261 }
262 else if (gaps[2] > lower
263 && gaps[2] >= block->xheight * textord_words_min_minspace)
264 upper = gaps[2];
265 else if (lower >= block->xheight * textord_words_min_minspace) {
266 upper = lower; //not nice
267 lower = gaps[1];
268 if (testing_on && textord_show_initial_words) {
269 tprintf ("Had to switch most common from lower to upper!!\n");
270 gap_stats.print();
271 }
272 }
273 else {
274 row->min_space = 0; //no evidence
275 row->max_nonspace = 0;
276 return 0;
277 }
278 }
279 else {
280 if (gaps[1] < gaps[0]) {
281 if (testing_on && textord_show_initial_words) {
282 tprintf ("Had to switch most common from lower to upper!!\n");
283 gap_stats.print();
284 }
285 lower = gaps[1];
286 upper = gaps[0];
287 }
288 else {
289 upper = gaps[1];
290 lower = gaps[0];
291 }
292 }
293 if (upper < block->xheight * textord_words_min_minspace) {
294 row->min_space = 0; //no evidence
295 row->max_nonspace = 0;
296 return 0;
297 }
298 if (upper * 3 < block->min_space * 2 + block->max_nonspace
299 || lower * 3 > block->min_space * 2 + block->max_nonspace) {
300 if (testing_on && textord_show_initial_words) {
301 tprintf ("Disagreement between block and row at %g!!\n",
302 row->intercept ());
303 tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
304 gap_stats.print();
305 }
306 }
307 row->min_space =
308 static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
309 row->max_nonspace =
310 static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
311 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
312 row->space_size = upper;
313 row->kern_size = lower;
314 if (testing_on && textord_show_initial_words) {
315 if (testing_row) {
316 tprintf ("GAP STATS\n");
317 gap_stats.print();
318 tprintf ("SPACE stats\n");
319 cluster_stats[2].print_summary();
320 tprintf ("NONSPACE stats\n");
321 cluster_stats[1].print_summary();
322 }
323 tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
324 row->intercept (), row->min_space, upper,
325 row->max_nonspace, lower);
326 }
327 return cluster_stats[2].get_total ();
328}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int textord_test_x
Definition: makerow.cpp:60
int textord_test_y
Definition: makerow.cpp:61
double textord_words_initial_lower
Definition: tovars.cpp:51
bool textord_show_initial_words
Definition: tovars.cpp:23
double textord_spacesize_ratioprop
Definition: tovars.cpp:78
double textord_wordstats_smooth_factor
Definition: tovars.cpp:37
double textord_words_definite_spread
Definition: tovars.cpp:74
double textord_words_min_minspace
Definition: tovars.cpp:47
double textord_words_initial_upper
Definition: tovars.cpp:53
const TBOX & bounding_box() const
Definition: blobbox.h:230
float intercept() const
Definition: blobbox.h:589
int32_t space_threshold
Definition: blobbox.h:665
int32_t max_nonspace
Definition: blobbox.h:664
int32_t min_space
Definition: blobbox.h:663
int32_t max_nonspace
Definition: blobbox.h:793
int32_t min_space
Definition: blobbox.h:792
integer coordinate
Definition: points.h:32
int16_t width() const
Definition: rect.h:115
int16_t left() const
Definition: rect.h:72
bool contains(const FCOORD pt) const
Definition: rect.h:333
int16_t right() const
Definition: rect.h:79
Definition: statistc.h:31
int32_t get_total() const
Definition: statistc.h:84
void print_summary() const
Definition: statistc.cpp:552

◆ row_words2()

int32_t row_words2 ( TO_BLOCK block,
TO_ROW row,
int32_t  maxwidth,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 337 of file wordseg.cpp.

343 {
344 bool prev_valid; //if decent size
345 bool this_valid; //current blob big enough
346 int32_t prev_x; //end of prev blob
347 int32_t min_width; //min interesting width
348 int32_t valid_count; //good gaps
349 int32_t total_count; //total gaps
350 int32_t cluster_count; //no of clusters
351 int32_t prev_count; //previous cluster_count
352 int32_t gap_index; //which cluster
353 int32_t smooth_factor; //for smoothing stats
354 BLOBNBOX *blob; //current blob
355 float lower, upper; //clustering parameters
356 ICOORD testpt;
357 TBOX blob_box; //bounding box
358 //iterator
359 BLOBNBOX_IT blob_it = row->blob_list ();
360 STATS gap_stats (0, maxwidth);
361 //gap sizes
362 float gaps[BLOCK_STATS_CLUSTERS];
363 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
364 //clusters
365
367 smooth_factor =
368 static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
369 // if (testing_on)
370 // tprintf("Row smooth factor=%d\n",smooth_factor);
371 prev_valid = false;
372 prev_x = -INT16_MAX;
373 const bool testing_row = false;
374 //min blob size
375 min_width = static_cast<int32_t>(block->pr_space);
376 total_count = 0;
377 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
378 blob = blob_it.data ();
379 if (!blob->joined_to_prev ()) {
380 blob_box = blob->bounding_box ();
381 this_valid = blob_box.width () >= min_width;
382 if (this_valid && prev_valid
383 && blob_box.left () - prev_x < maxwidth) {
384 gap_stats.add (blob_box.left () - prev_x, 1);
385 }
386 total_count++; //count possibles
387 prev_x = blob_box.right ();
388 prev_valid = this_valid;
389 }
390 }
391 valid_count = gap_stats.get_total ();
392 if (valid_count < total_count * textord_words_minlarge) {
393 gap_stats.clear ();
394 prev_x = -INT16_MAX;
395 for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
396 blob_it.forward ()) {
397 blob = blob_it.data ();
398 if (!blob->joined_to_prev ()) {
399 blob_box = blob->bounding_box ();
400 if (blob_box.left () - prev_x < maxwidth) {
401 gap_stats.add (blob_box.left () - prev_x, 1);
402 }
403 prev_x = blob_box.right ();
404 }
405 }
406 }
407 if (gap_stats.get_total () == 0) {
408 row->min_space = 0; //no evidence
409 row->max_nonspace = 0;
410 return 0;
411 }
412
413 cluster_count = 0;
414 lower = block->xheight * words_initial_lower;
415 upper = block->xheight * words_initial_upper;
416 gap_stats.smooth (smooth_factor);
417 do {
418 prev_count = cluster_count;
419 cluster_count = gap_stats.cluster (lower, upper,
421 BLOCK_STATS_CLUSTERS, cluster_stats);
422 }
423 while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
424 if (cluster_count < 1) {
425 row->min_space = 0;
426 row->max_nonspace = 0;
427 return 0;
428 }
429 for (gap_index = 0; gap_index < cluster_count; gap_index++)
430 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
431 //get medians
432 if (testing_on) {
433 tprintf ("cluster_count=%d:", cluster_count);
434 for (gap_index = 0; gap_index < cluster_count; gap_index++)
435 tprintf (" %g(%d)", gaps[gap_index],
436 cluster_stats[gap_index + 1].get_total ());
437 tprintf ("\n");
438 }
439
440 //Try to find proportional non-space and space for row.
441 for (gap_index = 0; gap_index < cluster_count
442 && gaps[gap_index] > block->max_nonspace; gap_index++);
443 if (gap_index < cluster_count)
444 lower = gaps[gap_index]; //most frequent below
445 else {
446 if (testing_on)
447 tprintf ("No cluster below block threshold!, using default=%g\n",
448 block->pr_nonsp);
449 lower = block->pr_nonsp;
450 }
451 for (gap_index = 0; gap_index < cluster_count
452 && gaps[gap_index] <= block->max_nonspace; gap_index++);
453 if (gap_index < cluster_count)
454 upper = gaps[gap_index]; //most frequent above
455 else {
456 if (testing_on)
457 tprintf ("No cluster above block threshold!, using default=%g\n",
458 block->pr_space);
459 upper = block->pr_space;
460 }
461 row->min_space =
462 static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
463 row->max_nonspace =
464 static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
465 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
466 row->space_size = upper;
467 row->kern_size = lower;
468 if (testing_on) {
469 if (testing_row) {
470 tprintf ("GAP STATS\n");
471 gap_stats.print();
472 tprintf ("SPACE stats\n");
473 cluster_stats[2].print_summary();
474 tprintf ("NONSPACE stats\n");
475 cluster_stats[1].print_summary();
476 }
477 tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
478 row->intercept (), row->min_space, upper,
479 row->max_nonspace, lower);
480 }
481 return 1;
482}
double words_initial_upper
Definition: tovars.cpp:69
double words_initial_lower
Definition: tovars.cpp:68
double textord_words_minlarge
Definition: tovars.cpp:55
#define BLOCK_STATS_CLUSTERS
Definition: wordseg.cpp:42
float pr_nonsp
Definition: blobbox.h:797
float pr_space
Definition: blobbox.h:796

◆ set_row_spaces()

void set_row_spaces ( TO_BLOCK block,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 126 of file wordseg.cpp.

130 {
131 TO_ROW *row; //current row
132 TO_ROW_IT row_it = block->get_rows ();
133
134 if (row_it.empty ())
135 return; //empty block
136 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
137 row = row_it.data ();
138 if (row->fixed_pitch == 0) {
139 row->min_space =
140 static_cast<int32_t>(ceil (row->pr_space -
141 (row->pr_space -
143 row->max_nonspace =
144 static_cast<int32_t>(floor (row->pr_nonsp +
145 (row->pr_space -
147 if (testing_on && textord_show_initial_words) {
148 tprintf ("Assigning defaults %d non, %d space to row at %g\n",
149 row->max_nonspace, row->min_space, row->intercept ());
150 }
151 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
152 row->space_size = row->pr_space;
153 row->kern_size = row->pr_nonsp;
154 }
155#ifndef GRAPHICS_DISABLED
156 if (textord_show_initial_words && testing_on) {
157 plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
158 }
159#endif
160 }
161}
ScrollView * to_win
Definition: drawtord.cpp:35
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:247
float fixed_pitch
Definition: blobbox.h:651
float pr_space
Definition: blobbox.h:654
float pr_nonsp
Definition: blobbox.h:655

Variable Documentation

◆ textord_chopper_test

bool textord_chopper_test = false

"Chopper is being tested."

Definition at line 40 of file wordseg.cpp.

◆ textord_force_make_prop_words

bool textord_force_make_prop_words = false

"Force proportional word segmentation on all rows"

Definition at line 38 of file wordseg.cpp.

◆ textord_fp_chopping

bool textord_fp_chopping = true

"Do fixed pitch chopping"

Definition at line 36 of file wordseg.cpp.