tesseract 4.1.1
Loading...
Searching...
No Matches
topitch.h File Reference
#include "blobbox.h"

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Functions

void compute_fixed_pitch (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, bool testing_on)
 
void fix_row_pitch (TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
 
void compute_block_pitch (TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
 
bool compute_rows_pitch (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
bool try_doc_fixed (ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
 
bool try_block_fixed (TO_BLOCK *block, int32_t block_index)
 
bool try_rows_fixed (TO_BLOCK *block, int32_t block_index, bool testing_on)
 
void print_block_counts (TO_BLOCK *block, int32_t block_index)
 
void count_block_votes (TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
 
bool row_pitch_stats (TO_ROW *row, int32_t maxwidth, bool testing_on)
 
bool find_row_pitch (TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
 
bool fixed_pitch_row (TO_ROW *row, BLOCK *block, int32_t block_index)
 
bool count_pitch_stats (TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
 
float tune_row_pitch (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float tune_row_pitch2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
 
float compute_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start=0, int16_t end=0)
 
float compute_pitch_sd2 (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start=0, int16_t end=0)
 
void print_pitch_sd (TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
 
void find_repeated_chars (TO_BLOCK *block, bool testing_on)
 
void plot_fp_word (TO_BLOCK *block, float pitch, float nonspace)
 

Variables

bool textord_debug_pitch_test = false
 
bool textord_debug_pitch_metric = false
 
bool textord_show_row_cuts = false
 
bool textord_show_page_cuts = false
 
bool textord_pitch_cheat = false
 
bool textord_blockndoc_fixed = true
 
bool textord_fast_pitch_test = false
 
double textord_projection_scale = 0.125
 
double textord_balance_factor = 2.0
 

Function Documentation

◆ compute_block_pitch()

void compute_block_pitch ( TO_BLOCK block,
FCOORD  rotation,
int32_t  block_index,
bool  testing_on 
)

Definition at line 314 of file topitch.cpp.

317 { // correct orientation
318 TBOX block_box; //bounding box
319
320 block_box = block->block->pdblk.bounding_box ();
321 if (testing_on && textord_debug_pitch_test) {
322 tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
323 block_index,
324 block_box.left (), block_box.bottom (),
325 block_box.right (), block_box.top ());
326 }
327 block->min_space = static_cast<int32_t>(floor (block->xheight
329 block->max_nonspace = static_cast<int32_t>(ceil (block->xheight
331 block->fixed_pitch = 0.0f;
332 block->space_size = static_cast<float>(block->min_space);
333 block->kern_size = static_cast<float>(block->max_nonspace);
336 if (!block->get_rows ()->empty ()) {
337 ASSERT_HOST (block->xheight > 0);
339#ifndef GRAPHICS_DISABLED
340 if (textord_show_initial_words && testing_on)
341 //overlap_picture_ops(true);
343#endif
344 compute_rows_pitch(block,
345 block_index,
346 textord_debug_pitch_test && testing_on);
347 }
348}
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void find_repeated_chars(TO_BLOCK *block, bool testing_on)
Definition: topitch.cpp:1759
bool textord_debug_pitch_test
Definition: topitch.cpp:39
bool compute_rows_pitch(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:357
bool textord_show_initial_words
Definition: tovars.cpp:23
double textord_spacesize_ratioprop
Definition: tovars.cpp:78
double textord_words_default_nonspace
Definition: tovars.cpp:49
double words_default_prop_nonspace
Definition: tovars.cpp:70
double textord_words_default_minspace
Definition: tovars.cpp:46
BLOCK * block
Definition: blobbox.h:777
float pr_nonsp
Definition: blobbox.h:797
int32_t max_nonspace
Definition: blobbox.h:793
float xheight
Definition: blobbox.h:788
TO_ROW_LIST * get_rows()
Definition: blobbox.h:704
float space_size
Definition: blobbox.h:791
int32_t min_space
Definition: blobbox.h:792
float kern_size
Definition: blobbox.h:790
float pr_space
Definition: blobbox.h:796
float fixed_pitch
Definition: blobbox.h:789
PDBLK pdblk
Page Description Block.
Definition: ocrblock.h:190
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79
static void Update()
Definition: scrollview.cpp:709

◆ compute_fixed_pitch()

void compute_fixed_pitch ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient,
FCOORD  rotation,
bool  testing_on 
)

Definition at line 80 of file topitch.cpp.

84 { // correct orientation
85 TO_BLOCK_IT block_it; //iterator
86 TO_BLOCK *block; //current block;
87 TO_ROW *row; //current row
88 int block_index; //block number
89 int row_index; //row number
90
91#ifndef GRAPHICS_DISABLED
92 if (textord_show_initial_words && testing_on) {
93 if (to_win == nullptr)
94 create_to_win(page_tr);
95 }
96#endif
97
98 block_it.set_to_list (port_blocks);
99 block_index = 1;
100 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
101 block_it.forward ()) {
102 block = block_it.data ();
103 compute_block_pitch(block, rotation, block_index, testing_on);
104 block_index++;
105 }
106
107 if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
108 block_index = 1;
109 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
110 block_it.forward ()) {
111 block = block_it.data ();
112 if (!try_block_fixed (block, block_index))
113 try_rows_fixed(block, block_index, testing_on);
114 block_index++;
115 }
116 }
117
118 block_index = 1;
119 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
120 block_it.forward()) {
121 block = block_it.data ();
122 POLY_BLOCK* pb = block->block->pdblk.poly_block();
123 if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist!
124 // row iterator
125 TO_ROW_IT row_it(block->get_rows());
126 row_index = 1;
127 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
128 row = row_it.data ();
129 fix_row_pitch(row, block, port_blocks, row_index, block_index);
130 row_index++;
131 }
132 block_index++;
133 }
134#ifndef GRAPHICS_DISABLED
135 if (textord_show_initial_words && testing_on) {
137 }
138#endif
139}
ScrollView * to_win
Definition: drawtord.cpp:35
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:44
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, int32_t row_target, int32_t block_target)
Definition: topitch.cpp:149
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, int32_t block_index, bool testing_on)
Definition: topitch.cpp:314
bool try_block_fixed(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:541
bool try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
Definition: topitch.cpp:401
bool try_rows_fixed(TO_BLOCK *block, int32_t block_index, bool testing_on)
Definition: topitch.cpp:555
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
bool IsText() const
Definition: polyblk.h:49

◆ compute_pitch_sd()

float compute_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch,
float &  sp_sd,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start = 0,
int16_t  end = 0 
)

Definition at line 1376 of file topitch.cpp.

1389 {
1390 int16_t occupation; //no of cells in word.
1391 //blobs
1392 BLOBNBOX_IT blob_it = row->blob_list ();
1393 BLOBNBOX_IT start_it; //start of word
1394 BLOBNBOX_IT plot_it; //for plotting
1395 int16_t blob_count; //no of blobs
1396 TBOX blob_box; //bounding box
1397 TBOX prev_box; //of super blob
1398 int32_t prev_right; //of word sync
1399 int scale_factor; //on scores for big words
1400 int32_t sp_count; //spaces
1401 FPSEGPT_LIST seg_list; //char cells
1402 FPSEGPT_IT seg_it; //iterator
1403 int16_t segpos; //position of segment
1404 int16_t cellpos; //previous cell boundary
1405 //iterator
1406 ICOORDELT_IT cell_it = row_cells;
1407 ICOORDELT *cell; //new cell
1408 double sqsum; //sum of squares
1409 double spsum; //of spaces
1410 double sp_var; //space error
1411 double word_sync; //result for word
1412 int32_t total_count; //total blobs
1413
1414 if ((pitsync_linear_version & 3) > 1) {
1415 word_sync = compute_pitch_sd2 (row, projection, projection_left,
1416 projection_right, initial_pitch,
1417 occupation, mid_cuts, row_cells,
1418 testing_on, start, end);
1419 sp_sd = occupation;
1420 return word_sync;
1421 }
1422 mid_cuts = 0;
1423 cellpos = 0;
1424 total_count = 0;
1425 sqsum = 0;
1426 sp_count = 0;
1427 spsum = 0;
1428 prev_right = -1;
1429 if (blob_it.empty ())
1430 return space_size * 10;
1431#ifndef GRAPHICS_DISABLED
1432 if (testing_on && to_win != nullptr) {
1433 blob_box = blob_it.data ()->bounding_box ();
1434 projection->plot (to_win, projection_left,
1435 row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1436 }
1437#endif
1438 start_it = blob_it;
1439 blob_count = 0;
1440 blob_box = box_next (&blob_it);//first blob
1441 blob_it.mark_cycle_pt ();
1442 do {
1443 for (; blob_count > 0; blob_count--)
1444 box_next(&start_it);
1445 do {
1446 prev_box = blob_box;
1447 blob_count++;
1448 blob_box = box_next (&blob_it);
1449 }
1450 while (!blob_it.cycled_list ()
1451 && blob_box.left () - prev_box.right () < space_size);
1452 plot_it = start_it;
1453 if (pitsync_linear_version & 3)
1454 word_sync =
1455 check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1456 projection, projection_left, projection_right,
1458 occupation, &seg_list, start, end);
1459 else
1460 word_sync =
1461 check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1462 projection, &seg_list);
1463 if (testing_on) {
1464 tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
1465 prev_box.right (), prev_box.top (),
1466 seg_list.length () - 1, word_sync);
1467 seg_it.set_to_list (&seg_list);
1468 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1469 seg_it.forward ()) {
1470 if (seg_it.data ()->faked)
1471 tprintf ("(F)");
1472 tprintf ("%d, ", seg_it.data ()->position ());
1473 // tprintf("C=%g, s=%g, sq=%g\n",
1474 // seg_it.data()->cost_function(),
1475 // seg_it.data()->sum(),
1476 // seg_it.data()->squares());
1477 }
1478 tprintf ("\n");
1479 }
1480#ifndef GRAPHICS_DISABLED
1481 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
1483#endif
1484 seg_it.set_to_list (&seg_list);
1485 if (prev_right >= 0) {
1486 sp_var = seg_it.data ()->position () - prev_right;
1487 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1488 sp_var *= sp_var;
1489 spsum += sp_var;
1490 sp_count++;
1491 }
1492 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1493 segpos = seg_it.data ()->position ();
1494 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1495 //big gap
1496 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1497 cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
1498 cell_it.add_after_then_move (cell);
1499 cellpos += static_cast<int16_t>(initial_pitch);
1500 }
1501 //make new one
1502 cell = new ICOORDELT (segpos, 0);
1503 cell_it.add_after_then_move (cell);
1504 cellpos = segpos;
1505 }
1506 else if (segpos > cellpos - initial_pitch / 2) {
1507 cell = cell_it.data ();
1508 //average positions
1509 cell->set_x ((cellpos + segpos) / 2);
1510 cellpos = cell->x ();
1511 }
1512 }
1513 seg_it.move_to_last ();
1514 prev_right = seg_it.data ()->position ();
1516 scale_factor = (seg_list.length () - 2) / 2;
1517 if (scale_factor < 1)
1518 scale_factor = 1;
1519 }
1520 else
1521 scale_factor = 1;
1522 sqsum += word_sync * scale_factor;
1523 total_count += (seg_list.length () - 1) * scale_factor;
1524 seg_list.clear ();
1525 }
1526 while (!blob_it.cycled_list ());
1527 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1528 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1529}
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:636
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
Definition: drawtord.cpp:361
bool textord_show_fixed_cuts
Definition: drawtord.cpp:33
double check_pitch_sync2(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:291
double check_pitch_sync(BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:143
float compute_pitch_sd2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float initial_pitch, int16_t &occupation, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1539
double textord_projection_scale
Definition: topitch.cpp:52
bool textord_pitch_scalebigwords
Definition: tovars.cpp:67
float intercept() const
Definition: blobbox.h:589
float xheight
Definition: blobbox.h:657
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:600
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
int16_t x() const
access function
Definition: points.h:52
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:577

◆ compute_pitch_sd2()

float compute_pitch_sd2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  initial_pitch,
int16_t &  occupation,
int16_t &  mid_cuts,
ICOORDELT_LIST *  row_cells,
bool  testing_on,
int16_t  start = 0,
int16_t  end = 0 
)

Definition at line 1539 of file topitch.cpp.

1551 {
1552 //blobs
1553 BLOBNBOX_IT blob_it = row->blob_list ();
1554 BLOBNBOX_IT plot_it;
1555 int16_t blob_count; //no of blobs
1556 TBOX blob_box; //bounding box
1557 FPSEGPT_LIST seg_list; //char cells
1558 FPSEGPT_IT seg_it; //iterator
1559 int16_t segpos; //position of segment
1560 //iterator
1561 ICOORDELT_IT cell_it = row_cells;
1562 ICOORDELT *cell; //new cell
1563 double word_sync; //result for word
1564
1565 mid_cuts = 0;
1566 if (blob_it.empty ()) {
1567 occupation = 0;
1568 return initial_pitch * 10;
1569 }
1570#ifndef GRAPHICS_DISABLED
1571 if (testing_on && to_win != nullptr) {
1572 projection->plot (to_win, projection_left,
1573 row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
1574 }
1575#endif
1576 blob_count = 0;
1577 blob_it.mark_cycle_pt ();
1578 do {
1579 //first blob
1580 blob_box = box_next (&blob_it);
1581 blob_count++;
1582 }
1583 while (!blob_it.cycled_list ());
1584 plot_it = blob_it;
1585 word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
1586 2, projection, projection_left,
1587 projection_right,
1589 occupation, &seg_list, start, end);
1590 if (testing_on) {
1591 tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
1592 blob_box.right (), blob_box.top (),
1593 seg_list.length () - 1, word_sync);
1594 seg_it.set_to_list (&seg_list);
1595 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1596 if (seg_it.data ()->faked)
1597 tprintf ("(F)");
1598 tprintf ("%d, ", seg_it.data ()->position ());
1599 // tprintf("C=%g, s=%g, sq=%g\n",
1600 // seg_it.data()->cost_function(),
1601 // seg_it.data()->sum(),
1602 // seg_it.data()->squares());
1603 }
1604 tprintf ("\n");
1605 }
1606#ifndef GRAPHICS_DISABLED
1607 if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
1609#endif
1610 seg_it.set_to_list (&seg_list);
1611 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1612 segpos = seg_it.data ()->position ();
1613 //make new one
1614 cell = new ICOORDELT (segpos, 0);
1615 cell_it.add_after_then_move (cell);
1616 if (seg_it.at_last ())
1617 mid_cuts = seg_it.data ()->cheap_cuts ();
1618 }
1619 seg_list.clear ();
1620 return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1621}

◆ compute_rows_pitch()

bool compute_rows_pitch ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 357 of file topitch.cpp.

361 {
362 int32_t maxwidth; //of spaces
363 TO_ROW *row; //current row
364 int32_t row_index; //row number.
365 float lower, upper; //cluster thresholds
366 TO_ROW_IT row_it = block->get_rows ();
367
368 row_index = 1;
369 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
370 row = row_it.data ();
371 ASSERT_HOST (row->xheight > 0);
373 maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace));
374 if (row_pitch_stats (row, maxwidth, testing_on)
375 && find_row_pitch (row, maxwidth,
376 textord_dotmatrix_gap + 1, block, block_index,
377 row_index, testing_on)) {
378 if (row->fixed_pitch == 0) {
379 lower = row->pr_nonsp;
380 upper = row->pr_space;
381 row->space_size = upper;
382 row->kern_size = lower;
383 }
384 }
385 else {
386 row->fixed_pitch = 0.0f; //insufficient data
388 }
389 row_index++;
390 }
391 return false;
392}
@ PITCH_DUNNO
Definition: blobbox.h:46
bool row_pitch_stats(TO_ROW *row, int32_t maxwidth, bool testing_on)
Definition: topitch.cpp:707
bool find_row_pitch(TO_ROW *row, int32_t maxwidth, int32_t dm_gap, TO_BLOCK *block, int32_t block_index, int32_t row_index, bool testing_on)
Definition: topitch.cpp:840
double textord_words_maxspace
Definition: tovars.cpp:42
int textord_dotmatrix_gap
Definition: tovars.cpp:33
float fixed_pitch
Definition: blobbox.h:651
float pr_space
Definition: blobbox.h:654
float pr_nonsp
Definition: blobbox.h:655
void compute_vertical_projection()
Definition: blobbox.cpp:796
PITCH_TYPE pitch_decision
Definition: blobbox.h:650
float kern_size
Definition: blobbox.h:666
float space_size
Definition: blobbox.h:667

◆ count_block_votes()

void count_block_votes ( TO_BLOCK block,
int32_t &  def_fixed,
int32_t &  def_prop,
int32_t &  maybe_fixed,
int32_t &  maybe_prop,
int32_t &  corr_fixed,
int32_t &  corr_prop,
int32_t &  dunno 
)

Definition at line 660 of file topitch.cpp.

668 {
669 TO_ROW *row; //current row
670 TO_ROW_IT row_it = block->get_rows ();
671
672 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
673 row = row_it.data ();
674 switch (row->pitch_decision) {
675 case PITCH_DUNNO:
676 dunno++;
677 break;
678 case PITCH_DEF_PROP:
679 def_prop++;
680 break;
681 case PITCH_MAYBE_PROP:
682 maybe_prop++;
683 break;
684 case PITCH_DEF_FIXED:
685 def_fixed++;
686 break;
688 maybe_fixed++;
689 break;
690 case PITCH_CORR_PROP:
691 corr_prop++;
692 break;
693 case PITCH_CORR_FIXED:
694 corr_fixed++;
695 break;
696 }
697 }
698}
@ PITCH_DEF_FIXED
Definition: blobbox.h:47
@ PITCH_MAYBE_FIXED
Definition: blobbox.h:48
@ PITCH_CORR_FIXED
Definition: blobbox.h:51
@ PITCH_DEF_PROP
Definition: blobbox.h:49
@ PITCH_CORR_PROP
Definition: blobbox.h:52
@ PITCH_MAYBE_PROP
Definition: blobbox.h:50

◆ count_pitch_stats()

bool count_pitch_stats ( TO_ROW row,
STATS gap_stats,
STATS pitch_stats,
float  initial_pitch,
float  min_space,
bool  ignore_outsize,
bool  split_outsize,
int32_t  dm_gap 
)

Definition at line 1064 of file topitch.cpp.

1073 {
1074 bool prev_valid; //not word broken
1075 BLOBNBOX *blob; //current blob
1076 //blobs
1077 BLOBNBOX_IT blob_it = row->blob_list ();
1078 int32_t prev_right; //end of prev blob
1079 int32_t prev_centre; //centre of previous blob
1080 int32_t x_centre; //centre of this blob
1081 int32_t blob_width; //width of blob
1082 int32_t width_units; //no of widths in blob
1083 float width; //blob width
1084 TBOX blob_box; //bounding box
1085 TBOX joined_box; //of super blob
1086
1087 gap_stats->clear ();
1088 pitch_stats->clear ();
1089 if (blob_it.empty ())
1090 return false;
1091 prev_valid = false;
1092 prev_centre = 0;
1093 prev_right = 0; // stop compiler warning
1094 joined_box = blob_it.data ()->bounding_box ();
1095 do {
1096 blob_it.forward ();
1097 blob = blob_it.data ();
1098 if (!blob->joined_to_prev ()) {
1099 blob_box = blob->bounding_box ();
1100 if ((blob_box.left () - joined_box.right () < dm_gap
1101 && !blob_it.at_first ())
1102 || blob->cblob() == nullptr)
1103 joined_box += blob_box; //merge blobs
1104 else {
1105 blob_width = joined_box.width ();
1106 if (split_outsize) {
1107 width_units =
1108 static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
1109 if (width_units < 1)
1110 width_units = 1;
1111 width_units--;
1112 }
1113 else if (ignore_outsize) {
1114 width = static_cast<float>(blob_width) / initial_pitch;
1115 width_units = width < 1 + words_default_fixed_limit
1116 && width > 1 - words_default_fixed_limit ? 0 : -1;
1117 }
1118 else
1119 width_units = 0; //everything in
1120 x_centre = static_cast<int32_t>(joined_box.left ()
1121 + (blob_width -
1122 width_units * initial_pitch) / 2);
1123 if (prev_valid && width_units >= 0) {
1124 // if (width_units>0)
1125 // {
1126 // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
1127 // width_units,blob_width,x_centre,x_centre-prev_centre);
1128 // }
1129 gap_stats->add (joined_box.left () - prev_right, 1);
1130 pitch_stats->add (x_centre - prev_centre, 1);
1131 }
1132 prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
1133 prev_right = joined_box.right ();
1134 prev_valid = blob_box.left () - joined_box.right () < min_space;
1135 prev_valid = prev_valid && width_units >= 0;
1136 joined_box = blob_box;
1137 }
1138 }
1139 }
1140 while (!blob_it.at_first ());
1141 return gap_stats->get_total () >= 3;
1142}
double words_default_fixed_limit
Definition: tovars.cpp:72
const TBOX & bounding_box() const
Definition: blobbox.h:230
C_BLOB * cblob() const
Definition: blobbox.h:268
bool joined_to_prev() const
Definition: blobbox.h:256
int16_t width() const
Definition: rect.h:115
void clear()
Definition: statistc.cpp:75
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
int32_t get_total() const
Definition: statistc.h:84

◆ find_repeated_chars()

void find_repeated_chars ( TO_BLOCK block,
bool  testing_on 
)

Definition at line 1759 of file topitch.cpp.

1760 { // Debug mode.
1761 POLY_BLOCK* pb = block->block->pdblk.poly_block();
1762 if (pb != nullptr && !pb->IsText())
1763 return; // Don't find repeated chars in non-text blocks.
1764
1765 TO_ROW *row;
1766 BLOBNBOX_IT box_it;
1767 BLOBNBOX_IT search_it; // forward search
1768 WERD *word; // new word
1769 TBOX word_box; // for plotting
1770 int blobcount, repeated_set;
1771
1772 TO_ROW_IT row_it = block->get_rows();
1773 if (row_it.empty()) return; // empty block
1774 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1775 row = row_it.data();
1776 box_it.set_to_list(row->blob_list());
1777 if (box_it.empty()) continue; // no blobs in this row
1778 if (!row->rep_chars_marked()) {
1780 }
1781 if (row->num_repeated_sets() == 0) continue; // nothing to do for this row
1782 // new words
1783 WERD_IT word_it(&row->rep_words);
1784 do {
1785 if (box_it.data()->repeated_set() != 0 &&
1786 !box_it.data()->joined_to_prev()) {
1787 blobcount = 1;
1788 repeated_set = box_it.data()->repeated_set();
1789 search_it = box_it;
1790 search_it.forward();
1791 while (!search_it.at_first() &&
1792 search_it.data()->repeated_set() == repeated_set) {
1793 blobcount++;
1794 search_it.forward();
1795 }
1796 // After the call to make_real_word() all the blobs from this
1797 // repeated set will be removed from the blob list. box_it will be
1798 // set to point to the blob after the end of the extracted sequence.
1799 word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
1800 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1801 tprintf("Bad box joined to prev at");
1802 box_it.data()->bounding_box().print();
1803 tprintf("After repeated word:");
1804 word->bounding_box().print();
1805 }
1806 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1807 word->set_flag(W_REP_CHAR, true);
1808 word->set_flag(W_DONT_CHOP, true);
1809 word_it.add_after_then_move(word);
1810 } else {
1811 box_it.forward();
1812 }
1813 } while (!box_it.at_first());
1814 }
1815}
@ W_REP_CHAR
repeated character
Definition: werd.h:38
@ W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:37
void mark_repeated_chars(TO_ROW *row)
Definition: makerow.cpp:2639
WERD * make_real_word(BLOBNBOX_IT *box_it, int32_t blobcount, bool bol, uint8_t blanks)
Definition: wordseg.cpp:579
bool rep_chars_marked() const
Definition: blobbox.h:631
WERD_LIST rep_words
Definition: blobbox.h:668
int num_repeated_sets() const
Definition: blobbox.h:637
void print() const
Definition: rect.h:278
Definition: werd.h:56
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
TBOX bounding_box() const
Definition: werd.cpp:148

◆ find_row_pitch()

bool find_row_pitch ( TO_ROW row,
int32_t  maxwidth,
int32_t  dm_gap,
TO_BLOCK block,
int32_t  block_index,
int32_t  row_index,
bool  testing_on 
)

Definition at line 840 of file topitch.cpp.

848 {
849 bool used_dm_model; //looks like dot matrix
850 float min_space; //estimate threshold
851 float non_space; //gap size
852 float gap_iqr; //interquartile range
853 float pitch_iqr;
854 float dm_gap_iqr; //interquartile range
855 float dm_pitch_iqr;
856 float dm_pitch; //pitch with dm on
857 float pitch; //revised estimate
858 float initial_pitch; //guess at pitch
859 STATS gap_stats (0, maxwidth);
860 //centre-centre
861 STATS pitch_stats (0, maxwidth);
862
863 row->fixed_pitch = 0.0f;
864 initial_pitch = row->fp_space;
865 if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
866 initial_pitch = row->xheight;//keep pitch decent
867 non_space = row->fp_nonsp;
868 if (non_space > initial_pitch)
869 non_space = initial_pitch;
870 min_space = (initial_pitch + non_space) / 2;
871
872 if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
873 initial_pitch, min_space, true, false, dm_gap)) {
874 dm_gap_iqr = 0.0001;
875 dm_pitch_iqr = maxwidth * 2.0f;
876 dm_pitch = initial_pitch;
877 }
878 else {
879 dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
880 dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
881 dm_pitch = pitch_stats.ile (0.5);
882 }
883 gap_stats.clear ();
884 pitch_stats.clear ();
885 if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
886 initial_pitch, min_space, true, false, 0)) {
887 gap_iqr = 0.0001;
888 pitch_iqr = maxwidth * 3.0f;
889 }
890 else {
891 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
892 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
893 if (testing_on)
894 tprintf
895 ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
896 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
897 initial_pitch = pitch_stats.ile (0.5);
898 if (min_space > initial_pitch
899 && count_pitch_stats (row, &gap_stats, &pitch_stats,
900 initial_pitch, initial_pitch, true, false, 0)) {
901 min_space = initial_pitch;
902 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
903 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
904 if (testing_on)
905 tprintf
906 ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
907 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
908 initial_pitch = pitch_stats.ile (0.5);
909 }
910 }
912 tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
913 block_index, row_index, 'X',
914 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
915 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
916 (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
917 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
920 tprintf ("\n");
921 return false; //insufficient data
922 }
923 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
924 if (testing_on)
925 tprintf
926 ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
927 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
928 gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
929 pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
930 pitch = pitch_stats.ile (0.5);
931 used_dm_model = false;
932 }
933 else {
934 if (testing_on)
935 tprintf
936 ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
937 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
938 gap_iqr = dm_gap_iqr;
939 pitch_iqr = dm_pitch_iqr;
940 pitch = dm_pitch;
941 used_dm_model = true;
942 }
944 tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
945 pitch_iqr, gap_iqr, pitch);
946 tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
947 pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
948 pitch_iqr < gap_iqr * textord_fpiqr_ratio
949 && pitch_iqr < block->xheight * textord_max_pitch_iqr
950 && pitch < block->xheight * textord_words_default_maxspace
951 ? 'F' : 'P');
952 }
953 if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
954 && pitch_iqr < block->xheight * textord_max_pitch_iqr
955 && pitch < block->xheight * textord_words_default_maxspace)
957 else
959 row->fixed_pitch = pitch;
960 row->kern_size = gap_stats.ile (0.5);
961 row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
962 if (row->min_space > row->fixed_pitch)
963 row->min_space = static_cast<int32_t>(row->fixed_pitch);
964 row->max_nonspace = row->min_space;
965 row->space_size = row->fixed_pitch;
966 row->space_threshold = (row->max_nonspace + row->min_space) / 2;
967 row->used_dm_model = used_dm_model;
968 return true;
969}
bool textord_debug_pitch_metric
Definition: topitch.cpp:45
bool count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, bool ignore_outsize, bool split_outsize, int32_t dm_gap)
Definition: topitch.cpp:1064
double textord_max_pitch_iqr
Definition: tovars.cpp:80
double textord_words_default_maxspace
Definition: tovars.cpp:44
double textord_fpiqr_ratio
Definition: tovars.cpp:79
bool used_dm_model
Definition: blobbox.h:647
int32_t space_threshold
Definition: blobbox.h:665
int32_t max_nonspace
Definition: blobbox.h:664
float fp_nonsp
Definition: blobbox.h:653
float fp_space
Definition: blobbox.h:652
int32_t min_space
Definition: blobbox.h:663
Definition: statistc.h:31

◆ fix_row_pitch()

void fix_row_pitch ( TO_ROW bad_row,
TO_BLOCK bad_block,
TO_BLOCK_LIST *  blocks,
int32_t  row_target,
int32_t  block_target 
)

Definition at line 149 of file topitch.cpp.

153 { // number of block
154 int16_t mid_cuts;
155 int block_votes; //votes in block
156 int like_votes; //votes over page
157 int other_votes; //votes of unlike blocks
158 int block_index; //number of block
159 int row_index; //number of row
160 int maxwidth; //max pitch
161 TO_BLOCK_IT block_it = blocks; //block iterator
162 TO_BLOCK *block; //current block
163 TO_ROW *row; //current row
164 float sp_sd; //space deviation
165 STATS block_stats; //pitches in block
166 STATS like_stats; //pitches in page
167
168 block_votes = like_votes = other_votes = 0;
169 maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace));
170 if (bad_row->pitch_decision != PITCH_DEF_FIXED
171 && bad_row->pitch_decision != PITCH_DEF_PROP) {
172 block_stats.set_range (0, maxwidth);
173 like_stats.set_range (0, maxwidth);
174 block_index = 1;
175 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
176 block_it.forward()) {
177 block = block_it.data();
178 POLY_BLOCK* pb = block->block->pdblk.poly_block();
179 if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist!
180 row_index = 1;
181 TO_ROW_IT row_it(block->get_rows());
182 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
183 row_it.forward ()) {
184 row = row_it.data ();
185 if ((bad_row->all_caps
186 && row->xheight + row->ascrise
187 <
188 (bad_row->xheight + bad_row->ascrise) * (1 +
190 && row->xheight + row->ascrise >
191 (bad_row->xheight + bad_row->ascrise) * (1 -
193 || (!bad_row->all_caps
194 && row->xheight <
195 bad_row->xheight * (1 + textord_pitch_rowsimilarity)
196 && row->xheight >
197 bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
198 if (block_index == block_target) {
199 if (row->pitch_decision == PITCH_DEF_FIXED) {
200 block_votes += textord_words_veto_power;
201 block_stats.add (static_cast<int32_t>(row->fixed_pitch),
203 }
204 else if (row->pitch_decision == PITCH_MAYBE_FIXED
205 || row->pitch_decision == PITCH_CORR_FIXED) {
206 block_votes++;
207 block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
208 }
209 else if (row->pitch_decision == PITCH_DEF_PROP)
210 block_votes -= textord_words_veto_power;
211 else if (row->pitch_decision == PITCH_MAYBE_PROP
213 block_votes--;
214 }
215 else {
216 if (row->pitch_decision == PITCH_DEF_FIXED) {
217 like_votes += textord_words_veto_power;
218 like_stats.add (static_cast<int32_t>(row->fixed_pitch),
220 }
221 else if (row->pitch_decision == PITCH_MAYBE_FIXED
222 || row->pitch_decision == PITCH_CORR_FIXED) {
223 like_votes++;
224 like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
225 }
226 else if (row->pitch_decision == PITCH_DEF_PROP)
227 like_votes -= textord_words_veto_power;
228 else if (row->pitch_decision == PITCH_MAYBE_PROP
230 like_votes--;
231 }
232 }
233 else {
235 other_votes += textord_words_veto_power;
236 else if (row->pitch_decision == PITCH_MAYBE_FIXED
238 other_votes++;
239 else if (row->pitch_decision == PITCH_DEF_PROP)
240 other_votes -= textord_words_veto_power;
241 else if (row->pitch_decision == PITCH_MAYBE_PROP
243 other_votes--;
244 }
245 row_index++;
246 }
247 block_index++;
248 }
249 if (block_votes > textord_words_veto_power) {
250 bad_row->fixed_pitch = block_stats.ile (0.5);
252 }
253 else if (block_votes <= textord_words_veto_power && like_votes > 0) {
254 bad_row->fixed_pitch = like_stats.ile (0.5);
256 }
257 else {
259 if (block_votes == 0 && like_votes == 0 && other_votes > 0
261 tprintf
262 ("Warning:row %d of block %d set prop with no like rows against trend\n",
263 row_target, block_target);
264 }
265 }
267 tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
268 block_votes, like_votes, other_votes);
269 tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
270 }
271 if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
272 if (bad_row->fixed_pitch < textord_min_xheight) {
273 if (block_votes > 0)
274 bad_row->fixed_pitch = block_stats.ile (0.5);
275 else if (block_votes == 0 && like_votes > 0)
276 bad_row->fixed_pitch = like_stats.ile (0.5);
277 else {
278 tprintf
279 ("Warning:guessing pitch as xheight on row %d, block %d\n",
280 row_target, block_target);
281 bad_row->fixed_pitch = bad_row->xheight;
282 }
283 }
284 if (bad_row->fixed_pitch < textord_min_xheight)
285 bad_row->fixed_pitch = (float) textord_min_xheight;
286 bad_row->kern_size = bad_row->fixed_pitch / 4;
287 bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
288 bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
289 bad_row->space_threshold =
290 (bad_row->min_space + bad_row->max_nonspace) / 2;
291 bad_row->space_size = bad_row->fixed_pitch;
292 if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
293 tune_row_pitch (bad_row, &bad_row->projection,
294 bad_row->projection_left, bad_row->projection_right,
295 (bad_row->fixed_pitch +
296 bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
297 sp_sd, mid_cuts, &bad_row->char_cells, false);
298 }
299 }
300 else if (bad_row->pitch_decision == PITCH_CORR_PROP
301 || bad_row->pitch_decision == PITCH_DEF_PROP) {
302 bad_row->fixed_pitch = 0.0f;
303 bad_row->char_cells.clear ();
304 }
305}
int textord_min_xheight
Definition: makerow.cpp:67
float tune_row_pitch(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1152
int textord_words_veto_power
Definition: tovars.cpp:63
double textord_pitch_rowsimilarity
Definition: tovars.cpp:65
bool all_caps
Definition: blobbox.h:646
int16_t projection_left
Definition: blobbox.h:648
STATS projection
Definition: blobbox.h:671
int16_t projection_right
Definition: blobbox.h:649
ICOORDELT_LIST char_cells
Definition: blobbox.h:669
float ascrise
Definition: blobbox.h:659
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
Definition: statistc.cpp:56
double ile(double frac) const
Definition: statistc.cpp:166

◆ fixed_pitch_row()

bool fixed_pitch_row ( TO_ROW row,
BLOCK block,
int32_t  block_index 
)

Definition at line 980 of file topitch.cpp.

983 {
984 const char *res_string; // pitch result
985 int16_t mid_cuts; // no of cheap cuts
986 float non_space; // gap size
987 float pitch_sd; // error on pitch
988 float sp_sd = 0.0f; // space sd
989
990 non_space = row->fp_nonsp;
991 if (non_space > row->fixed_pitch)
992 non_space = row->fixed_pitch;
993 POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
994 if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
995 // Set the decision to definitely proportional.
996 pitch_sd = textord_words_def_prop * row->fixed_pitch;
998 } else {
999 pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
1000 row->projection_right,
1001 (row->fixed_pitch + non_space * 3) / 4,
1002 row->fixed_pitch, sp_sd, mid_cuts,
1003 &row->char_cells,
1004 block_index == textord_debug_block);
1005 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1006 && ((pitsync_linear_version & 3) < 3
1007 || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
1008 || sp_sd > 20
1009 || (pitch_sd == 0 && sp_sd > 10))))) {
1010 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1011 && !row->all_caps
1012 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1014 else
1016 }
1017 else if ((pitsync_linear_version & 3) < 3
1018 || sp_sd > 20
1019 || mid_cuts > 0
1020 || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
1021 if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1023 else
1025 }
1026 else
1028 }
1029
1031 res_string = "??";
1032 switch (row->pitch_decision) {
1033 case PITCH_DEF_PROP:
1034 res_string = "DP";
1035 break;
1036 case PITCH_MAYBE_PROP:
1037 res_string = "MP";
1038 break;
1039 case PITCH_DEF_FIXED:
1040 res_string = "DF";
1041 break;
1042 case PITCH_MAYBE_FIXED:
1043 res_string = "MF";
1044 break;
1045 default:
1046 res_string = "??";
1047 }
1048 tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
1049 pitch_sd / row->fixed_pitch, sp_sd, res_string);
1050 }
1051 return true;
1052}
int textord_debug_block
Definition: tovars.cpp:34
double textord_words_pitchsd_threshold
Definition: tovars.cpp:57
double textord_words_def_prop
Definition: tovars.cpp:61

◆ plot_fp_word()

void plot_fp_word ( TO_BLOCK block,
float  pitch,
float  nonspace 
)

Definition at line 1825 of file topitch.cpp.

1829 {
1830 TO_ROW *row; //current row
1831 TO_ROW_IT row_it = block->get_rows ();
1832
1833 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1834 row = row_it.data ();
1835 row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
1836 row->max_nonspace = row->min_space;
1837 row->space_threshold = row->min_space;
1838 plot_word_decisions (to_win, static_cast<int16_t>(pitch), row);
1839 }
1840}
void plot_word_decisions(ScrollView *win, int16_t pitch, TO_ROW *row)
Definition: drawtord.cpp:247

◆ print_block_counts()

void print_block_counts ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 623 of file topitch.cpp.

626 {
627 int32_t def_fixed = 0; //counters
628 int32_t def_prop = 0;
629 int32_t maybe_fixed = 0;
630 int32_t maybe_prop = 0;
631 int32_t dunno = 0;
632 int32_t corr_fixed = 0;
633 int32_t corr_prop = 0;
634
635 count_block_votes(block,
636 def_fixed,
637 def_prop,
638 maybe_fixed,
639 maybe_prop,
640 corr_fixed,
641 corr_prop,
642 dunno);
643 tprintf ("Block %d has (%d,%d,%d)",
644 block_index, def_fixed, maybe_fixed, corr_fixed);
645 if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
646 tprintf (" (Wrongly)");
647 tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
648 if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
649 tprintf (" (Wrongly)");
650 tprintf (" prop, %d dunno\n", dunno);
651}
void count_block_votes(TO_BLOCK *block, int32_t &def_fixed, int32_t &def_prop, int32_t &maybe_fixed, int32_t &maybe_prop, int32_t &corr_fixed, int32_t &corr_prop, int32_t &dunno)
Definition: topitch.cpp:660
bool textord_blocksall_prop
Definition: tovars.cpp:29
bool textord_blocksall_fixed
Definition: tovars.cpp:27

◆ print_pitch_sd()

void print_pitch_sd ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float  initial_pitch 
)

Definition at line 1631 of file topitch.cpp.

1638 {
1639 const char *res2; //pitch result
1640 int16_t occupation; //used cells
1641 float sp_sd; //space sd
1642 //blobs
1643 BLOBNBOX_IT blob_it = row->blob_list ();
1644 BLOBNBOX_IT start_it; //start of word
1645 BLOBNBOX_IT row_start; //start of row
1646 int16_t blob_count; //no of blobs
1647 int16_t total_blob_count; //total blobs in line
1648 TBOX blob_box; //bounding box
1649 TBOX prev_box; //of super blob
1650 int32_t prev_right; //of word sync
1651 int scale_factor; //on scores for big words
1652 int32_t sp_count; //spaces
1653 FPSEGPT_LIST seg_list; //char cells
1654 FPSEGPT_IT seg_it; //iterator
1655 double sqsum; //sum of squares
1656 double spsum; //of spaces
1657 double sp_var; //space error
1658 double word_sync; //result for word
1659 double total_count; //total cuts
1660
1661 if (blob_it.empty ())
1662 return;
1663 row_start = blob_it;
1664 total_blob_count = 0;
1665
1666 total_count = 0;
1667 sqsum = 0;
1668 sp_count = 0;
1669 spsum = 0;
1670 prev_right = -1;
1671 blob_it = row_start;
1672 start_it = blob_it;
1673 blob_count = 0;
1674 blob_box = box_next (&blob_it);//first blob
1675 blob_it.mark_cycle_pt ();
1676 do {
1677 for (; blob_count > 0; blob_count--)
1678 box_next(&start_it);
1679 do {
1680 prev_box = blob_box;
1681 blob_count++;
1682 blob_box = box_next (&blob_it);
1683 }
1684 while (!blob_it.cycled_list ()
1685 && blob_box.left () - prev_box.right () < space_size);
1686 word_sync =
1687 check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
1688 projection, projection_left, projection_right,
1690 occupation, &seg_list, 0, 0);
1691 total_blob_count += blob_count;
1692 seg_it.set_to_list (&seg_list);
1693 if (prev_right >= 0) {
1694 sp_var = seg_it.data ()->position () - prev_right;
1695 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1696 sp_var *= sp_var;
1697 spsum += sp_var;
1698 sp_count++;
1699 }
1700 seg_it.move_to_last ();
1701 prev_right = seg_it.data ()->position ();
1703 scale_factor = (seg_list.length () - 2) / 2;
1704 if (scale_factor < 1)
1705 scale_factor = 1;
1706 }
1707 else
1708 scale_factor = 1;
1709 sqsum += word_sync * scale_factor;
1710 total_count += (seg_list.length () - 1) * scale_factor;
1711 seg_list.clear ();
1712 }
1713 while (!blob_it.cycled_list ());
1714 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1715 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1716 tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1717 word_sync, word_sync / initial_pitch, sp_sd,
1718 word_sync < textord_words_pitchsd_threshold * initial_pitch
1719 ? 'F' : 'P');
1720
1721 start_it = row_start;
1722 blob_it = row_start;
1723 word_sync =
1724 check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
1725 projection, projection_left, projection_right,
1726 row->xheight * textord_projection_scale, occupation,
1727 &seg_list, 0, 0);
1728 if (occupation > 1)
1729 word_sync /= occupation;
1730 word_sync = sqrt (word_sync);
1731
1732#ifndef GRAPHICS_DISABLED
1733 if (textord_show_row_cuts && to_win != nullptr)
1734 plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
1735#endif
1736 seg_list.clear ();
1737 if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
1738 if (word_sync < textord_words_def_fixed * initial_pitch
1739 && !row->all_caps)
1740 res2 = "DF";
1741 else
1742 res2 = "MF";
1743 }
1744 else
1745 res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
1746 tprintf
1747 ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1748 word_sync, word_sync / initial_pitch,
1749 word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
1750 occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
1751}
bool textord_show_row_cuts
Definition: topitch.cpp:46
double textord_words_def_fixed
Definition: tovars.cpp:59

◆ row_pitch_stats()

bool row_pitch_stats ( TO_ROW row,
int32_t  maxwidth,
bool  testing_on 
)

Definition at line 707 of file topitch.cpp.

711 {
712 BLOBNBOX *blob; //current blob
713 int gap_index; //current gap
714 int32_t prev_x; //end of prev blob
715 int32_t cluster_count; //no of clusters
716 int32_t prev_count; //of clusters
717 int32_t smooth_factor; //for smoothing stats
718 TBOX blob_box; //bounding box
719 float lower, upper; //cluster thresholds
720 //gap sizes
721 float gaps[BLOCK_STATS_CLUSTERS];
722 //blobs
723 BLOBNBOX_IT blob_it = row->blob_list ();
724 STATS gap_stats (0, maxwidth);
725 STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
726 //clusters
727
728 smooth_factor =
729 static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
730 if (!blob_it.empty ()) {
731 prev_x = blob_it.data ()->bounding_box ().right ();
732 blob_it.forward ();
733 while (!blob_it.at_first ()) {
734 blob = blob_it.data ();
735 if (!blob->joined_to_prev ()) {
736 blob_box = blob->bounding_box ();
737 if (blob_box.left () - prev_x < maxwidth)
738 gap_stats.add (blob_box.left () - prev_x, 1);
739 prev_x = blob_box.right ();
740 }
741 blob_it.forward ();
742 }
743 }
744 if (gap_stats.get_total () == 0) {
745 return false;
746 }
747 cluster_count = 0;
748 lower = row->xheight * words_initial_lower;
749 upper = row->xheight * words_initial_upper;
750 gap_stats.smooth (smooth_factor);
751 do {
752 prev_count = cluster_count;
753 cluster_count = gap_stats.cluster (lower, upper,
755 BLOCK_STATS_CLUSTERS, cluster_stats);
756 }
757 while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
758 if (cluster_count < 1) {
759 return false;
760 }
761 for (gap_index = 0; gap_index < cluster_count; gap_index++)
762 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
763 //get medians
764 if (testing_on) {
765 tprintf ("cluster_count=%d:", cluster_count);
766 for (gap_index = 0; gap_index < cluster_count; gap_index++)
767 tprintf (" %g(%d)", gaps[gap_index],
768 cluster_stats[gap_index + 1].get_total ());
769 tprintf ("\n");
770 }
771 qsort (gaps, cluster_count, sizeof (float), sort_floats);
772
773 //Try to find proportional non-space and space for row.
775 upper = row->xheight * textord_words_min_minspace;
776 for (gap_index = 0; gap_index < cluster_count
777 && gaps[gap_index] < lower; gap_index++);
778 if (gap_index == 0) {
779 if (testing_on)
780 tprintf ("No clusters below nonspace threshold!!\n");
781 if (cluster_count > 1) {
782 row->pr_nonsp = gaps[0];
783 row->pr_space = gaps[1];
784 }
785 else {
786 row->pr_nonsp = lower;
787 row->pr_space = gaps[0];
788 }
789 }
790 else {
791 row->pr_nonsp = gaps[gap_index - 1];
792 while (gap_index < cluster_count && gaps[gap_index] < upper)
793 gap_index++;
794 if (gap_index == cluster_count) {
795 if (testing_on)
796 tprintf ("No clusters above nonspace threshold!!\n");
798 }
799 else
800 row->pr_space = gaps[gap_index];
801 }
802
803 //Now try to find the fixed pitch space and non-space.
804 upper = row->xheight * words_default_fixed_space;
805 for (gap_index = 0; gap_index < cluster_count
806 && gaps[gap_index] < upper; gap_index++);
807 if (gap_index == 0) {
808 if (testing_on)
809 tprintf ("No clusters below space threshold!!\n");
810 row->fp_nonsp = upper;
811 row->fp_space = gaps[0];
812 }
813 else {
814 row->fp_nonsp = gaps[gap_index - 1];
815 if (gap_index == cluster_count) {
816 if (testing_on)
817 tprintf ("No clusters above space threshold!!\n");
818 row->fp_space = row->xheight;
819 }
820 else
821 row->fp_space = gaps[gap_index];
822 }
823 if (testing_on) {
824 tprintf
825 ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
826 row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
827 }
828 return true; //computed some stats
829}
#define BLOCK_STATS_CLUSTERS
Definition: topitch.cpp:56
double words_default_fixed_space
Definition: tovars.cpp:71
double words_initial_upper
Definition: tovars.cpp:69
double words_initial_lower
Definition: tovars.cpp:68
double textord_wordstats_smooth_factor
Definition: tovars.cpp:37
double textord_words_min_minspace
Definition: tovars.cpp:47

◆ try_block_fixed()

bool try_block_fixed ( TO_BLOCK block,
int32_t  block_index 
)

Definition at line 541 of file topitch.cpp.

544 {
545 return false;
546}

◆ try_doc_fixed()

bool try_doc_fixed ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks,
float  gradient 
)

Definition at line 401 of file topitch.cpp.

405 {
406 int16_t master_x; //uniform shifts
407 int16_t pitch; //median pitch.
408 int x; //profile coord
409 int prop_blocks; //correct counts
410 int fixed_blocks;
411 int total_row_count; //total in page
412 //iterator
413 TO_BLOCK_IT block_it = port_blocks;
414 TO_BLOCK *block; //current block;
415 TO_ROW *row; //current row
416 int16_t projection_left; //edges
417 int16_t projection_right;
418 int16_t row_left; //edges of row
419 int16_t row_right;
420 ICOORDELT_LIST *master_cells; //cells for page
421 float master_y; //uniform shifts
422 float shift_factor; //page skew correction
423 float row_shift; //shift for row
424 float final_pitch; //output pitch
425 float row_y; //baseline
426 STATS projection; //entire page
427 STATS pitches (0, MAX_ALLOWED_PITCH);
428 //for median
429 float sp_sd; //space sd
430 int16_t mid_cuts; //no of cheap cuts
431 float pitch_sd; //sync rating
432
433 if (block_it.empty ()
434 // || block_it.data()==block_it.data_relative(1)
436 return false;
437 shift_factor = gradient / (gradient * gradient + 1);
438 // row iterator
439 TO_ROW_IT row_it(block_it.data ()->get_rows());
440 master_x = row_it.data ()->projection_left;
441 master_y = row_it.data ()->baseline.y (master_x);
442 projection_left = INT16_MAX;
443 projection_right = -INT16_MAX;
444 prop_blocks = 0;
445 fixed_blocks = 0;
446 total_row_count = 0;
447
448 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
449 block_it.forward ()) {
450 block = block_it.data ();
451 row_it.set_to_list (block->get_rows ());
452 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
453 row = row_it.data ();
454 total_row_count++;
455 if (row->fixed_pitch > 0)
456 pitches.add (static_cast<int32_t>(row->fixed_pitch), 1);
457 //find median
458 row_y = row->baseline.y (master_x);
459 row_left =
460 static_cast<int16_t>(row->projection_left -
461 shift_factor * (master_y - row_y));
462 row_right =
463 static_cast<int16_t>(row->projection_right -
464 shift_factor * (master_y - row_y));
465 if (row_left < projection_left)
466 projection_left = row_left;
467 if (row_right > projection_right)
468 projection_right = row_right;
469 }
470 }
471 if (pitches.get_total () == 0)
472 return false;
473 projection.set_range (projection_left, projection_right);
474
475 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
476 block_it.forward ()) {
477 block = block_it.data ();
478 row_it.set_to_list (block->get_rows ());
479 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
480 row = row_it.data ();
481 row_y = row->baseline.y (master_x);
482 row_left =
483 static_cast<int16_t>(row->projection_left -
484 shift_factor * (master_y - row_y));
485 for (x = row->projection_left; x < row->projection_right;
486 x++, row_left++) {
487 projection.add (row_left, row->projection.pile_count (x));
488 }
489 }
490 }
491
492 row_it.set_to_list (block_it.data ()->get_rows ());
493 row = row_it.data ();
494#ifndef GRAPHICS_DISABLED
495 if (textord_show_page_cuts && to_win != nullptr)
496 projection.plot (to_win, projection_left,
497 row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
498#endif
499 final_pitch = pitches.ile (0.5);
500 pitch = static_cast<int16_t>(final_pitch);
501 pitch_sd =
502 tune_row_pitch (row, &projection, projection_left, projection_right,
503 pitch * 0.75, final_pitch, sp_sd, mid_cuts,
504 &row->char_cells, false);
505
507 tprintf
508 ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
509 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
510 pitch_sd / total_row_count, pitch_sd / pitch,
511 pitch_sd / total_row_count / pitch);
512
513#ifndef GRAPHICS_DISABLED
514 if (textord_show_page_cuts && to_win != nullptr) {
515 master_cells = &row->char_cells;
516 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
517 block_it.forward ()) {
518 block = block_it.data ();
519 row_it.set_to_list (block->get_rows ());
520 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
521 row_it.forward ()) {
522 row = row_it.data ();
523 row_y = row->baseline.y (master_x);
524 row_shift = shift_factor * (master_y - row_y);
525 plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
526 }
527 }
528 }
529#endif
530 row->char_cells.clear ();
531 return false;
532}
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
Definition: drawtord.cpp:394
#define MAX_ALLOWED_PITCH
Definition: topitch.cpp:57
bool textord_show_page_cuts
Definition: topitch.cpp:47
bool textord_blockndoc_fixed
Definition: topitch.cpp:51
QSPLINE baseline
Definition: blobbox.h:670
double y(double x) const
Definition: quspline.cpp:209
int32_t pile_count(int32_t value) const
Definition: statistc.h:76

◆ try_rows_fixed()

bool try_rows_fixed ( TO_BLOCK block,
int32_t  block_index,
bool  testing_on 
)

Definition at line 555 of file topitch.cpp.

559 {
560 TO_ROW *row; //current row
561 int32_t row_index; //row number.
562 int32_t def_fixed = 0; //counters
563 int32_t def_prop = 0;
564 int32_t maybe_fixed = 0;
565 int32_t maybe_prop = 0;
566 int32_t dunno = 0;
567 int32_t corr_fixed = 0;
568 int32_t corr_prop = 0;
569 float lower, upper; //cluster thresholds
570 TO_ROW_IT row_it = block->get_rows ();
571
572 row_index = 1;
573 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
574 row = row_it.data ();
575 ASSERT_HOST (row->xheight > 0);
576 if (row->fixed_pitch > 0 &&
577 fixed_pitch_row(row, block->block, block_index)) {
578 if (row->fixed_pitch == 0) {
579 lower = row->pr_nonsp;
580 upper = row->pr_space;
581 row->space_size = upper;
582 row->kern_size = lower;
583 }
584 }
585 row_index++;
586 }
587 count_block_votes(block,
588 def_fixed,
589 def_prop,
590 maybe_fixed,
591 maybe_prop,
592 corr_fixed,
593 corr_prop,
594 dunno);
595 if (testing_on
598 tprintf ("Initially:");
599 print_block_counts(block, block_index);
600 }
601 if (def_fixed > def_prop * textord_words_veto_power)
603 else if (def_prop > def_fixed * textord_words_veto_power)
605 else if (def_fixed > 0 || def_prop > 0)
607 else if (maybe_fixed > maybe_prop * textord_words_veto_power)
609 else if (maybe_prop > maybe_fixed * textord_words_veto_power)
611 else
613 return false;
614}
void print_block_counts(TO_BLOCK *block, int32_t block_index)
Definition: topitch.cpp:623
bool fixed_pitch_row(TO_ROW *row, BLOCK *block, int32_t block_index)
Definition: topitch.cpp:980
PITCH_TYPE pitch_decision
Definition: blobbox.h:778

◆ tune_row_pitch()

float tune_row_pitch ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1152 of file topitch.cpp.

1163 {
1164 int pitch_delta; //offset pitch
1165 int16_t mid_cuts; //cheap cuts
1166 float pitch_sd; //current sd
1167 float best_sd; //best result
1168 float best_pitch; //pitch for best result
1169 float initial_sd; //starting error
1170 float sp_sd; //space sd
1171 ICOORDELT_LIST test_cells; //row cells
1172 ICOORDELT_IT best_it; //start of best list
1173
1175 return tune_row_pitch2 (row, projection, projection_left,
1176 projection_right, space_size, initial_pitch,
1177 best_sp_sd,
1178 //space sd
1179 best_mid_cuts, best_cells, testing_on);
1180 if (textord_disable_pitch_test) {
1181 best_sp_sd = initial_pitch;
1182 return initial_pitch;
1183 }
1184 initial_sd =
1185 compute_pitch_sd(row,
1186 projection,
1187 projection_left,
1188 projection_right,
1189 space_size,
1190 initial_pitch,
1191 best_sp_sd,
1192 best_mid_cuts,
1193 best_cells,
1194 testing_on);
1195 best_sd = initial_sd;
1196 best_pitch = initial_pitch;
1197 if (testing_on)
1198 tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1199 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1200 pitch_sd =
1201 compute_pitch_sd (row, projection, projection_left, projection_right,
1202 space_size, initial_pitch + pitch_delta, sp_sd,
1203 mid_cuts, &test_cells, testing_on);
1204 if (testing_on)
1205 tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1206 pitch_sd);
1207 if (pitch_sd < best_sd) {
1208 best_sd = pitch_sd;
1209 best_mid_cuts = mid_cuts;
1210 best_sp_sd = sp_sd;
1211 best_pitch = initial_pitch + pitch_delta;
1212 best_cells->clear ();
1213 best_it.set_to_list (best_cells);
1214 best_it.add_list_after (&test_cells);
1215 }
1216 else
1217 test_cells.clear ();
1218 if (pitch_sd > initial_sd)
1219 break; //getting worse
1220 }
1221 for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
1222 pitch_sd =
1223 compute_pitch_sd (row, projection, projection_left, projection_right,
1224 space_size, initial_pitch - pitch_delta, sp_sd,
1225 mid_cuts, &test_cells, testing_on);
1226 if (testing_on)
1227 tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1228 pitch_sd);
1229 if (pitch_sd < best_sd) {
1230 best_sd = pitch_sd;
1231 best_mid_cuts = mid_cuts;
1232 best_sp_sd = sp_sd;
1233 best_pitch = initial_pitch - pitch_delta;
1234 best_cells->clear ();
1235 best_it.set_to_list (best_cells);
1236 best_it.add_list_after (&test_cells);
1237 }
1238 else
1239 test_cells.clear ();
1240 if (pitch_sd > initial_sd)
1241 break;
1242 }
1243 initial_pitch = best_pitch;
1244
1246 print_pitch_sd(row,
1247 projection,
1248 projection_left,
1249 projection_right,
1250 space_size,
1251 best_pitch);
1252
1253 return best_sd;
1254}
bool textord_fast_pitch_test
Definition: topitch.cpp:43
void print_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch)
Definition: topitch.cpp:1631
float tune_row_pitch2(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float &initial_pitch, float &best_sp_sd, int16_t &best_mid_cuts, ICOORDELT_LIST *best_cells, bool testing_on)
Definition: topitch.cpp:1264
float compute_pitch_sd(TO_ROW *row, STATS *projection, int16_t projection_left, int16_t projection_right, float space_size, float initial_pitch, float &sp_sd, int16_t &mid_cuts, ICOORDELT_LIST *row_cells, bool testing_on, int16_t start, int16_t end)
Definition: topitch.cpp:1376
int textord_pitch_range
Definition: tovars.cpp:35

◆ tune_row_pitch2()

float tune_row_pitch2 ( TO_ROW row,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  space_size,
float &  initial_pitch,
float &  best_sp_sd,
int16_t &  best_mid_cuts,
ICOORDELT_LIST *  best_cells,
bool  testing_on 
)

Definition at line 1264 of file topitch.cpp.

1275 {
1276 int pitch_delta; //offset pitch
1277 int16_t pixel; //pixel coord
1278 int16_t best_pixel; //pixel coord
1279 int16_t best_delta; //best pitch
1280 int16_t best_pitch; //best pitch
1281 int16_t start; //of good range
1282 int16_t end; //of good range
1283 int32_t best_count; //lowest sum
1284 float best_sd; //best result
1285
1286 best_sp_sd = initial_pitch;
1287
1288 best_pitch = static_cast<int>(initial_pitch);
1289 if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
1290 return initial_pitch;
1291 }
1292 std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection
1293
1294 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1295 pitch_delta++)
1296 sum_proj[textord_pitch_range + pitch_delta].set_range (0,
1297 best_pitch +
1298 pitch_delta + 1);
1299 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1300 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1301 pitch_delta++) {
1302 sum_proj[textord_pitch_range + pitch_delta].add(
1303 (pixel - projection_left) % (best_pitch + pitch_delta),
1304 projection->pile_count(pixel));
1305 }
1306 }
1307 best_count = sum_proj[textord_pitch_range].pile_count (0);
1308 best_delta = 0;
1309 best_pixel = 0;
1310 for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
1311 pitch_delta++) {
1312 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1313 if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
1314 < best_count) {
1315 best_count =
1316 sum_proj[textord_pitch_range +
1317 pitch_delta].pile_count (pixel);
1318 best_delta = pitch_delta;
1319 best_pixel = pixel;
1320 }
1321 }
1322 }
1323 if (testing_on)
1324 tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1325 initial_pitch, best_delta, best_count);
1326 best_pitch += best_delta;
1327 initial_pitch = best_pitch;
1328 best_count++;
1329 best_count += best_count;
1330 for (start = best_pixel - 2; start > best_pixel - best_pitch
1331 && sum_proj[textord_pitch_range +
1332 best_delta].pile_count (start % best_pitch) <= best_count;
1333 start--);
1334 for (end = best_pixel + 2;
1335 end < best_pixel + best_pitch
1336 && sum_proj[textord_pitch_range +
1337 best_delta].pile_count (end % best_pitch) <= best_count;
1338 end++);
1339
1340 best_sd =
1341 compute_pitch_sd(row,
1342 projection,
1343 projection_left,
1344 projection_right,
1345 space_size,
1346 initial_pitch,
1347 best_sp_sd,
1348 best_mid_cuts,
1349 best_cells,
1350 testing_on,
1351 start,
1352 end);
1353 if (testing_on)
1354 tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1355 best_sd);
1356
1358 print_pitch_sd(row,
1359 projection,
1360 projection_left,
1361 projection_right,
1362 space_size,
1363 initial_pitch);
1364
1365 return best_sd;
1366}

Variable Documentation

◆ textord_balance_factor

double textord_balance_factor = 2.0
extern

"Ding rate for unbalanced char cells"

Definition at line 54 of file topitch.cpp.

◆ textord_blockndoc_fixed

bool textord_blockndoc_fixed = true
extern

"Attempt whole doc/block fixed pitch"

Definition at line 51 of file topitch.cpp.

◆ textord_debug_pitch_metric

bool textord_debug_pitch_metric = false
extern

"Write full metric stuff"

Definition at line 45 of file topitch.cpp.

◆ textord_debug_pitch_test

bool textord_debug_pitch_test = false
extern

"Debug on fixed pitch test"

Definition at line 39 of file topitch.cpp.

◆ textord_fast_pitch_test

bool textord_fast_pitch_test = false
extern

"Do even faster pitch algorithm"

Definition at line 43 of file topitch.cpp.

◆ textord_pitch_cheat

bool textord_pitch_cheat = false
extern

"Use correct answer for fixed/prop"

Definition at line 49 of file topitch.cpp.

◆ textord_projection_scale

double textord_projection_scale = 0.125
extern

"Ding rate for mid-cuts"

Definition at line 52 of file topitch.cpp.

◆ textord_show_page_cuts

bool textord_show_page_cuts = false
extern

"Draw page-level cuts"

Definition at line 47 of file topitch.cpp.

◆ textord_show_row_cuts

bool textord_show_row_cuts = false
extern

"Draw row-level cuts"

Definition at line 46 of file topitch.cpp.