tesseract 4.1.1
Loading...
Searching...
No Matches
pithsync.cpp File Reference
#include <cmath>
#include <cfloat>
#include <vector>
#include "makerow.h"
#include "pitsync1.h"
#include "topitch.h"
#include "pithsync.h"
#include "tprintf.h"

Go to the source code of this file.

Functions

double check_pitch_sync2 (BLOBNBOX_IT *blob_it, int16_t blob_count, int16_t pitch, int16_t pitch_error, STATS *projection, int16_t projection_left, int16_t projection_right, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 
double check_pitch_sync3 (int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
 

Function Documentation

◆ check_pitch_sync2()

double check_pitch_sync2 ( BLOBNBOX_IT *  blob_it,
int16_t  blob_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
int16_t  projection_left,
int16_t  projection_right,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 291 of file pithsync.cpp.

304 {
305 bool faking; //illegal cut pt
306 bool mid_cut; //cheap cut pt.
307 int16_t x; //current coord
308 int16_t blob_index; //blob number
309 int16_t left_edge; //of word
310 int16_t right_edge; //of word
311 int16_t array_origin; //x coord of array
312 int16_t offset; //dist to legal area
313 int16_t zero_count; //projection zero
314 int16_t best_left_x = 0; //for equals
315 int16_t best_right_x = 0; //right edge
316 TBOX this_box; //bounding box
317 TBOX next_box; //box of next blob
318 FPSEGPT *segpt; //segment point
319 double best_cost; //best path
320 double mean_sum; //computes result
321 FPCUTPT *best_end; //end of best path
322 int16_t best_fake; //best fake level
323 int16_t best_count; //no of cuts
324 BLOBNBOX_IT this_it; //copy iterator
325 FPSEGPT_IT seg_it = seg_list; //output iterator
326
327 // tprintf("Computing sync on word of %d blobs with pitch %d\n",
328 // blob_count, pitch);
329 // if (blob_count==8 && pitch==27)
330 // projection->print(stdout,true);
331 zero_count = 0;
332 if (pitch < 3)
333 pitch = 3; //nothing ludicrous
334 if ((pitch - 3) / 2 < pitch_error)
335 pitch_error = (pitch - 3) / 2;
336 this_it = *blob_it;
337 this_box = box_next (&this_it);//get box
338 // left_edge=this_box.left(); //left of word
339 // right_edge=this_box.right();
340 // for (blob_index=1;blob_index<blob_count;blob_index++)
341 // {
342 // this_box=box_next(&this_it);
343 // if (this_box.right()>right_edge)
344 // right_edge=this_box.right();
345 // }
346 for (left_edge = projection_left; projection->pile_count (left_edge) == 0
347 && left_edge < projection_right; left_edge++);
348 for (right_edge = projection_right; projection->pile_count (right_edge) == 0
349 && right_edge > left_edge; right_edge--);
350 ASSERT_HOST (right_edge >= left_edge);
351 if (pitsync_linear_version >= 4)
352 return check_pitch_sync3 (projection_left, projection_right, zero_count,
353 pitch, pitch_error, projection,
354 projection_scale, occupation_count, seg_list,
355 start, end);
356 array_origin = left_edge - pitch;
357 // array of points
358 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
359 for (x = array_origin; x < left_edge; x++)
360 //free cuts
361 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
362 zero_count, pitch, x, 0);
363 for (offset = 0; offset <= pitch_error; offset++, x++)
364 //not quite free
365 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
366 zero_count, pitch, x, offset);
367
368 this_it = *blob_it;
369 best_cost = FLT_MAX;
370 best_end = nullptr;
371 this_box = box_next (&this_it);//first box
372 next_box = box_next (&this_it);//second box
373 blob_index = 1;
374 while (x < right_edge - pitch_error) {
375 if (x > this_box.right () + pitch_error && blob_index < blob_count) {
376 this_box = next_box;
377 next_box = box_next (&this_it);
378 blob_index++;
379 }
380 faking = false;
381 mid_cut = false;
382 if (x <= this_box.left ())
383 offset = 0;
384 else if (x <= this_box.left () + pitch_error)
385 offset = x - this_box.left ();
386 else if (x >= this_box.right ())
387 offset = 0;
388 else if (x >= next_box.left () && blob_index < blob_count) {
389 offset = x - next_box.left ();
390 if (this_box.right () - x < offset)
391 offset = this_box.right () - x;
392 }
393 else if (x >= this_box.right () - pitch_error)
394 offset = this_box.right () - x;
395 else if (x - this_box.left () > pitch * pitsync_joined_edge
396 && this_box.right () - x > pitch * pitsync_joined_edge) {
397 mid_cut = true;
398 offset = 0;
399 }
400 else {
401 faking = true;
402 offset = projection->pile_count (x);
403 }
404 cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
405 faking, mid_cut, offset, projection,
406 projection_scale, zero_count, pitch,
407 pitch_error);
408 x++;
409 }
410
411 best_fake = INT16_MAX;
412 best_cost = INT32_MAX;
413 best_count = INT16_MAX;
414 while (x < right_edge + pitch) {
415 offset = x < right_edge ? right_edge - x : 0;
416 cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
417 false, false, offset, projection,
418 projection_scale, zero_count, pitch,
419 pitch_error);
420 cutpts[x - array_origin].terminal = true;
421 if (cutpts[x - array_origin].index () +
422 cutpts[x - array_origin].fake_count <= best_count + best_fake) {
423 if (cutpts[x - array_origin].fake_count < best_fake
424 || (cutpts[x - array_origin].fake_count == best_fake
425 && cutpts[x - array_origin].cost_function () < best_cost)) {
426 best_fake = cutpts[x - array_origin].fake_count;
427 best_cost = cutpts[x - array_origin].cost_function ();
428 best_left_x = x;
429 best_right_x = x;
430 best_count = cutpts[x - array_origin].index ();
431 }
432 else if (cutpts[x - array_origin].fake_count == best_fake
433 && x == best_right_x + 1
434 && cutpts[x - array_origin].cost_function () == best_cost) {
435 //exactly equal
436 best_right_x = x;
437 }
438 }
439 x++;
440 }
441 ASSERT_HOST (best_fake < INT16_MAX);
442
443 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
444 if (this_box.right () == textord_test_x
445 && this_box.top () == textord_test_y) {
446 for (x = left_edge - pitch; x < right_edge + pitch; x++) {
447 tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
448 x, cutpts[x - array_origin].cost_function (),
449 cutpts[x - array_origin].sum (),
450 cutpts[x - array_origin].squares (),
451 cutpts[x - array_origin].previous ()->position ());
452 }
453 }
454 occupation_count = -1;
455 do {
456 for (x = best_end->position () - pitch + pitch_error;
457 x < best_end->position () - pitch_error
458 && projection->pile_count (x) == 0; x++);
459 if (x < best_end->position () - pitch_error)
460 occupation_count++;
461 //copy it
462 segpt = new FPSEGPT (best_end);
463 seg_it.add_before_then_move (segpt);
464 best_end = best_end->previous ();
465 }
466 while (best_end != nullptr);
467 seg_it.move_to_last ();
468 mean_sum = seg_it.data ()->sum ();
469 mean_sum = mean_sum * mean_sum / best_count;
470 if (seg_it.data ()->squares () - mean_sum < 0)
471 tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
472 seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
473 // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
474 // blob_count,pitch,seg_it.data()->squares()-mean_sum,
475 // occupation_count);
476 return seg_it.data ()->squares () - mean_sum;
477}
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:636
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int textord_test_x
Definition: makerow.cpp:60
int textord_test_y
Definition: makerow.cpp:61
double check_pitch_sync3(int16_t projection_left, int16_t projection_right, int16_t zero_count, int16_t pitch, int16_t pitch_error, STATS *projection, float projection_scale, int16_t &occupation_count, FPSEGPT_LIST *seg_list, int16_t start, int16_t end)
Definition: pithsync.cpp:488
double pitsync_joined_edge
Definition: pitsync1.cpp:26
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
int32_t pile_count(int32_t value) const
Definition: statistc.h:76
FPCUTPT * previous()
Definition: pithsync.h:79
double sum()
Definition: pithsync.h:76
int32_t position()
Definition: pithsync.h:67

◆ check_pitch_sync3()

double check_pitch_sync3 ( int16_t  projection_left,
int16_t  projection_right,
int16_t  zero_count,
int16_t  pitch,
int16_t  pitch_error,
STATS projection,
float  projection_scale,
int16_t &  occupation_count,
FPSEGPT_LIST *  seg_list,
int16_t  start,
int16_t  end 
)

Definition at line 488 of file pithsync.cpp.

500 {
501 bool faking; //illegal cut pt
502 bool mid_cut; //cheap cut pt.
503 int16_t left_edge; //of word
504 int16_t right_edge; //of word
505 int16_t x; //current coord
506 int16_t array_origin; //x coord of array
507 int16_t offset; //dist to legal area
508 int16_t projection_offset; //from scaled projection
509 int16_t prev_zero; //previous zero dist
510 int16_t next_zero; //next zero dist
511 int16_t zero_offset; //scan window
512 int16_t best_left_x = 0; //for equals
513 int16_t best_right_x = 0; //right edge
514 FPSEGPT *segpt; //segment point
515 int minindex; //next input position
516 int test_index; //index to mins
517 double best_cost; //best path
518 double mean_sum; //computes result
519 FPCUTPT *best_end; //end of best path
520 int16_t best_fake; //best fake level
521 int16_t best_count; //no of cuts
522 FPSEGPT_IT seg_it = seg_list; //output iterator
523
524 end = (end - start) % pitch;
525 if (pitch < 3)
526 pitch = 3; //nothing ludicrous
527 if ((pitch - 3) / 2 < pitch_error)
528 pitch_error = (pitch - 3) / 2;
529 //min dist of zero
530 zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
531 for (left_edge = projection_left; projection->pile_count (left_edge) == 0
532 && left_edge < projection_right; left_edge++);
533 for (right_edge = projection_right; projection->pile_count (right_edge) == 0
534 && right_edge > left_edge; right_edge--);
535 array_origin = left_edge - pitch;
536 // array of points
537 std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
538 // local min results
539 std::vector<bool> mins(pitch_error * 2 + 1);
540 for (x = array_origin; x < left_edge; x++)
541 //free cuts
542 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
543 zero_count, pitch, x, 0);
544 prev_zero = left_edge - 1;
545 for (offset = 0; offset <= pitch_error; offset++, x++)
546 //not quite free
547 cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
548 zero_count, pitch, x, offset);
549
550 best_cost = FLT_MAX;
551 best_end = nullptr;
552 for (offset = -pitch_error, minindex = 0; offset < pitch_error;
553 offset++, minindex++)
554 mins[minindex] = projection->local_min (x + offset);
555 next_zero = x + zero_offset + 1;
556 for (offset = next_zero - 1; offset >= x; offset--) {
557 if (projection->pile_count (offset) <= zero_count) {
558 next_zero = offset;
559 break;
560 }
561 }
562 while (x < right_edge - pitch_error) {
563 mins[minindex] = projection->local_min (x + pitch_error);
564 minindex++;
565 if (minindex > pitch_error * 2)
566 minindex = 0;
567 faking = false;
568 mid_cut = false;
569 offset = 0;
570 if (projection->pile_count (x) <= zero_count) {
571 prev_zero = x;
572 }
573 else {
574 for (offset = 1; offset <= pitch_error; offset++)
575 if (projection->pile_count (x + offset) <= zero_count
576 || projection->pile_count (x - offset) <= zero_count)
577 break;
578 }
579 if (offset > pitch_error) {
580 if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
581 for (offset = 0; offset <= pitch_error; offset++) {
582 test_index = minindex + pitch_error + offset;
583 if (test_index > pitch_error * 2)
584 test_index -= pitch_error * 2 + 1;
585 if (mins[test_index])
586 break;
587 test_index = minindex + pitch_error - offset;
588 if (test_index > pitch_error * 2)
589 test_index -= pitch_error * 2 + 1;
590 if (mins[test_index])
591 break;
592 }
593 }
594 if (offset > pitch_error) {
595 offset = projection->pile_count (x);
596 faking = true;
597 }
598 else {
599 projection_offset =
600 static_cast<int16_t>(projection->pile_count (x) / projection_scale);
601 if (projection_offset > offset)
602 offset = projection_offset;
603 mid_cut = true;
604 }
605 }
606 if ((start == 0 && end == 0)
608 || (x - projection_left - start) % pitch <= end)
609 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
610 faking, mid_cut, offset, projection,
611 projection_scale, zero_count, pitch,
612 pitch_error);
613 else
614 cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x,
615 faking, mid_cut, offset,
616 projection, projection_scale,
617 zero_count, pitch,
618 pitch_error);
619 x++;
620 if (next_zero < x || next_zero == x + zero_offset)
621 next_zero = x + zero_offset + 1;
622 if (projection->pile_count (x + zero_offset) <= zero_count)
623 next_zero = x + zero_offset;
624 }
625
626 best_fake = INT16_MAX;
627 best_cost = INT32_MAX;
628 best_count = INT16_MAX;
629 while (x < right_edge + pitch) {
630 offset = x < right_edge ? right_edge - x : 0;
631 cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
632 false, false, offset, projection,
633 projection_scale, zero_count, pitch,
634 pitch_error);
635 cutpts[x - array_origin].terminal = true;
636 if (cutpts[x - array_origin].index () +
637 cutpts[x - array_origin].fake_count <= best_count + best_fake) {
638 if (cutpts[x - array_origin].fake_count < best_fake
639 || (cutpts[x - array_origin].fake_count == best_fake
640 && cutpts[x - array_origin].cost_function () < best_cost)) {
641 best_fake = cutpts[x - array_origin].fake_count;
642 best_cost = cutpts[x - array_origin].cost_function ();
643 best_left_x = x;
644 best_right_x = x;
645 best_count = cutpts[x - array_origin].index ();
646 }
647 else if (cutpts[x - array_origin].fake_count == best_fake
648 && x == best_right_x + 1
649 && cutpts[x - array_origin].cost_function () == best_cost) {
650 //exactly equal
651 best_right_x = x;
652 }
653 }
654 x++;
655 }
656 ASSERT_HOST (best_fake < INT16_MAX);
657
658 best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
659 // for (x=left_edge-pitch;x<right_edge+pitch;x++)
660 // {
661 // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
662 // x,cutpts[x-array_origin].cost_function(),
663 // cutpts[x-array_origin].sum(),
664 // cutpts[x-array_origin].squares(),
665 // cutpts[x-array_origin].previous()->position());
666 // }
667 occupation_count = -1;
668 do {
669 for (x = best_end->position () - pitch + pitch_error;
670 x < best_end->position () - pitch_error
671 && projection->pile_count (x) == 0; x++);
672 if (x < best_end->position () - pitch_error)
673 occupation_count++;
674 //copy it
675 segpt = new FPSEGPT (best_end);
676 seg_it.add_before_then_move (segpt);
677 best_end = best_end->previous ();
678 }
679 while (best_end != nullptr);
680 seg_it.move_to_last ();
681 mean_sum = seg_it.data ()->sum ();
682 mean_sum = mean_sum * mean_sum / best_count;
683 if (seg_it.data ()->squares () - mean_sum < 0)
684 tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
685 seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
686 return seg_it.data ()->squares () - mean_sum;
687}
bool textord_fast_pitch_test
Definition: topitch.cpp:43
bool local_min(int32_t x) const
Definition: statistc.cpp:254