tesseract 4.1.1
Loading...
Searching...
No Matches
fixspace.cpp File Reference
#include "fixspace.h"
#include <cstdint>
#include "blobs.h"
#include "boxword.h"
#include "errcode.h"
#include "normalis.h"
#include "ocrclass.h"
#include "pageres.h"
#include "params.h"
#include "ratngs.h"
#include "rect.h"
#include "stepblob.h"
#include "strngs.h"
#include "tesseractclass.h"
#include "tessvars.h"
#include "tprintf.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Macros

#define PERFECT_WERDS   999
 

Functions

void initialise_search (WERD_RES_LIST &src_list, WERD_RES_LIST &new_list)
 
transform_to_next_perm()

Examines the current word list to find the smallest word gap size. Then walks the word list closing any gaps of this size by either inserted new combination words, or extending existing ones.

The routine COULD be limited to stop it building words longer than N blobs.

If there are no more gaps then it DELETES the entire list and returns the empty list to cause termination.

void transform_to_next_perm (WERD_RES_LIST &words)
 
void fixspace_dbg (WERD_RES *word)
 

Macro Definition Documentation

◆ PERFECT_WERDS

#define PERFECT_WERDS   999

Definition at line 44 of file fixspace.cpp.

Function Documentation

◆ fixspace_dbg()

void fixspace_dbg ( WERD_RES word)

Definition at line 822 of file fixspace.cpp.

822 {
823 TBOX box = word->word->bounding_box();
824 const bool show_map_detail = false;
825 int16_t i;
826
827 box.print();
828 tprintf(" \"%s\" ", word->best_choice->unichar_string().string());
829 tprintf("Blob count: %d (word); %d/%d (rebuild word)\n",
830 word->word->cblob_list()->length(),
831 word->rebuild_word->NumBlobs(),
832 word->box_word->length());
834 tprintf("\n");
835 if (show_map_detail) {
836 tprintf("\"%s\"\n", word->best_choice->unichar_string().string());
837 for (i = 0; word->best_choice->unichar_string()[i] != '\0'; i++) {
838 tprintf("**** \"%c\" ****\n", word->best_choice->unichar_string()[i]);
840 }
841 }
842
843 tprintf("Tess Accepted: %s\n", word->tess_accepted ? "TRUE" : "FALSE");
844 tprintf("Done flag: %s\n\n", word->done ? "TRUE" : "FALSE");
845}
FILE * debug_fp
Definition: tessvars.cpp:24
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int NumBlobs() const
Definition: blobs.h:448
int length() const
Definition: boxword.h:83
TWERD * rebuild_word
Definition: pageres.h:266
bool done
Definition: pageres.h:305
tesseract::BoxWord * box_word
Definition: pageres.h:272
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool tess_accepted
Definition: pageres.h:303
REJMAP reject_map
Definition: pageres.h:294
WERD * word
Definition: pageres.h:186
const STRING & unichar_string() const
Definition: ratngs.h:531
Definition: rect.h:34
void print() const
Definition: rect.h:278
void print(FILE *fp)
Definition: rejctmap.cpp:321
void full_print(FILE *fp)
Definition: rejctmap.cpp:333
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
TBOX bounding_box() const
Definition: werd.cpp:148
const char * string() const
Definition: strngs.cpp:194

◆ initialise_search()

void initialise_search ( WERD_RES_LIST &  src_list,
WERD_RES_LIST &  new_list 
)

Definition at line 204 of file fixspace.cpp.

204 {
205 WERD_RES_IT src_it(&src_list);
206 WERD_RES_IT new_it(&new_list);
207 WERD_RES *src_wd;
208 WERD_RES *new_wd;
209
210 for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
211 src_wd = src_it.data();
212 if (!src_wd->combination) {
213 new_wd = WERD_RES::deep_copy(src_wd);
214 new_wd->combination = false;
215 new_wd->part_of_combo = false;
216 new_it.add_after_then_move(new_wd);
217 }
218 }
219}
bool combination
Definition: pageres.h:339
bool part_of_combo
Definition: pageres.h:340
static WERD_RES * deep_copy(const WERD_RES *src)
Definition: pageres.h:649

◆ transform_to_next_perm()

void transform_to_next_perm ( WERD_RES_LIST &  words)

Definition at line 399 of file fixspace.cpp.

399 {
400 WERD_RES_IT word_it(&words);
401 WERD_RES_IT prev_word_it(&words);
402 WERD_RES *word;
403 WERD_RES *prev_word;
404 WERD_RES *combo;
405 WERD *copy_word;
406 int16_t prev_right = -INT16_MAX;
407 TBOX box;
408 int16_t gap;
409 int16_t min_gap = INT16_MAX;
410
411 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
412 word = word_it.data();
413 if (!word->part_of_combo) {
414 box = word->word->bounding_box();
415 if (prev_right > -INT16_MAX) {
416 gap = box.left() - prev_right;
417 if (gap < min_gap)
418 min_gap = gap;
419 }
420 prev_right = box.right();
421 }
422 }
423 if (min_gap < INT16_MAX) {
424 prev_right = -INT16_MAX; // back to start
425 word_it.set_to_list(&words);
426 // Note: we can't use cycle_pt due to inserted combos at start of list.
427 for (; (prev_right == -INT16_MAX) || !word_it.at_first();
428 word_it.forward()) {
429 word = word_it.data();
430 if (!word->part_of_combo) {
431 box = word->word->bounding_box();
432 if (prev_right > -INT16_MAX) {
433 gap = box.left() - prev_right;
434 if (gap <= min_gap) {
435 prev_word = prev_word_it.data();
436 if (prev_word->combination) {
437 combo = prev_word;
438 } else {
439 /* Make a new combination and insert before
440 * the first word being joined. */
441 copy_word = new WERD;
442 *copy_word = *(prev_word->word);
443 // deep copy
444 combo = new WERD_RES(copy_word);
445 combo->combination = true;
446 combo->x_height = prev_word->x_height;
447 prev_word->part_of_combo = true;
448 prev_word_it.add_before_then_move(combo);
449 }
450 combo->word->set_flag(W_EOL, word->word->flag(W_EOL));
451 if (word->combination) {
452 combo->word->join_on(word->word);
453 // Move blobs to combo
454 // old combo no longer needed
455 delete word_it.extract();
456 } else {
457 // Copy current wd to combo
458 combo->copy_on(word);
459 word->part_of_combo = true;
460 }
461 combo->done = false;
462 combo->ClearResults();
463 } else {
464 prev_word_it = word_it; // catch up
465 }
466 }
467 prev_right = box.right();
468 }
469 }
470 } else {
471 words.clear(); // signal termination
472 }
473}
@ W_EOL
end of line
Definition: werd.h:33
void copy_on(WERD_RES *word_res)
Definition: pageres.h:660
float x_height
Definition: pageres.h:316
void ClearResults()
Definition: pageres.cpp:1104
int16_t left() const
Definition: rect.h:72
int16_t right() const
Definition: rect.h:79
Definition: werd.h:56
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
void join_on(WERD *other)
Definition: werd.cpp:199
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117