48static const
double kStopperAmbiguityThresholdGain = 8.0;
51static const
double kStopperAmbiguityThresholdOffset = 1.5;
65static
double StopperAmbigThreshold(
double f1,
double f2) {
66 return (f2 - f1) * kStopperAmbiguityThresholdGain -
67 kStopperAmbiguityThresholdOffset;
76 bool merge_similar_words,
77 BLOCK_LIST *the_block_list,
80 BLOCK_IT block_it(the_block_list);
82 for (block_it.mark_cycle_pt();
83 !block_it.cycled_list(); block_it.forward()) {
84 block_res_it.add_to_end(
new BLOCK_RES(merge_similar_words,
97 ROW_IT row_it (the_block->
row_list ());
109 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
110 row_res_it.add_to_end(
new ROW_RES(merge_similar_words, row_it.data()));
131 bool add_next_word =
false;
135 for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
136 auto* word_res =
new WERD_RES(word_it.data());
137 word_res->x_height = the_row->
x_height();
141 word_res->part_of_combo =
true;
143 }
else if (merge_similar_words) {
144 union_box = word_res->word->bounding_box();
145 add_next_word = !word_res->word->flag(
W_REP_CHAR) &&
147 word_res->odd_size = !add_next_word;
149 WERD* next_word = word_it.data_relative(1);
150 if (merge_similar_words) {
158 int prev_right = union_box.
right();
159 union_box += next_box;
163 add_next_word =
false;
171 if (combo ==
nullptr) {
172 copy_word =
new WERD;
173 *copy_word = *(word_it.data());
177 word_res_it.add_to_end(combo);
179 word_res->part_of_combo =
true;
183 word_res_it.add_to_end(word_res);
216 WERD_CHOICE_IT wc_it(
const_cast<WERD_CHOICE_LIST*
>(&source.
best_choices));
218 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
220 wc_dest_it.add_after_then_move(
new WERD_CHOICE(*choice));
222 if (!wc_dest_it.empty()) {
223 wc_dest_it.move_to_first();
305 const TBOX* norm_box,
308 bool allow_detailed_fx,
310 auto norm_mode_hint =
316 (pb !=
nullptr && !pb->
IsText())) {
326 float word_xheight = use_body_size && row !=
nullptr && row->
body_size() > 0.0f
330 norm_mode_hint, norm_box, &
denorm);
360 if (blob_count > 0) {
361 auto** fake_choices =
new BLOB_CHOICE*[blob_count];
366 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
367 TBOX box = b_it.data()->bounding_box();
372 delete [] fake_choices;
404 for (
int b = 0; b < num_blobs; ++b) {
408 if (b + 1 < num_blobs) {
429 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
441 WERD_CHOICE_IT wc_it(
const_cast<WERD_CHOICE_LIST*
>(&
best_choices));
442 for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
461 tprintf(
"raw_choice has total of states = %d vs ratings dim of %d\n",
467 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
470 tprintf(
"Cooked #%d has total of states = %d vs ratings dim of %d\n",
482 (word_to_debug !=
nullptr && *word_to_debug !=
'\0' &&
best_choice !=
nullptr &&
489 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
500 tprintf(
"Best choice: accepted=%d, adaptable=%d, done=%d : ",
517 if (debug_level >= 2)
521 for (it.forward(); !it.at_first(); it.forward(), ++index) {
530 int i = 0, j = 0, chunk = 0;
536 while (i < choice->length() && j < best_choice->length()) {
539 if (debug_level >= 2) {
540 choice->
print(
"WorstCertaintyDiffWorseThan");
542 "i %d j %d Choice->Blob[i].Certainty %.4g"
543 " WorstOtherChoiceCertainty %g Threshold %g\n",
545 tprintf(
"Discarding bad choice #%d\n", index);
552 while (choice_chunk < chunk && ++i < choice->length())
553 choice_chunk += choice->
state(i);
555 while (best_chunk < chunk && ++j < best_choice->length())
571 float avg_rating = 0.0f;
572 int num_error_chunks = 0;
575 while (chunk < end_chunk) {
576 if (chunk >= end_raw_chunk) {
588 if (num_error_chunks > 0) {
589 avg_rating /= num_error_chunks;
590 *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
592 *thresholds = max_rating;
595 if (*thresholds > max_rating)
596 *thresholds = max_rating;
597 if (*thresholds < min_rating)
598 *thresholds = min_rating;
628 float max_certainty_delta =
631 if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
632 max_certainty_delta = -kStopperAmbiguityThresholdOffset;
634 max_certainty_delta) {
638 tprintf(
"Discarding choice \"%s\" with an overly low certainty"
639 " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
653 bool inserted =
false;
658 if (choice->
rating() > word_choice->
rating() && !inserted) {
660 it.add_before_stay_put(word_choice);
662 if (num_choices == 0)
673 tprintf(
"Discarding duplicate choice \"%s\", rating %g vs %g\n",
681 if (num_choices > max_num_choices)
685 }
while (!it.at_first());
687 if (!inserted && num_choices < max_num_choices) {
688 it.add_to_end(word_choice);
690 if (num_choices == 0)
698 word_choice->
print(
" Word Choice");
710template<
class T>
static void MovePointerData(T** dest, T**src) {
719 WERD_CHOICE_IT it(
const_cast<WERD_CHOICE_LIST*
>(&
best_choices));
720 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
721 if (!it.at_first()) alternates_str +=
"\", \"";
722 alternates_str += it.data()->unichar_string();
724 tprintf(
"Alternates for \"%s\": {\"%s\"}\n",
732 for (
int b = start_blob; b <= last_blob; ++b) {
773 word->seam_array.clear();
784 wc_it.add_list_after(&
word->best_choices);
786 if (
word->blamer_bundle !=
nullptr) {
842 for (
int i = 0; i < word_len; ++i) {
868 for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
869 wc_it.data()->SetAllScriptPositions(position);
884 for (
int c = 0; c < blob_count; ++c) {
885 auto* choice_list =
new BLOB_CHOICE_LIST;
886 BLOB_CHOICE_IT choice_it(choice_list);
887 choice_it.add_after_then_move(choices[c]);
901 word_choice->set_permuter(permuter);
902 for (
int b = 0; b < num_blobs; ++b) {
904 float rating = INT32_MAX;
905 float certainty = -INT32_MAX;
906 BLOB_CHOICE_LIST* choices =
ratings->
get(b, b);
907 if (choices !=
nullptr && !choices->empty()) {
908 BLOB_CHOICE_IT bc_it(choices);
911 rating = choice->
rating();
914 word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
942 bool modified =
false;
946 if (new_id != INVALID_UNICHAR_ID &&
962 BLOB_CHOICE_IT bc_it(blob_choices);
963 bc_it.add_before_then_move(blob_choice);
992static int is_simple_quote(
const char* signed_str,
int length) {
994 reinterpret_cast<const unsigned char*
>(signed_str);
996 return (length == 1 && (*str ==
'\'' || *str ==
'`')) ||
998 (length == 3 && ((*str == 0xe2 &&
999 *(str + 1) == 0x80 &&
1000 *(str + 2) == 0x98) ||
1002 *(str + 1) == 0x80 &&
1003 *(str + 2) == 0x99)));
1011 if (is_simple_quote(ch, strlen(ch)) &&
1012 is_simple_quote(next_ch, strlen(next_ch)))
1014 return INVALID_UNICHAR_ID;
1033 if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1034 (*ch ==
'-' || *ch ==
'~') && (*next_ch ==
'-' || *next_ch ==
'~'))
1036 return INVALID_UNICHAR_ID;
1063 return INVALID_UNICHAR_ID;
1080 for (
int index = start; index < start +
count - 1; ++index) {
1083 if (seam !=
nullptr && seam->
HasAnySplits())
return false;
1147 if (other.block_res ==
nullptr) {
1149 if (block_res ==
nullptr)
1153 if (block_res ==
nullptr) {
1156 if (block_res == other.block_res) {
1157 if (other.row_res ==
nullptr || row_res ==
nullptr) {
1161 if (row_res == other.row_res) {
1163 ASSERT_HOST(other.word_res !=
nullptr && word_res !=
nullptr);
1164 if (word_res == other.word_res) {
1170 for (word_res_it.mark_cycle_pt(); !word_res_it.cycled_list();
1171 word_res_it.forward()) {
1172 if (word_res_it.data() == word_res) {
1174 }
else if (word_res_it.data() == other.word_res) {
1178 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1183 for (row_res_it.mark_cycle_pt(); !row_res_it.cycled_list();
1184 row_res_it.forward()) {
1185 if (row_res_it.data() == row_res) {
1187 }
else if (row_res_it.data() == other.row_res) {
1191 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1196 for (block_res_it.mark_cycle_pt();
1197 !block_res_it.cycled_list(); block_res_it.forward()) {
1198 if (block_res_it.data() == block_res) {
1200 }
else if (block_res_it.data() == other.block_res) {
1205 ASSERT_HOST(
"Error: Incomparable PAGE_RES_ITs" ==
nullptr);
1216 auto* new_res =
new WERD_RES(new_word);
1217 new_res->CopySimpleFields(clone_res);
1218 new_res->combination =
true;
1220 WERD_RES_IT wr_it(&
row()->word_res_list);
1221 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1223 if (
word == word_res)
1227 wr_it.add_before_then_move(new_res);
1228 if (wr_it.at_first()) {
1239static void ComputeBlobEnds(
const WERD_RES& word,
const TBOX& clip_box,
1240 C_BLOB_LIST* next_word_blobs,
1246 TBOX blob_box = blob_it.data()->bounding_box();
1248 for (
int b = 1; b < length; ++b) {
1249 blob_box += blob_it.data()->bounding_box();
1254 int blob_end = INT32_MAX;
1255 if (!blob_it.at_first() || next_word_blobs !=
nullptr) {
1256 if (blob_it.at_first())
1257 blob_it.set_to_list(next_word_blobs);
1258 blob_end = (blob_box.
right() + blob_it.data()->bounding_box().left()) / 2;
1260 blob_end = ClipToRange<int>(blob_end, clip_box.
left(), clip_box.
right());
1269 int w_index,
TBOX prev_box, WERD_RES_IT w_it) {
1270 constexpr int kSignificantOverlapFraction = 4;
1272 TBOX current_box = words[w_index]->word->bounding_box();
1274 if (w_index + 1 < words.
size() && words[w_index + 1] !=
nullptr &&
1275 words[w_index + 1]->word !=
nullptr)
1276 next_box = words[w_index + 1]->word->bounding_box();
1277 for (w_it.forward(); !w_it.at_first() && w_it.data()->part_of_combo;
1279 if (w_it.data() ==
nullptr || w_it.data()->word ==
nullptr)
continue;
1280 TBOX w_box = w_it.data()->word->bounding_box();
1281 int height_limit = std::min<int>(w_box.
height(), w_box.
width() / 2);
1282 int width_limit = w_box.
width() / kSignificantOverlapFraction;
1283 int min_significant_overlap = std::max(height_limit, width_limit);
1287 if (overlap > min_significant_overlap) {
1288 if (prev_overlap > min_significant_overlap) {
1291 }
else if (next_overlap > min_significant_overlap) {
1295 clipped_box += w_box;
1299 if (clipped_box.
height() <= 0) {
1303 if (clipped_box.
width() <= 0) clipped_box = current_box;
1309static TBOX MoveAndClipBlob(C_BLOB_IT* src_it, C_BLOB_IT* dest_it,
1310 const TBOX& clip_box) {
1311 C_BLOB* src_blob = src_it->extract();
1315 ClipToRange<int>(box.
left(), clip_box.
left(), clip_box.
right() - 1);
1317 ClipToRange<int>(box.
right(), clip_box.
left() + 1, clip_box.
right());
1319 ClipToRange<int>(box.
top(), clip_box.
bottom() + 1, clip_box.
top());
1321 ClipToRange<int>(box.
bottom(), clip_box.
bottom(), clip_box.
top() - 1);
1322 box =
TBOX(left, bottom, right, top);
1326 dest_it->add_after_then_move(src_blob);
1335 if (words->
empty()) {
1342 (*words)[0]->word->set_flag(
W_BOL,
true);
1344 (*words)[0]->word->set_blanks(input_word->
word->
space());
1354 WERD_IT w_it(
row()->
row->word_list());
1356 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1365 WERD_RES_IT wr_it(&
row()->word_res_list);
1366 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1368 if (
word == input_word)
1379 for (
int w = 0; w < words->
size(); ++w) {
1381 clip_box = ComputeWordBounds(*words, w, clip_box, wr_it_of_current_word);
1384 C_BLOB_LIST* next_word_blobs =
1385 w + 1 < words->
size() ? (*words)[w + 1]->word->cblob_list() :
nullptr;
1386 ComputeBlobEnds(*word_w, clip_box, next_word_blobs, &blob_ends);
1389 C_BLOB_LIST fake_blobs;
1390 C_BLOB_IT fake_b_it(&fake_blobs);
1392 fake_b_it.move_to_first();
1397 for (
int i = 0; i < blob_ends.
size(); ++i, fake_b_it.forward()) {
1398 int end_x = blob_ends[i];
1401 while (!src_b_it.empty() &&
1402 src_b_it.data()->bounding_box().x_middle() < end_x) {
1403 blob_box += MoveAndClipBlob(&src_b_it, &dest_it, clip_box);
1406 while (!rej_b_it.empty() &&
1407 rej_b_it.data()->bounding_box().x_middle() < end_x) {
1408 blob_box += MoveAndClipBlob(&rej_b_it, &dest_it, clip_box);
1413 blob_box = MoveAndClipBlob(&fake_b_it, &dest_it, clip_box);
1415 box_word->InsertBox(i, blob_box);
1422 w_it.add_before_stay_put(word_w->
word);
1425 (*words)[w] =
nullptr;
1426 wr_it.add_before_stay_put(word_w);
1434 delete w_it.extract();
1435 delete wr_it.extract();
1448 WERD_IT w_it(
row()->
row->word_list());
1449 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
1450 if (w_it.data() == word_res->
word) {
1455 delete w_it.extract();
1459 WERD_RES_IT wr_it(&
row()->word_res_list);
1460 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1461 if (wr_it.data() == word_res) {
1467 delete wr_it.extract();
1480 WERD_RES_IT wr_it(&
row()->word_res_list);
1481 for (wr_it.mark_cycle_pt();
1482 !wr_it.cycled_list() && wr_it.data() != word_res; wr_it.forward()) {
1486 real_word = wr_it.data()->word;
1502 block_res_it.mark_cycle_pt();
1503 prev_block_res =
nullptr;
1504 prev_row_res =
nullptr;
1505 prev_word_res =
nullptr;
1506 block_res =
nullptr;
1509 next_block_res =
nullptr;
1510 next_row_res =
nullptr;
1511 next_word_res =
nullptr;
1512 internal_forward(
true, empty_ok);
1513 return internal_forward(
false, empty_ok);
1524 if (row_res == next_row_res) {
1527 word_res_it.move_to_first();
1528 for (word_res_it.mark_cycle_pt();
1529 !word_res_it.cycled_list() && word_res_it.data() != next_word_res;
1530 word_res_it.forward()) {
1531 if (!word_res_it.data()->part_of_combo) {
1532 if (prev_row_res == row_res) prev_word_res = word_res;
1533 word_res = word_res_it.data();
1537 wr_it_of_next_word = word_res_it;
1538 word_res_it.forward();
1542 for (wr_it.mark_cycle_pt(); !wr_it.cycled_list(); wr_it.forward()) {
1543 if (!wr_it.data()->part_of_combo) {
1544 if (prev_row_res == row_res) prev_word_res = word_res;
1545 word_res = wr_it.data();
1566WERD_RES *PAGE_RES_IT::internal_forward(
bool new_block,
bool empty_ok) {
1567 bool new_row =
false;
1569 prev_block_res = block_res;
1570 prev_row_res = row_res;
1571 prev_word_res = word_res;
1572 block_res = next_block_res;
1573 row_res = next_row_res;
1574 word_res = next_word_res;
1575 wr_it_of_current_word = wr_it_of_next_word;
1576 next_block_res =
nullptr;
1577 next_row_res =
nullptr;
1578 next_word_res =
nullptr;
1580 while (!block_res_it.cycled_list()) {
1583 row_res_it.set_to_list(&block_res_it.data()->row_res_list);
1584 row_res_it.mark_cycle_pt();
1585 if (row_res_it.empty() && empty_ok) {
1586 next_block_res = block_res_it.data();
1591 while (!row_res_it.cycled_list()) {
1594 word_res_it.set_to_list(&row_res_it.data()->word_res_list);
1595 word_res_it.mark_cycle_pt();
1598 while (!word_res_it.cycled_list() && word_res_it.data()->part_of_combo)
1599 word_res_it.forward();
1600 if (!word_res_it.cycled_list()) {
1601 next_block_res = block_res_it.data();
1602 next_row_res = row_res_it.data();
1603 next_word_res = word_res_it.data();
1604 wr_it_of_next_word = word_res_it;
1605 word_res_it.forward();
1609 row_res_it.forward();
1613 block_res_it.forward();
1620 (new_block || prev_word_res ==
nullptr) ?
nullptr : prev_word_res->
best_choice;
1632 if (!row)
return nullptr;
1646 while (block_res == next_block_res &&
1647 (next_row_res !=
nullptr && next_row_res->
row !=
nullptr &&
1649 internal_forward(
false,
true);
1651 return internal_forward(
false,
true);
1661 while (block_res == next_block_res) {
1662 internal_forward(
false,
true);
1664 return internal_forward(
false,
true);
1668 int16_t chars_in_word;
1669 int16_t rejects_in_word = 0;
1679 block_res->
rej_count += rejects_in_word;
1681 if (chars_in_word == rejects_in_word)
const double kMaxLineSizeRatio
const double kMaxWordSizeRatio
const double kMaxWordGapRatio
const int kWordrecMaxNumJoinChunks
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
void start_seam_list(TWERD *word, GenericVector< SEAM * > *seam_array)
@ W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
@ W_FUZZY_NON
fuzzy nonspace
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
@ W_REP_CHAR
repeated character
@ W_INVERSE
white on black
#define CLISTIZE(CLASSNAME)
#define ELISTIZE(CLASSNAME)
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
DLLSYM void tprintf(const char *format,...)
void init_to_size(int size, const T &t)
void insert(const T &t, int index)
void delete_data_pointers()
void move(GenericVector< T > *from)
void delete_matrix_pointers()
void put(ICOORD pos, const T &thing)
void CopyResults(const BlamerBundle &other)
void CopyTruth(const BlamerBundle &other)
void SetupNormTruthWord(const DENORM &denorm)
TBOX bounding_box() const
void MergeBlobs(int start, int end)
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
GenericVector< TBLOB * > blobs
void ComputeBoundingBoxes()
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
void InsertBox(int index, const TBOX &box)
void MergeBoxes(int start, int end)
const TBOX & BlobBox(int index) const
static BoxWord * CopyFromNormalized(TWERD *tessword)
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
void IncreaseBandSize(int bandwidth)
MATRIX * ConsumeAndMakeBigger(int ind)
bool Valid(const MATRIX &m) const
const BLOCK * block() const
ROW_LIST * row_list()
get rows
PDBLK pdblk
Page Description Block.
WERD_CHOICE ** prev_word_best_choice
BLOCK_RES_LIST block_res_list
ROW_RES_LIST row_res_list
int32_t whole_word_rej_count
WERD_RES_LIST word_res_list
const UNICHARSET * uch_set
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
int GetBlobsGap(int blob_index)
void CopySimpleFields(const WERD_RES &source)
void CloneChoppedToRebuild()
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
WERD_CHOICE_LIST best_choices
BlamerBundle * blamer_bundle
bool PiecesAllNatural(int start, int count) const
void DebugWordChoices(bool debug, const char *word_to_debug)
tesseract::Tesseract * tesseract
bool LogNewRawChoice(WERD_CHOICE *word_choice)
void SetupFake(const UNICHARSET &uch)
int8_t fontinfo_id2_count
const FontInfo * fontinfo
tesseract::BoxWord * box_word
GenericVector< int > blob_widths
void copy_on(WERD_RES *word_res)
GenericVector< SEAM * > seam_array
WERD_CHOICE * best_choice
void ReplaceBestChoice(WERD_CHOICE *choice)
const FontInfo * fontinfo2
void FakeWordFromRatings(PermuterType permuter)
void SetupBlobWidthsAndGaps()
void PrintBestChoices() const
void ConsumeWordResults(WERD_RES *word)
int GetBlobsWidth(int start_blob, int last_blob)
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
void SetAllScriptPositions(tesseract::ScriptPos position)
void FilterWordChoices(int debug_level)
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
CRUNCH_MODE unlv_crunch_mode
void BestChoiceToCorrectText()
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
void SetScriptPositions()
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX & > *box_cb)
void DebugTopChoice(const char *msg) const
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
void SetupWordScript(const UNICHARSET &unicharset_in)
GenericVector< int > best_state
void MergeAdjacentBlobs(int index)
WERD_RES & operator=(const WERD_RES &source)
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
void InsertSeam(int blob_number, SEAM *seam)
GenericVector< STRING > correct_text
GenericVector< int > blob_gaps
tesseract::BoxWord * bln_boxes
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
void InitForRetryRecognition(const WERD_RES &source)
BLOB_CHOICE * GetBlobChoice(int index) const
int cmp(const PAGE_RES_IT &other) const
WERD_RES * forward_block()
void MakeCurrentWordFuzzy()
void ReplaceCurrentWord(tesseract::PointerVector< WERD_RES > *words)
WERD_RES * forward_paragraph()
WERD_RES * start_page(bool empty_ok)
WERD_RES * restart_page()
WERD_RES * InsertSimpleCloneWord(const WERD_RES &clone_res, WERD *new_word)
POLY_BLOCK * poly_block() const
UNICHAR_ID unichar_id() const
void set_unichar_id(UNICHAR_ID newunichar_id)
MATRIX_COORD MatrixCoord(int index) const
void remove_unichar_id(int index)
int state(int index) const
float adjust_factor() const
void SetScriptPositions(bool small_caps, TWERD *word, int debug=0)
void UpdateStateForSplit(int blob_position)
const STRING & unichar_string() const
int TotalOfStates() const
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
bool dangerous_ambig_found() const
UNICHAR_ID unichar_id(int index) const
void set_unichar_id(UNICHAR_ID unichar_id, int index)
void SetAllScriptPositions(tesseract::ScriptPos position)
void set_permuter(uint8_t perm)
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
TBOX intersection(const TBOX &box) const
bool contains(const FCOORD pt) const
void initialise(int16_t length)
void remove_pos(int16_t pos)
static void BreakPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
bool HasAnySplits() const
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
static void JoinPieces(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int first, int last)
static C_BLOB * FakeBlob(const TBOX &box)
TBOX bounding_box() const
static int SortByXMiddle(const void *v1, const void *v2)
void set_script_id(int id)
C_BLOB_LIST * rej_cblob_list()
C_BLOB_LIST * cblob_list()
void set_flag(WERD_FLAGS mask, bool value)
TBOX bounding_box() const
bool flag(WERD_FLAGS mask) const
void operator=(const ELIST_LINK &)
void add_str_int(const char *str, int number)
const char * string() const
bool contains_unichar(const char *const unichar_repr) const
bool get_enabled(UNICHAR_ID unichar_id) const
const char * id_to_unichar(UNICHAR_ID id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool script_has_xheight() const