21#include "config_auto.h"
24#ifdef DISABLED_LEGACY_ENGINE
67 if (word->
done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
72 if (word->
done && ((!word_from_dict &&
142 for (i = 0, offset = 0;
160 tprintf(
"BAD tessedit_reject_mode\n");
161 ASSERT_HOST(
"Fatal error encountered!" ==
nullptr);
170 tprintf(
"Certainty: %f Rating: %f\n",
231 float bestgap = 0.0f;
234 int blob_count = word->
length();
237 for (
int i = 0; i < blob_count; ++i) {
241 gapstart = ratings[0] - 1;
242 if (blob_count >= 3) {
243 for (
int index = 0; index < blob_count - 1; index++) {
244 if (ratings[index + 1] - ratings[index] > bestgap) {
245 bestgap = ratings[index + 1] - ratings[index];
247 gapstart = ratings[index];
251 threshold = gapstart + bestgap / 2;
274 for (
int blobindex = 0; blobindex < blobcount; blobindex++) {
280 word->
reject_map[blobindex].setrej_edge_char();
297 int16_t first_alphanum_index_;
298 int16_t first_alphanum_offset_;
301 bool non_conflict_set_char;
302 bool conflict =
false;
311 word_len = strlen(lengths);
324 for (i = 0, offset = 0, non_conflict_set_char =
false;
325 (i < word_len) && !non_conflict_set_char; offset += lengths[i++])
326 non_conflict_set_char =
330 if (!non_conflict_set_char) {
348 dict_word_ok = (dict_word_type > 0) &&
353 (dict_perm_type && dict_word_ok)) {
356 if (lengths[first_alphanum_index_] == 1 &&
357 word[first_alphanum_offset_] ==
'I') {
363 setrej_1Il_conflict();
372 if (lengths[first_alphanum_index_] == 1 &&
373 word[first_alphanum_offset_] ==
'l') {
379 setrej_1Il_conflict();
403 if (lengths[first_alphanum_index_] == 1 &&
404 word[first_alphanum_offset_] ==
'l') {
411 else if (lengths[first_alphanum_index_] == 1 &&
412 word[first_alphanum_offset_] ==
'I') {
431 for (i = 0, offset = 0; word[offset] !=
'\0';
433 if ((!allow_1s || (word[offset] !=
'1')) &&
436 word_res->
reject_map[i].setrej_1Il_conflict ();
453 setrej_1Il_conflict ();
471 const char *word_lengths) {
475 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
484 const char *word_lengths) {
488 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
497 const char *word_lengths) {
502 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
511 const char* word_lengths) {
515 for (i = 0, offset = 0; word[offset] !=
'\0'; offset += word_lengths[i++]) {
517 (word_lengths[i] != 1 || word[offset] !=
'1'))
533 bool accepted_1Il =
false;
535 for (i = 0, offset = 0; i < word_len;
550 for (i = 0, offset = 0; i < word_len;
584 int16_t char_quality;
585 int16_t accepted_char_quality;
602 (char_quality == accepted_char_quality))
620 int prev_right = -9999;
630 for (i = 0; i < best_choice->
length() && i < num_blobs; ++i) {
633 if (i + 1 == num_blobs)
639 (out_box.
left() > prev_right) && (out_box.
right() < next_left)) {
640 aspect_ratio = out_box.
width() /
static_cast<float>(out_box.
height());
648 word_res->
reject_map[i].setrej_hyphen_accept();
655 else if (best_choice->
unichar_id(i) == unichar_dash) {
658 word_res->
reject_map[i].setrej_hyphen_accept();
667 prev_right = out_box.
right();
683 for (i = 0; i < best_choice->
length() && i < num_blobs; ++i) {
695 if (unichar_0 == INVALID_UNICHAR_ID ||
697 unichar_O == INVALID_UNICHAR_ID ||
701 for (i = 1; i < best_choice->
length(); ++i) {
702 if (best_choice->
unichar_id(i) == unichar_0 ||
705 if ((i+1) < best_choice->
length() &&
712 (i+1) < best_choice->
length() &&
715 (i+2) < best_choice->
length() &&
724 (((i+1) < best_choice->
length() &&
728 (i == best_choice->
length() - 1))) {
733 (i+1) < best_choice->
length() &&
739 (i+2) < best_choice->
length() &&
751 (i+2) < best_choice->
length() &&
761 (i+1) < best_choice->
length() &&
771 if (best_choice->
unichar_id(i-2) == unichar_O) {
774 while (i < best_choice->length() &&
787 return ch_set.
get_isupper(unichar_id) && !ch_set.
eq(unichar_id,
"O");
791 return ch_set.
get_isdigit(unichar_id) && !ch_set.
eq(unichar_id,
"0");
@ AC_INITIAL_CAP
ALL but initial lc.
@ AC_UNACCEPTABLE
Unacceptable word.
@ AC_UPPER_CASE
ALL upper case.
@ AC_LOWER_CASE
ALL lower case.
void reject_blanks(WERD_RES *word)
float compute_reject_threshold(WERD_CHOICE *word)
void reject_poor_matches(WERD_RES *word)
const int kBlnBaselineOffset
#define CLISTIZEH(CLASSNAME)
#define CLISTIZE(CLASSNAME)
DLLSYM void tprintf(const char *format,...)
void resize_no_init(int size)
int16_t first_alphanum_index(const char *word, const char *word_lengths)
void reject_edge_blobs(WERD_RES *word)
int16_t first_alphanum_offset(const char *word, const char *word_lengths)
bool non_O_upper(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
int16_t alpha_count(const char *word, const char *word_lengths)
void dont_allow_1Il(WERD_RES *word)
int16_t count_alphanums(const WERD_CHOICE &word)
double tessedit_lower_flip_hyphen
bool rej_1Il_trust_permuter_type
int tessedit_image_border
bool rej_use_tess_accepted
ACCEPTABLE_WERD_TYPE acceptable_word_string(const UNICHARSET &char_set, const char *s, const char *lengths)
bool one_ell_conflict(WERD_RES *word_res, bool update_map)
int16_t safe_dict_word(const WERD_RES *werd_res)
void set_done(WERD_RES *word, int16_t pass)
bool non_0_digit(const UNICHARSET &ch_set, UNICHAR_ID unichar_id)
bool check_debug_pt(WERD_RES *word, int location)
double rej_whole_of_mostly_reject_word_fract
void word_char_quality(WERD_RES *word, ROW *row, int16_t *match_count, int16_t *accepted_match_count)
void flip_hyphens(WERD_RES *word)
bool rej_alphas_in_number_perm
bool rej_1Il_use_dict_word
bool tessedit_rejection_debug
void reject_I_1_L(WERD_RES *word)
void reject_mostly_rejects(WERD_RES *word)
char * ok_repeated_ch_non_alphanum_wds
bool word_contains_non_1_digit(const char *word, const char *word_lengths)
char * conflict_set_I_l_1
bool repeated_nonalphanum_wd(WERD_RES *word, ROW *row)
double tessedit_upper_flip_hyphen
void flip_0O(WERD_RES *word)
void make_reject_map(WERD_RES *word, ROW *row, int16_t pass)
TBOX bounding_box() const
GenericVector< TBLOB * > blobs
const TBOX & BlobBox(int index) const
const UNICHARSET * uch_set
tesseract::Tesseract * tesseract
tesseract::BoxWord * box_word
WERD_CHOICE * best_choice
const STRING & unichar_string() const
bool dangerous_ambig_found() const
UNICHAR_ID unichar_id(int index) const
void set_unichar_id(UNICHAR_ID unichar_id, int index)
const STRING & unichar_lengths() const
void rej_word_mostly_rej()
void rej_word_bad_permuter()
void rej_word_not_tess_accepted()
void initialise(int16_t length)
void rej_word_small_xht()
void rej_word_contains_blanks()
TBOX bounding_box() const
bool contains(char c) const
const char * string() const
bool get_isupper(UNICHAR_ID unichar_id) const
bool get_isalpha(UNICHAR_ID unichar_id) const
bool get_enabled(UNICHAR_ID unichar_id) const
bool get_isdigit(UNICHAR_ID unichar_id) const
bool contains_unichar_id(UNICHAR_ID unichar_id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
int dict_word(const WERD_CHOICE &word)