tesseract 4.1.1
Loading...
Searching...
No Matches
reject.h File Reference

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 
bool word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
bool non_0_digit (const char *str, int length)
 

Function Documentation

◆ compute_reject_threshold()

float compute_reject_threshold ( WERD_CHOICE word)

Definition at line 229 of file reject.cpp.

229 {
230 float threshold; // rejection threshold
231 float bestgap = 0.0f; // biggest gap
232 float gapstart; // bottom of gap
233
234 int blob_count = word->length();
235 GenericVector<float> ratings;
236 ratings.resize_no_init(blob_count);
237 for (int i = 0; i < blob_count; ++i) {
238 ratings[i] = word->certainty(i);
239 }
240 ratings.sort();
241 gapstart = ratings[0] - 1; // all reject if none better
242 if (blob_count >= 3) {
243 for (int index = 0; index < blob_count - 1; index++) {
244 if (ratings[index + 1] - ratings[index] > bestgap) {
245 bestgap = ratings[index + 1] - ratings[index];
246 // find biggest
247 gapstart = ratings[index];
248 }
249 }
250 }
251 threshold = gapstart + bestgap / 2;
252
253 return threshold;
254}
void resize_no_init(int size)
Definition: genericvector.h:66
float certainty() const
Definition: ratngs.h:320
int length() const
Definition: ratngs.h:293

◆ dont_allow_1Il()

void dont_allow_1Il ( WERD_RES word)

◆ flip_0O()

void flip_0O ( WERD_RES word)

◆ flip_hyphens()

void flip_hyphens ( WERD_RES word)

◆ non_0_digit()

bool non_0_digit ( const char *  str,
int  length 
)

◆ reject_blanks()

void reject_blanks ( WERD_RES word)

Definition at line 181 of file reject.cpp.

181 {
182 int16_t i;
183 int16_t offset;
184
185 for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
186 offset += word->best_choice->unichar_lengths()[i], i += 1) {
187 if (word->best_choice->unichar_string()[offset] == ' ')
188 //rej unrecognised blobs
189 word->reject_map[i].setrej_tess_failure ();
190 }
191}
WERD_CHOICE * best_choice
Definition: pageres.h:241
REJMAP reject_map
Definition: pageres.h:294
const STRING & unichar_string() const
Definition: ratngs.h:531
const STRING & unichar_lengths() const
Definition: ratngs.h:538

◆ reject_poor_matches()

void reject_poor_matches ( WERD_RES word)

Definition at line 210 of file reject.cpp.

210 {
211 float threshold = compute_reject_threshold(word->best_choice);
212 for (int i = 0; i < word->best_choice->length(); ++i) {
213 if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
214 word->reject_map[i].setrej_tess_failure();
215 else if (word->best_choice->certainty(i) < threshold)
216 word->reject_map[i].setrej_poor_match();
217 }
218}
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:229
@ UNICHAR_SPACE
Definition: unicharset.h:34
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305

◆ word_contains_non_1_digit()

bool word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)