tesseract 4.1.1
Loading...
Searching...
No Matches
reject.cpp File Reference
#include "tessvars.h"
#include <cctype>
#include <cerrno>
#include <cstring>
#include "genericvector.h"
#include "reject.h"
#include "control.h"
#include "docqual.h"
#include "helpers.h"
#include "tesseractclass.h"

Go to the source code of this file.

Namespaces

namespace  tesseract
 

Functions

 CLISTIZEH (STRING) CLISTIZE(STRING) namespace tesseract
 
void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 

Function Documentation

◆ CLISTIZEH()

CLISTIZEH ( STRING  )

Definition at line 51 of file reject.cpp.

59 {
60void Tesseract::set_done(WERD_RES *word, int16_t pass) {
61 word->done = word->tess_accepted &&
62 (strchr(word->best_choice->unichar_string().string(), ' ') == nullptr);
63 bool word_is_ambig = word->best_choice->dangerous_ambig_found();
64 bool word_from_dict = word->best_choice->permuter() == SYSTEM_DAWG_PERM ||
67 if (word->done && (pass == 1) && (!word_from_dict || word_is_ambig) &&
68 one_ell_conflict(word, false)) {
69 if (tessedit_rejection_debug) tprintf("one_ell_conflict detected\n");
70 word->done = false;
71 }
72 if (word->done && ((!word_from_dict &&
73 word->best_choice->permuter() != NUMBER_PERM) || word_is_ambig)) {
74 if (tessedit_rejection_debug) tprintf("non-dict or ambig word detected\n");
75 word->done = false;
76 }
77 if (tessedit_rejection_debug) {
78 tprintf("set_done(): done=%d\n", word->done);
79 word->best_choice->print("");
80 }
81}
82
83
84/*************************************************************************
85 * make_reject_map()
86 *
87 * Sets the done flag to indicate whether the resylt is acceptable.
88 *
89 * Sets a reject map for the word.
90 *************************************************************************/
91void Tesseract::make_reject_map(WERD_RES *word, ROW *row, int16_t pass) {
92 int i;
93 int offset;
94
95 flip_0O(word);
96 check_debug_pt(word, -1); // For trap only
97 set_done(word, pass); // Set acceptance
99 reject_blanks(word);
100 /*
101 0: Rays original heuristic - the baseline
102 */
103 if (tessedit_reject_mode == 0) {
104 if (!word->done)
106 } else if (tessedit_reject_mode == 5) {
107 /*
108 5: Reject I/1/l from words where there is no strong contextual confirmation;
109 the whole of any unacceptable words (incl PERM rej of dubious 1/I/ls);
110 and the whole of any words which are very small
111 */
112 if (kBlnXHeight / word->denorm.y_scale() <= min_sane_x_ht_pixels) {
114 } else {
115 one_ell_conflict(word, true);
116 /*
117 Originally the code here just used the done flag. Now I have duplicated
118 and unpacked the conditions for setting the done flag so that each
119 mechanism can be turned on or off independently. This works WITHOUT
120 affecting the done flag setting.
121 */
122 if (rej_use_tess_accepted && !word->tess_accepted)
124
125 if (rej_use_tess_blanks &&
126 (strchr (word->best_choice->unichar_string().string (), ' ') != nullptr))
128
129 WERD_CHOICE* best_choice = word->best_choice;
130 if (rej_use_good_perm) {
131 if ((best_choice->permuter() == SYSTEM_DAWG_PERM ||
132 best_choice->permuter() == FREQ_DAWG_PERM ||
133 best_choice->permuter() == USER_DAWG_PERM) &&
134 (!rej_use_sensible_wd ||
135 acceptable_word_string(*word->uch_set,
136 best_choice->unichar_string().string(),
137 best_choice->unichar_lengths().string()) !=
139 // PASSED TEST
140 } else if (best_choice->permuter() == NUMBER_PERM) {
141 if (rej_alphas_in_number_perm) {
142 for (i = 0, offset = 0;
143 best_choice->unichar_string()[offset] != '\0';
144 offset += best_choice->unichar_lengths()[i++]) {
145 if (word->reject_map[i].accepted() &&
146 word->uch_set->get_isalpha(
147 best_choice->unichar_string().string() + offset,
148 best_choice->unichar_lengths()[i]))
149 word->reject_map[i].setrej_bad_permuter();
150 // rej alpha
151 }
152 }
153 } else {
155 }
156 }
157 /* Ambig word rejection was here once !!*/
158 }
159 } else {
160 tprintf("BAD tessedit_reject_mode\n");
161 ASSERT_HOST("Fatal error encountered!" == nullptr);
162 }
163
164 if (tessedit_image_border > -1)
165 reject_edge_blobs(word);
166
167 check_debug_pt (word, 10);
168 if (tessedit_rejection_debug) {
169 tprintf("Permuter Type = %d\n", word->best_choice->permuter ());
170 tprintf("Certainty: %f Rating: %f\n",
171 word->best_choice->certainty (), word->best_choice->rating ());
172 tprintf("Dict word: %d\n", dict_word(*(word->best_choice)));
173 }
174
175 flip_hyphens(word);
176 check_debug_pt(word, 20);
177}
178} // namespace tesseract
@ AC_UNACCEPTABLE
Unacceptable word.
Definition: control.h:30
void reject_blanks(WERD_RES *word)
Definition: reject.cpp:181
void reject_poor_matches(WERD_RES *word)
Definition: reject.cpp:210
void flip_hyphens(WERD_RES *word)
void flip_0O(WERD_RES *word)
const int kBlnXHeight
Definition: normalis.h:24
@ FREQ_DAWG_PERM
Definition: ratngs.h:244
@ USER_DAWG_PERM
Definition: ratngs.h:243
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:241
@ NUMBER_PERM
Definition: ratngs.h:239
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
float y_scale() const
Definition: normalis.h:270
Definition: ocrrow.h:37
const UNICHARSET * uch_set
Definition: pageres.h:203
DENORM denorm
Definition: pageres.h:201
bool done
Definition: pageres.h:305
WERD_CHOICE * best_choice
Definition: pageres.h:241
bool tess_accepted
Definition: pageres.h:303
REJMAP reject_map
Definition: pageres.h:294
const STRING & unichar_string() const
Definition: ratngs.h:531
bool dangerous_ambig_found() const
Definition: ratngs.h:353
uint8_t permuter() const
Definition: ratngs.h:336
float certainty() const
Definition: ratngs.h:320
void print() const
Definition: ratngs.h:570
const STRING & unichar_lengths() const
Definition: ratngs.h:538
float rating() const
Definition: ratngs.h:317
void rej_word_bad_permuter()
Definition: rejctmap.cpp:379
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:361
void initialise(int16_t length)
Definition: rejctmap.cpp:273
void rej_word_small_xht()
Definition: rejctmap.cpp:343
void rej_word_contains_blanks()
Definition: rejctmap.cpp:370
int32_t length() const
Definition: strngs.cpp:189
const char * string() const
Definition: strngs.cpp:194
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:491

◆ compute_reject_threshold()

float compute_reject_threshold ( WERD_CHOICE word)

Definition at line 229 of file reject.cpp.

229 {
230 float threshold; // rejection threshold
231 float bestgap = 0.0f; // biggest gap
232 float gapstart; // bottom of gap
233
234 int blob_count = word->length();
235 GenericVector<float> ratings;
236 ratings.resize_no_init(blob_count);
237 for (int i = 0; i < blob_count; ++i) {
238 ratings[i] = word->certainty(i);
239 }
240 ratings.sort();
241 gapstart = ratings[0] - 1; // all reject if none better
242 if (blob_count >= 3) {
243 for (int index = 0; index < blob_count - 1; index++) {
244 if (ratings[index + 1] - ratings[index] > bestgap) {
245 bestgap = ratings[index + 1] - ratings[index];
246 // find biggest
247 gapstart = ratings[index];
248 }
249 }
250 }
251 threshold = gapstart + bestgap / 2;
252
253 return threshold;
254}
void resize_no_init(int size)
Definition: genericvector.h:66
int length() const
Definition: ratngs.h:293

◆ reject_blanks()

void reject_blanks ( WERD_RES word)

Definition at line 181 of file reject.cpp.

181 {
182 int16_t i;
183 int16_t offset;
184
185 for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
186 offset += word->best_choice->unichar_lengths()[i], i += 1) {
187 if (word->best_choice->unichar_string()[offset] == ' ')
188 //rej unrecognised blobs
189 word->reject_map[i].setrej_tess_failure ();
190 }
191}

◆ reject_poor_matches()

void reject_poor_matches ( WERD_RES word)

Definition at line 210 of file reject.cpp.

210 {
211 float threshold = compute_reject_threshold(word->best_choice);
212 for (int i = 0; i < word->best_choice->length(); ++i) {
213 if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
214 word->reject_map[i].setrej_tess_failure();
215 else if (word->best_choice->certainty(i) < threshold)
216 word->reject_map[i].setrej_poor_match();
217 }
218}
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:229
@ UNICHAR_SPACE
Definition: unicharset.h:34
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305