tesseract 4.1.1
Loading...
Searching...
No Matches
stopper.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: stopper.c
3 ** Purpose: Stopping criteria for word classifier.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18#include <cstdio>
19#include <cstring>
20#include <cctype>
21#include <cmath>
22
23#include "stopper.h"
24#ifndef DISABLED_LEGACY_ENGINE
25#include "ambigs.h"
26#endif
27#include "ccutil.h"
28#include "dict.h"
29#include "helpers.h"
30#include "matchdefs.h"
31#include "pageres.h"
32#include "params.h"
33#include "ratngs.h"
34#include "unichar.h"
35
36/*----------------------------------------------------------------------------
37 Private Code
38----------------------------------------------------------------------------*/
39
40namespace tesseract {
41
42bool Dict::AcceptableChoice(const WERD_CHOICE& best_choice,
43 XHeightConsistencyEnum xheight_consistency) {
44 float CertaintyThreshold = stopper_nondict_certainty_base;
45 int WordSize;
46
47 if (stopper_no_acceptable_choices) return false;
48
49 if (best_choice.length() == 0) return false;
50
51 bool no_dang_ambigs = !best_choice.dangerous_ambig_found();
52 bool is_valid_word = valid_word_permuter(best_choice.permuter(), false);
53 bool is_case_ok = case_ok(best_choice);
54
55 if (stopper_debug_level >= 1) {
56 const char *xht = "UNKNOWN";
57 switch (xheight_consistency) {
58 case XH_GOOD: xht = "NORMAL"; break;
59 case XH_SUBNORMAL: xht = "SUBNORMAL"; break;
60 case XH_INCONSISTENT: xht = "INCONSISTENT"; break;
61 default: xht = "UNKNOWN";
62 }
63 tprintf("\nStopper: %s (word=%c, case=%c, xht_ok=%s=[%g,%g])\n",
64 best_choice.unichar_string().string(),
65 (is_valid_word ? 'y' : 'n'),
66 (is_case_ok ? 'y' : 'n'),
67 xht,
68 best_choice.min_x_height(),
69 best_choice.max_x_height());
70 }
71 // Do not accept invalid words in PASS1.
72 if (reject_offset_ <= 0.0f && !is_valid_word) return false;
73 if (is_valid_word && is_case_ok) {
74 WordSize = LengthOfShortestAlphaRun(best_choice);
75 WordSize -= stopper_smallword_size;
76 if (WordSize < 0)
77 WordSize = 0;
78 CertaintyThreshold += WordSize * stopper_certainty_per_char;
79 }
80
81 if (stopper_debug_level >= 1)
82 tprintf("Stopper: Rating = %4.1f, Certainty = %4.1f, Threshold = %4.1f\n",
83 best_choice.rating(), best_choice.certainty(), CertaintyThreshold);
84
85 if (no_dang_ambigs &&
86 best_choice.certainty() > CertaintyThreshold &&
87 xheight_consistency < XH_INCONSISTENT &&
88 UniformCertainties(best_choice)) {
89 return true;
90 } else {
91 if (stopper_debug_level >= 1) {
92 tprintf("AcceptableChoice() returned false"
93 " (no_dang_ambig:%d cert:%.4g thresh:%g uniform:%d)\n",
94 no_dang_ambigs, best_choice.certainty(),
95 CertaintyThreshold,
96 UniformCertainties(best_choice));
97 }
98 return false;
99 }
100}
101
103 if (word->best_choice == nullptr) return false;
104 float CertaintyThreshold = stopper_nondict_certainty_base - reject_offset_;
105 int WordSize;
106
107 if (stopper_debug_level >= 1) {
108 tprintf("\nRejecter: %s (word=%c, case=%c, unambig=%c, multiple=%c)\n",
110 (valid_word(*word->best_choice) ? 'y' : 'n'),
111 (case_ok(*word->best_choice) ? 'y' : 'n'),
112 word->best_choice->dangerous_ambig_found() ? 'n' : 'y',
113 word->best_choices.singleton() ? 'n' : 'y');
114 }
115
116 if (word->best_choice->length() == 0 || !word->best_choices.singleton())
117 return false;
118 if (valid_word(*word->best_choice) && case_ok(*word->best_choice)) {
119 WordSize = LengthOfShortestAlphaRun(*word->best_choice);
120 WordSize -= stopper_smallword_size;
121 if (WordSize < 0)
122 WordSize = 0;
123 CertaintyThreshold += WordSize * stopper_certainty_per_char;
124 }
125
126 if (stopper_debug_level >= 1)
127 tprintf("Rejecter: Certainty = %4.1f, Threshold = %4.1f ",
128 word->best_choice->certainty(), CertaintyThreshold);
129
130 if (word->best_choice->certainty() > CertaintyThreshold &&
132 if (stopper_debug_level >= 1)
133 tprintf("ACCEPTED\n");
134 return true;
135 } else {
136 if (stopper_debug_level >= 1)
137 tprintf("REJECTED\n");
138 return false;
139 }
140}
141
142#if !defined(DISABLED_LEGACY_ENGINE)
143
145 DANGERR *fixpt,
146 bool fix_replaceable,
147 MATRIX *ratings) {
148 if (stopper_debug_level > 2) {
149 tprintf("\nRunning NoDangerousAmbig() for %s\n",
150 best_choice->debug_string().string());
151 }
152
153 // Construct BLOB_CHOICE_LIST_VECTOR with ambiguities
154 // for each unichar id in BestChoice.
155 BLOB_CHOICE_LIST_VECTOR ambig_blob_choices;
156 int i;
157 bool ambigs_found = false;
158 // For each position in best_choice:
159 // -- choose AMBIG_SPEC_LIST that corresponds to unichar_id at best_choice[i]
160 // -- initialize wrong_ngram with a single unichar_id at best_choice[i]
161 // -- look for ambiguities corresponding to wrong_ngram in the list while
162 // adding the following unichar_ids from best_choice to wrong_ngram
163 //
164 // Repeat the above procedure twice: first time look through
165 // ambigs to be replaced and replace all the ambiguities found;
166 // second time look through dangerous ambiguities and construct
167 // ambig_blob_choices with fake a blob choice for each ambiguity
168 // and pass them to dawg_permute_and_select() to search for
169 // ambiguous words in the dictionaries.
170 //
171 // Note that during the execution of the for loop (on the first pass)
172 // if replacements are made the length of best_choice might change.
173 for (int pass = 0; pass < (fix_replaceable ? 2 : 1); ++pass) {
174 bool replace = (fix_replaceable && pass == 0);
175 const UnicharAmbigsVector &table = replace ?
177 if (!replace) {
178 // Initialize ambig_blob_choices with lists containing a single
179 // unichar id for the corresponding position in best_choice.
180 // best_choice consisting from only the original letters will
181 // have a rating of 0.0.
182 for (i = 0; i < best_choice->length(); ++i) {
183 auto *lst = new BLOB_CHOICE_LIST();
184 BLOB_CHOICE_IT lst_it(lst);
185 // TODO(rays/antonova) Put real xheights and y shifts here.
186 lst_it.add_to_end(new BLOB_CHOICE(best_choice->unichar_id(i),
187 0.0, 0.0, -1, 0, 1, 0, BCC_AMBIG));
188 ambig_blob_choices.push_back(lst);
189 }
190 }
191 UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE + 1];
192 int wrong_ngram_index;
193 int next_index;
194 int blob_index = 0;
195 for (i = 0; i < best_choice->length(); blob_index += best_choice->state(i),
196 ++i) {
197 UNICHAR_ID curr_unichar_id = best_choice->unichar_id(i);
198 if (stopper_debug_level > 2) {
199 tprintf("Looking for %s ngrams starting with %s:\n",
200 replace ? "replaceable" : "ambiguous",
201 getUnicharset().debug_str(curr_unichar_id).string());
202 }
203 int num_wrong_blobs = best_choice->state(i);
204 wrong_ngram_index = 0;
205 wrong_ngram[wrong_ngram_index] = curr_unichar_id;
206 if (curr_unichar_id == INVALID_UNICHAR_ID ||
207 curr_unichar_id >= table.size() ||
208 table[curr_unichar_id] == nullptr) {
209 continue; // there is no ambig spec for this unichar id
210 }
211 AmbigSpec_IT spec_it(table[curr_unichar_id]);
212 for (spec_it.mark_cycle_pt(); !spec_it.cycled_list();) {
213 const AmbigSpec *ambig_spec = spec_it.data();
214 wrong_ngram[wrong_ngram_index+1] = INVALID_UNICHAR_ID;
215 int compare = UnicharIdArrayUtils::compare(wrong_ngram,
216 ambig_spec->wrong_ngram);
217 if (stopper_debug_level > 2) {
218 tprintf("candidate ngram: ");
220 tprintf("current ngram from spec: ");
222 tprintf("comparison result: %d\n", compare);
223 }
224 if (compare == 0) {
225 // Record the place where we found an ambiguity.
226 if (fixpt != nullptr) {
227 UNICHAR_ID leftmost_id = ambig_spec->correct_fragments[0];
228 fixpt->push_back(DANGERR_INFO(
229 blob_index, blob_index + num_wrong_blobs, replace,
230 getUnicharset().get_isngram(ambig_spec->correct_ngram_id),
231 leftmost_id));
232 if (stopper_debug_level > 1) {
233 tprintf("fixpt+=(%d %d %d %d %s)\n", blob_index,
234 blob_index + num_wrong_blobs, false,
235 getUnicharset().get_isngram(
236 ambig_spec->correct_ngram_id),
237 getUnicharset().id_to_unichar(leftmost_id));
238 }
239 }
240
241 if (replace) {
242 if (stopper_debug_level > 2) {
243 tprintf("replace ambiguity with %s : ",
244 getUnicharset().id_to_unichar(
245 ambig_spec->correct_ngram_id));
247 ambig_spec->correct_fragments, getUnicharset());
248 }
249 ReplaceAmbig(i, ambig_spec->wrong_ngram_size,
250 ambig_spec->correct_ngram_id,
251 best_choice, ratings);
252 } else if (i > 0 || ambig_spec->type != CASE_AMBIG) {
253 // We found dang ambig - update ambig_blob_choices.
254 if (stopper_debug_level > 2) {
255 tprintf("found ambiguity: ");
257 ambig_spec->correct_fragments, getUnicharset());
258 }
259 ambigs_found = true;
260 for (int tmp_index = 0; tmp_index <= wrong_ngram_index;
261 ++tmp_index) {
262 // Add a blob choice for the corresponding fragment of the
263 // ambiguity. These fake blob choices are initialized with
264 // negative ratings (which are not possible for real blob
265 // choices), so that dawg_permute_and_select() considers any
266 // word not consisting of only the original letters a better
267 // choice and stops searching for alternatives once such a
268 // choice is found.
269 BLOB_CHOICE_IT bc_it(ambig_blob_choices[i+tmp_index]);
270 bc_it.add_to_end(new BLOB_CHOICE(
271 ambig_spec->correct_fragments[tmp_index], -1.0, 0.0,
272 -1, 0, 1, 0, BCC_AMBIG));
273 }
274 }
275 spec_it.forward();
276 } else if (compare == -1) {
277 if (wrong_ngram_index+1 < ambig_spec->wrong_ngram_size &&
278 ((next_index = wrong_ngram_index+1+i) < best_choice->length())) {
279 // Add the next unichar id to wrong_ngram and keep looking for
280 // more ambigs starting with curr_unichar_id in AMBIG_SPEC_LIST.
281 wrong_ngram[++wrong_ngram_index] =
282 best_choice->unichar_id(next_index);
283 num_wrong_blobs += best_choice->state(next_index);
284 } else {
285 break; // no more matching ambigs in this AMBIG_SPEC_LIST
286 }
287 } else {
288 spec_it.forward();
289 }
290 } // end searching AmbigSpec_LIST
291 } // end searching best_choice
292 } // end searching replace and dangerous ambigs
293
294 // If any ambiguities were found permute the constructed ambig_blob_choices
295 // to see if an alternative dictionary word can be found.
296 if (ambigs_found) {
297 if (stopper_debug_level > 2) {
298 tprintf("\nResulting ambig_blob_choices:\n");
299 for (i = 0; i < ambig_blob_choices.length(); ++i) {
300 print_ratings_list("", ambig_blob_choices.get(i), getUnicharset());
301 tprintf("\n");
302 }
303 }
304 WERD_CHOICE *alt_word = dawg_permute_and_select(ambig_blob_choices, 0.0);
305 ambigs_found = (alt_word->rating() < 0.0);
306 if (ambigs_found) {
307 if (stopper_debug_level >= 1) {
308 tprintf ("Stopper: Possible ambiguous word = %s\n",
309 alt_word->debug_string().string());
310 }
311 if (fixpt != nullptr) {
312 // Note: Currently character choices combined from fragments can only
313 // be generated by NoDangrousAmbigs(). This code should be updated if
314 // the capability to produce classifications combined from character
315 // fragments is added to other functions.
316 int orig_i = 0;
317 for (i = 0; i < alt_word->length(); ++i) {
318 const UNICHARSET &uchset = getUnicharset();
319 bool replacement_is_ngram =
320 uchset.get_isngram(alt_word->unichar_id(i));
321 UNICHAR_ID leftmost_id = alt_word->unichar_id(i);
322 if (replacement_is_ngram) {
323 // we have to extract the leftmost unichar from the ngram.
324 const char *str = uchset.id_to_unichar(leftmost_id);
325 int step = uchset.step(str);
326 if (step) leftmost_id = uchset.unichar_to_id(str, step);
327 }
328 int end_i = orig_i + alt_word->state(i);
329 if (alt_word->state(i) > 1 ||
330 (orig_i + 1 == end_i && replacement_is_ngram)) {
331 // Compute proper blob indices.
332 int blob_start = 0;
333 for (int j = 0; j < orig_i; ++j)
334 blob_start += best_choice->state(j);
335 int blob_end = blob_start;
336 for (int j = orig_i; j < end_i; ++j)
337 blob_end += best_choice->state(j);
338 fixpt->push_back(DANGERR_INFO(blob_start, blob_end, true,
339 replacement_is_ngram, leftmost_id));
340 if (stopper_debug_level > 1) {
341 tprintf("fixpt->dangerous+=(%d %d %d %d %s)\n", orig_i, end_i,
342 true, replacement_is_ngram,
343 uchset.id_to_unichar(leftmost_id));
344 }
345 }
346 orig_i += alt_word->state(i);
347 }
348 }
349 }
350 delete alt_word;
351 }
352 if (output_ambig_words_file_ != nullptr) {
353 fprintf(output_ambig_words_file_, "\n");
354 }
355
356 ambig_blob_choices.delete_data_pointers();
357 return !ambigs_found;
358}
359
361
362#endif // !defined(DISABLED_LEGACY_ENGINE)
363
365 reject_offset_ = 0.0;
366}
367
370}
371
372void Dict::ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size,
373 UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice,
374 MATRIX *ratings) {
375 int num_blobs_to_replace = 0;
376 int begin_blob_index = 0;
377 int i;
378 // Rating and certainty for the new BLOB_CHOICE are derived from the
379 // replaced choices.
380 float new_rating = 0.0f;
381 float new_certainty = 0.0f;
382 BLOB_CHOICE* old_choice = nullptr;
383 for (i = 0; i < wrong_ngram_begin_index + wrong_ngram_size; ++i) {
384 if (i >= wrong_ngram_begin_index) {
385 int num_blobs = werd_choice->state(i);
386 int col = begin_blob_index + num_blobs_to_replace;
387 int row = col + num_blobs - 1;
388 BLOB_CHOICE_LIST* choices = ratings->get(col, row);
389 ASSERT_HOST(choices != nullptr);
390 old_choice = FindMatchingChoice(werd_choice->unichar_id(i), choices);
391 ASSERT_HOST(old_choice != nullptr);
392 new_rating += old_choice->rating();
393 new_certainty += old_choice->certainty();
394 num_blobs_to_replace += num_blobs;
395 } else {
396 begin_blob_index += werd_choice->state(i);
397 }
398 }
399 new_certainty /= wrong_ngram_size;
400 // If there is no entry in the ratings matrix, add it.
401 MATRIX_COORD coord(begin_blob_index,
402 begin_blob_index + num_blobs_to_replace - 1);
403 if (!coord.Valid(*ratings)) {
404 ratings->IncreaseBandSize(coord.row - coord.col + 1);
405 }
406 if (ratings->get(coord.col, coord.row) == nullptr)
407 ratings->put(coord.col, coord.row, new BLOB_CHOICE_LIST);
408 BLOB_CHOICE_LIST* new_choices = ratings->get(coord.col, coord.row);
409 BLOB_CHOICE* choice = FindMatchingChoice(correct_ngram_id, new_choices);
410 if (choice != nullptr) {
411 // Already there. Upgrade if new rating better.
412 if (new_rating < choice->rating())
413 choice->set_rating(new_rating);
414 if (new_certainty < choice->certainty())
415 choice->set_certainty(new_certainty);
416 // DO NOT SORT!! It will mess up the iterator in LanguageModel::UpdateState.
417 } else {
418 // Need a new choice with the correct_ngram_id.
419 choice = new BLOB_CHOICE(*old_choice);
420 choice->set_unichar_id(correct_ngram_id);
421 choice->set_rating(new_rating);
422 choice->set_certainty(new_certainty);
423 choice->set_classifier(BCC_AMBIG);
424 choice->set_matrix_cell(coord.col, coord.row);
425 BLOB_CHOICE_IT it (new_choices);
426 it.add_to_end(choice);
427 }
428 // Remove current unichar from werd_choice. On the last iteration
429 // set the correct replacement unichar instead of removing a unichar.
430 for (int replaced_count = 0; replaced_count < wrong_ngram_size;
431 ++replaced_count) {
432 if (replaced_count + 1 == wrong_ngram_size) {
433 werd_choice->set_blob_choice(wrong_ngram_begin_index,
434 num_blobs_to_replace, choice);
435 } else {
436 werd_choice->remove_unichar_id(wrong_ngram_begin_index + 1);
437 }
438 }
439 if (stopper_debug_level >= 1) {
440 werd_choice->print("ReplaceAmbig() ");
441 tprintf("Modified blob_choices: ");
442 print_ratings_list("\n", new_choices, getUnicharset());
443 }
444}
445
446int Dict::LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const {
447 int shortest = INT32_MAX;
448 int curr_len = 0;
449 for (int w = 0; w < WordChoice.length(); ++w) {
450 if (WordChoice.unicharset()->get_isalpha(WordChoice.unichar_id(w))) {
451 curr_len++;
452 } else if (curr_len > 0) {
453 if (curr_len < shortest) shortest = curr_len;
454 curr_len = 0;
455 }
456 }
457 if (curr_len > 0 && curr_len < shortest) {
458 shortest = curr_len;
459 } else if (shortest == INT32_MAX) {
460 shortest = 0;
461 }
462 return shortest;
463}
464
466 float Certainty;
467 float WorstCertainty = FLT_MAX;
468 float CertaintyThreshold;
469 double TotalCertainty;
470 double TotalCertaintySquared;
471 double Variance;
472 float Mean, StdDev;
473 int word_length = word.length();
474
475 if (word_length < 3)
476 return true;
477
478 TotalCertainty = TotalCertaintySquared = 0.0;
479 for (int i = 0; i < word_length; ++i) {
480 Certainty = word.certainty(i);
481 TotalCertainty += Certainty;
482 TotalCertaintySquared += static_cast<double>(Certainty) * Certainty;
483 if (Certainty < WorstCertainty)
484 WorstCertainty = Certainty;
485 }
486
487 // Subtract off worst certainty from statistics.
488 word_length--;
489 TotalCertainty -= WorstCertainty;
490 TotalCertaintySquared -= static_cast<double>(WorstCertainty) * WorstCertainty;
491
492 Mean = TotalCertainty / word_length;
493 Variance = ((word_length * TotalCertaintySquared -
494 TotalCertainty * TotalCertainty) /
495 (word_length * (word_length - 1)));
496 if (Variance < 0.0)
497 Variance = 0.0;
498 StdDev = sqrt(Variance);
499
500 CertaintyThreshold = Mean - stopper_allowable_character_badness * StdDev;
501 if (CertaintyThreshold > stopper_nondict_certainty_base)
502 CertaintyThreshold = stopper_nondict_certainty_base;
503
504 if (word.certainty() < CertaintyThreshold) {
505 if (stopper_debug_level >= 1)
506 tprintf("Stopper: Non-uniform certainty = %4.1f"
507 " (m=%4.1f, s=%4.1f, t=%4.1f)\n",
508 word.certainty(), Mean, StdDev, CertaintyThreshold);
509 return false;
510 } else {
511 return true;
512 }
513}
514
515} // namespace tesseract
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:837
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:184
@ BCC_AMBIG
Definition: ratngs.h:47
#define MAX_AMBIG_SIZE
Definition: ambigs.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int UNICHAR_ID
Definition: unichar.h:34
float Mean(PROTOTYPE *Proto, uint16_t Dimension)
Definition: cluster.cpp:602
XHeightConsistencyEnum
Definition: dict.h:78
@ XH_GOOD
Definition: dict.h:78
@ XH_SUBNORMAL
Definition: dict.h:78
@ XH_INCONSISTENT
Definition: dict.h:78
@ CASE_AMBIG
Definition: ambigs.h:42
int push_back(T object)
int size() const
Definition: genericvector.h:72
int length() const
Definition: genericvector.h:86
void delete_data_pointers()
T & get(int index) const
T get(ICOORD pos) const
Definition: matrix.h:231
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
Definition: matrix.h:578
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
bool Valid(const MATRIX &m) const
Definition: matrix.h:618
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
WERD_CHOICE * best_choice
Definition: pageres.h:241
void set_rating(float newrat)
Definition: ratngs.h:144
float certainty() const
Definition: ratngs.h:83
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:157
float rating() const
Definition: ratngs.h:80
void set_matrix_cell(int col, int row)
Definition: ratngs.h:153
void set_certainty(float newrat)
Definition: ratngs.h:147
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:141
const STRING debug_string() const
Definition: ratngs.h:495
void remove_unichar_id(int index)
Definition: ratngs.h:474
int state(int index) const
Definition: ratngs.h:309
const STRING & unichar_string() const
Definition: ratngs.h:531
bool dangerous_ambig_found() const
Definition: ratngs.h:353
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
uint8_t permuter() const
Definition: ratngs.h:336
const UNICHARSET * unicharset() const
Definition: ratngs.h:290
void set_blob_choice(int index, int blob_count, const BLOB_CHOICE *blob_choice)
Definition: ratngs.cpp:316
float min_x_height() const
Definition: ratngs.h:326
float certainty() const
Definition: ratngs.h:320
int length() const
Definition: ratngs.h:293
float max_x_height() const
Definition: ratngs.h:329
void print() const
Definition: ratngs.h:570
float rating() const
Definition: ratngs.h:317
static void print(const UNICHAR_ID array[], const UNICHARSET &unicharset)
Definition: ambigs.h:91
static int compare(const UNICHAR_ID *ptr1, const UNICHAR_ID *ptr2)
Definition: ambigs.h:55
UNICHAR_ID correct_ngram_id
Definition: ambigs.h:126
UNICHAR_ID wrong_ngram[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:124
UNICHAR_ID correct_fragments[MAX_AMBIG_SIZE+1]
Definition: ambigs.h:125
AmbigType type
Definition: ambigs.h:127
const UnicharAmbigsVector & replace_ambigs() const
Definition: ambigs.h:146
const UnicharAmbigsVector & dang_ambigs() const
Definition: ambigs.h:145
const char * string() const
Definition: strngs.cpp:194
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:491
bool get_isngram(UNICHAR_ID unichar_id) const
Definition: unicharset.h:526
int step(const char *str) const
Definition: unicharset.cpp:233
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
double stopper_allowable_character_badness
Definition: dict.h:637
bool AcceptableResult(WERD_RES *word) const
Definition: stopper.cpp:102
int UniformCertainties(const WERD_CHOICE &word)
Definition: stopper.cpp:465
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:368
bool stopper_no_acceptable_choices
Definition: dict.h:641
int LengthOfShortestAlphaRun(const WERD_CHOICE &WordChoice) const
Returns the length of the shortest alpha run in WordChoice.
Definition: stopper.cpp:446
void ReplaceAmbig(int wrong_ngram_begin_index, int wrong_ngram_size, UNICHAR_ID correct_ngram_id, WERD_CHOICE *werd_choice, MATRIX *ratings)
Definition: stopper.cpp:372
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:474
double stopper_phase2_certainty_rejection_offset
Definition: dict.h:631
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
Definition: dict.cpp:778
double stopper_nondict_certainty_base
Definition: dict.h:629
bool AcceptableChoice(const WERD_CHOICE &best_choice, XHeightConsistencyEnum xheight_consistency)
Returns true if the given best_choice is good enough to stop.
Definition: stopper.cpp:42
WERD_CHOICE * dawg_permute_and_select(const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_limit)
Definition: permdawg.cpp:168
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:108
double stopper_certainty_per_char
Definition: dict.h:635
int stopper_debug_level
Definition: dict.h:638
bool NoDangerousAmbig(WERD_CHOICE *BestChoice, DANGERR *fixpt, bool fix_replaceable, MATRIX *ratings)
Definition: stopper.cpp:144
void EndDangerousAmbigs()
Definition: stopper.cpp:360
int case_ok(const WERD_CHOICE &word) const
Check a string to see if it matches a set of lexical rules.
Definition: context.cpp:46
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:364
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
int stopper_smallword_size
Definition: dict.h:633