tesseract 4.1.1
Loading...
Searching...
No Matches
chopper.cpp
Go to the documentation of this file.
1/* -*-C-*-
2 ********************************************************************************
3 *
4 * File: chopper.cpp (Formerly chopper.c)
5 * Author: Mark Seaman, OCR Technology
6 *
7 * (c) Copyright 1987, Hewlett-Packard Company.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **************************************************************************/
19
20/*----------------------------------------------------------------------
21 I n c l u d e s
22----------------------------------------------------------------------*/
23
24#include "blamer.h" // for BlamerBundle, IRR_CORRECT
25#include "blobs.h" // for TPOINT, TBLOB, EDGEPT, TESSLINE, divisible_blob
26#include "callcpp.h" // for Red
27#include "dict.h" // for Dict
28#include "lm_pain_points.h" // for LMPainPoints
29#include "lm_state.h" // for BestChoiceBundle
30#include "matrix.h" // for MATRIX
31#include "normalis.h" // for DENORM
32#include "pageres.h" // for WERD_RES
33#include "params.h" // for IntParam, BoolParam
34#include "ratngs.h" // for BLOB_CHOICE (ptr only), BLOB_CHOICE_LIST (ptr ...
35#include "rect.h" // for TBOX
36#include "render.h" // for display_blob
37#include "seam.h" // for SEAM
38#include "split.h" // for remove_edgept
39#include "stopper.h" // for DANGERR
40#include "tprintf.h" // for tprintf
41#include "wordrec.h" // for Wordrec, SegSearchPending (ptr only)
42
43template <typename T> class GenericVector;
44
45// Include automatically generated configuration file if running autoconf.
46#ifdef HAVE_CONFIG_H
47#include "config_auto.h"
48#endif
49
50// Even though the limit on the number of chunks may now be removed, keep
51// the same limit for repeatable behavior, and it may be a speed advantage.
52static const int kMaxNumChunks = 64;
53
54/*----------------------------------------------------------------------
55 F u n c t i o n s
56----------------------------------------------------------------------*/
57
63static int check_blob(TBLOB *blob) {
64 TESSLINE *outline;
65 EDGEPT *edgept;
66
67 for (outline = blob->outlines; outline != nullptr; outline = outline->next) {
68 edgept = outline->loop;
69 do {
70 if (edgept == nullptr)
71 break;
72 edgept = edgept->next;
73 }
74 while (edgept != outline->loop);
75 if (edgept == nullptr)
76 return 1;
77 }
78 return 0;
79}
80
86static int any_shared_split_points(const GenericVector<SEAM*>& seams, SEAM *seam) {
87 int length;
88 int index;
89
90 length = seams.size();
91 for (index = 0; index < length; index++)
92 if (seam->SharesPosition(*seams[index])) return true;
93 return false;
94}
95
101static void preserve_outline(EDGEPT *start) {
102 EDGEPT *srcpt;
103
104 if (start == nullptr)
105 return;
106 srcpt = start;
107 do {
108 srcpt->flags[1] = 1;
109 srcpt = srcpt->next;
110 }
111 while (srcpt != start);
112 srcpt->flags[1] = 2;
113}
114
115static void preserve_outline_tree(TESSLINE *srcline) {
116 TESSLINE *outline;
117
118 for (outline = srcline; outline != nullptr; outline = outline->next) {
119 preserve_outline (outline->loop);
120 }
121}
122
128static EDGEPT *restore_outline(EDGEPT *start) {
129 EDGEPT *srcpt;
130 EDGEPT *real_start;
131
132 if (start == nullptr)
133 return nullptr;
134 srcpt = start;
135 do {
136 if (srcpt->flags[1] == 2)
137 break;
138 srcpt = srcpt->next;
139 }
140 while (srcpt != start);
141 real_start = srcpt;
142 do {
143 srcpt = srcpt->next;
144 if (srcpt->prev->flags[1] == 0) {
145 remove_edgept(srcpt->prev);
146 }
147 }
148 while (srcpt != real_start);
149 return real_start;
150}
151
152static void restore_outline_tree(TESSLINE *srcline) {
153 TESSLINE *outline;
154
155 for (outline = srcline; outline != nullptr; outline = outline->next) {
156 outline->loop = restore_outline (outline->loop);
157 outline->start = outline->loop->pos;
158 }
159}
160
161/**********************************************************************
162 * total_containment
163 *
164 * Check to see if one of these outlines is totally contained within
165 * the bounding box of the other.
166 **********************************************************************/
167static int16_t total_containment(TBLOB *blob1, TBLOB *blob2) {
168 TBOX box1 = blob1->bounding_box();
169 TBOX box2 = blob2->bounding_box();
170 return box1.contains(box2) || box2.contains(box1);
171}
172
173// Helper runs all the checks on a seam to make sure it is valid.
174// Returns the seam if OK, otherwise deletes the seam and returns nullptr.
175static SEAM* CheckSeam(int debug_level, int32_t blob_number, TWERD* word,
176 TBLOB* blob, TBLOB* other_blob,
177 const GenericVector<SEAM*>& seams, SEAM* seam) {
178 if (seam == nullptr || blob->outlines == nullptr || other_blob->outlines == nullptr ||
179 total_containment(blob, other_blob) || check_blob(other_blob) ||
180 !seam->ContainedByBlob(*blob) || !seam->ContainedByBlob(*other_blob) ||
181 any_shared_split_points(seams, seam) ||
182 !seam->PrepareToInsertSeam(seams, word->blobs, blob_number, false)) {
183 word->blobs.remove(blob_number + 1);
184 if (seam) {
185 seam->UndoSeam(blob, other_blob);
186 delete seam;
187 seam = nullptr;
188#ifndef GRAPHICS_DISABLED
189 if (debug_level) {
190 if (debug_level >2)
191 display_blob(blob, Red);
192 tprintf("\n** seam being removed ** \n");
193 }
194#endif
195 } else {
196 delete other_blob;
197 }
198 return nullptr;
199 }
200 return seam;
201}
202
203namespace tesseract {
204
211SEAM *Wordrec::attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number,
212 bool italic_blob,
213 const GenericVector<SEAM*>& seams) {
215 preserve_outline_tree (blob->outlines);
216 TBLOB *other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
217 // Insert it into the word.
218 word->blobs.insert(other_blob, blob_number + 1);
219
220 SEAM *seam = nullptr;
222 TPOINT location;
223 if (divisible_blob(blob, italic_blob, &location)) {
224 seam = new SEAM(0.0f, location);
225 }
226 }
227 if (seam == nullptr)
228 seam = pick_good_seam(blob);
229 if (chop_debug) {
230 if (seam != nullptr)
231 seam->Print("Good seam picked=");
232 else
233 tprintf("\n** no seam picked *** \n");
234 }
235 if (seam) {
236 seam->ApplySeam(italic_blob, blob, other_blob);
237 }
238
239 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
240 seams, seam);
241 if (seam == nullptr) {
243 restore_outline_tree(blob->outlines);
245 // If the blob can simply be divided into outlines, then do that.
246 TPOINT location;
247 if (divisible_blob(blob, italic_blob, &location)) {
248 other_blob = TBLOB::ShallowCopy(*blob); /* Make new blob */
249 word->blobs.insert(other_blob, blob_number + 1);
250 seam = new SEAM(0.0f, location);
251 seam->ApplySeam(italic_blob, blob, other_blob);
252 seam = CheckSeam(chop_debug, blob_number, word, blob, other_blob,
253 seams, seam);
254 }
255 }
256 }
257 if (seam != nullptr) {
258 // Make sure this seam doesn't get chopped again.
259 seam->Finalize();
260 }
261 return seam;
262}
263
264
265SEAM *Wordrec::chop_numbered_blob(TWERD *word, int32_t blob_number,
266 bool italic_blob,
267 const GenericVector<SEAM*>& seams) {
268 return attempt_blob_chop(word, word->blobs[blob_number], blob_number,
269 italic_blob, seams);
270}
271
272
274 bool italic_blob, WERD_RES *word_res,
275 int *blob_number) {
276 TWERD *word = word_res->chopped_word;
277 for (*blob_number = 0; *blob_number < word->NumBlobs(); ++*blob_number) {
278 TBLOB *blob = word->blobs[*blob_number];
279 TPOINT topleft, botright;
280 topleft.x = blob->bounding_box().left();
281 topleft.y = blob->bounding_box().top();
282 botright.x = blob->bounding_box().right();
283 botright.y = blob->bounding_box().bottom();
284
285 TPOINT original_topleft, original_botright;
286 word_res->denorm.DenormTransform(nullptr, topleft, &original_topleft);
287 word_res->denorm.DenormTransform(nullptr, botright, &original_botright);
288
289 TBOX original_box = TBOX(original_topleft.x, original_botright.y,
290 original_botright.x, original_topleft.y);
291
292 bool almost_equal_box = false;
293 int num_overlap = 0;
294 for (int i = 0; i < boxes.size(); i++) {
295 if (original_box.overlap_fraction(boxes[i]) > 0.125)
296 num_overlap++;
297 if (original_box.almost_equal(boxes[i], 3))
298 almost_equal_box = true;
299 }
300
301 TPOINT location;
302 if (divisible_blob(blob, italic_blob, &location) ||
303 (!almost_equal_box && num_overlap > 1)) {
304 SEAM *seam = attempt_blob_chop(word, blob, *blob_number,
305 italic_blob, word_res->seam_array);
306 if (seam != nullptr)
307 return seam;
308 }
309 }
310
311 *blob_number = -1;
312 return nullptr;
313}
314
328 DANGERR *fixpt,
329 bool split_next_to_fragment,
330 bool italic_blob,
331 WERD_RES* word,
332 int* blob_number) {
333 float rating_ceiling = FLT_MAX;
334 SEAM *seam = nullptr;
335 do {
336 *blob_number = select_blob_to_split_from_fixpt(fixpt);
337 if (chop_debug) tprintf("blob_number from fixpt = %d\n", *blob_number);
338 bool split_point_from_dict = (*blob_number != -1);
339 if (split_point_from_dict) {
340 fixpt->clear();
341 } else {
342 *blob_number = select_blob_to_split(blob_choices, rating_ceiling,
343 split_next_to_fragment);
344 }
345 if (chop_debug) tprintf("blob_number = %d\n", *blob_number);
346 if (*blob_number == -1)
347 return nullptr;
348
349 // TODO(rays) it may eventually help to allow italic_blob to be true,
350 seam = chop_numbered_blob(word->chopped_word, *blob_number, italic_blob,
351 word->seam_array);
352 if (seam != nullptr)
353 return seam; // Success!
354 if (blob_choices[*blob_number] == nullptr)
355 return nullptr;
356 if (!split_point_from_dict) {
357 // We chopped the worst rated blob, try something else next time.
358 rating_ceiling = blob_choices[*blob_number]->rating();
359 }
360 } while (true);
361 return seam;
362}
363
372 const GenericVector<BLOB_CHOICE*>& blob_choices,
373 WERD_RES* word_res,
374 int* blob_number) {
376 return chop_overlapping_blob(boxes, true, word_res, blob_number);
377 } else {
378 return improve_one_blob(blob_choices, nullptr, false, true, word_res,
379 blob_number);
380 }
381}
382
392 int num_blobs = word->chopped_word->NumBlobs();
393 if (word->ratings == nullptr) {
394 word->ratings = new MATRIX(num_blobs, wordrec_max_join_chunks);
395 }
396 if (word->ratings->get(0, 0) == nullptr) {
397 // Run initial classification.
398 for (int b = 0; b < num_blobs; ++b) {
399 BLOB_CHOICE_LIST* choices = classify_piece(word->seam_array, b, b,
400 "Initial:", word->chopped_word,
401 word->blamer_bundle);
402 word->ratings->put(b, b, choices);
403 }
404 } else {
405 // Blobs have been pre-classified. Set matrix cell for all blob choices
406 for (int col = 0; col < word->ratings->dimension(); ++col) {
407 for (int row = col; row < word->ratings->dimension() &&
408 row < col + word->ratings->bandwidth(); ++row) {
409 BLOB_CHOICE_LIST* choices = word->ratings->get(col, row);
410 if (choices != nullptr) {
411 BLOB_CHOICE_IT bc_it(choices);
412 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
413 bc_it.data()->set_matrix_cell(col, row);
414 }
415 }
416 }
417 }
418 }
419
420 // Run Segmentation Search.
421 BestChoiceBundle best_choice_bundle(word->ratings->dimension());
422 SegSearch(word, &best_choice_bundle, word->blamer_bundle);
423
424 if (word->best_choice == nullptr) {
425 // SegSearch found no valid paths, so just use the leading diagonal.
427 }
428 word->RebuildBestState();
429 // If we finished without a hyphen at the end of the word, let the next word
430 // be found in the dictionary.
431 if (word->word->flag(W_EOL) &&
432 !getDict().has_hyphen_end(*word->best_choice)) {
434 }
435
436 if (word->blamer_bundle != nullptr && this->fill_lattice_ != nullptr) {
438 *word->uch_set, word->blamer_bundle);
439 }
440 if (wordrec_debug_level > 0) {
441 tprintf("Final Ratings Matrix:\n");
442 word->ratings->print(getDict().getUnicharset());
443 }
444 word->FilterWordChoices(getDict().stopper_debug_level);
445}
446
454void Wordrec::improve_by_chopping(float rating_cert_scale,
455 WERD_RES* word,
456 BestChoiceBundle* best_choice_bundle,
457 BlamerBundle* blamer_bundle,
458 LMPainPoints* pain_points,
460 int blob_number;
461 do { // improvement loop.
462 // Make a simple vector of BLOB_CHOICEs to make it easy to pick which
463 // one to chop.
464 GenericVector<BLOB_CHOICE*> blob_choices;
465 int num_blobs = word->ratings->dimension();
466 for (int i = 0; i < num_blobs; ++i) {
467 BLOB_CHOICE_LIST* choices = word->ratings->get(i, i);
468 if (choices == nullptr || choices->empty()) {
469 blob_choices.push_back(nullptr);
470 } else {
471 BLOB_CHOICE_IT bc_it(choices);
472 blob_choices.push_back(bc_it.data());
473 }
474 }
475 SEAM* seam = improve_one_blob(blob_choices, &best_choice_bundle->fixpt,
476 false, false, word, &blob_number);
477 if (seam == nullptr) break;
478 // A chop has been made. We have to correct all the data structures to
479 // take into account the extra bottom-level blob.
480 // Put the seam into the seam_array and correct everything else on the
481 // word: ratings matrix (including matrix location in the BLOB_CHOICES),
482 // states in WERD_CHOICEs, and blob widths.
483 word->InsertSeam(blob_number, seam);
484 // Insert a new entry in the beam array.
485 best_choice_bundle->beam.insert(new LanguageModelState, blob_number);
486 // Fixpts are outdated, but will get recalculated.
487 best_choice_bundle->fixpt.clear();
488 // Remap existing pain points.
489 pain_points->RemapForSplit(blob_number);
490 // Insert a new pending at the chop point.
491 pending->insert(SegSearchPending(), blob_number);
492
493 // Classify the two newly created blobs using ProcessSegSearchPainPoint,
494 // as that updates the pending correctly and adds new pain points.
495 MATRIX_COORD pain_point(blob_number, blob_number);
496 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop1", pending, word,
497 pain_points, blamer_bundle);
498 pain_point.col = blob_number + 1;
499 pain_point.row = blob_number + 1;
500 ProcessSegSearchPainPoint(0.0f, pain_point, "Chop2", pending, word,
501 pain_points, blamer_bundle);
502 if (language_model_->language_model_ngram_on) {
503 // N-gram evaluation depends on the number of blobs in a chunk, so we
504 // have to re-evaluate everything in the word.
505 ResetNGramSearch(word, best_choice_bundle, pending);
506 blob_number = 0;
507 }
508 // Run language model incrementally. (Except with the n-gram model on.)
509 UpdateSegSearchNodes(rating_cert_scale, blob_number, pending,
510 word, pain_points, best_choice_bundle, blamer_bundle);
511 } while (!language_model_->AcceptableChoiceFound() &&
512 word->ratings->dimension() < kMaxNumChunks);
513
514 // If after running only the chopper best_choice is incorrect and no blame
515 // has been yet set, blame the classifier if best_choice is classifier's
516 // top choice and is a dictionary word (i.e. language model could not have
517 // helped). Otherwise blame the tradeoff between the classifier and
518 // the old language model (permuters).
519 if (word->blamer_bundle != nullptr &&
522 bool valid_permuter = word->best_choice != nullptr &&
525 getDict().getUnicharset(),
526 valid_permuter,
528 }
529}
530
531
532/**********************************************************************
533 * select_blob_to_split
534 *
535 * These are the results of the last classification. Find a likely
536 * place to apply splits. If none, return -1.
537 **********************************************************************/
539 const GenericVector<BLOB_CHOICE*>& blob_choices,
540 float rating_ceiling, bool split_next_to_fragment) {
541 BLOB_CHOICE *blob_choice;
542 int x;
543 float worst = -FLT_MAX;
544 int worst_index = -1;
545 float worst_near_fragment = -FLT_MAX;
546 int worst_index_near_fragment = -1;
547 const CHAR_FRAGMENT **fragments = nullptr;
548
549 if (chop_debug) {
550 if (rating_ceiling < FLT_MAX)
551 tprintf("rating_ceiling = %8.4f\n", rating_ceiling);
552 else
553 tprintf("rating_ceiling = No Limit\n");
554 }
555
556 if (split_next_to_fragment && blob_choices.size() > 0) {
557 fragments = new const CHAR_FRAGMENT *[blob_choices.length()];
558 if (blob_choices[0] != nullptr) {
559 fragments[0] = getDict().getUnicharset().get_fragment(
560 blob_choices[0]->unichar_id());
561 } else {
562 fragments[0] = nullptr;
563 }
564 }
565
566 for (x = 0; x < blob_choices.size(); ++x) {
567 if (blob_choices[x] == nullptr) {
568 delete[] fragments;
569 return x;
570 } else {
571 blob_choice = blob_choices[x];
572 // Populate fragments for the following position.
573 if (split_next_to_fragment && x+1 < blob_choices.size()) {
574 if (blob_choices[x + 1] != nullptr) {
575 fragments[x + 1] = getDict().getUnicharset().get_fragment(
576 blob_choices[x + 1]->unichar_id());
577 } else {
578 fragments[x + 1] = nullptr;
579 }
580 }
581 if (blob_choice->rating() < rating_ceiling &&
582 blob_choice->certainty() < tessedit_certainty_threshold) {
583 // Update worst and worst_index.
584 if (blob_choice->rating() > worst) {
585 worst_index = x;
586 worst = blob_choice->rating();
587 }
588 if (split_next_to_fragment) {
589 // Update worst_near_fragment and worst_index_near_fragment.
590 bool expand_following_fragment =
591 (x + 1 < blob_choices.size() &&
592 fragments[x+1] != nullptr && !fragments[x+1]->is_beginning());
593 bool expand_preceding_fragment =
594 (x > 0 && fragments[x-1] != nullptr && !fragments[x-1]->is_ending());
595 if ((expand_following_fragment || expand_preceding_fragment) &&
596 blob_choice->rating() > worst_near_fragment) {
597 worst_index_near_fragment = x;
598 worst_near_fragment = blob_choice->rating();
599 if (chop_debug) {
600 tprintf("worst_index_near_fragment=%d"
601 " expand_following_fragment=%d"
602 " expand_preceding_fragment=%d\n",
603 worst_index_near_fragment,
604 expand_following_fragment,
605 expand_preceding_fragment);
606 }
607 }
608 }
609 }
610 }
611 }
612 delete[] fragments;
613 // TODO(daria): maybe a threshold of badness for
614 // worst_near_fragment would be useful.
615 return worst_index_near_fragment != -1 ?
616 worst_index_near_fragment : worst_index;
617}
618
619/**********************************************************************
620 * select_blob_to_split_from_fixpt
621 *
622 * Given the fix point from a dictionary search, if there is a single
623 * dangerous blob that maps to multiple characters, return that blob
624 * index as a place we need to split. If none, return -1.
625 **********************************************************************/
627 if (!fixpt)
628 return -1;
629 for (int i = 0; i < fixpt->size(); i++) {
630 if ((*fixpt)[i].begin + 1 == (*fixpt)[i].end &&
631 (*fixpt)[i].dangerous &&
632 (*fixpt)[i].correct_is_ngram) {
633 return (*fixpt)[i].begin;
634 }
635 }
636 return -1;
637}
638
639} // namespace tesseract
@ IRR_CORRECT
Definition: blamer.h:53
bool divisible_blob(TBLOB *blob, bool italic_blob, TPOINT *location)
Definition: blobs.cpp:913
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
void remove_edgept(EDGEPT *point)
Definition: split.cpp:200
@ W_EOL
end of line
Definition: werd.h:33
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ Red
Definition: callcpp.h:30
void display_blob(TBLOB *blob, C_COL color)
Definition: render.cpp:52
int push_back(T object)
int size() const
Definition: genericvector.h:72
void remove(int index)
void insert(const T &t, int index)
int length() const
Definition: genericvector.h:86
T get(ICOORD pos) const
Definition: matrix.h:231
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:119
IncorrectResultReason incorrect_result_reason() const
Definition: blamer.h:120
void BlameClassifierOrLangModel(const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
Definition: blamer.cpp:377
Definition: blobs.h:51
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
Definition: blobs.h:99
EDGEPT * next
Definition: blobs.h:192
char flags[EDGEPTFLAGS]
Definition: blobs.h:191
EDGEPT * prev
Definition: blobs.h:193
TPOINT pos
Definition: blobs.h:186
EDGEPT * loop
Definition: blobs.h:280
TESSLINE * next
Definition: blobs.h:281
TPOINT start
Definition: blobs.h:278
Definition: blobs.h:284
TESSLINE * outlines
Definition: blobs.h:400
TBOX bounding_box() const
Definition: blobs.cpp:468
static TBLOB * ShallowCopy(const TBLOB &src)
Definition: blobs.cpp:335
Definition: blobs.h:418
int NumBlobs() const
Definition: blobs.h:448
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
int dimension() const
Definition: matrix.h:536
Definition: matrix.h:578
void print(const UNICHARSET &unicharset) const
Definition: matrix.cpp:112
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
const UNICHARSET * uch_set
Definition: pageres.h:203
DENORM denorm
Definition: pageres.h:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:249
BlamerBundle * blamer_bundle
Definition: pageres.h:252
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
WERD_CHOICE * best_choice
Definition: pageres.h:241
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
void FilterWordChoices(int debug_level)
Definition: pageres.cpp:513
TWERD * chopped_word
Definition: pageres.h:212
void InsertSeam(int blob_number, SEAM *seam)
Definition: pageres.cpp:418
void RebuildBestState()
Definition: pageres.cpp:808
MATRIX * ratings
Definition: pageres.h:237
WERD * word
Definition: pageres.h:186
float certainty() const
Definition: ratngs.h:83
float rating() const
Definition: ratngs.h:80
uint8_t permuter() const
Definition: ratngs.h:336
Definition: rect.h:34
bool almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:258
double overlap_fraction(const TBOX &box) const
Definition: rect.h:388
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
bool contains(const FCOORD pt) const
Definition: rect.h:333
int16_t right() const
Definition: rect.h:79
Definition: seam.h:38
bool SharesPosition(const SEAM &other) const
Definition: seam.h:89
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:134
bool PrepareToInsertSeam(const GenericVector< SEAM * > &seams, const GenericVector< TBLOB * > &blobs, int insert_index, bool modify)
Definition: seam.cpp:76
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:118
bool ContainedByBlob(const TBLOB &blob) const
Definition: seam.h:73
void Finalize()
Definition: seam.h:110
void Print(const char *label) const
Definition: seam.cpp:154
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
bool is_beginning() const
Definition: unicharset.h:105
bool is_ending() const
Definition: unicharset.h:108
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:734
bool allow_blob_division
Definition: classify.h:423
bool prioritize_division
Definition: classify.h:428
virtual Dict & getDict()
Definition: classify.h:107
static bool valid_word_permuter(uint8_t perm, bool numbers_ok)
Check all the DAWGs to see if this word is in any of them.
Definition: dict.h:474
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:28
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
void RemapForSplit(int index)
Struct to store information maintained by various language model components.
Definition: lm_state.h:200
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:222
DANGERR fixpt
Places to try to fix the word suggested by ambiguity checking.
Definition: lm_state.h:234
PointerVector< LanguageModelState > beam
Definition: lm_state.h:238
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:311
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE * > &blob_choices, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:371
int select_blob_to_split(const GenericVector< BLOB_CHOICE * > &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:538
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:180
int wordrec_debug_level
Definition: wordrec.h:226
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:259
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:265
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:454
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:273
int repair_unchopped_blobs
Definition: wordrec.h:202
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:391
double tessedit_certainty_threshold
Definition: wordrec.h:203
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE * > &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:327
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM * > &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:50
bool wordrec_debug_blamer
Definition: wordrec.h:231
int wordrec_max_join_chunks
Definition: wordrec.h:228
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:626
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM * > &seams)
Definition: chopper.cpp:211
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:217
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:42
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:471
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:248