tesseract 4.1.1
Loading...
Searching...
No Matches
BlamerBundle Struct Reference

#include <blamer.h>

Public Member Functions

 BlamerBundle ()
 
 BlamerBundle (const BlamerBundle &other)
 
 ~BlamerBundle ()
 
STRING TruthString () const
 
IncorrectResultReason incorrect_result_reason () const
 
bool NoTruth () const
 
bool HasDebugInfo () const
 
const STRINGdebug () const
 
const STRINGmisadaption_debug () const
 
void UpdateBestRating (float rating)
 
int correct_segmentation_length () const
 
bool MatrixPositionCorrect (int index, const MATRIX_COORD &coord)
 
void set_best_choice_is_dict_and_top_choice (bool value)
 
const char * lattice_data () const
 
int lattice_size () const
 
void set_lattice_data (const char *data, int size)
 
const tesseract::ParamsTrainingBundleparams_training_bundle () const
 
void AddHypothesis (const tesseract::ParamsTrainingHypothesis &hypo)
 
void SetWordTruth (const UNICHARSET &unicharset, const char *truth_str, const TBOX &word_box)
 
void SetSymbolTruth (const UNICHARSET &unicharset, const char *char_str, const TBOX &char_box)
 
void SetRejectedTruth ()
 
bool ChoiceIsCorrect (const WERD_CHOICE *word_choice) const
 
void ClearResults ()
 
void CopyTruth (const BlamerBundle &other)
 
void CopyResults (const BlamerBundle &other)
 
const char * IncorrectReason () const
 
void FillDebugString (const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
 
void SetupNormTruthWord (const DENORM &denorm)
 
void SplitBundle (int word1_right, int word2_left, bool debug, BlamerBundle *bundle1, BlamerBundle *bundle2) const
 
void JoinBlames (const BlamerBundle &bundle1, const BlamerBundle &bundle2, bool debug)
 
void BlameClassifier (const UNICHARSET &unicharset, const TBOX &blob_box, const BLOB_CHOICE_LIST &choices, bool debug)
 
void SetChopperBlame (const WERD_RES *word, bool debug)
 
void BlameClassifierOrLangModel (const WERD_RES *word, const UNICHARSET &unicharset, bool valid_permuter, bool debug)
 
void SetupCorrectSegmentation (const TWERD *word, bool debug)
 
bool GuidedSegsearchNeeded (const WERD_CHOICE *best_choice) const
 
void InitForSegSearch (const WERD_CHOICE *best_choice, MATRIX *ratings, UNICHAR_ID wildcard_id, bool debug, STRING *debug_str, TessResultCallback2< bool, int, int > *pp_cb)
 
bool GuidedSegsearchStillGoing () const
 
void FinishSegSearch (const WERD_CHOICE *best_choice, bool debug, STRING *debug_str)
 
void SetMisAdaptionDebug (const WERD_CHOICE *best_choice, bool debug)
 

Static Public Member Functions

static const char * IncorrectReasonName (IncorrectResultReason irr)
 
static void LastChanceBlame (bool debug, WERD_RES *word)
 

Detailed Description

Definition at line 102 of file blamer.h.

Constructor & Destructor Documentation

◆ BlamerBundle() [1/2]

BlamerBundle::BlamerBundle ( )
inline

Definition at line 104 of file blamer.h.

104 : truth_has_char_boxes_(false),
105 incorrect_result_reason_(IRR_CORRECT),
106 lattice_data_(nullptr) { ClearResults(); }
@ IRR_CORRECT
Definition: blamer.h:53
void ClearResults()
Definition: blamer.h:189

◆ BlamerBundle() [2/2]

BlamerBundle::BlamerBundle ( const BlamerBundle other)
inline

Definition at line 107 of file blamer.h.

107 {
108 this->CopyTruth(other);
109 this->CopyResults(other);
110 }
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:210
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:203

◆ ~BlamerBundle()

BlamerBundle::~BlamerBundle ( )
inline

Definition at line 111 of file blamer.h.

111{ delete[] lattice_data_; }

Member Function Documentation

◆ AddHypothesis()

void BlamerBundle::AddHypothesis ( const tesseract::ParamsTrainingHypothesis hypo)
inline

Definition at line 169 of file blamer.h.

169 {
170 params_training_bundle_.AddHypothesis(hypo);
171 }
ParamsTrainingHypothesis & AddHypothesis(const ParamsTrainingHypothesis &other)

◆ BlameClassifier()

void BlamerBundle::BlameClassifier ( const UNICHARSET unicharset,
const TBOX blob_box,
const BLOB_CHOICE_LIST &  choices,
bool  debug 
)

Definition at line 265 of file blamer.cpp.

268 {
269 if (!truth_has_char_boxes_ ||
270 incorrect_result_reason_ != IRR_CORRECT)
271 return; // Nothing to do here.
272
273 for (int b = 0; b < norm_truth_word_.length(); ++b) {
274 const TBOX &truth_box = norm_truth_word_.BlobBox(b);
275 // Note that we are more strict on the bounding box boundaries here
276 // than in other places (chopper, segmentation search), since we do
277 // not have the ability to check the previous and next bounding box.
278 if (blob_box.x_almost_equal(truth_box, norm_box_tolerance_/2)) {
279 bool found = false;
280 bool incorrect_adapted = false;
281 UNICHAR_ID incorrect_adapted_id = INVALID_UNICHAR_ID;
282 const char *truth_str = truth_text_[b].string();
283 // We promise not to modify the list or its contents, using a
284 // const BLOB_CHOICE* below.
285 BLOB_CHOICE_IT choices_it(const_cast<BLOB_CHOICE_LIST*>(&choices));
286 for (choices_it.mark_cycle_pt(); !choices_it.cycled_list();
287 choices_it.forward()) {
288 const BLOB_CHOICE* choice = choices_it.data();
289 if (strcmp(truth_str, unicharset.get_normed_unichar(
290 choice->unichar_id())) == 0) {
291 found = true;
292 break;
293 } else if (choice->IsAdapted()) {
294 incorrect_adapted = true;
295 incorrect_adapted_id = choice->unichar_id();
296 }
297 } // end choices_it for loop
298 if (!found) {
299 STRING debug_str = "unichar ";
300 debug_str += truth_str;
301 debug_str += " not found in classification list";
302 SetBlame(IRR_CLASSIFIER, debug_str, nullptr, debug);
303 } else if (incorrect_adapted) {
304 STRING debug_str = "better rating for adapted ";
305 debug_str += unicharset.id_to_unichar(incorrect_adapted_id);
306 debug_str += " than for correct ";
307 debug_str += truth_str;
308 SetBlame(IRR_ADAPTION, debug_str, nullptr, debug);
309 }
310 break;
311 }
312 } // end iterating over blamer_bundle->norm_truth_word
313}
@ IRR_ADAPTION
Definition: blamer.h:88
@ IRR_CLASSIFIER
Definition: blamer.h:58
int UNICHAR_ID
Definition: unichar.h:34
const STRING & debug() const
Definition: blamer.h:130
int length() const
Definition: boxword.h:83
const TBOX & BlobBox(int index) const
Definition: boxword.h:84
bool IsAdapted() const
Definition: ratngs.h:132
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
Definition: rect.h:34
bool x_almost_equal(const TBOX &box, int tolerance) const
Definition: rect.cpp:253
Definition: strngs.h:45
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:828
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291

◆ BlameClassifierOrLangModel()

void BlamerBundle::BlameClassifierOrLangModel ( const WERD_RES word,
const UNICHARSET unicharset,
bool  valid_permuter,
bool  debug 
)

Definition at line 377 of file blamer.cpp.

379 {
380 if (valid_permuter) {
381 // Find out whether best choice is a top choice.
382 best_choice_is_dict_and_top_choice_ = true;
383 for (int i = 0; i < word->best_choice->length(); ++i) {
384 BLOB_CHOICE_IT blob_choice_it(word->GetBlobChoices(i));
385 ASSERT_HOST(!blob_choice_it.empty());
386 BLOB_CHOICE *first_choice = nullptr;
387 for (blob_choice_it.mark_cycle_pt(); !blob_choice_it.cycled_list();
388 blob_choice_it.forward()) { // find first non-fragment choice
389 if (!(unicharset.get_fragment(blob_choice_it.data()->unichar_id()))) {
390 first_choice = blob_choice_it.data();
391 break;
392 }
393 }
394 ASSERT_HOST(first_choice != nullptr);
395 if (first_choice->unichar_id() != word->best_choice->unichar_id(i)) {
396 best_choice_is_dict_and_top_choice_ = false;
397 break;
398 }
399 }
400 }
401 STRING debug_str;
402 if (best_choice_is_dict_and_top_choice_) {
403 debug_str = "Best choice is: incorrect, top choice, dictionary word";
404 debug_str += " with permuter ";
405 debug_str += word->best_choice->permuter_name();
406 } else {
407 debug_str = "Classifier/Old LM tradeoff is to blame";
408 }
409 SetBlame(best_choice_is_dict_and_top_choice_ ? IRR_CLASSIFIER
411 debug_str, word->best_choice, debug);
412}
@ IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:85
#define ASSERT_HOST(x)
Definition: errcode.h:88
WERD_CHOICE * best_choice
Definition: pageres.h:241
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:759
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
int length() const
Definition: ratngs.h:293
static const char * permuter_name(uint8_t permuter)
Definition: ratngs.cpp:198
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:734

◆ ChoiceIsCorrect()

bool BlamerBundle::ChoiceIsCorrect ( const WERD_CHOICE word_choice) const

Definition at line 119 of file blamer.cpp.

119 {
120 if (word_choice == nullptr) return false;
121 const UNICHARSET* uni_set = word_choice->unicharset();
122 STRING normed_choice_str;
123 for (int i = 0; i < word_choice->length(); ++i) {
124 normed_choice_str +=
125 uni_set->get_normed_unichar(word_choice->unichar_id(i));
126 }
127 STRING truth_str = TruthString();
128 return truth_str == normed_choice_str;
129}
STRING TruthString() const
Definition: blamer.h:114
const UNICHARSET * unicharset() const
Definition: ratngs.h:290

◆ ClearResults()

void BlamerBundle::ClearResults ( )
inline

Definition at line 189 of file blamer.h.

189 {
190 norm_truth_word_.DeleteAllBoxes();
191 norm_box_tolerance_ = 0;
192 if (!NoTruth()) incorrect_result_reason_ = IRR_CORRECT;
193 debug_ = "";
194 segsearch_is_looking_for_blame_ = false;
195 best_correctly_segmented_rating_ = WERD_CHOICE::kBadRating;
196 correct_segmentation_cols_.clear();
197 correct_segmentation_rows_.clear();
198 best_choice_is_dict_and_top_choice_ = false;
199 delete[] lattice_data_;
200 lattice_data_ = nullptr;
201 lattice_size_ = 0;
202 }
bool NoTruth() const
Definition: blamer.h:123
void DeleteAllBoxes()
Definition: boxword.cpp:174
static const float kBadRating
Definition: ratngs.h:265

◆ CopyResults()

void BlamerBundle::CopyResults ( const BlamerBundle other)
inline

Definition at line 210 of file blamer.h.

210 {
211 norm_truth_word_ = other.norm_truth_word_;
212 norm_box_tolerance_ = other.norm_box_tolerance_;
213 incorrect_result_reason_ = other.incorrect_result_reason_;
214 segsearch_is_looking_for_blame_ = other.segsearch_is_looking_for_blame_;
215 best_correctly_segmented_rating_ = other.best_correctly_segmented_rating_;
216 correct_segmentation_cols_ = other.correct_segmentation_cols_;
217 correct_segmentation_rows_ = other.correct_segmentation_rows_;
218 best_choice_is_dict_and_top_choice_ =
219 other.best_choice_is_dict_and_top_choice_;
220 if (other.lattice_data_ != nullptr) {
221 lattice_data_ = new char[other.lattice_size_];
222 memcpy(lattice_data_, other.lattice_data_, other.lattice_size_);
223 lattice_size_ = other.lattice_size_;
224 } else {
225 lattice_data_ = nullptr;
226 }
227 }

◆ CopyTruth()

void BlamerBundle::CopyTruth ( const BlamerBundle other)
inline

Definition at line 203 of file blamer.h.

203 {
204 truth_has_char_boxes_ = other.truth_has_char_boxes_;
205 truth_word_ = other.truth_word_;
206 truth_text_ = other.truth_text_;
207 incorrect_result_reason_ =
208 (other.NoTruth() ? other.incorrect_result_reason_ : IRR_CORRECT);
209 }

◆ correct_segmentation_length()

int BlamerBundle::correct_segmentation_length ( ) const
inline

Definition at line 140 of file blamer.h.

140 {
141 return correct_segmentation_cols_.length();
142 }
int length() const
Definition: genericvector.h:86

◆ debug()

const STRING & BlamerBundle::debug ( ) const
inline

Definition at line 130 of file blamer.h.

130 {
131 return debug_;
132 }

◆ FillDebugString()

void BlamerBundle::FillDebugString ( const STRING msg,
const WERD_CHOICE choice,
STRING debug 
)

Definition at line 131 of file blamer.cpp.

133 {
134 (*debug) += "Truth ";
135 for (int i = 0; i < this->truth_text_.length(); ++i) {
136 (*debug) += this->truth_text_[i];
137 }
138 if (!this->truth_has_char_boxes_) (*debug) += " (no char boxes)";
139 if (choice != nullptr) {
140 (*debug) += " Choice ";
141 STRING choice_str;
142 choice->string_and_lengths(&choice_str, nullptr);
143 (*debug) += choice_str;
144 }
145 if (msg.length() > 0) {
146 (*debug) += "\n";
147 (*debug) += msg;
148 }
149 (*debug) += "\n";
150}
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:453
int32_t length() const
Definition: strngs.cpp:189

◆ FinishSegSearch()

void BlamerBundle::FinishSegSearch ( const WERD_CHOICE best_choice,
bool  debug,
STRING debug_str 
)

Definition at line 519 of file blamer.cpp.

520 {
521 // If we are still looking for blame (i.e. best_choice is incorrect, but a
522 // path representing the correct segmentation could be constructed), we can
523 // blame segmentation search pain point prioritization if the rating of the
524 // path corresponding to the correct segmentation is better than that of
525 // best_choice (i.e. language model would have done the correct thing, but
526 // because of poor pain point prioritization the correct segmentation was
527 // never explored). Otherwise we blame the tradeoff between the language model
528 // and the classifier, since even after exploring the path corresponding to
529 // the correct segmentation incorrect best_choice would have been chosen.
530 // One special case when we blame the classifier instead is when best choice
531 // is incorrect, but it is a dictionary word and it classifier's top choice.
532 if (segsearch_is_looking_for_blame_) {
533 segsearch_is_looking_for_blame_ = false;
534 if (best_choice_is_dict_and_top_choice_) {
535 *debug_str = "Best choice is: incorrect, top choice, dictionary word";
536 *debug_str += " with permuter ";
537 *debug_str += best_choice->permuter_name();
538 SetBlame(IRR_CLASSIFIER, *debug_str, best_choice, debug);
539 } else if (best_correctly_segmented_rating_ <
540 best_choice->rating()) {
541 *debug_str += "Correct segmentation state was not explored";
542 SetBlame(IRR_SEGSEARCH_PP, *debug_str, best_choice, debug);
543 } else {
544 if (best_correctly_segmented_rating_ >=
546 *debug_str += "Correct segmentation paths were pruned by LM\n";
547 } else {
548 debug_str->add_str_double("Best correct segmentation rating ",
549 best_correctly_segmented_rating_);
550 debug_str->add_str_double(" vs. best choice rating ",
551 best_choice->rating());
552 }
553 SetBlame(IRR_CLASS_LM_TRADEOFF, *debug_str, best_choice, debug);
554 }
555 }
556}
@ IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:68
@ IRR_SEGSEARCH_PP
Definition: blamer.h:81
float rating() const
Definition: ratngs.h:317
void add_str_double(const char *str, double number)
Definition: strngs.cpp:387

◆ GuidedSegsearchNeeded()

bool BlamerBundle::GuidedSegsearchNeeded ( const WERD_CHOICE best_choice) const

Definition at line 471 of file blamer.cpp.

471 {
472 return incorrect_result_reason_ == IRR_CORRECT &&
473 !segsearch_is_looking_for_blame_ &&
474 truth_has_char_boxes_ &&
475 !ChoiceIsCorrect(best_choice);
476}
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:119

◆ GuidedSegsearchStillGoing()

bool BlamerBundle::GuidedSegsearchStillGoing ( ) const

Definition at line 514 of file blamer.cpp.

514 {
515 return segsearch_is_looking_for_blame_;
516}

◆ HasDebugInfo()

bool BlamerBundle::HasDebugInfo ( ) const
inline

Definition at line 127 of file blamer.h.

127 {
128 return debug_.length() > 0 || misadaption_debug_.length() > 0;
129 }

◆ incorrect_result_reason()

IncorrectResultReason BlamerBundle::incorrect_result_reason ( ) const
inline

Definition at line 120 of file blamer.h.

120 {
121 return incorrect_result_reason_;
122 }

◆ IncorrectReason()

const char * BlamerBundle::IncorrectReason ( ) const

Definition at line 68 of file blamer.cpp.

68 {
69 return kIncorrectResultReasonNames[incorrect_result_reason_];
70}
const char *const kIncorrectResultReasonNames[]
Definition: blamer.cpp:49

◆ IncorrectReasonName()

const char * BlamerBundle::IncorrectReasonName ( IncorrectResultReason  irr)
static

Definition at line 64 of file blamer.cpp.

64 {
66}

◆ InitForSegSearch()

void BlamerBundle::InitForSegSearch ( const WERD_CHOICE best_choice,
MATRIX ratings,
UNICHAR_ID  wildcard_id,
bool  debug,
STRING debug_str,
TessResultCallback2< bool, int, int > *  pp_cb 
)

Definition at line 484 of file blamer.cpp.

487 {
488 segsearch_is_looking_for_blame_ = true;
489 if (debug) {
490 tprintf("segsearch starting to look for blame\n");
491 }
492 // Fill pain points for any unclassifed blob corresponding to the
493 // correct segmentation state.
494 *debug_str += "Correct segmentation:\n";
495 for (int idx = 0; idx < correct_segmentation_cols_.length(); ++idx) {
496 debug_str->add_str_int("col=", correct_segmentation_cols_[idx]);
497 debug_str->add_str_int(" row=", correct_segmentation_rows_[idx]);
498 *debug_str += "\n";
499 if (!ratings->Classified(correct_segmentation_cols_[idx],
500 correct_segmentation_rows_[idx],
501 wildcard_id) &&
502 !cb->Run(correct_segmentation_cols_[idx],
503 correct_segmentation_rows_[idx])) {
504 segsearch_is_looking_for_blame_ = false;
505 *debug_str += "\nFailed to insert pain point\n";
506 SetBlame(IRR_SEGSEARCH_HEUR, *debug_str, best_choice, debug);
507 break;
508 }
509 } // end for blamer_bundle->correct_segmentation_cols/rows
510}
@ IRR_SEGSEARCH_HEUR
Definition: blamer.h:75
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool Classified(int col, int row, int wildcard_id) const
Definition: matrix.cpp:36
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377

◆ JoinBlames()

void BlamerBundle::JoinBlames ( const BlamerBundle bundle1,
const BlamerBundle bundle2,
bool  debug 
)

Definition at line 233 of file blamer.cpp.

234 {
235 STRING debug_str;
236 IncorrectResultReason irr = incorrect_result_reason_;
237 if (irr != IRR_NO_TRUTH_SPLIT) debug_str = "";
238 if (bundle1.incorrect_result_reason_ != IRR_CORRECT &&
239 bundle1.incorrect_result_reason_ != IRR_NO_TRUTH &&
240 bundle1.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
241 debug_str += "Blame from part 1: ";
242 debug_str += bundle1.debug_;
243 irr = bundle1.incorrect_result_reason_;
244 }
245 if (bundle2.incorrect_result_reason_ != IRR_CORRECT &&
246 bundle2.incorrect_result_reason_ != IRR_NO_TRUTH &&
247 bundle2.incorrect_result_reason_ != IRR_NO_TRUTH_SPLIT) {
248 debug_str += "Blame from part 2: ";
249 debug_str += bundle2.debug_;
250 if (irr == IRR_CORRECT) {
251 irr = bundle2.incorrect_result_reason_;
252 } else if (irr != bundle2.incorrect_result_reason_) {
253 irr = IRR_UNKNOWN;
254 }
255 }
256 incorrect_result_reason_ = irr;
257 if (irr != IRR_CORRECT && irr != IRR_NO_TRUTH) {
258 SetBlame(irr, debug_str, nullptr, debug);
259 }
260}
IncorrectResultReason
Definition: blamer.h:51
@ IRR_NO_TRUTH
Definition: blamer.h:93
@ IRR_UNKNOWN
Definition: blamer.h:96
@ IRR_NO_TRUTH_SPLIT
Definition: blamer.h:90

◆ LastChanceBlame()

void BlamerBundle::LastChanceBlame ( bool  debug,
WERD_RES word 
)
static

Definition at line 560 of file blamer.cpp.

560 {
561 if (word->blamer_bundle == nullptr) {
562 word->blamer_bundle = new BlamerBundle();
563 word->blamer_bundle->SetBlame(IRR_PAGE_LAYOUT, "LastChanceBlame",
564 word->best_choice, debug);
565 } else if (word->blamer_bundle->incorrect_result_reason_ == IRR_NO_TRUTH) {
566 word->blamer_bundle->SetBlame(IRR_NO_TRUTH, "Rejected truth",
567 word->best_choice, debug);
568 } else {
569 bool correct = word->blamer_bundle->ChoiceIsCorrect(word->best_choice);
570 IncorrectResultReason irr = word->blamer_bundle->incorrect_result_reason_;
571 if (irr == IRR_CORRECT && !correct) {
572 STRING debug_str = "Choice is incorrect after recognition";
573 word->blamer_bundle->SetBlame(IRR_UNKNOWN, debug_str, word->best_choice,
574 debug);
575 } else if (irr != IRR_CORRECT && correct) {
576 if (debug) {
577 tprintf("Corrected %s\n", word->blamer_bundle->debug_.string());
578 }
579 word->blamer_bundle->incorrect_result_reason_ = IRR_CORRECT;
580 word->blamer_bundle->debug_ = "";
581 }
582 }
583}
@ IRR_PAGE_LAYOUT
Definition: blamer.h:72
BlamerBundle()
Definition: blamer.h:104
BlamerBundle * blamer_bundle
Definition: pageres.h:252
const char * string() const
Definition: strngs.cpp:194

◆ lattice_data()

const char * BlamerBundle::lattice_data ( ) const
inline

Definition at line 152 of file blamer.h.

152 {
153 return lattice_data_;
154 }

◆ lattice_size()

int BlamerBundle::lattice_size ( ) const
inline

Definition at line 155 of file blamer.h.

155 {
156 return lattice_size_; // size of lattice_data in bytes
157 }

◆ MatrixPositionCorrect()

bool BlamerBundle::MatrixPositionCorrect ( int  index,
const MATRIX_COORD coord 
)
inline

Definition at line 145 of file blamer.h.

145 {
146 return correct_segmentation_cols_[index] == coord.col &&
147 correct_segmentation_rows_[index] == coord.row;
148 }

◆ misadaption_debug()

const STRING & BlamerBundle::misadaption_debug ( ) const
inline

Definition at line 133 of file blamer.h.

133 {
134 return misadaption_debug_;
135 }

◆ NoTruth()

bool BlamerBundle::NoTruth ( ) const
inline

Definition at line 123 of file blamer.h.

123 {
124 return incorrect_result_reason_ == IRR_NO_TRUTH ||
125 incorrect_result_reason_ == IRR_PAGE_LAYOUT;
126 }

◆ params_training_bundle()

const tesseract::ParamsTrainingBundle & BlamerBundle::params_training_bundle ( ) const
inline

Definition at line 165 of file blamer.h.

165 {
166 return params_training_bundle_;
167 }

◆ set_best_choice_is_dict_and_top_choice()

void BlamerBundle::set_best_choice_is_dict_and_top_choice ( bool  value)
inline

Definition at line 149 of file blamer.h.

149 {
150 best_choice_is_dict_and_top_choice_ = value;
151 }

◆ set_lattice_data()

void BlamerBundle::set_lattice_data ( const char *  data,
int  size 
)
inline

Definition at line 158 of file blamer.h.

158 {
159 lattice_size_ = size;
160 delete [] lattice_data_;
161 lattice_data_ = new char[lattice_size_];
162 memcpy(lattice_data_, data, lattice_size_);
163 }

◆ SetChopperBlame()

void BlamerBundle::SetChopperBlame ( const WERD_RES word,
bool  debug 
)

Definition at line 318 of file blamer.cpp.

318 {
319 if (NoTruth() || !truth_has_char_boxes_ ||
320 word->chopped_word->blobs.empty()) {
321 return;
322 }
323 STRING debug_str;
324 bool missing_chop = false;
325 int num_blobs = word->chopped_word->blobs.size();
326 int box_index = 0;
327 int blob_index = 0;
328 int16_t truth_x = -1;
329 while (box_index < truth_word_.length() && blob_index < num_blobs) {
330 truth_x = norm_truth_word_.BlobBox(box_index).right();
331 TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
332 if (curr_blob->bounding_box().right() < truth_x - norm_box_tolerance_) {
333 ++blob_index;
334 continue; // encountered an extra chop, keep looking
335 } else if (curr_blob->bounding_box().right() >
336 truth_x + norm_box_tolerance_) {
337 missing_chop = true;
338 break;
339 } else {
340 ++blob_index;
341 }
342 }
343 if (missing_chop || box_index < norm_truth_word_.length()) {
344 STRING debug_str;
345 if (missing_chop) {
346 debug_str.add_str_int("Detected missing chop (tolerance=",
347 norm_box_tolerance_);
348 debug_str += ") at Bounding Box=";
349 TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
350 curr_blob->bounding_box().print_to_str(&debug_str);
351 debug_str.add_str_int("\nNo chop for truth at x=", truth_x);
352 } else {
353 debug_str.add_str_int("Missing chops for last ",
354 norm_truth_word_.length() - box_index);
355 debug_str += " truth box(es)";
356 }
357 debug_str += "\nMaximally chopped word boxes:\n";
358 for (blob_index = 0; blob_index < num_blobs; ++blob_index) {
359 TBLOB * curr_blob = word->chopped_word->blobs[blob_index];
360 curr_blob->bounding_box().print_to_str(&debug_str);
361 debug_str += '\n';
362 }
363 debug_str += "Truth bounding boxes:\n";
364 for (box_index = 0; box_index < norm_truth_word_.length(); ++box_index) {
365 norm_truth_word_.BlobBox(box_index).print_to_str(&debug_str);
366 debug_str += '\n';
367 }
368 SetBlame(IRR_CHOPPER, debug_str, word->best_choice, debug);
369 }
370}
@ IRR_CHOPPER
Definition: blamer.h:61
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
Definition: blobs.h:284
TBOX bounding_box() const
Definition: blobs.cpp:468
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
TWERD * chopped_word
Definition: pageres.h:212
void print_to_str(STRING *str) const
Definition: rect.cpp:175
int16_t right() const
Definition: rect.h:79

◆ SetMisAdaptionDebug()

void BlamerBundle::SetMisAdaptionDebug ( const WERD_CHOICE best_choice,
bool  debug 
)

Definition at line 587 of file blamer.cpp.

588 {
589 if (incorrect_result_reason_ != IRR_NO_TRUTH &&
590 !ChoiceIsCorrect(best_choice)) {
591 misadaption_debug_ ="misadapt to word (";
592 misadaption_debug_ += best_choice->permuter_name();
593 misadaption_debug_ += "): ";
594 FillDebugString("", best_choice, &misadaption_debug_);
595 if (debug) {
596 tprintf("%s\n", misadaption_debug_.string());
597 }
598 }
599}
void FillDebugString(const STRING &msg, const WERD_CHOICE *choice, STRING *debug)
Definition: blamer.cpp:131

◆ SetRejectedTruth()

void BlamerBundle::SetRejectedTruth ( )

Definition at line 113 of file blamer.cpp.

113 {
114 incorrect_result_reason_ = IRR_NO_TRUTH;
115 truth_has_char_boxes_ = false;
116}

◆ SetSymbolTruth()

void BlamerBundle::SetSymbolTruth ( const UNICHARSET unicharset,
const char *  char_str,
const TBOX char_box 
)

Definition at line 94 of file blamer.cpp.

95 {
96 STRING symbol_str(char_str);
97 UNICHAR_ID id = unicharset.unichar_to_id(char_str);
98 if (id != INVALID_UNICHAR_ID) {
99 STRING normed_uch(unicharset.get_normed_unichar(id));
100 if (normed_uch.length() > 0) symbol_str = normed_uch;
101 }
102 int length = truth_word_.length();
103 truth_text_.push_back(symbol_str);
104 truth_word_.InsertBox(length, char_box);
105 if (length == 0)
106 truth_has_char_boxes_ = true;
107 else if (truth_word_.BlobBox(length - 1) == char_box)
108 truth_has_char_boxes_ = false;
109}
int push_back(T object)
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210

◆ SetupCorrectSegmentation()

void BlamerBundle::SetupCorrectSegmentation ( const TWERD word,
bool  debug 
)

Definition at line 415 of file blamer.cpp.

415 {
416#ifndef DISABLED_LEGACY_ENGINE
417 params_training_bundle_.StartHypothesisList();
418#endif // ndef DISABLED_LEGACY_ENGINE
419 if (incorrect_result_reason_ != IRR_CORRECT || !truth_has_char_boxes_)
420 return; // Nothing to do here.
421
422 STRING debug_str;
423 debug_str += "Blamer computing correct_segmentation_cols\n";
424 int curr_box_col = 0;
425 int next_box_col = 0;
426 int num_blobs = word->NumBlobs();
427 if (num_blobs == 0) return; // No blobs to play with.
428 int blob_index = 0;
429 int16_t next_box_x = word->blobs[blob_index]->bounding_box().right();
430 for (int truth_idx = 0; blob_index < num_blobs &&
431 truth_idx < norm_truth_word_.length();
432 ++blob_index) {
433 ++next_box_col;
434 int16_t curr_box_x = next_box_x;
435 if (blob_index + 1 < num_blobs)
436 next_box_x = word->blobs[blob_index + 1]->bounding_box().right();
437 int16_t truth_x = norm_truth_word_.BlobBox(truth_idx).right();
438 debug_str.add_str_int("Box x coord vs. truth: ", curr_box_x);
439 debug_str.add_str_int(" ", truth_x);
440 debug_str += "\n";
441 if (curr_box_x > (truth_x + norm_box_tolerance_)) {
442 break; // failed to find a matching box
443 } else if (curr_box_x >= truth_x - norm_box_tolerance_ && // matched
444 (blob_index + 1 >= num_blobs || // next box can't be included
445 next_box_x > truth_x + norm_box_tolerance_)) {
446 correct_segmentation_cols_.push_back(curr_box_col);
447 correct_segmentation_rows_.push_back(next_box_col-1);
448 ++truth_idx;
449 debug_str.add_str_int("col=", curr_box_col);
450 debug_str.add_str_int(" row=", next_box_col-1);
451 debug_str += "\n";
452 curr_box_col = next_box_col;
453 }
454 }
455 if (blob_index < num_blobs || // trailing blobs
456 correct_segmentation_cols_.length() != norm_truth_word_.length()) {
457 debug_str.add_str_int("Blamer failed to find correct segmentation"
458 " (tolerance=", norm_box_tolerance_);
459 if (blob_index >= num_blobs) debug_str += " blob == nullptr";
460 debug_str += ")\n";
461 debug_str.add_str_int(" path length ", correct_segmentation_cols_.length());
462 debug_str.add_str_int(" vs. truth ", norm_truth_word_.length());
463 debug_str += "\n";
464 SetBlame(IRR_UNKNOWN, debug_str, nullptr, debug);
465 correct_segmentation_cols_.clear();
466 correct_segmentation_rows_.clear();
467 }
468}
int NumBlobs() const
Definition: blobs.h:448

◆ SetupNormTruthWord()

void BlamerBundle::SetupNormTruthWord ( const DENORM denorm)

Definition at line 153 of file blamer.cpp.

153 {
154 // TODO(rays) Is this the last use of denorm in WERD_RES and can it go?
155 norm_box_tolerance_ = kBlamerBoxTolerance * denorm.x_scale();
156 TPOINT topleft;
157 TPOINT botright;
158 TPOINT norm_topleft;
159 TPOINT norm_botright;
160 for (int b = 0; b < truth_word_.length(); ++b) {
161 const TBOX &box = truth_word_.BlobBox(b);
162 topleft.x = box.left();
163 topleft.y = box.top();
164 botright.x = box.right();
165 botright.y = box.bottom();
166 denorm.NormTransform(nullptr, topleft, &norm_topleft);
167 denorm.NormTransform(nullptr, botright, &norm_botright);
168 TBOX norm_box(norm_topleft.x, norm_botright.y,
169 norm_botright.x, norm_topleft.y);
170 norm_truth_word_.InsertBox(b, norm_box);
171 }
172}
Definition: blobs.h:51
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
float x_scale() const
Definition: normalis.h:267
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:335
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65

◆ SetWordTruth()

void BlamerBundle::SetWordTruth ( const UNICHARSET unicharset,
const char *  truth_str,
const TBOX word_box 
)

Definition at line 74 of file blamer.cpp.

75 {
76 truth_word_.InsertBox(0, word_box);
77 truth_has_char_boxes_ = false;
78 // Encode the string as UNICHAR_IDs.
80 GenericVector<char> lengths;
81 unicharset.encode_string(truth_str, false, &encoding, &lengths, nullptr);
82 int total_length = 0;
83 for (int i = 0; i < encoding.size(); total_length += lengths[i++]) {
84 STRING uch(truth_str + total_length);
85 uch.truncate_at(lengths[i] - total_length);
86 UNICHAR_ID id = encoding[i];
87 if (id != INVALID_UNICHAR_ID) uch = unicharset.get_normed_unichar(id);
88 truth_text_.push_back(uch);
89 }
90}
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:259

◆ SplitBundle()

void BlamerBundle::SplitBundle ( int  word1_right,
int  word2_left,
bool  debug,
BlamerBundle bundle1,
BlamerBundle bundle2 
) const

Definition at line 177 of file blamer.cpp.

179 {
180 STRING debug_str;
181 // Find truth boxes that correspond to the split in the blobs.
182 int b;
183 int begin2_truth_index = -1;
184 if (incorrect_result_reason_ != IRR_NO_TRUTH &&
185 truth_has_char_boxes_) {
186 debug_str = "Looking for truth split at";
187 debug_str.add_str_int(" end1_x ", word1_right);
188 debug_str.add_str_int(" begin2_x ", word2_left);
189 debug_str += "\nnorm_truth_word boxes:\n";
190 if (norm_truth_word_.length() > 1) {
191 norm_truth_word_.BlobBox(0).print_to_str(&debug_str);
192 for (b = 1; b < norm_truth_word_.length(); ++b) {
193 norm_truth_word_.BlobBox(b).print_to_str(&debug_str);
194 if ((abs(word1_right - norm_truth_word_.BlobBox(b - 1).right()) <
195 norm_box_tolerance_) &&
196 (abs(word2_left - norm_truth_word_.BlobBox(b).left()) <
197 norm_box_tolerance_)) {
198 begin2_truth_index = b;
199 debug_str += "Split found";
200 break;
201 }
202 }
203 debug_str += '\n';
204 }
205 }
206 // Populate truth information in word and word2 with the first and second
207 // part of the original truth.
208 if (begin2_truth_index > 0) {
209 bundle1->truth_has_char_boxes_ = true;
210 bundle1->norm_box_tolerance_ = norm_box_tolerance_;
211 bundle2->truth_has_char_boxes_ = true;
212 bundle2->norm_box_tolerance_ = norm_box_tolerance_;
213 BlamerBundle *curr_bb = bundle1;
214 for (b = 0; b < norm_truth_word_.length(); ++b) {
215 if (b == begin2_truth_index) curr_bb = bundle2;
216 curr_bb->norm_truth_word_.InsertBox(b, norm_truth_word_.BlobBox(b));
217 curr_bb->truth_word_.InsertBox(b, truth_word_.BlobBox(b));
218 curr_bb->truth_text_.push_back(truth_text_[b]);
219 }
220 } else if (incorrect_result_reason_ == IRR_NO_TRUTH) {
221 bundle1->incorrect_result_reason_ = IRR_NO_TRUTH;
222 bundle2->incorrect_result_reason_ = IRR_NO_TRUTH;
223 } else {
224 debug_str += "Truth split not found";
225 debug_str += truth_has_char_boxes_ ?
226 "\n" : " (no truth char boxes)\n";
227 bundle1->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
228 bundle2->SetBlame(IRR_NO_TRUTH_SPLIT, debug_str, nullptr, debug);
229 }
230}

◆ TruthString()

STRING BlamerBundle::TruthString ( ) const
inline

Definition at line 114 of file blamer.h.

114 {
115 STRING truth_str;
116 for (int i = 0; i < truth_text_.length(); ++i)
117 truth_str += truth_text_[i];
118 return truth_str;
119 }

◆ UpdateBestRating()

void BlamerBundle::UpdateBestRating ( float  rating)
inline

Definition at line 136 of file blamer.h.

136 {
137 if (rating < best_correctly_segmented_rating_)
138 best_correctly_segmented_rating_ = rating;
139 }

The documentation for this struct was generated from the following files: