tesseract 4.1.1
Loading...
Searching...
No Matches
blamer.h File Reference
#include <cstdint>
#include <cstring>
#include "boxword.h"
#include "genericvector.h"
#include "params_training_featdef.h"
#include "ratngs.h"
#include "rect.h"
#include "strngs.h"
#include "tprintf.h"
#include "unichar.h"

Go to the source code of this file.

Classes

struct  BlamerBundle
 

Enumerations

enum  IncorrectResultReason {
  IRR_CORRECT , IRR_CLASSIFIER , IRR_CHOPPER , IRR_CLASS_LM_TRADEOFF ,
  IRR_PAGE_LAYOUT , IRR_SEGSEARCH_HEUR , IRR_SEGSEARCH_PP , IRR_CLASS_OLD_LM_TRADEOFF ,
  IRR_ADAPTION , IRR_NO_TRUTH_SPLIT , IRR_NO_TRUTH , IRR_UNKNOWN ,
  IRR_NUM_REASONS
}
 

Enumeration Type Documentation

◆ IncorrectResultReason

Enumerator
IRR_CORRECT 
IRR_CLASSIFIER 
IRR_CHOPPER 
IRR_CLASS_LM_TRADEOFF 
IRR_PAGE_LAYOUT 
IRR_SEGSEARCH_HEUR 
IRR_SEGSEARCH_PP 
IRR_CLASS_OLD_LM_TRADEOFF 
IRR_ADAPTION 
IRR_NO_TRUTH_SPLIT 
IRR_NO_TRUTH 
IRR_UNKNOWN 
IRR_NUM_REASONS 

Definition at line 51 of file blamer.h.

51 {
52 // The text recorded in best choice == truth text
54 // Either: Top choice is incorrect and is a dictionary word (language model
55 // is unlikely to help correct such errors, so blame the classifier).
56 // Or: the correct unichar was not included in shortlist produced by the
57 // classifier at all.
59 // Chopper have not found one or more splits that correspond to the correct
60 // character bounding boxes recorded in BlamerBundle::truth_word.
62 // Classifier did include correct unichars for each blob in the correct
63 // segmentation, however its rating could have been too bad to allow the
64 // language model to pull out the correct choice. On the other hand the
65 // strength of the language model might have been too weak to favor the
66 // correct answer, this we call this case a classifier-language model
67 // tradeoff error.
69 // Page layout failed to produce the correct bounding box. Blame page layout
70 // if the truth was not found for the word, which implies that the bounding
71 // box of the word was incorrect (no truth word had a similar bounding box).
73 // SegSearch heuristic prevented one or more blobs from the correct
74 // segmentation state to be classified (e.g. the blob was too wide).
76 // The correct segmentaiton state was not explored because of poor SegSearch
77 // pain point prioritization. We blame SegSearch pain point prioritization
78 // if the best rating of a choice constructed from correct segmentation is
79 // better than that of the best choice (i.e. if we got to explore the correct
80 // segmentation state, language model would have picked the correct choice).
82 // Same as IRR_CLASS_LM_TRADEOFF, but used when we only run chopper on a word,
83 // and thus use the old language model (permuters).
84 // TODO(antonova): integrate the new language mode with chopper
86 // If there is an incorrect adaptive template match with a better score than
87 // a correct one (either pre-trained or adapted), mark this as adaption error.
89 // split_and_recog_word() failed to find a suitable split in truth.
91 // Truth is not available for this word (e.g. when words in corrected content
92 // file are turned into ~~~~ because an appropriate alignment was not found.
94 // The text recorded in best choice != truth text, but none of the above
95 // reasons are set.
97
99};
@ IRR_PAGE_LAYOUT
Definition: blamer.h:72
@ IRR_NO_TRUTH
Definition: blamer.h:93
@ IRR_ADAPTION
Definition: blamer.h:88
@ IRR_CHOPPER
Definition: blamer.h:61
@ IRR_CORRECT
Definition: blamer.h:53
@ IRR_NUM_REASONS
Definition: blamer.h:98
@ IRR_CLASS_LM_TRADEOFF
Definition: blamer.h:68
@ IRR_SEGSEARCH_PP
Definition: blamer.h:81
@ IRR_CLASSIFIER
Definition: blamer.h:58
@ IRR_UNKNOWN
Definition: blamer.h:96
@ IRR_NO_TRUTH_SPLIT
Definition: blamer.h:90
@ IRR_SEGSEARCH_HEUR
Definition: blamer.h:75
@ IRR_CLASS_OLD_LM_TRADEOFF
Definition: blamer.h:85