tesseract 4.1.1
Loading...
Searching...
No Matches
classify.cpp
Go to the documentation of this file.
1
2// File: classify.cpp
3// Description: classify class.
4// Author: Samuel Charron
5//
6// (C) Copyright 2006, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "classify.h"
20
21#ifdef DISABLED_LEGACY_ENGINE
22
23#include <string.h>
24
25namespace tesseract {
26
28 :
29 INT_MEMBER(classify_debug_level, 0, "Classify debug level",
30 this->params()),
31
32 BOOL_MEMBER(classify_bln_numeric_mode, 0,
33"Assume the input is numbers [0-9].", this->params()),
34
35 double_MEMBER(classify_max_rating_ratio, 1.5,
36 "Veto ratio between classifier ratings", this->params()),
37
38 double_MEMBER(classify_max_certainty_margin, 5.5,
39 "Veto difference between classifier certainties",
40 this->params()),
41
42 dict_(this) {}
43
44Classify::~Classify() {}
45
46} // namespace tesseract
47
48#else // DISABLED_LEGACY_ENGINE not defined
49
50#include "fontinfo.h"
51#include "intproto.h"
52#include "mfoutline.h"
53#include "scrollview.h"
54#include "shapeclassifier.h"
55#include "shapetable.h"
56#include "unicity_table.h"
57#include <cstring>
58
59namespace tesseract {
61 : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
62 this->params()),
63 BOOL_MEMBER(prioritize_division, false,
64 "Prioritize blob division over chopping", this->params()),
65 BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
66 this->params()),
67 INT_MEMBER(classify_debug_level, 0, "Classify debug level",
68 this->params()),
69 INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
70 this->params()),
71 double_MEMBER(classify_char_norm_range, 0.2,
72 "Character Normalization Range ...", this->params()),
73 double_MEMBER(classify_max_rating_ratio, 1.5,
74 "Veto ratio between classifier ratings", this->params()),
75 double_MEMBER(classify_max_certainty_margin, 5.5,
76 "Veto difference between classifier certainties",
77 this->params()),
78 BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
79 this->params()),
80 BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
81 this->params()),
82 BOOL_MEMBER(classify_enable_adaptive_matcher, 1,
83 "Enable adaptive classifier", this->params()),
84 BOOL_MEMBER(classify_use_pre_adapted_templates, 0,
85 "Use pre-adapted classifier templates", this->params()),
86 BOOL_MEMBER(classify_save_adapted_templates, 0,
87 "Save adapted templates to a file", this->params()),
88 BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
89 this->params()),
90 BOOL_MEMBER(classify_nonlinear_norm, 0,
91 "Non-linear stroke-density normalization", this->params()),
92 INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
93 INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
94 INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
95 this->params()),
96 double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
97 this->params()),
98 double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
99 this->params()),
100 double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
101 this->params()),
102 double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
103 this->params()),
104 double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
105 this->params()),
106 double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
107 this->params()),
108 INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
109 this->params()),
110 INT_MEMBER(matcher_min_examples_for_prototyping, 3,
111 "Reliable Config Threshold", this->params()),
112 INT_MEMBER(matcher_sufficient_examples_for_prototyping, 5,
113 "Enable adaption even if the ambiguities have not been seen",
114 this->params()),
115 double_MEMBER(matcher_clustering_max_angle_delta, 0.015,
116 "Maximum angle delta for prototype clustering",
117 this->params()),
118 double_MEMBER(classify_misfit_junk_penalty, 0.0,
119 "Penalty to apply when a non-alnum is vertically out of "
120 "its expected textline position",
121 this->params()),
122 double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
123 double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
124 this->params()),
125 double_MEMBER(tessedit_class_miss_scale, 0.00390625,
126 "Scale factor for features not used", this->params()),
128 classify_adapted_pruning_factor, 2.5,
129 "Prune poor adapted results this much worse than best result",
130 this->params()),
131 double_MEMBER(classify_adapted_pruning_threshold, -1.0,
132 "Threshold at which classify_adapted_pruning_factor starts",
133 this->params()),
134 INT_MEMBER(classify_adapt_proto_threshold, 230,
135 "Threshold for good protos during adaptive 0-255",
136 this->params()),
137 INT_MEMBER(classify_adapt_feature_threshold, 230,
138 "Threshold for good features during adaptive 0-255",
139 this->params()),
141 "Do not include character fragments in the"
142 " results of the classifier",
143 this->params()),
144 double_MEMBER(classify_character_fragments_garbage_certainty_threshold,
145 -3.0,
146 "Exclude fragments that do not look like whole"
147 " characters from training and adaption",
148 this->params()),
149 BOOL_MEMBER(classify_debug_character_fragments, false,
150 "Bring up graphical debugging windows for fragments training",
151 this->params()),
152 BOOL_MEMBER(matcher_debug_separate_windows, false,
153 "Use two different windows for debugging the matching: "
154 "One for the protos and one for the features.",
155 this->params()),
156 STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
157 this->params()),
158 INT_MEMBER(classify_class_pruner_threshold, 229,
159 "Class Pruner Threshold 0-255", this->params()),
160 INT_MEMBER(classify_class_pruner_multiplier, 15,
161 "Class Pruner Multiplier 0-255: ", this->params()),
162 INT_MEMBER(classify_cp_cutoff_strength, 7,
163 "Class Pruner CutoffStrength: ", this->params()),
165 "Integer Matcher Multiplier 0-255: ", this->params()),
166 BOOL_MEMBER(classify_bln_numeric_mode, 0,
167 "Assume the input is numbers [0-9].", this->params()),
168 double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
169 this->params()),
170 double_MEMBER(speckle_rating_penalty, 10.0,
171 "Penalty to add to worst rating for noise", this->params()),
172 im_(&classify_debug_level),
173 shape_table_(nullptr),
174 dict_(this),
175 static_classifier_(nullptr) {
176 fontinfo_table_.set_compare_callback(
178 fontinfo_table_.set_clear_callback(
180 fontset_table_.set_compare_callback(
182 fontset_table_.set_clear_callback(
184 AdaptedTemplates = nullptr;
185 BackupAdaptedTemplates = nullptr;
186 PreTrainedTemplates = nullptr;
187 AllProtosOn = nullptr;
188 AllConfigsOn = nullptr;
189 AllConfigsOff = nullptr;
190 TempProtoMask = nullptr;
191 NormProtos = nullptr;
192
193 NumAdaptationsFailed = 0;
194
195 learn_debug_win_ = nullptr;
196 learn_fragmented_word_debug_win_ = nullptr;
197 learn_fragments_debug_win_ = nullptr;
199}
200
203 delete learn_debug_win_;
204 delete learn_fragmented_word_debug_win_;
205 delete learn_fragments_debug_win_;
206}
207
208
209// Takes ownership of the given classifier, and uses it for future calls
210// to CharNormClassifier.
212 delete static_classifier_;
213 static_classifier_ = static_classifier;
214}
215
216// Moved from speckle.cpp
217// Adds a noise classification result that is a bit worse than the worst
218// current result, or the worst possible result if no current results.
219void Classify::AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices) {
220 BLOB_CHOICE_IT bc_it(choices);
221 // If there is no classifier result, we will use the worst possible certainty
222 // and corresponding rating.
223 float certainty = -getDict().certainty_scale;
224 float rating = rating_scale * blob_length;
225 if (!choices->empty() && blob_length > 0) {
226 bc_it.move_to_last();
227 BLOB_CHOICE* worst_choice = bc_it.data();
228 // Add speckle_rating_penalty to worst rating, matching old value.
229 rating = worst_choice->rating() + speckle_rating_penalty;
230 // Compute the rating to correspond to the certainty. (Used to be kept
231 // the same, but that messes up the language model search.)
232 certainty = -rating * getDict().certainty_scale /
233 (rating_scale * blob_length);
234 }
235 auto* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
236 -1, 0.0f, FLT_MAX, 0,
238 bc_it.add_to_end(blob_choice);
239}
240
241// Returns true if the blob is small enough to be a large speckle.
242bool Classify::LargeSpeckle(const TBLOB &blob) {
243 double speckle_size = kBlnXHeight * speckle_large_max_size;
244 TBOX bbox = blob.bounding_box();
245 return bbox.width() < speckle_size && bbox.height() < speckle_size;
246}
247
248} // namespace tesseract
249
250#endif // def DISABLED_LEGACY_ENGINE
const int kBlnXHeight
Definition: normalis.h:24
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:46
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:315
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:324
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:321
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:318
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
@ UNICHAR_SPACE
Definition: unicharset.h:34
void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs)
Definition: featdefs.cpp:112
bool disable_character_fragments
int classify_integer_matcher_multiplier
@ character
Definition: mfoutline.h:63
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:119
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:138
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:127
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:147
Definition: blobs.h:284
TBOX bounding_box() const
Definition: blobs.cpp:468
float rating() const
Definition: ratngs.h:80
Definition: rect.h:34
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108
double speckle_rating_penalty
Definition: classify.h:511
BIT_VECTOR AllProtosOn
Definition: classify.h:522
BIT_VECTOR TempProtoMask
Definition: classify.h:525
~Classify() override
Definition: classify.cpp:201
double speckle_large_max_size
Definition: classify.h:509
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:515
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:242
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:514
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:541
UnicityTable< FontSet > fontset_table_
Definition: classify.h:537
BIT_VECTOR AllConfigsOff
Definition: classify.h:524
NORM_PROTOS * NormProtos
Definition: classify.h:527
BIT_VECTOR AllConfigsOn
Definition: classify.h:523
void SetStaticClassifier(ShapeClassifier *static_classifier)
Definition: classify.cpp:211
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:459
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:529
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:519
virtual Dict & getDict()
Definition: classify.h:107
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:219
double certainty_scale
Definition: dict.h:627