tesseract 4.1.1
Loading...
Searching...
No Matches
classify.h
Go to the documentation of this file.
1
2// File: classify.h
3// Description: classify class.
4// Author: Samuel Charron
5//
6// (C) Copyright 2006, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_CLASSIFY_CLASSIFY_H_
20#define TESSERACT_CLASSIFY_CLASSIFY_H_
21
22// Include automatically generated configuration file if running autoconf.
23#ifdef HAVE_CONFIG_H
24#include "config_auto.h"
25#endif
26
27
28#ifdef DISABLED_LEGACY_ENGINE
29
30#include "ccstruct.h"
31#include "dict.h"
32
33namespace tesseract {
34
35class Classify : public CCStruct {
36 public:
37 Classify();
38 virtual ~Classify();
39 virtual Dict& getDict() {
40 return dict_;
41 }
42
43 // Member variables.
44
45 INT_VAR_H(classify_debug_level, 0, "Classify debug level");
46
48 "Assume the input is numbers [0-9].");
49
51 "Veto ratio between classifier ratings");
52
54 "Veto difference between classifier certainties");
55
56 private:
57 Dict dict_;
58};
59
60} // namespace tesseract
61
62
63#else // DISABLED_LEGACY_ENGINE not defined
64
65#include "adaptive.h"
66#include "ccstruct.h"
67#include "dict.h"
68#include "featdefs.h"
69#include "fontinfo.h"
70#include "imagedata.h"
71#include "intfx.h"
72#include "intmatcher.h"
73#include "normalis.h"
74#include "ratngs.h"
75#include "ocrfeatures.h"
76#include "unicity_table.h"
77
78class ScrollView;
79class WERD_CHOICE;
80class WERD_RES;
81struct ADAPT_RESULTS;
82struct NORM_PROTOS;
83
84static const int kUnknownFontinfoId = -1;
85static const int kBlankFontinfoId = -2;
86
87namespace tesseract {
88
89class ShapeClassifier;
90struct ShapeRating;
91class ShapeTable;
92struct UnicharRating;
93
94// How segmented is a blob. In this enum, character refers to a classifiable
95// unit, but that is too long and character is usually easier to understand.
97 CST_FRAGMENT, // A partial character.
98 CST_WHOLE, // A correctly segmented character.
99 CST_IMPROPER, // More than one but less than 2 characters.
100 CST_NGRAM // Multiple characters.
102
103class Classify : public CCStruct {
104 public:
105 Classify();
106 ~Classify() override;
107 virtual Dict& getDict() {
108 return dict_;
109 }
110
111 const ShapeTable* shape_table() const {
112 return shape_table_;
113 }
114
115 // Takes ownership of the given classifier, and uses it for future calls
116 // to CharNormClassifier.
117 void SetStaticClassifier(ShapeClassifier* static_classifier);
118
119 // Adds a noise classification result that is a bit worse than the worst
120 // current result, or the worst possible result if no current results.
121 void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices);
122
123 // Returns true if the blob is small enough to be a large speckle.
124 bool LargeSpeckle(const TBLOB &blob);
125
126 /* adaptive.cpp ************************************************************/
127 ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset);
128 int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId);
129 // Runs the class pruner from int_templates on the given features, returning
130 // the number of classes output in results.
131 // int_templates Class pruner tables
132 // num_features Number of features in blob
133 // features Array of features
134 // normalization_factors (input) Array of int_templates->NumClasses fudge
135 // factors from blob normalization process.
136 // (Indexed by CLASS_INDEX)
137 // expected_num_features (input) Array of int_templates->NumClasses
138 // expected number of features for each class.
139 // (Indexed by CLASS_INDEX)
140 // results (output) Sorted Array of pruned classes.
141 // Array must be sized to take the maximum possible
142 // number of outputs : int_templates->NumClasses.
143 int PruneClasses(const INT_TEMPLATES_STRUCT* int_templates, int num_features,
144 int keep_this, const INT_FEATURE_STRUCT* features,
145 const uint8_t* normalization_factors,
146 const uint16_t* expected_num_features,
148 void ReadNewCutoffs(TFile* fp, uint16_t* Cutoffs);
149 void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
150 void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates);
152 /* normmatch.cpp ************************************************************/
153 float ComputeNormMatch(CLASS_ID ClassId,
154 const FEATURE_STRUCT& feature, bool DebugMatch);
155 void FreeNormProtos();
157 /* protos.cpp ***************************************************************/
158 void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class);
160 const UNICHARSET& target_unicharset);
161 /* adaptmatch.cpp ***********************************************************/
162
163 // Learns the given word using its chopped_word, seam_array, denorm,
164 // box_word, best_state, and correct_text to learn both correctly and
165 // incorrectly segmented blobs. If fontname is not nullptr, then LearnBlob
166 // is called and the data will be saved in an internal buffer.
167 // Otherwise AdaptToBlob is called for adaption within a document.
168 void LearnWord(const char* fontname, WERD_RES* word);
169
170 // Builds a blob of length fragments, from the word, starting at start,
171 // and then learns it, as having the given correct_text.
172 // If fontname is not nullptr, then LearnBlob is called and the data will be
173 // saved in an internal buffer for static training.
174 // Otherwise AdaptToBlob is called for adaption within a document.
175 // threshold is a magic number required by AdaptToChar and generated by
176 // ComputeAdaptionThresholds.
177 // Although it can be partly inferred from the string, segmentation is
178 // provided to explicitly clarify the character segmentation.
179 void LearnPieces(const char* fontname, int start, int length, float threshold,
180 CharSegmentationType segmentation, const char* correct_text,
181 WERD_RES* word);
183 void InitAdaptedClass(TBLOB *Blob,
184 CLASS_ID ClassId,
185 int FontinfoId,
186 ADAPT_CLASS Class,
187 ADAPT_TEMPLATES Templates);
188 void AmbigClassifier(const GenericVector<INT_FEATURE_STRUCT>& int_features,
189 const INT_FX_RESULT_STRUCT& fx_info,
190 const TBLOB *blob,
191 INT_TEMPLATES templates,
192 ADAPT_CLASS *classes,
193 UNICHAR_ID *ambiguities,
194 ADAPT_RESULTS *results);
195 void MasterMatcher(INT_TEMPLATES templates,
196 int16_t num_features,
197 const INT_FEATURE_STRUCT* features,
198 const uint8_t* norm_factors,
199 ADAPT_CLASS* classes,
200 int debug,
201 int matcher_multiplier,
202 const TBOX& blob_box,
203 const GenericVector<CP_RESULT_STRUCT>& results,
204 ADAPT_RESULTS* final_results);
205 // Converts configs to fonts, and if the result is not adapted, and a
206 // shape_table_ is present, the shape is expanded to include all
207 // unichar_ids represented, before applying a set of corrections to the
208 // distance rating in int_result, (see ComputeCorrectedRating.)
209 // The results are added to the final_results output.
211 bool debug,
212 int class_id,
213 int bottom, int top,
214 float cp_rating,
215 int blob_length,
216 int matcher_multiplier,
217 const uint8_t* cn_factors,
218 UnicharRating* int_result,
219 ADAPT_RESULTS* final_results);
220 // Applies a set of corrections to the distance im_rating,
221 // including the cn_correction, miss penalty and additional penalty
222 // for non-alnums being vertical misfits. Returns the corrected distance.
223 double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating,
224 double im_rating, int feature_misses,
225 int bottom, int top,
226 int blob_length, int matcher_multiplier,
227 const uint8_t* cn_factors);
228 void ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
229 ADAPT_RESULTS *Results,
230 BLOB_CHOICE_LIST *Choices);
231 void AddNewResult(const UnicharRating& new_result, ADAPT_RESULTS *results);
232 int GetAdaptiveFeatures(TBLOB *Blob,
233 INT_FEATURE_ARRAY IntFeatures,
234 FEATURE_SET *FloatFeatures);
235
236#ifndef GRAPHICS_DISABLED
238 ADAPT_RESULTS *Results);
239#endif
241 int NumBadFeat,
242 FEATURE_ID BadFeat[],
243 INT_CLASS IClass,
244 ADAPT_CLASS Class,
247 CLASS_ID ClassId,
248 int FontinfoId,
249 int NumFeatures,
250 INT_FEATURE_ARRAY Features,
251 FEATURE_SET FloatFeatures);
252 void MakePermanent(ADAPT_TEMPLATES Templates,
253 CLASS_ID ClassId,
254 int ConfigId,
255 TBLOB *Blob);
256 void PrintAdaptiveMatchResults(const ADAPT_RESULTS& results);
257 void RemoveExtraPuncs(ADAPT_RESULTS *Results);
258 void RemoveBadMatches(ADAPT_RESULTS *Results);
259 void SetAdaptiveThreshold(float Threshold);
260 void ShowBestMatchFor(int shape_id,
261 const INT_FEATURE_STRUCT* features,
262 int num_features);
263 // Returns a string for the classifier class_id: either the corresponding
264 // unicharset debug_str or the shape_table_ debug str.
266 int class_id, int config_id) const;
267 // Converts a classifier class_id index with a config ID to:
268 // shape_table_ present: a shape_table_ index OR
269 // No shape_table_: a font ID.
270 // Without shape training, each class_id, config pair represents a single
271 // unichar id/font combination, so this function looks up the corresponding
272 // font id.
273 // With shape training, each class_id, config pair represents a single
274 // shape table index, so the fontset_table stores the shape table index,
275 // and the shape_table_ must be consulted to obtain the actual unichar_id/
276 // font combinations that the shape represents.
277 int ClassAndConfigIDToFontOrShapeID(int class_id,
278 int int_result_config) const;
279 // Converts a shape_table_ index to a classifier class_id index (not a
280 // unichar-id!). Uses a search, so not fast.
281 int ShapeIDToClassID(int shape_id) const;
283 TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
284 const INT_FX_RESULT_STRUCT& fx_info,
285 ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results);
286 int CharNormClassifier(TBLOB *blob,
287 const TrainingSample& sample,
288 ADAPT_RESULTS *adapt_results);
289
290 // As CharNormClassifier, but operates on a TrainingSample and outputs to
291 // a GenericVector of ShapeRating without conversion to classes.
292 int CharNormTrainingSample(bool pruner_only, int keep_this,
293 const TrainingSample& sample,
295 UNICHAR_ID *GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass);
296 void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results);
297 void AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
298 float Threshold, ADAPT_TEMPLATES adaptive_templates);
299 void DisplayAdaptedChar(TBLOB* blob, INT_CLASS_STRUCT* int_class);
300 bool AdaptableWord(WERD_RES* word);
302 void SettupPass1();
303 void SettupPass2();
304 void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices);
305 void ClassifyAsNoise(ADAPT_RESULTS *Results);
309
310 int GetCharNormFeature(const INT_FX_RESULT_STRUCT& fx_info,
311 INT_TEMPLATES templates,
312 uint8_t* pruner_norm_array,
313 uint8_t* char_norm_array);
314 // Computes the char_norm_array for the unicharset and, if not nullptr, the
315 // pruner_array as appropriate according to the existence of the shape_table.
316 // The norm_feature is deleted as it is almost certainly no longer needed.
317 void ComputeCharNormArrays(FEATURE_STRUCT* norm_feature,
318 INT_TEMPLATES_STRUCT* templates,
319 uint8_t* char_norm_array,
320 uint8_t* pruner_array);
321
322 bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config);
323 void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob);
324
325 bool AdaptiveClassifierIsFull() const { return NumAdaptationsFailed > 0; }
327 return AdaptedTemplates->NumPermClasses == 0;
328 }
329 bool LooksLikeGarbage(TBLOB *blob);
330 void RefreshDebugWindow(ScrollView **win, const char *msg,
331 int y_offset, const TBOX &wbox);
332 // intfx.cpp
333 // Computes the DENORMS for bl(baseline) and cn(character) normalization
334 // during feature extraction. The input denorm describes the current state
335 // of the blob, which is usually a baseline-normalized word.
336 // The Transforms setup are as follows:
337 // Baseline Normalized (bl) Output:
338 // We center the grapheme by aligning the x-coordinate of its centroid with
339 // x=128 and leaving the already-baseline-normalized y as-is.
340 //
341 // Character Normalized (cn) Output:
342 // We align the grapheme's centroid at the origin and scale it
343 // asymmetrically in x and y so that the 2nd moments are a standard value
344 // (51.2) ie the result is vaguely square.
345 // If classify_nonlinear_norm is true:
346 // A non-linear normalization is setup that attempts to evenly distribute
347 // edges across x and y.
348 //
349 // Some of the fields of fx_info are also setup:
350 // Length: Total length of outline.
351 // Rx: Rounded y second moment. (Reversed by convention.)
352 // Ry: rounded x second moment.
353 // Xmean: Rounded x center of mass of the blob.
354 // Ymean: Rounded y center of mass of the blob.
355 static void SetupBLCNDenorms(const TBLOB& blob, bool nonlinear_norm,
356 DENORM* bl_denorm, DENORM* cn_denorm,
357 INT_FX_RESULT_STRUCT* fx_info);
358
359 // Extracts sets of 3-D features of length kStandardFeatureLength (=12.8), as
360 // (x,y) position and angle as measured counterclockwise from the vector
361 // <-1, 0>, from blob using two normalizations defined by bl_denorm and
362 // cn_denorm. See SetpuBLCNDenorms for definitions.
363 // If outline_cn_counts is not nullptr, on return it contains the cumulative
364 // number of cn features generated for each outline in the blob (in order).
365 // Thus after the first outline, there were (*outline_cn_counts)[0] features,
366 // after the second outline, there were (*outline_cn_counts)[1] features etc.
367 static void ExtractFeatures(const TBLOB& blob,
368 bool nonlinear_norm,
371 INT_FX_RESULT_STRUCT* results,
372 GenericVector<int>* outline_cn_counts);
373 /* float2int.cpp ************************************************************/
374 void ClearCharNormArray(uint8_t* char_norm_array);
375 void ComputeIntCharNormArray(const FEATURE_STRUCT& norm_feature,
376 uint8_t* char_norm_array);
377 void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures);
378 /* intproto.cpp *************************************************************/
380 void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates,
381 const UNICHARSET& target_unicharset);
382 CLASS_ID GetClassToDebug(const char *Prompt, bool* adaptive_on,
383 bool* pretrained_on, int* shape_id);
384 void ShowMatchDisplay();
385 /* font detection ***********************************************************/
387 return fontinfo_table_;
388 }
390 return fontinfo_table_;
391 }
393 return fontset_table_;
394 }
395 /* mfoutline.cpp ***********************************************************/
396 void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale);
397 /* outfeat.cpp ***********************************************************/
399 /* picofeat.cpp ***********************************************************/
402 const INT_FX_RESULT_STRUCT& fx_info);
404 const INT_FX_RESULT_STRUCT& fx_info);
405 /* blobclass.cpp ***********************************************************/
406 // Extracts features from the given blob and saves them in the tr_file_data_
407 // member variable.
408 // fontname: Name of font that this blob was printed in.
409 // cn_denorm: Character normalization transformation to apply to the blob.
410 // fx_info: Character normalization parameters computed with cn_denorm.
411 // blob_text: Ground truth text for the blob.
412 void LearnBlob(const STRING& fontname, TBLOB* Blob, const DENORM& cn_denorm,
413 const INT_FX_RESULT_STRUCT& fx_info, const char* blob_text);
414 // Writes stored training data to a .tr file based on the given filename.
415 // Returns false on error.
416 bool WriteTRFile(const STRING& filename);
417
418 // Member variables.
419
420 // Parameters.
421 // Set during training (in lang.config) to indicate whether the divisible
422 // blobs chopper should be used (true for latin script.)
423 BOOL_VAR_H(allow_blob_division, true, "Use divisible blobs chopping");
424 // Set during training (in lang.config) to indicate whether the divisible
425 // blobs chopper should be used in preference to chopping. Set to true for
426 // southern Indic scripts.
428 "Prioritize blob division over chopping");
429 BOOL_VAR_H(classify_enable_learning, true, "Enable adaptive classifier");
430 INT_VAR_H(classify_debug_level, 0, "Classify debug level");
431
432 /* mfoutline.cpp ***********************************************************/
433 /* control knobs used to control normalization of outlines */
434 INT_VAR_H(classify_norm_method, character, "Normalization Method ...");
436 "Character Normalization Range ...");
438 "Veto ratio between classifier ratings");
440 "Veto difference between classifier certainties");
441
442 /* adaptmatch.cpp ***********************************************************/
443 BOOL_VAR_H(tess_cn_matching, 0, "Character Normalized Matching");
444 BOOL_VAR_H(tess_bn_matching, 0, "Baseline Normalized Matching");
445 BOOL_VAR_H(classify_enable_adaptive_matcher, 1, "Enable adaptive classifier");
447 "Use pre-adapted classifier templates");
449 "Save adapted templates to a file");
450 BOOL_VAR_H(classify_enable_adaptive_debugger, 0, "Enable match debugger");
452 "Non-linear stroke-density normalization");
453 INT_VAR_H(matcher_debug_level, 0, "Matcher Debug Level");
454 INT_VAR_H(matcher_debug_flags, 0, "Matcher Debug Flags");
455 INT_VAR_H(classify_learning_debug_level, 0, "Learning Debug Level: ");
456 double_VAR_H(matcher_good_threshold, 0.125, "Good Match (0-1)");
458 double_VAR_H(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)");
459 double_VAR_H(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)");
460 double_VAR_H(matcher_rating_margin, 0.1, "New template margin (0-1)");
461 double_VAR_H(matcher_avg_noise_size, 12.0, "Avg. noise blob length: ");
462 INT_VAR_H(matcher_permanent_classes_min, 1, "Min # of permanent classes");
464 "Reliable Config Threshold");
466 "Enable adaption even if the ambiguities have not been seen");
468 "Maximum angle delta for prototype clustering");
470 "Penalty to apply when a non-alnum is vertically out of "
471 "its expected textline position");
472 double_VAR_H(rating_scale, 1.5, "Rating scaling factor");
473 double_VAR_H(certainty_scale, 20.0, "Certainty scaling factor");
475 "Scale factor for features not used");
477 "Prune poor adapted results this much worse than best result");
479 "Threshold at which classify_adapted_pruning_factor starts");
481 "Threshold for good protos during adaptive 0-255");
483 "Threshold for good features during adaptive 0-255");
485 "Do not include character fragments in the"
486 " results of the classifier");
488 "Exclude fragments that do not match any whole character"
489 " with at least this certainty");
491 "Bring up graphical debugging windows for fragments training");
493 "Use two different windows for debugging the matching: "
494 "One for the protos and one for the features.");
495 STRING_VAR_H(classify_learn_debug_str, "", "Class str to debug learning");
496
497 /* intmatcher.cpp **********************************************************/
499 "Class Pruner Threshold 0-255");
501 "Class Pruner Multiplier 0-255: ");
503 "Class Pruner CutoffStrength: ");
505 "Integer Matcher Multiplier 0-255: ");
506
508 "Assume the input is numbers [0-9].");
509 double_VAR_H(speckle_large_max_size, 0.30, "Max large speckle size");
511 "Penalty to add to worst rating for noise");
512
513 // Use class variables to hold onto built-in templates and adapted templates.
516 // The backup adapted templates are created from the previous page (only)
517 // so they are always ready and reasonably well trained if the primary
518 // adapted templates become full.
520
521 // Create dummy proto and config masks for use with the built-in templates.
526 /* normmatch.cpp */
528 /* font detection ***********************************************************/
530 // Without shape training, each class_id, config pair represents a single
531 // unichar id/font combination, so each fontset_table_ entry holds font ids
532 // for each config in the class.
533 // With shape training, each class_id, config pair represents a single
534 // shape_table_ index, so the fontset_table_ stores the shape_table_ index,
535 // and the shape_table_ must be consulted to obtain the actual unichar_id/
536 // font combinations that the shape represents.
538
539 protected:
542 // If a shape_table_ is present, it is used to remap classifier output in
543 // ExpandShapesAndApplyCorrections. font_ids referenced by configs actually
544 // mean an index to the shape_table_ and the choices returned are *all* the
545 // shape_table_ entries at that index.
547
548 private:
549 // The currently active static classifier.
550 ShapeClassifier* static_classifier_;
551 ScrollView* learn_debug_win_;
552 ScrollView* learn_fragmented_word_debug_win_;
553 ScrollView* learn_fragments_debug_win_;
554
555 // Training data gathered here for all the images in a document.
556 STRING tr_file_data_;
557
558 Dict dict_;
559
560 GenericVector<uint16_t> shapetable_cutoffs_;
561
562 /* variables used to hold performance statistics */
563 int NumAdaptationsFailed;
564
565 // Expected number of features in the class pruner, used to penalize
566 // unknowns that have too few features (like a c being classified as e) so
567 // it doesn't recognize everything as '@' or '#'.
568 // CharNormCutoffs is for the static classifier (with no shapetable).
569 // BaselineCutoffs gets a copy of CharNormCutoffs as an estimate of the real
570 // value in the adaptive classifier. Both are indexed by unichar_id.
571 // shapetable_cutoffs_ provides a similar value for each shape in the
572 // shape_table_
573 uint16_t CharNormCutoffs[MAX_NUM_CLASSES];
574 uint16_t BaselineCutoffs[MAX_NUM_CLASSES];
575
576 public:
578};
579} // namespace tesseract
580
581#endif // DISABLED_LEGACY_ENGINE
582
583#endif // TESSERACT_CLASSIFY_CLASSIFY_H_
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
#define STRING_VAR_H(name, val, comment)
Definition: params.h:299
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
#define double_VAR_H(name, val, comment)
Definition: params.h:301
int UNICHAR_ID
Definition: unichar.h:34
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:152
@ character
Definition: mfoutline.h:63
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
int16_t PROTO_ID
Definition: matchdefs.h:40
uint8_t FEATURE_ID
Definition: matchdefs.h:46
#define MAX_NUM_CLASSES
Definition: matchdefs.h:30
CharSegmentationType
Definition: classify.h:96
@ CST_IMPROPER
Definition: classify.h:99
@ CST_NGRAM
Definition: classify.h:100
@ CST_WHOLE
Definition: classify.h:98
@ CST_FRAGMENT
Definition: classify.h:97
Definition: blobs.h:284
Definition: rect.h:34
Definition: strngs.h:45
uint8_t NumPermClasses
Definition: adaptive.h:69
double speckle_rating_penalty
Definition: classify.h:511
double classify_adapted_pruning_factor
Definition: classify.h:477
double classify_max_rating_ratio
Definition: classify.h:438
BIT_VECTOR AllProtosOn
Definition: classify.h:522
bool matcher_debug_separate_windows
Definition: classify.h:494
IntegerMatcher im_
Definition: classify.h:540
bool AdaptiveClassifierIsEmpty() const
Definition: classify.h:326
double tessedit_class_miss_scale
Definition: classify.h:475
bool classify_debug_character_fragments
Definition: classify.h:491
bool allow_blob_division
Definition: classify.h:423
bool AdaptableWord(WERD_RES *word)
Definition: adaptmatch.cpp:821
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:946
void RemoveBadMatches(ADAPT_RESULTS *Results)
double matcher_bad_match_pad
Definition: classify.h:459
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:244
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:453
bool LooksLikeGarbage(TBLOB *blob)
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:786
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
bool prioritize_division
Definition: classify.h:428
bool classify_enable_adaptive_debugger
Definition: classify.h:450
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
BIT_VECTOR TempProtoMask
Definition: classify.h:525
bool classify_save_adapted_templates
Definition: classify.h:449
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:151
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
Definition: adaptmatch.cpp:994
double classify_adapted_pruning_threshold
Definition: classify.h:479
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
Definition: intfx.cpp:442
int classify_cp_cutoff_strength
Definition: classify.h:503
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:250
int matcher_min_examples_for_prototyping
Definition: classify.h:464
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:489
const ShapeTable * shape_table() const
Definition: classify.h:111
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:598
void StartBackupAdaptiveClassifier()
Definition: adaptmatch.cpp:629
int classify_adapt_proto_threshold
Definition: classify.h:481
double matcher_perfect_threshold
Definition: classify.h:458
bool classify_nonlinear_norm
Definition: classify.h:452
int classify_class_pruner_multiplier
Definition: classify.h:501
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:452
ShapeTable * shape_table_
Definition: classify.h:546
~Classify() override
Definition: classify.cpp:201
double speckle_large_max_size
Definition: classify.h:509
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:515
UnicityTable< FontSet > & get_fontset_table()
Definition: classify.h:392
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
bool classify_use_pre_adapted_templates
Definition: classify.h:447
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
bool classify_bln_numeric_mode
Definition: classify.h:508
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:129
void ClearCharNormArray(uint8_t *char_norm_array)
Definition: float2int.cpp:44
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:242
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:514
double classify_misfit_junk_penalty
Definition: classify.h:471
float ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
Definition: normmatch.cpp:94
int classify_class_pruner_threshold
Definition: classify.h:499
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:226
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:541
const UnicityTable< FontInfo > & get_fontinfo_table() const
Definition: classify.h:389
int matcher_permanent_classes_min
Definition: classify.h:462
UnicityTable< FontSet > fontset_table_
Definition: classify.h:537
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
bool classify_enable_learning
Definition: classify.h:429
double matcher_clustering_max_angle_delta
Definition: classify.h:468
CLASS_ID GetClassToDebug(const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
Definition: intproto.cpp:1255
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
double matcher_rating_margin
Definition: classify.h:460
double classify_char_norm_range
Definition: classify.h:436
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
void MasterMatcher(INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1017
bool WriteTRFile(const STRING &filename)
Definition: blobclass.cpp:98
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
double classify_max_certainty_margin
Definition: classify.h:440
void ClassifyAsNoise(ADAPT_RESULTS *Results)
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:173
NORM_PROTOS * ReadNormProtos(TFile *fp)
Definition: normmatch.cpp:190
double matcher_avg_noise_size
Definition: classify.h:461
BIT_VECTOR AllConfigsOff
Definition: classify.h:524
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:693
void SwitchAdaptiveClassifier()
Definition: adaptmatch.cpp:613
double certainty_scale
Definition: classify.h:473
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:191
bool AdaptiveClassifierIsFull() const
Definition: classify.h:325
double matcher_reliable_adaptive_result
Definition: classify.h:457
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:217
bool disable_character_fragments
Definition: classify.h:486
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
char * classify_learn_debug_str
Definition: classify.h:495
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
Definition: float2int.cpp:62
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
int classify_integer_matcher_multiplier
Definition: classify.h:505
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:718
void InitAdaptiveClassifier(TessdataManager *mgr)
Definition: adaptmatch.cpp:527
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:374
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:466
NORM_PROTOS * NormProtos
Definition: classify.h:527
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:63
BIT_VECTOR AllConfigsOn
Definition: classify.h:523
void SetAdaptiveThreshold(float Threshold)
bool classify_enable_adaptive_matcher
Definition: classify.h:445
void SetStaticClassifier(ShapeClassifier *static_classifier)
Definition: classify.cpp:211
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:332
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
double matcher_good_threshold
Definition: classify.h:456
void ReadNewCutoffs(TFile *fp, uint16_t *Cutoffs)
Definition: cutoffs.cpp:41
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:90
int ShapeIDToClassID(int shape_id) const
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:459
UnicityTable< FontInfo > & get_fontinfo_table()
Definition: classify.h:386
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:529
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:519
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:853
void NormalizeOutlines(LIST Outlines, float *XScale, float *YScale)
Definition: mfoutline.cpp:276
virtual Dict & getDict()
Definition: classify.h:107
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:70
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:247
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:41
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:488
INT_TEMPLATES CreateIntTemplates(CLASSES FloatProtos, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:527
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
int classify_adapt_feature_threshold
Definition: classify.h:483
int classify_learning_debug_level
Definition: classify.h:455
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:219
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
Definition: cluster.h:32