tesseract 4.1.1
Loading...
Searching...
No Matches
intmatcher.h
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: intmatcher.h
3 ** Purpose: Interface to high level generic classifier routines.
4 ** Author: Robert Moss
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17#ifndef INTMATCHER_H
18#define INTMATCHER_H
19
20#include "params.h"
21
22// Character fragments could be present in the trained templaes
23// but turned on/off on the language-by-language basis or depending
24// on particular properties of the corpus (e.g. when we expect the
25// images to have low exposure).
27 "Do not include character fragments in the"
28 " results of the classifier");
29
31 "Integer Matcher Multiplier 0-255: ");
32
33
37#include "intproto.h"
38
39namespace tesseract {
40struct UnicharRating;
41}
42
44 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
45
46 float Rating;
48};
49
50
55#define SE_TABLE_BITS 9
56#define SE_TABLE_SIZE 512
57
62
63 void Clear(const INT_CLASS class_template);
64 void ClearFeatureEvidence(const INT_CLASS class_template);
65 void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures);
67 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask);
68};
69
70
72 public:
73 // Integer Matcher Theta Fudge (0-255).
74 static const int kIntThetaFudge = 128;
75 // Bits in Similarity to Evidence Lookup (8-9).
76 static const int kEvidenceTableBits = 9;
77 // Integer Evidence Truncation Bits (8-14).
78 static const int kIntEvidenceTruncBits = 14;
79 // Similarity to Evidence Table Exponential Multiplier.
80 static const float kSEExponentialMultiplier;
81 // Center of Similarity Curve.
82 static const float kSimilarityCenter;
83
84 IntegerMatcher(tesseract::IntParam *classify_debug_level);
85
86 void Match(INT_CLASS ClassTemplate,
87 BIT_VECTOR ProtoMask,
88 BIT_VECTOR ConfigMask,
89 int16_t NumFeatures,
90 const INT_FEATURE_STRUCT* Features,
92 int AdaptFeatureThreshold,
93 int Debug,
94 bool SeparateDebugWindows);
95
96 // Applies the CN normalization factor to the given rating and returns
97 // the modified rating.
98 float ApplyCNCorrection(float rating, int blob_length,
99 int normalization_factor, int matcher_multiplier);
100
101 int FindGoodProtos(INT_CLASS ClassTemplate,
102 BIT_VECTOR ProtoMask,
103 BIT_VECTOR ConfigMask,
104 int16_t NumFeatures,
105 INT_FEATURE_ARRAY Features,
106 PROTO_ID *ProtoArray,
107 int AdaptProtoThreshold,
108 int Debug);
109
110 int FindBadFeatures(INT_CLASS ClassTemplate,
111 BIT_VECTOR ProtoMask,
112 BIT_VECTOR ConfigMask,
113 int16_t NumFeatures,
114 INT_FEATURE_ARRAY Features,
115 FEATURE_ID *FeatureArray,
116 int AdaptFeatureThreshold,
117 int Debug);
118
119 private:
120 int UpdateTablesForFeature(
121 INT_CLASS ClassTemplate,
122 BIT_VECTOR ProtoMask,
123 BIT_VECTOR ConfigMask,
124 int FeatureNum,
125 const INT_FEATURE_STRUCT* Feature,
126 ScratchEvidence *evidence,
127 int Debug);
128
129 int FindBestMatch(INT_CLASS ClassTemplate,
130 const ScratchEvidence &tables,
132
133#ifndef GRAPHICS_DISABLED
134 void DebugFeatureProtoError(
135 INT_CLASS ClassTemplate,
136 BIT_VECTOR ProtoMask,
137 BIT_VECTOR ConfigMask,
138 const ScratchEvidence &tables,
139 int16_t NumFeatures,
140 int Debug);
141
142 void DisplayProtoDebugInfo(
143 INT_CLASS ClassTemplate,
144 BIT_VECTOR ConfigMask,
145 const ScratchEvidence &tables,
146 bool SeparateDebugWindows);
147
148 void DisplayFeatureDebugInfo(
149 INT_CLASS ClassTemplate,
150 BIT_VECTOR ProtoMask,
151 BIT_VECTOR ConfigMask,
152 int16_t NumFeatures,
153 const INT_FEATURE_STRUCT* Features,
154 int AdaptFeatureThreshold,
155 int Debug,
156 bool SeparateDebugWindows);
157#endif
158
159 private:
160 tesseract::IntParam *classify_debug_level_;
161 uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
162 uint32_t evidence_table_mask_;
163 uint32_t mult_trunc_shift_bits_;
164 uint32_t table_trunc_shift_bits_;
165 uint32_t evidence_mult_mask_;
166};
167
168#endif
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
#define SE_TABLE_SIZE
Definition: intmatcher.h:56
bool disable_character_fragments
int classify_integer_matcher_multiplier
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_PROTO_INDEX
Definition: intproto.h:44
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:152
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
int16_t PROTO_ID
Definition: matchdefs.h:40
uint8_t FEATURE_ID
Definition: matchdefs.h:46
CLASS_ID Class
Definition: intmatcher.h:47
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:739
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:746
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:60
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask)
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:59
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:61
void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures)
static const float kSEExponentialMultiplier
Definition: intmatcher.h:80
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:657
static const int kEvidenceTableBits
Definition: intmatcher.h:76
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:511
static const int kIntThetaFudge
Definition: intmatcher.h:74
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:78
static const float kSimilarityCenter
Definition: intmatcher.h:82
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:589
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)