tesseract 4.1.1
Loading...
Searching...
No Matches
intmatcher.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: intmatcher.cpp
3 ** Purpose: Generic high level classification routines.
4 ** Author: Robert Moss
5 ** (c) Copyright Hewlett-Packard Company, 1988.
6 ** Licensed under the Apache License, Version 2.0 (the "License");
7 ** you may not use this file except in compliance with the License.
8 ** You may obtain a copy of the License at
9 ** http://www.apache.org/licenses/LICENSE-2.0
10 ** Unless required by applicable law or agreed to in writing, software
11 ** distributed under the License is distributed on an "AS IS" BASIS,
12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 ** See the License for the specific language governing permissions and
14 ** limitations under the License.
15 ******************************************************************************/
16
17// Include automatically generated configuration file if running autoconf.
18#ifdef HAVE_CONFIG_H
19#include "config_auto.h"
20#endif
21
22/*----------------------------------------------------------------------------
23 Include Files and Type Defines
24----------------------------------------------------------------------------*/
25#include "intmatcher.h"
26
27#include <cassert>
28#include <cmath>
29#include "fontinfo.h"
30#include "intproto.h"
31#include "callcpp.h"
32#include "scrollview.h"
33#include "float2int.h"
34#include "helpers.h"
35#include "classify.h"
36#include "shapetable.h"
37
40
41/*----------------------------------------------------------------------------
42 Global Data Definitions and Declarations
43----------------------------------------------------------------------------*/
44// Parameters of the sigmoid used to convert similarity to evidence in the
45// similarity_evidence_table_ that is used to convert distance metric to an
46// 8 bit evidence value in the secondary matcher. (See IntMatcher::Init).
48const float IntegerMatcher::kSimilarityCenter = 0.0075f;
49
50static const uint8_t offset_table[] = {
51 255, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
52 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
53 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3,
54 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5,
55 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
56 0, 1, 0, 2, 0, 1, 0, 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
57 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3,
58 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6,
59 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3,
60 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4,
61 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
62};
63
64static const uint8_t next_table[] = {
65 0, 0, 0, 0x2, 0, 0x4, 0x4, 0x6, 0, 0x8, 0x8, 0x0a, 0x08, 0x0c, 0x0c, 0x0e,
66 0, 0x10, 0x10, 0x12, 0x10, 0x14, 0x14, 0x16, 0x10, 0x18, 0x18, 0x1a,
67 0x18, 0x1c, 0x1c, 0x1e, 0, 0x20, 0x20, 0x22, 0x20, 0x24, 0x24, 0x26,
68 0x20, 0x28, 0x28, 0x2a, 0x28, 0x2c, 0x2c, 0x2e, 0x20, 0x30, 0x30, 0x32,
69 0x30, 0x34, 0x34, 0x36, 0x30, 0x38, 0x38, 0x3a, 0x38, 0x3c, 0x3c, 0x3e,
70 0, 0x40, 0x40, 0x42, 0x40, 0x44, 0x44, 0x46, 0x40, 0x48, 0x48, 0x4a,
71 0x48, 0x4c, 0x4c, 0x4e, 0x40, 0x50, 0x50, 0x52, 0x50, 0x54, 0x54, 0x56,
72 0x50, 0x58, 0x58, 0x5a, 0x58, 0x5c, 0x5c, 0x5e, 0x40, 0x60, 0x60, 0x62,
73 0x60, 0x64, 0x64, 0x66, 0x60, 0x68, 0x68, 0x6a, 0x68, 0x6c, 0x6c, 0x6e,
74 0x60, 0x70, 0x70, 0x72, 0x70, 0x74, 0x74, 0x76, 0x70, 0x78, 0x78, 0x7a,
75 0x78, 0x7c, 0x7c, 0x7e, 0, 0x80, 0x80, 0x82, 0x80, 0x84, 0x84, 0x86,
76 0x80, 0x88, 0x88, 0x8a, 0x88, 0x8c, 0x8c, 0x8e, 0x80, 0x90, 0x90, 0x92,
77 0x90, 0x94, 0x94, 0x96, 0x90, 0x98, 0x98, 0x9a, 0x98, 0x9c, 0x9c, 0x9e,
78 0x80, 0xa0, 0xa0, 0xa2, 0xa0, 0xa4, 0xa4, 0xa6, 0xa0, 0xa8, 0xa8, 0xaa,
79 0xa8, 0xac, 0xac, 0xae, 0xa0, 0xb0, 0xb0, 0xb2, 0xb0, 0xb4, 0xb4, 0xb6,
80 0xb0, 0xb8, 0xb8, 0xba, 0xb8, 0xbc, 0xbc, 0xbe, 0x80, 0xc0, 0xc0, 0xc2,
81 0xc0, 0xc4, 0xc4, 0xc6, 0xc0, 0xc8, 0xc8, 0xca, 0xc8, 0xcc, 0xcc, 0xce,
82 0xc0, 0xd0, 0xd0, 0xd2, 0xd0, 0xd4, 0xd4, 0xd6, 0xd0, 0xd8, 0xd8, 0xda,
83 0xd8, 0xdc, 0xdc, 0xde, 0xc0, 0xe0, 0xe0, 0xe2, 0xe0, 0xe4, 0xe4, 0xe6,
84 0xe0, 0xe8, 0xe8, 0xea, 0xe8, 0xec, 0xec, 0xee, 0xe0, 0xf0, 0xf0, 0xf2,
85 0xf0, 0xf4, 0xf4, 0xf6, 0xf0, 0xf8, 0xf8, 0xfa, 0xf8, 0xfc, 0xfc, 0xfe
86};
87
88// See http://b/19318793 (#6) for a complete discussion.
89
90namespace tesseract {
91
100static void
101HeapSort (int n, int ra[], int rb[]) {
102 int i, rra, rrb;
103 int l, j, ir;
104
105 l = (n >> 1) + 1;
106 ir = n;
107 for (;;) {
108 if (l > 1) {
109 rra = ra[--l];
110 rrb = rb[l];
111 }
112 else {
113 rra = ra[ir];
114 rrb = rb[ir];
115 ra[ir] = ra[1];
116 rb[ir] = rb[1];
117 if (--ir == 1) {
118 ra[1] = rra;
119 rb[1] = rrb;
120 return;
121 }
122 }
123 i = l;
124 j = l << 1;
125 while (j <= ir) {
126 if (j < ir && ra[j] < ra[j + 1])
127 ++j;
128 if (rra < ra[j]) {
129 ra[i] = ra[j];
130 rb[i] = rb[j];
131 j += (i = j);
132 }
133 else
134 j = ir + 1;
135 }
136 ra[i] = rra;
137 rb[i] = rrb;
138 }
139}
140
141// Encapsulation of the intermediate data and computations made by the class
142// pruner. The class pruner implements a simple linear classifier on binary
143// features by heavily quantizing the feature space, and applying
144// NUM_BITS_PER_CLASS (2)-bit weights to the features. Lack of resolution in
145// weights is compensated by a non-constant bias that is dependent on the
146// number of features present.
148 public:
149 ClassPruner(int max_classes) {
150 // The unrolled loop in ComputeScores means that the array sizes need to
151 // be rounded up so that the array is big enough to accommodate the extra
152 // entries accessed by the unrolling. Each pruner word is of sized
153 // BITS_PER_WERD and each entry is NUM_BITS_PER_CLASS, so there are
154 // BITS_PER_WERD / NUM_BITS_PER_CLASS entries.
155 // See ComputeScores.
156 max_classes_ = max_classes;
157 rounded_classes_ = RoundUp(
159 class_count_ = new int[rounded_classes_];
160 norm_count_ = new int[rounded_classes_];
161 sort_key_ = new int[rounded_classes_ + 1];
162 sort_index_ = new int[rounded_classes_ + 1];
163 for (int i = 0; i < rounded_classes_; i++) {
164 class_count_[i] = 0;
165 }
166 pruning_threshold_ = 0;
167 num_features_ = 0;
168 num_classes_ = 0;
169 }
170
172 delete []class_count_;
173 delete []norm_count_;
174 delete []sort_key_;
175 delete []sort_index_;
176 }
177
180 void ComputeScores(const INT_TEMPLATES_STRUCT* int_templates,
181 int num_features, const INT_FEATURE_STRUCT* features) {
182 num_features_ = num_features;
183 int num_pruners = int_templates->NumClassPruners;
184 for (int f = 0; f < num_features; ++f) {
185 const INT_FEATURE_STRUCT* feature = &features[f];
186 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
187 int x = feature->X * NUM_CP_BUCKETS >> 8;
188 int y = feature->Y * NUM_CP_BUCKETS >> 8;
189 int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
190 int class_id = 0;
191 // Each CLASS_PRUNER_STRUCT only covers CLASSES_PER_CP(32) classes, so
192 // we need a collection of them, indexed by pruner_set.
193 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
194 // Look up quantized feature in a 3-D array, an array of weights for
195 // each class.
196 const uint32_t* pruner_word_ptr =
197 int_templates->ClassPruners[pruner_set]->p[x][y][theta];
198 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
199 uint32_t pruner_word = *pruner_word_ptr++;
200 // This inner loop is unrolled to speed up the ClassPruner.
201 // Currently gcc would not unroll it unless it is set to O3
202 // level of optimization or -funroll-loops is specified.
203 /*
204 uint32_t class_mask = (1 << NUM_BITS_PER_CLASS) - 1;
205 for (int bit = 0; bit < BITS_PER_WERD/NUM_BITS_PER_CLASS; bit++) {
206 class_count_[class_id++] += pruner_word & class_mask;
207 pruner_word >>= NUM_BITS_PER_CLASS;
208 }
209 */
210 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
211 pruner_word >>= NUM_BITS_PER_CLASS;
212 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
213 pruner_word >>= NUM_BITS_PER_CLASS;
214 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
215 pruner_word >>= NUM_BITS_PER_CLASS;
216 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
217 pruner_word >>= NUM_BITS_PER_CLASS;
218 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
219 pruner_word >>= NUM_BITS_PER_CLASS;
220 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
221 pruner_word >>= NUM_BITS_PER_CLASS;
222 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
223 pruner_word >>= NUM_BITS_PER_CLASS;
224 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
225 pruner_word >>= NUM_BITS_PER_CLASS;
226 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
227 pruner_word >>= NUM_BITS_PER_CLASS;
228 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
229 pruner_word >>= NUM_BITS_PER_CLASS;
230 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
231 pruner_word >>= NUM_BITS_PER_CLASS;
232 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
233 pruner_word >>= NUM_BITS_PER_CLASS;
234 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
235 pruner_word >>= NUM_BITS_PER_CLASS;
236 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
237 pruner_word >>= NUM_BITS_PER_CLASS;
238 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
239 pruner_word >>= NUM_BITS_PER_CLASS;
240 class_count_[class_id++] += pruner_word & CLASS_PRUNER_CLASS_MASK;
241 }
242 }
243 }
244 }
245
251 void AdjustForExpectedNumFeatures(const uint16_t* expected_num_features,
252 int cutoff_strength) {
253 for (int class_id = 0; class_id < max_classes_; ++class_id) {
254 if (num_features_ < expected_num_features[class_id]) {
255 int deficit = expected_num_features[class_id] - num_features_;
256 class_count_[class_id] -= class_count_[class_id] * deficit /
257 (num_features_ * cutoff_strength + deficit);
258 }
259 }
260 }
261
264 void DisableDisabledClasses(const UNICHARSET& unicharset) {
265 for (int class_id = 0; class_id < max_classes_; ++class_id) {
266 if (!unicharset.get_enabled(class_id))
267 class_count_[class_id] = 0; // This char is disabled!
268 }
269 }
270
272 void DisableFragments(const UNICHARSET& unicharset) {
273 for (int class_id = 0; class_id < max_classes_; ++class_id) {
274 // Do not include character fragments in the class pruner
275 // results if disable_character_fragments is true.
276 if (unicharset.get_fragment(class_id)) {
277 class_count_[class_id] = 0;
278 }
279 }
280 }
281
286 void NormalizeForXheight(int norm_multiplier,
287 const uint8_t* normalization_factors) {
288 for (int class_id = 0; class_id < max_classes_; class_id++) {
289 norm_count_[class_id] = class_count_[class_id] -
290 ((norm_multiplier * normalization_factors[class_id]) >> 8);
291 }
292 }
293
296 for (int class_id = 0; class_id < max_classes_; class_id++) {
297 norm_count_[class_id] = class_count_[class_id];
298 }
299 }
300
304 void PruneAndSort(int pruning_factor, int keep_this,
305 bool max_of_non_fragments, const UNICHARSET& unicharset) {
306 int max_count = 0;
307 for (int c = 0; c < max_classes_; ++c) {
308 if (norm_count_[c] > max_count &&
309 // This additional check is added in order to ensure that
310 // the classifier will return at least one non-fragmented
311 // character match.
312 // TODO(daria): verify that this helps accuracy and does not
313 // hurt performance.
314 (!max_of_non_fragments || !unicharset.get_fragment(c))) {
315 max_count = norm_count_[c];
316 }
317 }
318 // Prune Classes.
319 pruning_threshold_ = (max_count * pruning_factor) >> 8;
320 // Select Classes.
321 if (pruning_threshold_ < 1)
322 pruning_threshold_ = 1;
323 num_classes_ = 0;
324 for (int class_id = 0; class_id < max_classes_; class_id++) {
325 if (norm_count_[class_id] >= pruning_threshold_ ||
326 class_id == keep_this) {
327 ++num_classes_;
328 sort_index_[num_classes_] = class_id;
329 sort_key_[num_classes_] = norm_count_[class_id];
330 }
331 }
332
333 // Sort Classes using Heapsort Algorithm.
334 if (num_classes_ > 1)
335 HeapSort(num_classes_, sort_key_, sort_index_);
336 }
337
340 void DebugMatch(const Classify& classify,
341 const INT_TEMPLATES_STRUCT* int_templates,
342 const INT_FEATURE_STRUCT* features) const {
343 int num_pruners = int_templates->NumClassPruners;
344 int max_num_classes = int_templates->NumClasses;
345 for (int f = 0; f < num_features_; ++f) {
346 const INT_FEATURE_STRUCT* feature = &features[f];
347 tprintf("F=%3d(%d,%d,%d),", f, feature->X, feature->Y, feature->Theta);
348 // Quantize the feature to NUM_CP_BUCKETS*NUM_CP_BUCKETS*NUM_CP_BUCKETS.
349 int x = feature->X * NUM_CP_BUCKETS >> 8;
350 int y = feature->Y * NUM_CP_BUCKETS >> 8;
351 int theta = feature->Theta * NUM_CP_BUCKETS >> 8;
352 int class_id = 0;
353 for (int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
354 // Look up quantized feature in a 3-D array, an array of weights for
355 // each class.
356 const uint32_t* pruner_word_ptr =
357 int_templates->ClassPruners[pruner_set]->p[x][y][theta];
358 for (int word = 0; word < WERDS_PER_CP_VECTOR; ++word) {
359 uint32_t pruner_word = *pruner_word_ptr++;
360 for (int word_class = 0; word_class < 16 &&
361 class_id < max_num_classes; ++word_class, ++class_id) {
362 if (norm_count_[class_id] >= pruning_threshold_) {
363 tprintf(" %s=%d,",
364 classify.ClassIDToDebugStr(int_templates,
365 class_id, 0).string(),
366 pruner_word & CLASS_PRUNER_CLASS_MASK);
367 }
368 pruner_word >>= NUM_BITS_PER_CLASS;
369 }
370 }
371 tprintf("\n");
372 }
373 }
374 }
375
377 void SummarizeResult(const Classify& classify,
378 const INT_TEMPLATES_STRUCT* int_templates,
379 const uint16_t* expected_num_features,
380 int norm_multiplier,
381 const uint8_t* normalization_factors) const {
382 tprintf("CP:%d classes, %d features:\n", num_classes_, num_features_);
383 for (int i = 0; i < num_classes_; ++i) {
384 int class_id = sort_index_[num_classes_ - i];
385 STRING class_string = classify.ClassIDToDebugStr(int_templates,
386 class_id, 0);
387 tprintf("%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
388 class_string.string(),
389 class_count_[class_id],
390 expected_num_features[class_id],
391 (norm_multiplier * normalization_factors[class_id]) >> 8,
392 sort_key_[num_classes_ - i],
393 100.0 - 100.0 * sort_key_[num_classes_ - i] /
394 (CLASS_PRUNER_CLASS_MASK * num_features_));
395 }
396 }
397
401 CP_RESULT_STRUCT empty;
402 results->init_to_size(num_classes_, empty);
403 for (int c = 0; c < num_classes_; ++c) {
404 (*results)[c].Class = sort_index_[num_classes_ - c];
405 (*results)[c].Rating = 1.0f - sort_key_[num_classes_ - c] /
406 (static_cast<float>(CLASS_PRUNER_CLASS_MASK) * num_features_);
407 }
408 return num_classes_;
409 }
410
411 private:
413 int *class_count_;
417 int *norm_count_;
419 int *sort_key_;
421 int *sort_index_;
423 int max_classes_;
425 int rounded_classes_;
427 int pruning_threshold_;
429 int num_features_;
431 int num_classes_;
432};
433
434/*----------------------------------------------------------------------------
435 Public Code
436----------------------------------------------------------------------------*/
453 int num_features, int keep_this,
454 const INT_FEATURE_STRUCT* features,
455 const uint8_t* normalization_factors,
456 const uint16_t* expected_num_features,
458 ClassPruner pruner(int_templates->NumClasses);
459 // Compute initial match scores for all classes.
460 pruner.ComputeScores(int_templates, num_features, features);
461 // Adjust match scores for number of expected features.
462 pruner.AdjustForExpectedNumFeatures(expected_num_features,
464 // Apply disabled classes in unicharset - only works without a shape_table.
465 if (shape_table_ == nullptr)
467 // If fragments are disabled, remove them, also only without a shape table.
470
471 // If we have good x-heights, apply the given normalization factors.
472 if (normalization_factors != nullptr) {
474 normalization_factors);
475 } else {
476 pruner.NoNormalization();
477 }
478 // Do the actual pruning and sort the short-list.
480 shape_table_ == nullptr, unicharset);
481
482 if (classify_debug_level > 2) {
483 pruner.DebugMatch(*this, int_templates, features);
484 }
485 if (classify_debug_level > 1) {
486 pruner.SummarizeResult(*this, int_templates, expected_num_features,
488 normalization_factors);
489 }
490 // Convert to the expected output format.
491 return pruner.SetupResults(results);
492}
493
494} // namespace tesseract
495
512 BIT_VECTOR ProtoMask,
513 BIT_VECTOR ConfigMask,
514 int16_t NumFeatures,
515 const INT_FEATURE_STRUCT* Features,
516 UnicharRating* Result,
517 int AdaptFeatureThreshold,
518 int Debug,
519 bool SeparateDebugWindows) {
520 auto *tables = new ScratchEvidence();
521 int Feature;
522
523 if (MatchDebuggingOn (Debug))
524 cprintf ("Integer Matcher -------------------------------------------\n");
525
526 tables->Clear(ClassTemplate);
527 Result->feature_misses = 0;
528
529 for (Feature = 0; Feature < NumFeatures; Feature++) {
530 int csum = UpdateTablesForFeature(ClassTemplate, ProtoMask, ConfigMask,
531 Feature, &Features[Feature],
532 tables, Debug);
533 // Count features that were missed over all configs.
534 if (csum == 0)
535 ++Result->feature_misses;
536 }
537
538#ifndef GRAPHICS_DISABLED
539 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug)) {
540 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
541 NumFeatures, Debug);
542 }
543
544 if (DisplayProtoMatchesOn(Debug)) {
545 DisplayProtoDebugInfo(ClassTemplate, ConfigMask,
546 *tables, SeparateDebugWindows);
547 }
548
549 if (DisplayFeatureMatchesOn(Debug)) {
550 DisplayFeatureDebugInfo(ClassTemplate, ProtoMask, ConfigMask, NumFeatures,
551 Features, AdaptFeatureThreshold, Debug,
552 SeparateDebugWindows);
553 }
554#endif
555
556 tables->UpdateSumOfProtoEvidences(ClassTemplate, ConfigMask);
557 tables->NormalizeSums(ClassTemplate, NumFeatures);
558
559 FindBestMatch(ClassTemplate, *tables, Result);
560
561#ifndef GRAPHICS_DISABLED
562 if (PrintMatchSummaryOn(Debug))
563 Result->Print();
564
565 if (MatchDebuggingOn(Debug))
566 cprintf("Match Complete --------------------------------------------\n");
567#endif
568
569 delete tables;
570}
571
590 INT_CLASS ClassTemplate,
591 BIT_VECTOR ProtoMask,
592 BIT_VECTOR ConfigMask,
593 int16_t NumFeatures,
594 INT_FEATURE_ARRAY Features,
595 PROTO_ID *ProtoArray,
596 int AdaptProtoThreshold,
597 int Debug) {
598 auto *tables = new ScratchEvidence();
599 int NumGoodProtos = 0;
600
601 /* DEBUG opening heading */
602 if (MatchDebuggingOn (Debug))
603 cprintf
604 ("Find Good Protos -------------------------------------------\n");
605
606 tables->Clear(ClassTemplate);
607
608 for (int Feature = 0; Feature < NumFeatures; Feature++)
609 UpdateTablesForFeature(
610 ClassTemplate, ProtoMask, ConfigMask, Feature, &(Features[Feature]),
611 tables, Debug);
612
613#ifndef GRAPHICS_DISABLED
614 if (PrintProtoMatchesOn (Debug) || PrintMatchSummaryOn (Debug))
615 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
616 NumFeatures, Debug);
617#endif
618
619 /* Average Proto Evidences & Find Good Protos */
620 for (int proto = 0; proto < ClassTemplate->NumProtos; proto++) {
621 /* Compute Average for Actual Proto */
622 int Temp = 0;
623 for (uint8_t i = 0;
624 i < MAX_PROTO_INDEX && i < ClassTemplate->ProtoLengths[proto]; i++)
625 Temp += tables->proto_evidence_[proto][i];
626
627 Temp /= ClassTemplate->ProtoLengths[proto];
628
629 /* Find Good Protos */
630 if (Temp >= AdaptProtoThreshold) {
631 *ProtoArray = proto;
632 ProtoArray++;
633 NumGoodProtos++;
634 }
635 }
636
637 if (MatchDebuggingOn (Debug))
638 cprintf ("Match Complete --------------------------------------------\n");
639 delete tables;
640
641 return NumGoodProtos;
642}
643
658 INT_CLASS ClassTemplate,
659 BIT_VECTOR ProtoMask,
660 BIT_VECTOR ConfigMask,
661 int16_t NumFeatures,
662 INT_FEATURE_ARRAY Features,
663 FEATURE_ID *FeatureArray,
664 int AdaptFeatureThreshold,
665 int Debug) {
666 auto *tables = new ScratchEvidence();
667 int NumBadFeatures = 0;
668
669 /* DEBUG opening heading */
670 if (MatchDebuggingOn(Debug))
671 cprintf("Find Bad Features -------------------------------------------\n");
672
673 tables->Clear(ClassTemplate);
674
675 for (int Feature = 0; Feature < NumFeatures; Feature++) {
676 UpdateTablesForFeature(
677 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
678 tables, Debug);
679
680 /* Find Best Evidence for Current Feature */
681 int best = 0;
682 assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
683 for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
684 if (tables->feature_evidence_[i] > best)
685 best = tables->feature_evidence_[i];
686
687 /* Find Bad Features */
688 if (best < AdaptFeatureThreshold) {
689 *FeatureArray = Feature;
690 FeatureArray++;
691 NumBadFeatures++;
692 }
693 }
694
695#ifndef GRAPHICS_DISABLED
696 if (PrintProtoMatchesOn(Debug) || PrintMatchSummaryOn(Debug))
697 DebugFeatureProtoError(ClassTemplate, ProtoMask, ConfigMask, *tables,
698 NumFeatures, Debug);
699#endif
700
701 if (MatchDebuggingOn(Debug))
702 cprintf("Match Complete --------------------------------------------\n");
703
704 delete tables;
705 return NumBadFeatures;
706}
707
708
710 : classify_debug_level_(classify_debug_level)
711{
712 /* Initialize table for evidence to similarity lookup */
713 for (int i = 0; i < SE_TABLE_SIZE; i++) {
714 uint32_t IntSimilarity = i << (27 - SE_TABLE_BITS);
715 double Similarity = (static_cast<double>(IntSimilarity)) / 65536.0 / 65536.0;
716 double evidence = Similarity / kSimilarityCenter;
717 evidence = 255.0 / (evidence * evidence + 1.0);
718
719 if (kSEExponentialMultiplier > 0.0) {
720 double scale = 1.0 - exp(-kSEExponentialMultiplier) *
721 exp(kSEExponentialMultiplier * (static_cast<double>(i) / SE_TABLE_SIZE));
722 evidence *= ClipToRange(scale, 0.0, 1.0);
723 }
724
725 similarity_evidence_table_[i] = static_cast<uint8_t>(evidence + 0.5);
726 }
727
728 /* Initialize evidence computation variables */
729 evidence_table_mask_ =
730 ((1 << kEvidenceTableBits) - 1) << (9 - kEvidenceTableBits);
731 mult_trunc_shift_bits_ = (14 - kIntEvidenceTruncBits);
732 table_trunc_shift_bits_ = (27 - SE_TABLE_BITS - (mult_trunc_shift_bits_ << 1));
733 evidence_mult_mask_ = ((1 << kIntEvidenceTruncBits) - 1);
734}
735
736/*----------------------------------------------------------------------------
737 Private Code
738----------------------------------------------------------------------------*/
739void ScratchEvidence::Clear(const INT_CLASS class_template) {
740 memset(sum_feature_evidence_, 0,
741 class_template->NumConfigs * sizeof(sum_feature_evidence_[0]));
742 memset(proto_evidence_, 0,
743 class_template->NumProtos * sizeof(proto_evidence_[0]));
744}
745
747 memset(feature_evidence_, 0,
748 class_template->NumConfigs * sizeof(feature_evidence_[0]));
749}
750
754static void IMDebugConfiguration(int FeatureNum, uint16_t ActualProtoNum,
755 uint8_t Evidence, uint32_t ConfigWord) {
756 cprintf ("F = %3d, P = %3d, E = %3d, Configs = ",
757 FeatureNum, static_cast<int>(ActualProtoNum), static_cast<int>(Evidence));
758 while (ConfigWord) {
759 if (ConfigWord & 1)
760 cprintf ("1");
761 else
762 cprintf ("0");
763 ConfigWord >>= 1;
764 }
765 cprintf ("\n");
766}
767
771static void IMDebugConfigurationSum(int FeatureNum, uint8_t *FeatureEvidence,
772 int32_t ConfigCount) {
773 cprintf("F=%3d, C=", FeatureNum);
774 for (int ConfigNum = 0; ConfigNum < ConfigCount; ConfigNum++) {
775 cprintf("%4d", FeatureEvidence[ConfigNum]);
776 }
777 cprintf("\n");
778}
779
791int IntegerMatcher::UpdateTablesForFeature(
792 INT_CLASS ClassTemplate,
793 BIT_VECTOR ProtoMask,
794 BIT_VECTOR ConfigMask,
795 int FeatureNum,
796 const INT_FEATURE_STRUCT* Feature,
797 ScratchEvidence *tables,
798 int Debug) {
799 uint32_t ConfigWord;
800 uint32_t ProtoWord;
801 uint32_t ProtoNum;
802 uint32_t ActualProtoNum;
803 uint8_t proto_byte;
804 int32_t proto_word_offset;
805 int32_t proto_offset;
806 PROTO_SET ProtoSet;
807 uint32_t *ProtoPrunerPtr;
808 INT_PROTO Proto;
809 int ProtoSetIndex;
810 uint8_t Evidence;
811 uint32_t XFeatureAddress;
812 uint32_t YFeatureAddress;
813 uint32_t ThetaFeatureAddress;
814
815 tables->ClearFeatureEvidence(ClassTemplate);
816
817 /* Precompute Feature Address offset for Proto Pruning */
818 XFeatureAddress = ((Feature->X >> 2) << 1);
819 YFeatureAddress = (NUM_PP_BUCKETS << 1) + ((Feature->Y >> 2) << 1);
820 ThetaFeatureAddress = (NUM_PP_BUCKETS << 2) + ((Feature->Theta >> 2) << 1);
821
822 for (ProtoSetIndex = 0, ActualProtoNum = 0;
823 ProtoSetIndex < ClassTemplate->NumProtoSets; ProtoSetIndex++) {
824 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
825 ProtoPrunerPtr = reinterpret_cast<uint32_t *>((*ProtoSet).ProtoPruner);
826 for (ProtoNum = 0; ProtoNum < PROTOS_PER_PROTO_SET;
827 ProtoNum += (PROTOS_PER_PROTO_SET >> 1), ActualProtoNum +=
828 (PROTOS_PER_PROTO_SET >> 1), ProtoMask++, ProtoPrunerPtr++) {
829 /* Prune Protos of current Proto Set */
830 ProtoWord = *(ProtoPrunerPtr + XFeatureAddress);
831 ProtoWord &= *(ProtoPrunerPtr + YFeatureAddress);
832 ProtoWord &= *(ProtoPrunerPtr + ThetaFeatureAddress);
833 ProtoWord &= *ProtoMask;
834
835 if (ProtoWord != 0) {
836 proto_byte = ProtoWord & 0xff;
837 ProtoWord >>= 8;
838 proto_word_offset = 0;
839 while (ProtoWord != 0 || proto_byte != 0) {
840 while (proto_byte == 0) {
841 proto_byte = ProtoWord & 0xff;
842 ProtoWord >>= 8;
843 proto_word_offset += 8;
844 }
845 proto_offset = offset_table[proto_byte] + proto_word_offset;
846 proto_byte = next_table[proto_byte];
847 Proto = &(ProtoSet->Protos[ProtoNum + proto_offset]);
848 ConfigWord = Proto->Configs[0];
849 int32_t A3 = (((Proto->A * (Feature->X - 128)) * 2)
850 - (Proto->B * (Feature->Y - 128)) + (Proto->C * 512));
851 int32_t M3 = ((static_cast<int8_t>(Feature->Theta - Proto->Angle)) *
852 kIntThetaFudge) * 2;
853
854 if (A3 < 0)
855 A3 = ~A3;
856 if (M3 < 0)
857 M3 = ~M3;
858 A3 >>= mult_trunc_shift_bits_;
859 M3 >>= mult_trunc_shift_bits_;
860 if (static_cast<uint32_t>(A3) > evidence_mult_mask_)
861 A3 = evidence_mult_mask_;
862 if (static_cast<uint32_t>(M3) > evidence_mult_mask_)
863 M3 = evidence_mult_mask_;
864
865 uint32_t A4 = (A3 * A3) + (M3 * M3);
866 A4 >>= table_trunc_shift_bits_;
867 if (A4 > evidence_table_mask_)
868 Evidence = 0;
869 else
870 Evidence = similarity_evidence_table_[A4];
871
872 if (PrintFeatureMatchesOn (Debug))
873 IMDebugConfiguration (FeatureNum,
874 ActualProtoNum + proto_offset,
875 Evidence, ConfigWord);
876
877 ConfigWord &= *ConfigMask;
878
879 uint8_t feature_evidence_index = 0;
880 uint8_t config_byte = 0;
881 while (ConfigWord != 0 || config_byte != 0) {
882 while (config_byte == 0) {
883 config_byte = ConfigWord & 0xff;
884 ConfigWord >>= 8;
885 feature_evidence_index += 8;
886 }
887 const uint8_t config_offset =
888 offset_table[config_byte] + feature_evidence_index - 8;
889 config_byte = next_table[config_byte];
890 if (Evidence > tables->feature_evidence_[config_offset])
891 tables->feature_evidence_[config_offset] = Evidence;
892 }
893
894 uint8_t* UINT8Pointer =
895 &(tables->proto_evidence_[ActualProtoNum + proto_offset][0]);
896 for (uint8_t ProtoIndex =
897 ClassTemplate->ProtoLengths[ActualProtoNum + proto_offset];
898 ProtoIndex > 0; ProtoIndex--, UINT8Pointer++) {
899 if (Evidence > *UINT8Pointer) {
900 uint8_t Temp = *UINT8Pointer;
901 *UINT8Pointer = Evidence;
902 Evidence = Temp;
903 }
904 else if (Evidence == 0)
905 break;
906 }
907 }
908 }
909 }
910 }
911
912 if (PrintFeatureMatchesOn(Debug)) {
913 IMDebugConfigurationSum(FeatureNum, tables->feature_evidence_,
914 ClassTemplate->NumConfigs);
915 }
916
917 int* IntPointer = tables->sum_feature_evidence_;
918 uint8_t* UINT8Pointer = tables->feature_evidence_;
919 int SumOverConfigs = 0;
920 for (int ConfigNum = ClassTemplate->NumConfigs; ConfigNum > 0; ConfigNum--) {
921 int evidence = *UINT8Pointer++;
922 SumOverConfigs += evidence;
923 *IntPointer++ += evidence;
924 }
925 return SumOverConfigs;
926}
927
931#ifndef GRAPHICS_DISABLED
932void IntegerMatcher::DebugFeatureProtoError(
933 INT_CLASS ClassTemplate,
934 BIT_VECTOR ProtoMask,
935 BIT_VECTOR ConfigMask,
936 const ScratchEvidence& tables,
937 int16_t NumFeatures,
938 int Debug) {
939 float ProtoConfigs[MAX_NUM_CONFIGS];
940 int ConfigNum;
941 uint32_t ConfigWord;
942 int ProtoSetIndex;
943 uint16_t ProtoNum;
944 uint8_t ProtoWordNum;
945 PROTO_SET ProtoSet;
946 uint16_t ActualProtoNum;
947
948 if (PrintMatchSummaryOn(Debug)) {
949 cprintf("Configuration Mask:\n");
950 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
951 cprintf("%1d", (((*ConfigMask) >> ConfigNum) & 1));
952 cprintf("\n");
953
954 cprintf("Feature Error for Configurations:\n");
955 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++) {
956 cprintf(
957 " %5.1f",
958 100.0 * (1.0 - static_cast<float>(tables.sum_feature_evidence_[ConfigNum])
959 / NumFeatures / 256.0));
960 }
961 cprintf("\n\n\n");
962 }
963
964 if (PrintMatchSummaryOn (Debug)) {
965 cprintf ("Proto Mask:\n");
966 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
967 ProtoSetIndex++) {
968 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
969 for (ProtoWordNum = 0; ProtoWordNum < 2;
970 ProtoWordNum++, ProtoMask++) {
971 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
972 for (ProtoNum = 0;
973 ((ProtoNum < (PROTOS_PER_PROTO_SET >> 1))
974 && (ActualProtoNum < ClassTemplate->NumProtos));
975 ProtoNum++, ActualProtoNum++)
976 cprintf ("%1d", (((*ProtoMask) >> ProtoNum) & 1));
977 cprintf ("\n");
978 }
979 }
980 cprintf ("\n");
981 }
982
983 for (int i = 0; i < ClassTemplate->NumConfigs; i++)
984 ProtoConfigs[i] = 0;
985
986 if (PrintProtoMatchesOn (Debug)) {
987 cprintf ("Proto Evidence:\n");
988 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
989 ProtoSetIndex++) {
990 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
991 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
992 for (ProtoNum = 0;
993 ((ProtoNum < PROTOS_PER_PROTO_SET) &&
994 (ActualProtoNum < ClassTemplate->NumProtos));
995 ProtoNum++, ActualProtoNum++) {
996 cprintf ("P %3d =", ActualProtoNum);
997 int temp = 0;
998 for (uint8_t j = 0; j < ClassTemplate->ProtoLengths[ActualProtoNum]; j++) {
999 uint8_t data = tables.proto_evidence_[ActualProtoNum][j];
1000 cprintf(" %d", data);
1001 temp += data;
1002 }
1003
1004 cprintf(" = %6.4f%%\n",
1005 temp / 256.0 / ClassTemplate->ProtoLengths[ActualProtoNum]);
1006
1007 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1008 ConfigNum = 0;
1009 while (ConfigWord) {
1010 cprintf ("%5d", ConfigWord & 1 ? temp : 0);
1011 if (ConfigWord & 1)
1012 ProtoConfigs[ConfigNum] += temp;
1013 ConfigNum++;
1014 ConfigWord >>= 1;
1015 }
1016 cprintf("\n");
1017 }
1018 }
1019 }
1020
1021 if (PrintMatchSummaryOn (Debug)) {
1022 cprintf ("Proto Error for Configurations:\n");
1023 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1024 cprintf (" %5.1f",
1025 100.0 * (1.0 -
1026 ProtoConfigs[ConfigNum] /
1027 ClassTemplate->ConfigLengths[ConfigNum] / 256.0));
1028 cprintf ("\n\n");
1029 }
1030
1031 if (PrintProtoMatchesOn (Debug)) {
1032 cprintf ("Proto Sum for Configurations:\n");
1033 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1034 cprintf (" %4.1f", ProtoConfigs[ConfigNum] / 256.0);
1035 cprintf ("\n\n");
1036
1037 cprintf ("Proto Length for Configurations:\n");
1038 for (ConfigNum = 0; ConfigNum < ClassTemplate->NumConfigs; ConfigNum++)
1039 cprintf (" %4.1f",
1040 static_cast<float>(ClassTemplate->ConfigLengths[ConfigNum]));
1041 cprintf ("\n\n");
1042 }
1043
1044}
1045
1046void IntegerMatcher::DisplayProtoDebugInfo(
1047 INT_CLASS ClassTemplate,
1048 BIT_VECTOR ConfigMask,
1049 const ScratchEvidence& tables,
1050 bool SeparateDebugWindows) {
1051 uint16_t ProtoNum;
1052 uint16_t ActualProtoNum;
1053 PROTO_SET ProtoSet;
1054 int ProtoSetIndex;
1055
1057 if (SeparateDebugWindows) {
1060 }
1061
1062 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1063 ProtoSetIndex++) {
1064 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1065 ActualProtoNum = ProtoSetIndex * PROTOS_PER_PROTO_SET;
1066 for (ProtoNum = 0;
1067 ((ProtoNum < PROTOS_PER_PROTO_SET) &&
1068 (ActualProtoNum < ClassTemplate->NumProtos));
1069 ProtoNum++, ActualProtoNum++) {
1070 /* Compute Average for Actual Proto */
1071 int temp = 0;
1072 for (uint8_t i = 0; i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1073 temp += tables.proto_evidence_[ActualProtoNum][i];
1074
1075 temp /= ClassTemplate->ProtoLengths[ActualProtoNum];
1076
1077 if ((ProtoSet->Protos[ProtoNum]).Configs[0] & (*ConfigMask)) {
1078 DisplayIntProto(ClassTemplate, ActualProtoNum, temp / 255.0);
1079 }
1080 }
1081 }
1082}
1083
1084
1085void IntegerMatcher::DisplayFeatureDebugInfo(
1086 INT_CLASS ClassTemplate,
1087 BIT_VECTOR ProtoMask,
1088 BIT_VECTOR ConfigMask,
1089 int16_t NumFeatures,
1090 const INT_FEATURE_STRUCT* Features,
1091 int AdaptFeatureThreshold,
1092 int Debug,
1093 bool SeparateDebugWindows) {
1094 auto *tables = new ScratchEvidence();
1095
1096 tables->Clear(ClassTemplate);
1097
1099 if (SeparateDebugWindows) {
1102 }
1103
1104 for (int Feature = 0; Feature < NumFeatures; Feature++) {
1105 UpdateTablesForFeature(
1106 ClassTemplate, ProtoMask, ConfigMask, Feature, &Features[Feature],
1107 tables, 0);
1108
1109 /* Find Best Evidence for Current Feature */
1110 int best = 0;
1111 assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
1112 for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++)
1113 if (tables->feature_evidence_[i] > best)
1114 best = tables->feature_evidence_[i];
1115
1116 /* Update display for current feature */
1117 if (ClipMatchEvidenceOn(Debug)) {
1118 if (best < AdaptFeatureThreshold)
1119 DisplayIntFeature(&Features[Feature], 0.0);
1120 else
1121 DisplayIntFeature(&Features[Feature], 1.0);
1122 } else {
1123 DisplayIntFeature(&Features[Feature], best / 255.0);
1124 }
1125 }
1126
1127 delete tables;
1128}
1129#endif
1130
1135 INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask) {
1136
1137 int *IntPointer;
1138 uint32_t ConfigWord;
1139 int ProtoSetIndex;
1140 uint16_t ProtoNum;
1141 PROTO_SET ProtoSet;
1142 int NumProtos;
1143 uint16_t ActualProtoNum;
1144
1145 NumProtos = ClassTemplate->NumProtos;
1146
1147 for (ProtoSetIndex = 0; ProtoSetIndex < ClassTemplate->NumProtoSets;
1148 ProtoSetIndex++) {
1149 ProtoSet = ClassTemplate->ProtoSets[ProtoSetIndex];
1150 ActualProtoNum = (ProtoSetIndex * PROTOS_PER_PROTO_SET);
1151 for (ProtoNum = 0;
1152 ((ProtoNum < PROTOS_PER_PROTO_SET) && (ActualProtoNum < NumProtos));
1153 ProtoNum++, ActualProtoNum++) {
1154 int temp = 0;
1155 assert(ClassTemplate->ProtoLengths[ActualProtoNum] < MAX_PROTO_INDEX);
1156 for (uint8_t i = 0; i < MAX_PROTO_INDEX &&
1157 i < ClassTemplate->ProtoLengths[ActualProtoNum]; i++)
1158 temp += proto_evidence_[ActualProtoNum] [i];
1159
1160 ConfigWord = ProtoSet->Protos[ProtoNum].Configs[0];
1161 ConfigWord &= *ConfigMask;
1162 IntPointer = sum_feature_evidence_;
1163 while (ConfigWord) {
1164 if (ConfigWord & 1)
1165 *IntPointer += temp;
1166 IntPointer++;
1167 ConfigWord >>= 1;
1168 }
1169 }
1170 }
1171}
1172
1178 INT_CLASS ClassTemplate, int16_t NumFeatures) {
1179
1180 assert(ClassTemplate->NumConfigs < MAX_NUM_CONFIGS);
1181 for (int i = 0; i < MAX_NUM_CONFIGS && i < ClassTemplate->NumConfigs; i++) {
1183 (NumFeatures + ClassTemplate->ConfigLengths[i]);
1184 }
1185}
1186
1192int IntegerMatcher::FindBestMatch(
1193 INT_CLASS class_template,
1194 const ScratchEvidence &tables,
1195 UnicharRating* result) {
1196 int best_match = 0;
1197 result->config = 0;
1198 result->fonts.truncate(0);
1199 result->fonts.reserve(class_template->NumConfigs);
1200
1201 /* Find best match */
1202 assert(class_template->NumConfigs < MAX_NUM_CONFIGS);
1203 for (int c = 0; c < MAX_NUM_CONFIGS && c < class_template->NumConfigs; ++c) {
1204 int rating = tables.sum_feature_evidence_[c];
1205 if (*classify_debug_level_ > 2)
1206 tprintf("Config %d, rating=%d\n", c, rating);
1207 if (rating > best_match) {
1208 result->config = c;
1209 best_match = rating;
1210 }
1211 result->fonts.push_back(ScoredFont(c, rating));
1212 }
1213
1214 // Compute confidence on a Probability scale.
1215 result->rating = best_match / 65536.0f;
1216
1217 return best_match;
1218}
1219
1224float IntegerMatcher::ApplyCNCorrection(float rating, int blob_length,
1225 int normalization_factor,
1226 int matcher_multiplier) {
1227 int divisor = blob_length + matcher_multiplier;
1228 return divisor == 0 ? 1.0f : (rating * blob_length +
1229 matcher_multiplier * normalization_factor / 256.0f) / divisor;
1230}
int RoundUp(int n, int block_size)
Definition: helpers.h:102
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define SE_TABLE_SIZE
Definition: intmatcher.h:56
#define SE_TABLE_BITS
Definition: intmatcher.h:55
void DisplayIntFeature(const INT_FEATURE_STRUCT *Feature, float Evidence)
Definition: intproto.cpp:590
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1722
void InitFeatureDisplayWindowIfReqd()
Definition: intproto.cpp:1754
void DisplayIntProto(INT_CLASS Class, PROTO_ID ProtoId, float Evidence)
Definition: intproto.cpp:608
void InitProtoDisplayWindowIfReqd()
Definition: intproto.cpp:1743
#define MatchDebuggingOn(D)
Definition: intproto.h:195
#define PrintFeatureMatchesOn(D)
Definition: intproto.h:199
#define MAX_PROTO_INDEX
Definition: intproto.h:44
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:152
#define PrintMatchSummaryOn(D)
Definition: intproto.h:196
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define BITS_PER_WERD
Definition: intproto.h:45
#define CLASS_PRUNER_CLASS_MASK
Definition: intproto.h:56
#define DisplayFeatureMatchesOn(D)
Definition: intproto.h:197
#define DisplayProtoMatchesOn(D)
Definition: intproto.h:198
#define NUM_CP_BUCKETS
Definition: intproto.h:53
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:62
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:49
#define ClipMatchEvidenceOn(D)
Definition: intproto.h:201
#define NUM_PP_BUCKETS
Definition: intproto.h:52
#define NUM_BITS_PER_CLASS
Definition: intproto.h:55
#define PrintProtoMatchesOn(D)
Definition: intproto.h:200
uint32_t * BIT_VECTOR
Definition: bitvec.h:28
void cprintf(const char *format,...)
Definition: callcpp.cpp:32
int16_t PROTO_ID
Definition: matchdefs.h:40
uint8_t FEATURE_ID
Definition: matchdefs.h:46
void init_to_size(int size, const T &t)
UNICHARSET unicharset
Definition: ccutil.h:73
Definition: strngs.h:45
const char * string() const
Definition: strngs.cpp:194
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:734
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
int classify_cp_cutoff_strength
Definition: classify.h:503
int classify_class_pruner_multiplier
Definition: classify.h:501
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:452
ShapeTable * shape_table_
Definition: classify.h:546
int classify_class_pruner_threshold
Definition: classify.h:499
bool disable_character_fragments
Definition: classify.h:486
void SummarizeResult(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const
Definition: intmatcher.cpp:377
void AdjustForExpectedNumFeatures(const uint16_t *expected_num_features, int cutoff_strength)
Definition: intmatcher.cpp:251
void NormalizeForXheight(int norm_multiplier, const uint8_t *normalization_factors)
Definition: intmatcher.cpp:286
void DisableFragments(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:272
void ComputeScores(const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
Definition: intmatcher.cpp:180
ClassPruner(int max_classes)
Definition: intmatcher.cpp:149
void PruneAndSort(int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
Definition: intmatcher.cpp:304
int SetupResults(GenericVector< CP_RESULT_STRUCT > *results) const
Definition: intmatcher.cpp:400
void DebugMatch(const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
Definition: intmatcher.cpp:340
void DisableDisabledClasses(const UNICHARSET &unicharset)
Definition: intmatcher.cpp:264
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:739
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:746
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:60
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask)
uint8_t feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:59
uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:61
void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures)
static const float kSEExponentialMultiplier
Definition: intmatcher.h:80
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:657
static const int kEvidenceTableBits
Definition: intmatcher.h:76
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:511
static const int kIntThetaFudge
Definition: intmatcher.h:74
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:78
static const float kSimilarityCenter
Definition: intmatcher.h:82
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:589
IntegerMatcher(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:709
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
uint32_t p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:78
uint8_t Angle
Definition: intproto.h:85
uint32_t Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
uint8_t NumProtoSets
Definition: intproto.h:107
uint16_t NumProtos
Definition: intproto.h:106
uint8_t * ProtoLengths
Definition: intproto.h:110
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:109
uint16_t ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:111
uint8_t NumConfigs
Definition: intproto.h:108
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:122
GenericVector< ScoredFont > fonts
Definition: shapetable.h:87