tesseract 4.1.1
Loading...
Searching...
No Matches
intfeaturedist.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: intfeaturedist.cpp
5// Description: Fast set-difference-based feature distance calculator.
6// Created: Thu Sep 01 13:07:30 PDT 2011
7//
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "intfeaturedist.h"
21#include "intfeaturemap.h"
22
23namespace tesseract {
24
26 : size_(0), total_feature_weight_(0.0),
27 feature_map_(nullptr), features_(nullptr),
28 features_delta_one_(nullptr), features_delta_two_(nullptr) {
29}
30
32 Clear();
33}
34
35// Initialize the table to the given size of feature space.
36void IntFeatureDist::Init(const IntFeatureMap* feature_map) {
37 size_ = feature_map->sparse_size();
38 Clear();
39 feature_map_ = feature_map;
40 features_ = new bool[size_];
41 features_delta_one_ = new bool[size_];
42 features_delta_two_ = new bool[size_];
43 memset(features_, false, size_ * sizeof(features_[0]));
44 memset(features_delta_one_, false, size_ * sizeof(features_delta_one_[0]));
45 memset(features_delta_two_, false, size_ * sizeof(features_delta_two_[0]));
46 total_feature_weight_ = 0.0;
47}
48
49// Setup the map for the given indexed_features that have been indexed by
50// feature_map.
51void IntFeatureDist::Set(const GenericVector<int>& indexed_features,
52 int canonical_count, bool value) {
53 total_feature_weight_ = canonical_count;
54 for (int i = 0; i < indexed_features.size(); ++i) {
55 const int f = indexed_features[i];
56 features_[f] = value;
57 for (int dir = -kNumOffsetMaps; dir <= kNumOffsetMaps; ++dir) {
58 if (dir == 0) continue;
59 const int mapped_f = feature_map_->OffsetFeature(f, dir);
60 if (mapped_f >= 0) {
61 features_delta_one_[mapped_f] = value;
62 for (int dir2 = -kNumOffsetMaps; dir2 <= kNumOffsetMaps; ++dir2) {
63 if (dir2 == 0) continue;
64 const int mapped_f2 = feature_map_->OffsetFeature(mapped_f, dir2);
65 if (mapped_f2 >= 0)
66 features_delta_two_[mapped_f2] = value;
67 }
68 }
69 }
70 }
71}
72
73// Compute the distance between the given feature vector and the last
74// Set feature vector.
76 const GenericVector<int>& features) const {
77 const int num_test_features = features.size();
78 const double denominator = total_feature_weight_ + num_test_features;
79 double misses = denominator;
80 for (int i = 0; i < num_test_features; ++i) {
81 const int index = features[i];
82 const double weight = 1.0;
83 if (features_[index]) {
84 // A perfect match.
85 misses -= 2.0 * weight;
86 } else if (features_delta_one_[index]) {
87 misses -= 1.5 * weight;
88 } else if (features_delta_two_[index]) {
89 // A near miss.
90 misses -= 1.0 * weight;
91 }
92 }
93 return misses / denominator;
94}
95
96// Compute the distance between the given feature vector and the last
97// Set feature vector.
99 const GenericVector<int>& features) const {
100 const int num_test_features = features.size();
101 const double denominator = total_feature_weight_ + num_test_features;
102 double misses = denominator;
103 for (int i = 0; i < num_test_features; ++i) {
104 const int index = features[i];
105 const double weight = 1.0;
106 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(features[i]);
107 tprintf("Testing feature weight %g:", weight);
108 f.print();
109 if (features_[index]) {
110 // A perfect match.
111 misses -= 2.0 * weight;
112 tprintf("Perfect hit\n");
113 } else if (features_delta_one_[index]) {
114 misses -= 1.5 * weight;
115 tprintf("-1 hit\n");
116 } else if (features_delta_two_[index]) {
117 // A near miss.
118 misses -= 1.0 * weight;
119 tprintf("-2 hit\n");
120 } else {
121 tprintf("Total miss\n");
122 }
123 }
124 tprintf("Features present:");
125 for (int i = 0; i < size_; ++i) {
126 if (features_[i]) {
127 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
128 f.print();
129 }
130 }
131 tprintf("\nMinus one features:");
132 for (int i = 0; i < size_; ++i) {
133 if (features_delta_one_[i]) {
134 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
135 f.print();
136 }
137 }
138 tprintf("\nMinus two features:");
139 for (int i = 0; i < size_; ++i) {
140 if (features_delta_two_[i]) {
141 INT_FEATURE_STRUCT f = feature_map_->InverseMapFeature(i);
142 f.print();
143 }
144 }
145 tprintf("\n");
146 return misses / denominator;
147}
148
149// Clear all data.
150void IntFeatureDist::Clear() {
151 delete [] features_;
152 features_ = nullptr;
153 delete [] features_delta_one_;
154 features_delta_one_ = nullptr;
155 delete [] features_delta_two_;
156 features_delta_two_ = nullptr;
157}
158
159} // namespace tesseract
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int size() const
Definition: genericvector.h:72
double FeatureDistance(const GenericVector< int > &features) const
double DebugFeatureDistance(const GenericVector< int > &features) const
void Init(const IntFeatureMap *feature_map)
void Set(const GenericVector< int > &indexed_features, int canonical_count, bool value)
INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const
int OffsetFeature(int index_feature, int dir) const
void print() const
Definition: intproto.h:145