tesseract 4.1.1
Loading...
Searching...
No Matches
intfeaturemap.h
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: intfeaturemap.h
5// Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
6// to provide a subspace mapping and fast feature lookup.
7// Created: Tue Oct 26 08:58:30 PDT 2010
8//
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
20
21#ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_
22#define TESSERACT_CLASSIFY_INTFEATUREMAP_H_
23
24#include "intfeaturespace.h"
25#include "indexmapbidi.h"
26#include "intproto.h"
27
28namespace tesseract {
29
30class SampleIterator;
31
32// Number of positive and negative offset maps.
33static const int kNumOffsetMaps = 2;
34
35// Class to map a feature space defined by INT_FEATURE_STRUCT to a compact
36// down-sampled subspace of actually used features.
37// The IntFeatureMap copes with 2 stages of transformation:
38// The first step is down-sampling (re-quantization) and converting to a
39// single index value from the 3-D input:
40// INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and
41// the second is a feature-space compaction to map only the feature indices
42// that are actually used. This saves space in classifiers that are built
43// using the mapped feature space.
44// index (sparse) feature <-> map (compact) feature via IndexMapBiDi.
45// Although the transformations are reversible, the inverses are lossy and do
46// not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
47// of both transformations.
49 public:
52
53 // Accessors.
54 int sparse_size() const {
55 return feature_space_.Size();
56 }
57 int compact_size() const {
58 return compact_size_;
59 }
61 return feature_space_;
62 }
63 const IndexMapBiDi& feature_map() const {
64 return feature_map_;
65 }
66
67 // Pseudo-accessors.
68 int IndexFeature(const INT_FEATURE_STRUCT& f) const;
69 int MapFeature(const INT_FEATURE_STRUCT& f) const;
70 int MapIndexFeature(int index_feature) const;
71 INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const;
72 INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const;
73 void DeleteMapFeature(int map_feature);
74 bool IsMapFeatureDeleted(int map_feature) const;
75
76 // Copies the given feature_space and uses it as the index feature map
77 // from INT_FEATURE_STRUCT.
79
80 // Helper to return an offset index feature. In this context an offset
81 // feature with a dir of +/-1 is a feature of a similar direction,
82 // but shifted perpendicular to the direction of the feature. An offset
83 // feature with a dir of +/-2 is feature at the same position, but rotated
84 // by +/- one [compact] quantum. Returns the index of the generated offset
85 // feature, or -1 if it doesn't exist. Dir should be in
86 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
87 // A dir of 0 is an identity transformation.
88 // Both input and output are from the index(sparse) feature space, not
89 // the mapped/compact feature space, but the offset feature is the minimum
90 // distance moved from the input to guarantee that it maps to the next
91 // available quantum in the mapped/compact space.
92 int OffsetFeature(int index_feature, int dir) const;
93
94 // Computes the features used by the subset of samples defined by
95 // the iterator and sets up the feature mapping.
96 // Returns the size of the compacted feature space.
98
99 // After deleting some features, finish setting up the mapping, and map
100 // all the samples. Returns the size of the compacted feature space.
102
103 // Indexes the given array of features to a vector of sorted indices.
105 int num_features,
106 GenericVector<int>* sorted_features) const {
107 feature_space_.IndexAndSortFeatures(features, num_features,
108 sorted_features);
109 }
110 // Maps the given array of index/sparse features to an array of map/compact
111 // features.
112 // Assumes the input is sorted. The output indices are sorted and uniqued.
113 // Returns the number of "missed" features, being features that
114 // don't map to the compact feature space.
115 int MapIndexedFeatures(const GenericVector<int>& index_features,
116 GenericVector<int>* map_features) const {
117 return feature_map_.MapFeatures(index_features, map_features);
118 }
119
120 // Prints the map features from the set in human-readable form.
121 void DebugMapFeatures(const GenericVector<int>& map_features) const;
122
123 private:
124 void Clear();
125
126 // Helper to compute an offset index feature. In this context an offset
127 // feature with a dir of +/-1 is a feature of a similar direction,
128 // but shifted perpendicular to the direction of the feature. An offset
129 // feature with a dir of +/-2 is feature at the same position, but rotated
130 // by +/- one [compact] quantum. Returns the index of the generated offset
131 // feature, or -1 if it doesn't exist. Dir should be in
132 // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
133 // A dir of 0 is an identity transformation.
134 // Both input and output are from the index(sparse) feature space, not
135 // the mapped/compact feature space, but the offset feature is the minimum
136 // distance moved from the input to guarantee that it maps to the next
137 // available quantum in the mapped/compact space.
138 int ComputeOffsetFeature(int index_feature, int dir) const;
139
140 // True if the mapping has changed since it was last finalized.
141 bool mapping_changed_;
142 // Size of the compacted feature space, after unused features are removed.
143 int compact_size_;
144 // Feature space quantization definition and indexing from INT_FEATURE_STRUCT.
145 IntFeatureSpace feature_space_;
146 // Mapping from indexed feature space to the compacted space with unused
147 // features mapping to -1.
148 IndexMapBiDi feature_map_;
149 // Index tables to map a feature index to the corresponding feature after a
150 // shift perpendicular to the feature direction, or a rotation in place.
151 // An entry of -1 indicates that there is no corresponding feature.
152 // Array of arrays of size feature_space_.Size() owned by this class.
153 int* offset_plus_[kNumOffsetMaps];
154 int* offset_minus_[kNumOffsetMaps];
155
156 // Don't use default copy and assign!
158 void operator=(const IntFeatureMap&);
159};
160
161} // namespace tesseract.
162
163#endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
int MapFeature(const INT_FEATURE_STRUCT &f) const
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const
void DebugMapFeatures(const GenericVector< int > &map_features) const
bool IsMapFeatureDeleted(int map_feature) const
INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const
int MapIndexFeature(int index_feature) const
int OffsetFeature(int index_feature, int dir) const
void DeleteMapFeature(int map_feature)
int FindNZFeatureMapping(SampleIterator *it)
void Init(const IntFeatureSpace &feature_space)
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
const IndexMapBiDi & feature_map() const
Definition: intfeaturemap.h:63
int IndexFeature(const INT_FEATURE_STRUCT &f) const
int FinalizeMapping(SampleIterator *it)
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const