tesseract 4.1.1
Loading...
Searching...
No Matches
indexmapbidi.cpp
Go to the documentation of this file.
1
2// File: indexmapbidi.cpp
3// Description: Bi-directional mapping between a sparse and compact space.
4// Author: rays@google.com (Ray Smith)
5// Created: Tue Apr 06 11:33:59 PDT 2010
6//
7// (C) Copyright 2010, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "indexmapbidi.h"
21
22namespace tesseract {
23
24// Destructor.
25// It is defined here, so the compiler can create a single vtable
26// instead of weak vtables in every compilation unit.
27IndexMap::~IndexMap() = default;
28
29// SparseToCompact takes a sparse index to an index in the compact space.
30// Uses a binary search to find the result. For faster speed use
31// IndexMapBiDi, but that takes more memory.
32int IndexMap::SparseToCompact(int sparse_index) const {
33 int result = compact_map_.binary_search(sparse_index);
34 return compact_map_[result] == sparse_index ? result : -1;
35}
36
37// Copy from the input.
38void IndexMap::CopyFrom(const IndexMap& src) {
41}
45}
46
47// Writes to the given file. Returns false in case of error.
48bool IndexMap::Serialize(FILE* fp) const {
50}
51
52// Reads from the given file. Returns false in case of error.
53// If swap is true, assumes a big/little-endian swap is needed.
54bool IndexMap::DeSerialize(bool swap, FILE* fp) {
55 uint32_t sparse_size;
56 if (!tesseract::DeSerialize(fp, &sparse_size)) return false;
57 if (swap)
58 ReverseN(&sparse_size, sizeof(sparse_size));
59 // Arbitrarily limit the number of elements to protect against bad data.
60 if (sparse_size > UINT16_MAX) return false;
61 sparse_size_ = sparse_size;
62 return compact_map_.DeSerialize(swap, fp);
63}
64
65// Destructor.
66// It is defined here, so the compiler can create a single vtable
67// instead of weak vtables in every compilation unit.
69
70// Top-level init function in a single call to initialize a map to select
71// a single contiguous subrange [start, end) of the sparse space to be mapped
72// 1 to 1 to the compact space, with all other elements of the sparse space
73// left unmapped.
74// No need to call Setup after this.
75void IndexMapBiDi::InitAndSetupRange(int sparse_size, int start, int end) {
76 Init(sparse_size, false);
77 for (int i = start; i < end; ++i)
78 SetMap(i, true);
79 Setup();
80}
81
82// Initializes just the sparse_map_ to the given size with either all
83// forward indices mapped (all_mapped = true) or none (all_mapped = false).
84// Call Setup immediately after, or make calls to SetMap first to adjust the
85// mapping and then call Setup before using the map.
86void IndexMapBiDi::Init(int size, bool all_mapped) {
87 sparse_map_.init_to_size(size, -1);
88 if (all_mapped) {
89 for (int i = 0; i < size; ++i)
90 sparse_map_[i] = i;
91 }
92}
93
94// Sets a given index in the sparse_map_ to be mapped or not.
95void IndexMapBiDi::SetMap(int sparse_index, bool mapped) {
96 sparse_map_[sparse_index] = mapped ? 0 : -1;
97}
98
99// Sets up the sparse_map_ and compact_map_ properly after Init and
100// some calls to SetMap. Assumes an ordered 1-1 map from set indices
101// in the forward map to the compact space.
103 int compact_size = 0;
104 for (int i = 0; i < sparse_map_.size(); ++i) {
105 if (sparse_map_[i] >= 0) {
106 sparse_map_[i] = compact_size++;
107 }
108 }
109 compact_map_.init_to_size(compact_size, -1);
110 for (int i = 0; i < sparse_map_.size(); ++i) {
111 if (sparse_map_[i] >= 0) {
112 compact_map_[sparse_map_[i]] = i;
113 }
114 }
115 sparse_size_ = sparse_map_.size();
116}
117
118// Copy from the input.
120 sparse_map_ = src.sparse_map_;
122 sparse_size_ = sparse_map_.size();
123}
124
125// Merges the two compact space indices. May be called many times, but
126// the merges must be concluded by a call to CompleteMerges.
127// Returns true if a merge was actually performed.
128bool IndexMapBiDi::Merge(int compact_index1, int compact_index2) {
129 // Find the current master index for index1 and index2.
130 compact_index1 = MasterCompactIndex(compact_index1);
131 compact_index2 = MasterCompactIndex(compact_index2);
132 // Be sure that index1 < index2.
133 if (compact_index1 > compact_index2) {
134 int tmp = compact_index1;
135 compact_index1 = compact_index2;
136 compact_index2 = tmp;
137 } else if (compact_index1 == compact_index2) {
138 return false;
139 }
140 // To save iterating over all sparse_map_ entries, simply make the master
141 // entry for index2 point to index1.
142 // This leaves behind a potential chain of parents that needs to be chased,
143 // as above.
144 sparse_map_[compact_map_[compact_index2]] = compact_index1;
145 if (compact_index1 >= 0)
146 compact_map_[compact_index2] = compact_map_[compact_index1];
147 return true;
148}
149
150// Completes one or more Merge operations by further compacting the
151// compact space. Unused compact space indices are removed, and the used
152// ones above shuffled down to fill the gaps.
153// Example:
154// Input sparse_map_: (x indicates -1)
155// x x 0 x 2 x x 4 x 0 x 2 x
156// Output sparse_map_:
157// x x 0 x 1 x x 2 x 0 x 1 x
158// Output compact_map_:
159// 2 4 7.
161 // Ensure each sparse_map_entry contains a master compact_map_ index.
162 int compact_size = 0;
163 for (int i = 0; i < sparse_map_.size(); ++i) {
164 int compact_index = MasterCompactIndex(sparse_map_[i]);
165 sparse_map_[i] = compact_index;
166 if (compact_index >= compact_size)
167 compact_size = compact_index + 1;
168 }
169 // Re-generate the compact_map leaving holes for unused indices.
170 compact_map_.init_to_size(compact_size, -1);
171 for (int i = 0; i < sparse_map_.size(); ++i) {
172 if (sparse_map_[i] >= 0) {
173 if (compact_map_[sparse_map_[i]] == -1)
174 compact_map_[sparse_map_[i]] = i;
175 }
176 }
177 // Compact the compact_map, leaving tmp_compact_map saying where each
178 // index went to in the compacted map.
179 GenericVector<int32_t> tmp_compact_map;
180 tmp_compact_map.init_to_size(compact_size, -1);
181 compact_size = 0;
182 for (int i = 0; i < compact_map_.size(); ++i) {
183 if (compact_map_[i] >= 0) {
184 tmp_compact_map[i] = compact_size;
185 compact_map_[compact_size++] = compact_map_[i];
186 }
187 }
188 compact_map_.truncate(compact_size);
189 // Now modify the entries in the sparse map to point to the new locations.
190 for (int i = 0; i < sparse_map_.size(); ++i) {
191 if (sparse_map_[i] >= 0) {
192 sparse_map_[i] = tmp_compact_map[sparse_map_[i]];
193 }
194 }
195}
196
197// Writes to the given file. Returns false in case of error.
198bool IndexMapBiDi::Serialize(FILE* fp) const {
199 if (!IndexMap::Serialize(fp)) return false;
200 // Make a vector containing the rest of the map. If the map is many-to-one
201 // then each additional sparse entry needs to be stored.
202 // Normally we store only the compact map to save space.
203 GenericVector<int32_t> remaining_pairs;
204 for (int i = 0; i < sparse_map_.size(); ++i) {
205 if (sparse_map_[i] >= 0 && compact_map_[sparse_map_[i]] != i) {
206 remaining_pairs.push_back(i);
207 remaining_pairs.push_back(sparse_map_[i]);
208 }
209 }
210 if (!remaining_pairs.Serialize(fp)) return false;
211 return true;
212}
213
214// Reads from the given file. Returns false in case of error.
215// If swap is true, assumes a big/little-endian swap is needed.
216bool IndexMapBiDi::DeSerialize(bool swap, FILE* fp) {
217 if (!IndexMap::DeSerialize(swap, fp)) return false;
218 GenericVector<int32_t> remaining_pairs;
219 if (!remaining_pairs.DeSerialize(swap, fp)) return false;
220 sparse_map_.init_to_size(sparse_size_, -1);
221 for (int i = 0; i < compact_map_.size(); ++i) {
222 sparse_map_[compact_map_[i]] = i;
223 }
224 for (int i = 0; i < remaining_pairs.size(); ++i) {
225 int sparse_index = remaining_pairs[i++];
226 sparse_map_[sparse_index] = remaining_pairs[i];
227 }
228 return true;
229}
230
231// Bulk calls to SparseToCompact.
232// Maps the given array of sparse indices to an array of compact indices.
233// Assumes the input is sorted. The output indices are sorted and uniqued.
234// Return value is the number of "missed" features, being features that
235// don't map to the compact feature space.
237 GenericVector<int>* compact) const {
238 compact->truncate(0);
239 int num_features = sparse.size();
240 int missed_features = 0;
241 int prev_good_feature = -1;
242 for (int f = 0; f < num_features; ++f) {
243 int feature = sparse_map_[sparse[f]];
244 if (feature >= 0) {
245 if (feature != prev_good_feature) {
246 compact->push_back(feature);
247 prev_good_feature = feature;
248 }
249 } else {
250 ++missed_features;
251 }
252 }
253 return missed_features;
254}
255
256} // namespace tesseract.
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:185
bool DeSerialize(FILE *fp, char *data, size_t n)
Definition: serialis.cpp:28
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:60
void init_to_size(int size, const T &t)
int push_back(T object)
bool Serialize(FILE *fp) const
int size() const
Definition: genericvector.h:72
void truncate(int size)
bool DeSerialize(bool swap, FILE *fp)
int binary_search(const T &target) const
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
void CopyFrom(const IndexMap &src)
virtual int SparseToCompact(int sparse_index) const
GenericVector< int32_t > compact_map_
Definition: indexmapbidi.h:80
void Init(int size, bool all_mapped)
bool Merge(int compact_index1, int compact_index2)
void CopyFrom(const IndexMapBiDi &src)
void SetMap(int sparse_index, bool mapped)
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
int SparseSize() const override
Definition: indexmapbidi.h:142
void InitAndSetupRange(int sparse_size, int start, int end)
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)