tesseract 4.1.1
Loading...
Searching...
No Matches
indexmapbidi.h
Go to the documentation of this file.
1
2// File: indexmapbidi.h
3// Description: Bi-directional mapping between a sparse and compact space.
4// Author: rays@google.com (Ray Smith)
5// Created: Tue Apr 06 11:33:59 PDT 2010
6//
7// (C) Copyright 2010, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
21#define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
22
23#include <cstdio>
24#include "genericvector.h"
25
26namespace tesseract {
27
28class IndexMapBiDi;
29
30// Bidirectional one-to-one mapping between a sparse and a compact discrete
31// space. Many entries in the sparse space are unmapped, but those that are
32// mapped have a 1-1 mapping to (and from) the compact space, where all
33// values are used. This is useful for forming subsets of larger collections,
34// such as subsets of character sets, or subsets of binary feature spaces.
35//
36// This base class provides basic functionality with binary search for the
37// SparseToCompact mapping to save memory.
38// For a faster inverse mapping, or to allow a many-to-one mapping, use
39// IndexMapBiDi below.
40// NOTE: there are currently no methods to setup an IndexMap on its own!
41// It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
42class IndexMap {
43 public:
44 virtual ~IndexMap();
45
46 // SparseToCompact takes a sparse index to an index in the compact space.
47 // Uses a binary search to find the result. For faster speed use
48 // IndexMapBiDi, but that takes more memory.
49 virtual int SparseToCompact(int sparse_index) const;
50
51 // CompactToSparse takes a compact index to the corresponding index in the
52 // sparse space.
53 int CompactToSparse(int compact_index) const {
54 return compact_map_[compact_index];
55 }
56 // The size of the sparse space.
57 virtual int SparseSize() const {
58 return sparse_size_;
59 }
60 // The size of the compact space.
61 int CompactSize() const {
62 return compact_map_.size();
63 }
64
65 // Copy from the input.
66 void CopyFrom(const IndexMap& src);
67 void CopyFrom(const IndexMapBiDi& src);
68
69 // Writes to the given file. Returns false in case of error.
70 bool Serialize(FILE* fp) const;
71 // Reads from the given file. Returns false in case of error.
72 // If swap is true, assumes a big/little-endian swap is needed.
73 bool DeSerialize(bool swap, FILE* fp);
74
75 protected:
76 // The sparse space covers integers in the range [0, sparse_size_-1].
77 int32_t sparse_size_;
78 // The compact space covers integers in the range [0, compact_map_.size()-1].
79 // Each element contains the corresponding sparse index.
81};
82
83// Bidirectional many-to-one mapping between a sparse and a compact discrete
84// space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
85// of those that are, many may be mapped to the same compact index.
86// If the map is many-to-one, it is not possible to directly obtain all the
87// sparse indices that map to a single compact index.
88// This map is time- rather than space-efficient. It stores the entire sparse
89// space.
90// IndexMapBiDi may be initialized in one of 3 ways:
91// 1. Init(size, true);
92// Setup();
93// Sets a complete 1:1 mapping with no unmapped elements.
94// 2. Init(size, false);
95// for ... SetMap(index, true);
96// Setup();
97// Specifies precisely which sparse indices are mapped. The mapping is 1:1.
98// 3. Either of the above, followed by:
99// for ... Merge(index1, index2);
100// CompleteMerges();
101// Allows a many-to-one mapping by merging compact space indices.
102class IndexMapBiDi : public IndexMap {
103 public:
104 ~IndexMapBiDi() override;
105
106 // Top-level init function in a single call to initialize a map to select
107 // a single contiguous subrange [start, end) of the sparse space to be mapped
108 // 1 to 1 to the compact space, with all other elements of the sparse space
109 // left unmapped.
110 // No need to call Setup after this.
111 void InitAndSetupRange(int sparse_size, int start, int end);
112
113 // Initializes just the sparse_map_ to the given size with either all
114 // forward indices mapped (all_mapped = true) or none (all_mapped = false).
115 // Call Setup immediately after, or make calls to SetMap first to adjust the
116 // mapping and then call Setup before using the map.
117 void Init(int size, bool all_mapped);
118 // Sets a given index in the sparse_map_ to be mapped or not.
119 void SetMap(int sparse_index, bool mapped);
120 // Sets up the sparse_map_ and compact_map_ properly after Init and
121 // some calls to SetMap. Assumes an ordered 1-1 map from set indices
122 // in the sparse space to the compact space.
123 void Setup();
124
125 // Merges the two compact space indices. May be called many times, but
126 // the merges must be concluded by a call to CompleteMerges.
127 // Returns true if a merge was actually performed.
128 bool Merge(int compact_index1, int compact_index2);
129 // Returns true if the given compact index has been deleted.
130 bool IsCompactDeleted(int index) const {
131 return MasterCompactIndex(index) < 0;
132 }
133 // Completes one or more Merge operations by further compacting the
134 // compact space.
135 void CompleteMerges();
136
137 // SparseToCompact takes a sparse index to an index in the compact space.
138 int SparseToCompact(int sparse_index) const override {
139 return sparse_map_[sparse_index];
140 }
141 // The size of the sparse space.
142 int SparseSize() const override {
143 return sparse_map_.size();
144 }
145
146 // Copy from the input.
147 void CopyFrom(const IndexMapBiDi& src);
148
149 // Writes to the given file. Returns false in case of error.
150 bool Serialize(FILE* fp) const;
151 // Reads from the given file. Returns false in case of error.
152 // If swap is true, assumes a big/little-endian swap is needed.
153 bool DeSerialize(bool swap, FILE* fp);
154
155 // Bulk calls to SparseToCompact.
156 // Maps the given array of sparse indices to an array of compact indices.
157 // Assumes the input is sorted. The output indices are sorted and uniqued.
158 // Return value is the number of "missed" features, being features that
159 // don't map to the compact feature space.
160 int MapFeatures(const GenericVector<int>& sparse,
161 GenericVector<int>* compact) const;
162
163 private:
164 // Returns the master compact index for a given compact index.
165 // During a multiple merge operation, several compact indices may be
166 // combined, so we need to be able to find the master of all.
167 int MasterCompactIndex(int compact_index) const {
168 while (compact_index >= 0 &&
169 sparse_map_[compact_map_[compact_index]] != compact_index)
170 compact_index = sparse_map_[compact_map_[compact_index]];
171 return compact_index;
172 }
173
174 // Direct look-up of the compact index for each element in sparse space.
175 GenericVector<int32_t> sparse_map_;
176};
177
178} // namespace tesseract.
179
180#endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_
int size() const
Definition: genericvector.h:72
bool Serialize(FILE *fp) const
virtual int SparseSize() const
Definition: indexmapbidi.h:57
int CompactSize() const
Definition: indexmapbidi.h:61
bool DeSerialize(bool swap, FILE *fp)
void CopyFrom(const IndexMap &src)
virtual int SparseToCompact(int sparse_index) const
GenericVector< int32_t > compact_map_
Definition: indexmapbidi.h:80
int CompactToSparse(int compact_index) const
Definition: indexmapbidi.h:53
void Init(int size, bool all_mapped)
bool Merge(int compact_index1, int compact_index2)
void CopyFrom(const IndexMapBiDi &src)
void SetMap(int sparse_index, bool mapped)
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
int SparseSize() const override
Definition: indexmapbidi.h:142
void InitAndSetupRange(int sparse_size, int start, int end)
bool Serialize(FILE *fp) const
bool DeSerialize(bool swap, FILE *fp)
int SparseToCompact(int sparse_index) const override
Definition: indexmapbidi.h:138
bool IsCompactDeleted(int index) const
Definition: indexmapbidi.h:130