tesseract 4.1.1
Loading...
Searching...
No Matches
trainingsample.h
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
15
16#ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17#define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18
19#include "elst.h"
20#include "featdefs.h"
21#include "intfx.h"
22#include "intmatcher.h"
23#include "matrix.h"
24#include "mf.h"
25#include "picofeat.h"
26#include "shapetable.h"
27#include "unicharset.h"
28
29struct Pix;
30
31namespace tesseract {
32
33class IntFeatureMap;
34class IntFeatureSpace;
35class ShapeTable;
36
37// Number of elements of cn_feature_.
38static const int kNumCNParams = 4;
39// Number of ways to shift the features when randomizing.
40static const int kSampleYShiftSize = 5;
41// Number of ways to scale the features when randomizing.
42static const int kSampleScaleSize = 3;
43// Total number of different ways to manipulate the features when randomizing.
44// The first and last combinations are removed to avoid an excessive
45// top movement (first) and an identity transformation (last).
46// WARNING: To avoid patterned duplication of samples, be sure to keep
47// kSampleRandomSize prime!
48// Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
49// kSampleRandomSize is 13, which is prime.
50static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
51// ASSERT_IS_PRIME(kSampleRandomSize) !!
52
53class TrainingSample : public ELIST_LINK {
54 public:
56 : class_id_(INVALID_UNICHAR_ID), font_id_(0), page_num_(0),
57 num_features_(0), num_micro_features_(0), outline_length_(0),
58 features_(nullptr), micro_features_(nullptr), weight_(1.0),
59 max_dist_(0.0), sample_index_(0),
60 features_are_indexed_(false), features_are_mapped_(false),
61 is_error_(false) {
62 }
64
65 // Saves the given features into a TrainingSample. The features are copied,
66 // so may be deleted afterwards. Delete the return value after use.
68 const TBOX& bounding_box,
70 int num_features);
71 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
73 // Constructs and returns a copy "randomized" by the method given by
74 // the randomizer index. If index is out of [0, kSampleRandomSize) then
75 // an exact copy is returned.
76 TrainingSample* RandomizedCopy(int index) const;
77 // Constructs and returns an exact copy.
78 TrainingSample* Copy() const;
79
80 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
81 // members, which is mostly the mapped features, and the weight.
82 // It is assumed these can all be reconstructed from what is saved.
83 // Writes to the given file. Returns false in case of error.
84 bool Serialize(FILE* fp) const;
85 // Creates from the given file. Returns nullptr in case of error.
86 // If swap is true, assumes a big/little-endian swap is needed.
87 static TrainingSample* DeSerializeCreate(bool swap, FILE* fp);
88 // Reads from the given file. Returns false in case of error.
89 // If swap is true, assumes a big/little-endian swap is needed.
90 bool DeSerialize(bool swap, FILE* fp);
91
92 // Extracts the needed information from the CHAR_DESC_STRUCT.
93 void ExtractCharDesc(int feature_type, int micro_type,
94 int cn_type, int geo_type,
95 CHAR_DESC_STRUCT* char_desc);
96
97 // Sets the mapped_features_ from the features_ using the provided
98 // feature_space to the indexed versions of the features.
99 void IndexFeatures(const IntFeatureSpace& feature_space);
100 // Sets the mapped_features_ from the features_ using the provided
101 // feature_map.
102 void MapFeatures(const IntFeatureMap& feature_map);
103
104 // Returns a pix representing the sample. (Int features only.)
105 Pix* RenderToPix(const UNICHARSET* unicharset) const;
106 // Displays the features in the given window with the given color.
107 void DisplayFeatures(ScrollView::Color color, ScrollView* window) const;
108
109 // Returns a pix of the original sample image. The pix is padded all round
110 // by padding wherever possible.
111 // The returned Pix must be pixDestroyed after use.
112 // If the input page_pix is nullptr, nullptr is returned.
113 Pix* GetSamplePix(int padding, Pix* page_pix) const;
114
115 // Accessors.
117 return class_id_;
118 }
119 void set_class_id(int id) {
120 class_id_ = id;
121 }
122 int font_id() const {
123 return font_id_;
124 }
125 void set_font_id(int id) {
126 font_id_ = id;
127 }
128 int page_num() const {
129 return page_num_;
130 }
131 void set_page_num(int page) {
132 page_num_ = page;
133 }
134 const TBOX& bounding_box() const {
135 return bounding_box_;
136 }
137 void set_bounding_box(const TBOX& box) {
138 bounding_box_ = box;
139 }
140 uint32_t num_features() const {
141 return num_features_;
142 }
144 return features_;
145 }
146 uint32_t num_micro_features() const {
147 return num_micro_features_;
148 }
150 return micro_features_;
151 }
152 int outline_length() const {
153 return outline_length_;
154 }
155 float cn_feature(int index) const {
156 return cn_feature_[index];
157 }
158 int geo_feature(int index) const {
159 return geo_feature_[index];
160 }
161 double weight() const {
162 return weight_;
163 }
164 void set_weight(double value) {
165 weight_ = value;
166 }
167 double max_dist() const {
168 return max_dist_;
169 }
170 void set_max_dist(double value) {
171 max_dist_ = value;
172 }
173 int sample_index() const {
174 return sample_index_;
175 }
176 void set_sample_index(int value) {
177 sample_index_ = value;
178 }
179 bool features_are_mapped() const {
180 return features_are_mapped_;
181 }
183 ASSERT_HOST(features_are_mapped_);
184 return mapped_features_;
185 }
187 ASSERT_HOST(features_are_indexed_);
188 return mapped_features_;
189 }
190 bool is_error() const {
191 return is_error_;
192 }
193 void set_is_error(bool value) {
194 is_error_ = value;
195 }
196
197 private:
198 // Unichar id that this sample represents. There obviously must be a
199 // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
200 UNICHAR_ID class_id_;
201 // Font id in which this sample was printed. Refers to a fontinfo_table_ in
202 // MasterTrainer.
203 int font_id_;
204 // Number of page that the sample came from.
205 int page_num_;
206 // Bounding box of sample in original image.
207 TBOX bounding_box_;
208 // Number of INT_FEATURE_STRUCT in features_ array.
209 uint32_t num_features_;
210 // Number of MicroFeature in micro_features_ array.
211 uint32_t num_micro_features_;
212 // Total length of outline in the baseline normalized coordinate space.
213 // See comment in WERD_RES class definition for a discussion of coordinate
214 // spaces.
215 int outline_length_;
216 // Array of features.
217 INT_FEATURE_STRUCT* features_;
218 // Array of features.
219 MicroFeature* micro_features_;
220 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
221 float cn_feature_[kNumCNParams];
222 // The one and only geometric feature. (Aims at replacing cn_feature_).
223 // Indexed by GeoParams enum in picofeat.h
224 int geo_feature_[GeoCount];
225
226 // Non-serialized cache data.
227 // Weight used for boosting training.
228 double weight_;
229 // Maximum distance to other samples of same class/font used in computing
230 // the canonical sample.
231 double max_dist_;
232 // Global index of this sample.
233 int sample_index_;
234 // Indexed/mapped features, as indicated by the bools below.
235 GenericVector<int> mapped_features_;
236 bool features_are_indexed_;
237 bool features_are_mapped_;
238 // True if the last classification was an error by the current definition.
239 bool is_error_;
240
241 // Randomizing factors.
242 static const int kYShiftValues[kSampleYShiftSize];
243 static const double kScaleValues[kSampleScaleSize];
244};
245
246ELISTIZEH(TrainingSample)
247
248} // namespace tesseract
249
250#endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_
#define ELISTIZEH(CLASSNAME)
Definition: elst.h:918
#define ASSERT_HOST(x)
Definition: errcode.h:88
int UNICHAR_ID
Definition: unichar.h:34
float MicroFeature[MFCount]
Definition: mf.h:33
@ GeoCount
Definition: picofeat.h:41
Definition: rect.h:34
bool DeSerialize(bool swap, FILE *fp)
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const INT_FEATURE_STRUCT * features() const
const TBOX & bounding_box() const
void set_max_dist(double value)
UNICHAR_ID class_id() const
TrainingSample * RandomizedCopy(int index) const
void MapFeatures(const IntFeatureMap &feature_map)
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
uint32_t num_features() const
FEATURE_STRUCT * GetCNFeature() const
const GenericVector< int > & indexed_features() const
void IndexFeatures(const IntFeatureSpace &feature_space)
const GenericVector< int > & mapped_features() const
void set_weight(double value)
uint32_t num_micro_features() const
TrainingSample * Copy() const
int geo_feature(int index) const
void set_is_error(bool value)
Pix * RenderToPix(const UNICHARSET *unicharset) const
float cn_feature(int index) const
const MicroFeature * micro_features() const
bool features_are_mapped() const
void set_bounding_box(const TBOX &box)
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
bool Serialize(FILE *fp) const
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
Pix * GetSamplePix(int padding, Pix *page_pix) const
void set_sample_index(int value)