tesseract 4.1.1
Loading...
Searching...
No Matches
trainingsample.cpp
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
15
16#define _USE_MATH_DEFINES // for M_PI
17// Include automatically generated configuration file if running autoconf.
18#ifdef HAVE_CONFIG_H
19#include "config_auto.h"
20#endif
21
22#include "trainingsample.h"
23
24#include <cmath> // for M_PI
25#include "allheaders.h"
26#include "helpers.h"
27#include "intfeaturemap.h"
28#include "normfeat.h"
29#include "shapetable.h"
30
31namespace tesseract {
32
33ELISTIZE(TrainingSample)
34
35// Center of randomizing operations.
36const int kRandomizingCenter = 128;
37
38// Randomizing factors.
39const int TrainingSample::kYShiftValues[kSampleYShiftSize] = {
40 6, 3, -3, -6, 0
41};
42const double TrainingSample::kScaleValues[kSampleScaleSize] = {
43 1.0625, 0.9375, 1.0
44};
45
47 delete [] features_;
48 delete [] micro_features_;
49}
50
51// WARNING! Serialize/DeSerialize do not save/restore the "cache" data
52// members, which is mostly the mapped features, and the weight.
53// It is assumed these can all be reconstructed from what is saved.
54// Writes to the given file. Returns false in case of error.
55bool TrainingSample::Serialize(FILE* fp) const {
56 if (fwrite(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
57 if (fwrite(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
58 if (fwrite(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
59 if (!bounding_box_.Serialize(fp)) return false;
60 if (fwrite(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
61 if (fwrite(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
62 return false;
63 if (fwrite(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
64 return false;
65 if (fwrite(features_, sizeof(*features_), num_features_, fp) != num_features_)
66 return false;
67 if (fwrite(micro_features_, sizeof(*micro_features_), num_micro_features_,
68 fp) != num_micro_features_)
69 return false;
70 if (fwrite(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
71 kNumCNParams) return false;
72 if (fwrite(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
73 return false;
74 return true;
75}
76
77// Creates from the given file. Returns nullptr in case of error.
78// If swap is true, assumes a big/little-endian swap is needed.
80 auto* sample = new TrainingSample;
81 if (sample->DeSerialize(swap, fp)) return sample;
82 delete sample;
83 return nullptr;
84}
85
86// Reads from the given file. Returns false in case of error.
87// If swap is true, assumes a big/little-endian swap is needed.
88bool TrainingSample::DeSerialize(bool swap, FILE* fp) {
89 if (fread(&class_id_, sizeof(class_id_), 1, fp) != 1) return false;
90 if (fread(&font_id_, sizeof(font_id_), 1, fp) != 1) return false;
91 if (fread(&page_num_, sizeof(page_num_), 1, fp) != 1) return false;
92 if (!bounding_box_.DeSerialize(swap, fp)) return false;
93 if (fread(&num_features_, sizeof(num_features_), 1, fp) != 1) return false;
94 if (fread(&num_micro_features_, sizeof(num_micro_features_), 1, fp) != 1)
95 return false;
96 if (fread(&outline_length_, sizeof(outline_length_), 1, fp) != 1)
97 return false;
98 if (swap) {
99 ReverseN(&class_id_, sizeof(class_id_));
100 ReverseN(&num_features_, sizeof(num_features_));
101 ReverseN(&num_micro_features_, sizeof(num_micro_features_));
102 ReverseN(&outline_length_, sizeof(outline_length_));
103 }
104 // Arbitrarily limit the number of elements to protect against bad data.
105 if (num_features_ > UINT16_MAX) return false;
106 if (num_micro_features_ > UINT16_MAX) return false;
107 delete [] features_;
108 features_ = new INT_FEATURE_STRUCT[num_features_];
109 if (fread(features_, sizeof(*features_), num_features_, fp)
110 != num_features_)
111 return false;
112 delete [] micro_features_;
113 micro_features_ = new MicroFeature[num_micro_features_];
114 if (fread(micro_features_, sizeof(*micro_features_), num_micro_features_,
115 fp) != num_micro_features_)
116 return false;
117 if (fread(cn_feature_, sizeof(*cn_feature_), kNumCNParams, fp) !=
118 kNumCNParams) return false;
119 if (fread(geo_feature_, sizeof(*geo_feature_), GeoCount, fp) != GeoCount)
120 return false;
121 return true;
122}
123
124// Saves the given features into a TrainingSample.
126 const INT_FX_RESULT_STRUCT& fx_info,
127 const TBOX& bounding_box,
128 const INT_FEATURE_STRUCT* features,
129 int num_features) {
130 auto* sample = new TrainingSample;
131 sample->num_features_ = num_features;
132 sample->features_ = new INT_FEATURE_STRUCT[num_features];
133 sample->outline_length_ = fx_info.Length;
134 memcpy(sample->features_, features, num_features * sizeof(features[0]));
135 sample->geo_feature_[GeoBottom] = bounding_box.bottom();
136 sample->geo_feature_[GeoTop] = bounding_box.top();
137 sample->geo_feature_[GeoWidth] = bounding_box.width();
138
139 // Generate the cn_feature_ from the fx_info.
140 sample->cn_feature_[CharNormY] =
142 sample->cn_feature_[CharNormLength] =
144 sample->cn_feature_[CharNormRx] = MF_SCALE_FACTOR * fx_info.Rx;
145 sample->cn_feature_[CharNormRy] = MF_SCALE_FACTOR * fx_info.Ry;
146
147 sample->features_are_indexed_ = false;
148 sample->features_are_mapped_ = false;
149 return sample;
150}
151
152// Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
154 FEATURE feature = NewFeature(&CharNormDesc);
155 for (int i = 0; i < kNumCNParams; ++i)
156 feature->Params[i] = cn_feature_[i];
157 return feature;
158}
159
160// Constructs and returns a copy randomized by the method given by
161// the randomizer index. If index is out of [0, kSampleRandomSize) then
162// an exact copy is returned.
165 if (index >= 0 && index < kSampleRandomSize) {
166 ++index; // Remove the first combination.
167 const int yshift = kYShiftValues[index / kSampleScaleSize];
168 double scaling = kScaleValues[index % kSampleScaleSize];
169 for (uint32_t i = 0; i < num_features_; ++i) {
170 double result = (features_[i].X - kRandomizingCenter) * scaling;
171 result += kRandomizingCenter;
172 sample->features_[i].X = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
173 result = (features_[i].Y - kRandomizingCenter) * scaling;
174 result += kRandomizingCenter + yshift;
175 sample->features_[i].Y = ClipToRange<int>(result + 0.5, 0, UINT8_MAX);
176 }
177 }
178 return sample;
179}
180
181// Constructs and returns an exact copy.
183 auto* sample = new TrainingSample;
184 sample->class_id_ = class_id_;
185 sample->font_id_ = font_id_;
186 sample->weight_ = weight_;
187 sample->sample_index_ = sample_index_;
188 sample->num_features_ = num_features_;
189 if (num_features_ > 0) {
190 sample->features_ = new INT_FEATURE_STRUCT[num_features_];
191 memcpy(sample->features_, features_, num_features_ * sizeof(features_[0]));
192 }
193 sample->num_micro_features_ = num_micro_features_;
194 if (num_micro_features_ > 0) {
195 sample->micro_features_ = new MicroFeature[num_micro_features_];
196 memcpy(sample->micro_features_, micro_features_,
197 num_micro_features_ * sizeof(micro_features_[0]));
198 }
199 memcpy(sample->cn_feature_, cn_feature_, sizeof(*cn_feature_) * kNumCNParams);
200 memcpy(sample->geo_feature_, geo_feature_, sizeof(*geo_feature_) * GeoCount);
201 return sample;
202}
203
204// Extracts the needed information from the CHAR_DESC_STRUCT.
205void TrainingSample::ExtractCharDesc(int int_feature_type,
206 int micro_type,
207 int cn_type,
208 int geo_type,
209 CHAR_DESC_STRUCT* char_desc) {
210 // Extract the INT features.
211 delete[] features_;
212 FEATURE_SET_STRUCT* char_features = char_desc->FeatureSets[int_feature_type];
213 if (char_features == nullptr) {
214 tprintf("Error: no features to train on of type %s\n",
216 num_features_ = 0;
217 features_ = nullptr;
218 } else {
219 num_features_ = char_features->NumFeatures;
220 features_ = new INT_FEATURE_STRUCT[num_features_];
221 for (uint32_t f = 0; f < num_features_; ++f) {
222 features_[f].X =
223 static_cast<uint8_t>(char_features->Features[f]->Params[IntX]);
224 features_[f].Y =
225 static_cast<uint8_t>(char_features->Features[f]->Params[IntY]);
226 features_[f].Theta =
227 static_cast<uint8_t>(char_features->Features[f]->Params[IntDir]);
228 features_[f].CP_misses = 0;
229 }
230 }
231 // Extract the Micro features.
232 delete[] micro_features_;
233 char_features = char_desc->FeatureSets[micro_type];
234 if (char_features == nullptr) {
235 tprintf("Error: no features to train on of type %s\n",
237 num_micro_features_ = 0;
238 micro_features_ = nullptr;
239 } else {
240 num_micro_features_ = char_features->NumFeatures;
241 micro_features_ = new MicroFeature[num_micro_features_];
242 for (uint32_t f = 0; f < num_micro_features_; ++f) {
243 for (int d = 0; d < MFCount; ++d) {
244 micro_features_[f][d] = char_features->Features[f]->Params[d];
245 }
246 }
247 }
248 // Extract the CN feature.
249 char_features = char_desc->FeatureSets[cn_type];
250 if (char_features == nullptr) {
251 tprintf("Error: no CN feature to train on.\n");
252 } else {
253 ASSERT_HOST(char_features->NumFeatures == 1);
254 cn_feature_[CharNormY] = char_features->Features[0]->Params[CharNormY];
255 cn_feature_[CharNormLength] =
256 char_features->Features[0]->Params[CharNormLength];
257 cn_feature_[CharNormRx] = char_features->Features[0]->Params[CharNormRx];
258 cn_feature_[CharNormRy] = char_features->Features[0]->Params[CharNormRy];
259 }
260 // Extract the Geo feature.
261 char_features = char_desc->FeatureSets[geo_type];
262 if (char_features == nullptr) {
263 tprintf("Error: no Geo feature to train on.\n");
264 } else {
265 ASSERT_HOST(char_features->NumFeatures == 1);
266 geo_feature_[GeoBottom] = char_features->Features[0]->Params[GeoBottom];
267 geo_feature_[GeoTop] = char_features->Features[0]->Params[GeoTop];
268 geo_feature_[GeoWidth] = char_features->Features[0]->Params[GeoWidth];
269 }
270 features_are_indexed_ = false;
271 features_are_mapped_ = false;
272}
273
274// Sets the mapped_features_ from the features_ using the provided
275// feature_space to the indexed versions of the features.
278 feature_space.IndexAndSortFeatures(features_, num_features_,
279 &mapped_features_);
280 features_are_indexed_ = true;
281 features_are_mapped_ = false;
282}
283
284// Sets the mapped_features_ from the features using the provided
285// feature_map.
288 feature_map.feature_space().IndexAndSortFeatures(features_, num_features_,
290 feature_map.MapIndexedFeatures(indexed_features, &mapped_features_);
291 features_are_indexed_ = false;
292 features_are_mapped_ = true;
293}
294
295// Returns a pix representing the sample. (Int features only.)
296Pix* TrainingSample::RenderToPix(const UNICHARSET* unicharset) const {
297 Pix* pix = pixCreate(kIntFeatureExtent, kIntFeatureExtent, 1);
298 for (uint32_t f = 0; f < num_features_; ++f) {
299 int start_x = features_[f].X;
300 int start_y = kIntFeatureExtent - features_[f].Y;
301 double dx = cos((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
302 double dy = -sin((features_[f].Theta / 256.0) * 2.0 * M_PI - M_PI);
303 for (int i = 0; i <= 5; ++i) {
304 int x = static_cast<int>(start_x + dx * i);
305 int y = static_cast<int>(start_y + dy * i);
306 if (x >= 0 && x < 256 && y >= 0 && y < 256)
307 pixSetPixel(pix, x, y, 1);
308 }
309 }
310 if (unicharset != nullptr)
311 pixSetText(pix, unicharset->id_to_unichar(class_id_));
312 return pix;
313}
314
315// Displays the features in the given window with the given color.
317 ScrollView* window) const {
318 #ifndef GRAPHICS_DISABLED
319 for (uint32_t f = 0; f < num_features_; ++f) {
320 RenderIntFeature(window, &features_[f], color);
321 }
322 #endif // GRAPHICS_DISABLED
323}
324
325// Returns a pix of the original sample image. The pix is padded all round
326// by padding wherever possible.
327// The returned Pix must be pixDestroyed after use.
328// If the input page_pix is nullptr, nullptr is returned.
329Pix* TrainingSample::GetSamplePix(int padding, Pix* page_pix) const {
330 if (page_pix == nullptr)
331 return nullptr;
332 int page_width = pixGetWidth(page_pix);
333 int page_height = pixGetHeight(page_pix);
334 TBOX padded_box = bounding_box();
335 padded_box.pad(padding, padding);
336 // Clip the padded_box to the limits of the page
337 TBOX page_box(0, 0, page_width, page_height);
338 padded_box &= page_box;
339 Box* box = boxCreate(page_box.left(), page_height - page_box.top(),
340 page_box.width(), page_box.height());
341 Pix* sample_pix = pixClipRectangle(page_pix, box, nullptr);
342 boxDestroy(&box);
343 return sample_pix;
344}
345
346} // namespace tesseract
const int kBlnBaselineOffset
Definition: normalis.h:25
#define ELISTIZE(CLASSNAME)
Definition: elst.h:931
#define ASSERT_HOST(x)
Definition: errcode.h:88
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:185
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const char *const kIntFeatureType
Definition: featdefs.cpp:34
const char *const kMicroFeatureType
Definition: featdefs.cpp:32
const FEATURE_DESC_STRUCT CharNormDesc
const int kIntFeatureExtent
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1602
@ MFCount
Definition: mf.h:30
float MicroFeature[MFCount]
Definition: mf.h:33
const float MF_SCALE_FACTOR
Definition: mfoutline.h:71
#define LENGTH_COMPRESSION
Definition: normfeat.h:27
@ CharNormRx
Definition: normfeat.h:30
@ CharNormY
Definition: normfeat.h:30
@ CharNormRy
Definition: normfeat.h:30
@ CharNormLength
Definition: normfeat.h:30
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:78
@ IntDir
Definition: picofeat.h:32
@ IntX
Definition: picofeat.h:30
@ IntY
Definition: picofeat.h:31
@ GeoWidth
Definition: picofeat.h:39
@ GeoBottom
Definition: picofeat.h:37
@ GeoTop
Definition: picofeat.h:38
@ GeoCount
Definition: picofeat.h:41
const int kRandomizingCenter
Definition: rect.h:34
bool Serialize(FILE *fp) const
Definition: rect.cpp:185
int16_t top() const
Definition: rect.h:58
int16_t width() const
Definition: rect.h:115
int16_t height() const
Definition: rect.h:108
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
void pad(int xpad, int ypad)
Definition: rect.h:131
bool DeSerialize(bool swap, FILE *fp)
Definition: rect.cpp:192
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
Definition: cluster.h:32
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:41
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
int16_t Ymean
Definition: intfx.h:37
int32_t Length
Definition: intfx.h:36
float Params[1]
Definition: ocrfeatures.h:61
FEATURE Features[1]
Definition: ocrfeatures.h:68
uint16_t NumFeatures
Definition: ocrfeatures.h:66
bool DeSerialize(bool swap, FILE *fp)
static TrainingSample * CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, const TBOX &bounding_box, const INT_FEATURE_STRUCT *features, int num_features)
const INT_FEATURE_STRUCT * features() const
const TBOX & bounding_box() const
TrainingSample * RandomizedCopy(int index) const
void MapFeatures(const IntFeatureMap &feature_map)
void DisplayFeatures(ScrollView::Color color, ScrollView *window) const
uint32_t num_features() const
FEATURE_STRUCT * GetCNFeature() const
const GenericVector< int > & indexed_features() const
void IndexFeatures(const IntFeatureSpace &feature_space)
TrainingSample * Copy() const
Pix * RenderToPix(const UNICHARSET *unicharset) const
static TrainingSample * DeSerializeCreate(bool swap, FILE *fp)
bool Serialize(FILE *fp) const
void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, CHAR_DESC_STRUCT *char_desc)
Pix * GetSamplePix(int padding, Pix *page_pix) const