tesseract 4.1.1
Loading...
Searching...
No Matches
networkio.h
Go to the documentation of this file.
1
2// File: networkio.h
3// Description: Network input/output data, allowing float/int implementations.
4// Author: Ray Smith
5// Created: Tue Jun 17 08:43:11 PST 2014
6//
7// (C) Copyright 2014, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
18
19#ifndef TESSERACT_LSTM_NETWORKIO_H_
20#define TESSERACT_LSTM_NETWORKIO_H_
21
22#include <cmath>
23#include <cstdio>
24#include <vector>
25
26#include "genericvector.h"
27#include "helpers.h"
28#include "static_shape.h"
29#include "stridemap.h"
30#include "weightmatrix.h"
31
32struct Pix;
33
34namespace tesseract {
35
36// Class to contain all the input/output of a network, allowing for fixed or
37// variable-strided 2d to 1d mapping, and float or int8_t values. Provides
38// enough calculating functions to hide the detail of the implementation.
39class NetworkIO {
40 public:
41 NetworkIO() : int_mode_(false) {}
42 // Resizes the array (and stride), avoiding realloc if possible, to the given
43 // size from various size specs:
44 // Same stride size, but given number of features.
45 void Resize(const NetworkIO& src, int num_features) {
46 ResizeToMap(src.int_mode(), src.stride_map(), num_features);
47 }
48 // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
49 void Resize2d(bool int_mode, int width, int num_features);
50 // Resizes forcing a float representation with the stridemap of src and the
51 // given number of features.
52 void ResizeFloat(const NetworkIO& src, int num_features) {
53 ResizeToMap(false, src.stride_map(), num_features);
54 }
55 // Resizes to a specific stride_map.
56 void ResizeToMap(bool int_mode, const StrideMap& stride_map,
57 int num_features);
58 // Shrinks image size by x_scale,y_scale, and use given number of features.
59 void ResizeScaled(const NetworkIO& src, int x_scale, int y_scale,
60 int num_features);
61 // Resizes to just 1 x-coord, whatever the input.
62 void ResizeXTo1(const NetworkIO& src, int num_features);
63 // Initialize all the array to zero.
64 void Zero();
65 // Initializes to zero all elements of the array that do not correspond to
66 // valid image positions. (If a batch of different-sized images are packed
67 // together, then there will be padding pixels.)
69 // Sets up the array from the given image, using the currently set int_mode_.
70 // If the image width doesn't match the shape, the image is truncated or
71 // padded with noise to match.
72 void FromPix(const StaticShape& shape, const Pix* pix, TRand* randomizer);
73 // Sets up the array from the given set of images, using the currently set
74 // int_mode_. If the image width doesn't match the shape, the images are
75 // truncated or padded with noise to match.
76 void FromPixes(const StaticShape& shape, const std::vector<const Pix*>& pixes,
77 TRand* randomizer);
78 // Copies the given pix to *this at the given batch index, stretching and
79 // clipping the pixel values so that [black, black + 2*contrast] maps to the
80 // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
81 // This is a 2-d operation in the sense that the output depth is the number
82 // of input channels, the height is the height of the image, and the width
83 // is the width of the image, or truncated/padded with noise if the width
84 // is a fixed size.
85 void Copy2DImage(int batch, Pix* pix, float black, float contrast,
86 TRand* randomizer);
87 // Copies the given pix to *this at the given batch index, as Copy2DImage
88 // above, except that the output depth is the height of the input image, the
89 // output height is 1, and the output width as for Copy2DImage.
90 // The image is thus treated as a 1-d set of vertical pixel strips.
91 void Copy1DGreyImage(int batch, Pix* pix, float black, float contrast,
92 TRand* randomizer);
93 // Helper stores the pixel value in i_ or f_ according to int_mode_.
94 // t: is the index from the StrideMap corresponding to the current
95 // [batch,y,x] position
96 // f: is the index into the depth/channel
97 // pixel: the value of the pixel from the image (in one channel)
98 // black: the pixel value to map to the lowest of the range of *this
99 // contrast: the range of pixel values to stretch to half the range of *this.
100 void SetPixel(int t, int f, int pixel, float black, float contrast);
101 // Converts the array to a Pix. Must be pixDestroyed after use.
102 Pix* ToPix() const;
103 // Prints the first and last num timesteps of the array for each feature.
104 void Print(int num) const;
105
106 // Returns the timestep width.
107 int Width() const {
108 return int_mode_ ? i_.dim1() : f_.dim1();
109 }
110 // Returns the number of features.
111 int NumFeatures() const {
112 return int_mode_ ? i_.dim2() : f_.dim2();
113 }
114 // Accessor to a timestep of the float matrix.
115 float* f(int t) {
116 ASSERT_HOST(!int_mode_);
117 return f_[t];
118 }
119 const float* f(int t) const {
120 ASSERT_HOST(!int_mode_);
121 return f_[t];
122 }
123 const int8_t* i(int t) const {
124 ASSERT_HOST(int_mode_);
125 return i_[t];
126 }
127 bool int_mode() const {
128 return int_mode_;
129 }
130 void set_int_mode(bool is_quantized) {
131 int_mode_ = is_quantized;
132 }
133 const StrideMap& stride_map() const {
134 return stride_map_;
135 }
136 void set_stride_map(const StrideMap& map) {
137 stride_map_ = map;
138 }
139 const GENERIC_2D_ARRAY<float>& float_array() const { return f_; }
141
142 // Copies a single time step from src.
143 void CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t);
144 // Copies a part of single time step from src.
145 void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features,
146 const NetworkIO& src, int src_t, int src_offset);
147 // Zeroes a single time step.
149 void ZeroTimeStepGeneral(int t, int offset, int num_features);
150 // Sets the given range to random values.
151 void Randomize(int t, int offset, int num_features, TRand* randomizer);
152
153 // Helper returns the label and score of the best choice over a range.
154 int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch,
155 float* rating, float* certainty) const;
156 // Helper returns the rating and certainty of the choice over a range in t.
157 void ScoresOverRange(int t_start, int t_end, int choice, int null_ch,
158 float* rating, float* certainty) const;
159 // Returns the index (label) of the best value at the given timestep,
160 // and if not null, sets the score to the log of the corresponding value.
161 int BestLabel(int t, float* score) const {
162 return BestLabel(t, -1, -1, score);
163 }
164 // Returns the index (label) of the best value at the given timestep,
165 // excluding not_this and not_that, and if not null, sets the score to the
166 // log of the corresponding value.
167 int BestLabel(int t, int not_this, int not_that, float* score) const;
168 // Returns the best start position out of range (into which both start and end
169 // must fit) to obtain the highest cumulative score for the given labels.
170 int PositionOfBestMatch(const GenericVector<int>& labels, int start,
171 int end) const;
172 // Returns the cumulative score of the given labels starting at start, and
173 // using one label per time-step.
174 double ScoreOfLabels(const GenericVector<int>& labels, int start) const;
175 // Helper function sets all the outputs for a single timestep, such that
176 // label has value ok_score, and the other labels share 1 - ok_score.
177 // Assumes float mode.
178 void SetActivations(int t, int label, float ok_score);
179 // Modifies the values, only if needed, so that the given label is
180 // the winner at the given time step t.
181 // Assumes float mode.
182 void EnsureBestLabel(int t, int label);
183 // Helper function converts prob to certainty taking the minimum into account.
184 static float ProbToCertainty(float prob);
185 // Returns true if there is any bad value that is suspiciously like a GT
186 // error. Assuming that *this is the difference(gradient) between target
187 // and forward output, returns true if there is a large negative value
188 // (correcting a very confident output) for which there is no corresponding
189 // positive value in an adjacent timestep for the same feature index. This
190 // allows the box-truthed samples to make fine adjustments to position while
191 // stopping other disagreements of confident output with ground truth.
192 bool AnySuspiciousTruth(float confidence_thr) const;
193
194 // Reads a single timestep to floats in the range [-1, 1].
195 void ReadTimeStep(int t, double* output) const;
196 // Adds a single timestep to floats.
197 void AddTimeStep(int t, double* inout) const;
198 // Adds part of a single timestep to floats.
199 void AddTimeStepPart(int t, int offset, int num_features, float* inout) const;
200 // Writes a single timestep from floats in the range [-1, 1].
201 void WriteTimeStep(int t, const double* input);
202 // Writes a single timestep from floats in the range [-1, 1] writing only
203 // num_features elements of input to (*this)[t], starting at offset.
204 void WriteTimeStepPart(int t, int offset, int num_features,
205 const double* input);
206 // Maxpools a single time step from src.
207 void MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
208 int* max_line);
209 // Runs maxpool backward, using maxes to index timesteps in *this.
210 void MaxpoolBackward(const NetworkIO& fwd,
211 const GENERIC_2D_ARRAY<int>& maxes);
212 // Returns the min over time of the maxes over features of the outputs.
213 float MinOfMaxes() const;
214 // Returns the min over time.
215 float Max() const { return int_mode_ ? i_.Max() : f_.Max(); }
216 // Computes combined results for a combiner that chooses between an existing
217 // input and itself, with an additional output to indicate the choice.
218 void CombineOutputs(const NetworkIO& base_output,
219 const NetworkIO& combiner_output);
220 // Computes deltas for a combiner that chooses between 2 sets of inputs.
221 void ComputeCombinerDeltas(const NetworkIO& fwd_deltas,
222 const NetworkIO& base_output);
223
224 // Copies the array checking that the types match.
225 void CopyAll(const NetworkIO& src);
226 // Adds the array to a float array, with scaling to [-1, 1] if the src is int.
227 void AddAllToFloat(const NetworkIO& src);
228 // Subtracts the array from a float array. src must also be float.
229 void SubtractAllFromFloat(const NetworkIO& src);
230
231 // Copies src to *this, with maxabs normalization to match scale.
232 void CopyWithNormalization(const NetworkIO& src, const NetworkIO& scale);
233 // Multiplies the float data by the given factor.
234 void ScaleFloatBy(float factor) { f_ *= factor; }
235 // Copies src to *this with independent reversal of the y dimension.
236 void CopyWithYReversal(const NetworkIO& src);
237 // Copies src to *this with independent reversal of the x dimension.
238 void CopyWithXReversal(const NetworkIO& src);
239 // Copies src to *this with independent transpose of the x and y dimensions.
240 void CopyWithXYTranspose(const NetworkIO& src);
241 // Copies src to *this, at the given feature_offset, returning the total
242 // feature offset after the copy. Multiple calls will stack outputs from
243 // multiple sources in feature space.
244 int CopyPacking(const NetworkIO& src, int feature_offset);
245 // Opposite of CopyPacking, fills *this with a part of src, starting at
246 // feature_offset, and picking num_features. Resizes *this to match.
247 void CopyUnpacking(const NetworkIO& src, int feature_offset,
248 int num_features);
249 // Transposes the float part of *this into dest.
250 void Transpose(TransposedArray* dest) const;
251
252 // Clips the content of a single time-step to +/-range.
253 void ClipVector(int t, float range);
254
255 // Applies Func to timestep t of *this (u) and multiplies the result by v
256 // component-wise, putting the product in *product.
257 // *this and v may be int or float, but must match. The outputs are double.
258 template <class Func>
259 void FuncMultiply(const NetworkIO& v_io, int t, double* product) {
260 Func f;
261 ASSERT_HOST(!int_mode_);
262 ASSERT_HOST(!v_io.int_mode_);
263 int dim = f_.dim2();
264 if (int_mode_) {
265 const int8_t* u = i_[t];
266 const int8_t* v = v_io.i_[t];
267 for (int i = 0; i < dim; ++i) {
268 product[i] = f(u[i] / static_cast<double>(INT8_MAX)) * v[i] /
269 static_cast<double>(INT8_MAX);
270 }
271 } else {
272 const float* u = f_[t];
273 const float* v = v_io.f_[t];
274 for (int i = 0; i < dim; ++i) {
275 product[i] = f(u[i]) * v[i];
276 }
277 }
278 }
279 // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
280 // component-wise, putting the product in *product.
281 // All NetworkIOs are assumed to be float.
282 template <class Func>
283 void FuncMultiply3(int u_t, const NetworkIO& v_io, int v_t, const double* w,
284 double* product) const {
285 ASSERT_HOST(!int_mode_);
286 ASSERT_HOST(!v_io.int_mode_);
287 Func f;
288 const float* u = f_[u_t];
289 const float* v = v_io.f_[v_t];
290 int dim = f_.dim2();
291 for (int i = 0; i < dim; ++i) {
292 product[i] = f(u[i]) * v[i] * w[i];
293 }
294 }
295 // Applies Func to *this (u) at u_t, and multiplies the result by v[v_t] * w,
296 // component-wise, adding the product to *product.
297 // All NetworkIOs are assumed to be float.
298 template <class Func>
299 void FuncMultiply3Add(const NetworkIO& v_io, int t, const double* w,
300 double* product) const {
301 ASSERT_HOST(!int_mode_);
302 ASSERT_HOST(!v_io.int_mode_);
303 Func f;
304 const float* u = f_[t];
305 const float* v = v_io.f_[t];
306 int dim = f_.dim2();
307 for (int i = 0; i < dim; ++i) {
308 product[i] += f(u[i]) * v[i] * w[i];
309 }
310 }
311 // Applies Func1 to *this (u), Func2 to v, and multiplies the result by w,
312 // component-wise, putting the product in product, all at timestep t, except
313 // w, which is a simple array. All NetworkIOs are assumed to be float.
314 template <class Func1, class Func2>
315 void Func2Multiply3(const NetworkIO& v_io, int t, const double* w,
316 double* product) const {
317 ASSERT_HOST(!int_mode_);
318 ASSERT_HOST(!v_io.int_mode_);
319 Func1 f;
320 Func2 g;
321 const float* u = f_[t];
322 const float* v = v_io.f_[t];
323 int dim = f_.dim2();
324 for (int i = 0; i < dim; ++i) {
325 product[i] = f(u[i]) * g(v[i]) * w[i];
326 }
327 }
328
329 private:
330 // Returns the padding required for the given number of features in order
331 // for the SIMD operations to be safe.
332 static int GetPadding(int num_features);
333
334 // Choice of float vs 8 bit int for data.
337 // Which of f_ and i_ are we actually using.
338 bool int_mode_;
339 // Stride for 2d input data.
340 StrideMap stride_map_;
341};
342
343} // namespace tesseract.
344
345#endif // TESSERACT_LSTM_NETWORKIO_H_
#define ASSERT_HOST(x)
Definition: errcode.h:88
int dim2() const
Definition: matrix.h:210
T Max() const
Definition: matrix.h:345
int dim1() const
Definition: matrix.h:209
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
Definition: networkio.cpp:770
void ResizeXTo1(const NetworkIO &src, int num_features)
Definition: networkio.cpp:70
void ZeroInvalidElements()
Definition: networkio.cpp:88
float MinOfMaxes() const
Definition: networkio.cpp:712
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
Definition: networkio.cpp:668
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:246
bool int_mode() const
Definition: networkio.h:127
void ResizeFloat(const NetworkIO &src, int num_features)
Definition: networkio.h:52
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:208
float Max() const
Definition: networkio.h:215
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
Definition: networkio.cpp:651
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
Definition: networkio.cpp:393
void ClipVector(int t, float range)
Definition: networkio.cpp:971
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
Definition: networkio.cpp:629
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:872
void FuncMultiply3(int u_t, const NetworkIO &v_io, int v_t, const double *w, double *product) const
Definition: networkio.h:283
void set_stride_map(const StrideMap &map)
Definition: networkio.h:136
void Print(int num) const
Definition: networkio.cpp:366
void ScaleFloatBy(float factor)
Definition: networkio.h:234
void FromPixes(const StaticShape &shape, const std::vector< const Pix * > &pixes, TRand *randomizer)
Definition: networkio.cpp:170
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
Definition: networkio.cpp:507
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:450
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
int CopyPacking(const NetworkIO &src, int feature_offset)
Definition: networkio.cpp:917
const float * f(int t) const
Definition: networkio.h:119
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:891
void WriteTimeStep(int t, const double *input)
Definition: networkio.cpp:645
float * f(int t)
Definition: networkio.h:115
int Width() const
Definition: networkio.h:107
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:35
GENERIC_2D_ARRAY< float > * mutable_float_array()
Definition: networkio.h:140
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
Definition: networkio.cpp:695
void FuncMultiply(const NetworkIO &v_io, int t, double *product)
Definition: networkio.h:259
void ZeroTimeStep(int t)
Definition: networkio.h:148
void Func2Multiply3(const NetworkIO &v_io, int t, const double *w, double *product) const
Definition: networkio.h:315
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:537
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
Definition: networkio.cpp:525
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
Definition: networkio.cpp:736
void Transpose(TransposedArray *dest) const
Definition: networkio.cpp:964
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
Definition: networkio.cpp:62
void ReadTimeStep(int t, double *output) const
Definition: networkio.cpp:598
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
Definition: networkio.cpp:161
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:579
Pix * ToPix() const
Definition: networkio.cpp:286
void set_int_mode(bool is_quantized)
Definition: networkio.h:130
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:853
void ZeroTimeStepGeneral(int t, int offset, int num_features)
Definition: networkio.cpp:407
void Randomize(int t, int offset, int num_features, TRand *randomizer)
Definition: networkio.cpp:416
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
Definition: networkio.cpp:945
void EnsureBestLabel(int t, int label)
Definition: networkio.cpp:549
void AddTimeStep(int t, double *inout) const
Definition: networkio.cpp:613
void FuncMultiply3Add(const NetworkIO &v_io, int t, const double *w, double *product) const
Definition: networkio.h:299
void AddAllToFloat(const NetworkIO &src)
Definition: networkio.cpp:817
const StrideMap & stride_map() const
Definition: networkio.h:133
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:824
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
Definition: networkio.cpp:46
const int8_t * i(int t) const
Definition: networkio.h:123
const GENERIC_2D_ARRAY< float > & float_array() const
Definition: networkio.h:139
void CopyAll(const NetworkIO &src)
Definition: networkio.cpp:811
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
Definition: networkio.cpp:383
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:431
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
Definition: networkio.cpp:831
int NumFeatures() const
Definition: networkio.h:111
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:275
int BestLabel(int t, float *score) const
Definition: networkio.h:161