tesseract 4.1.1
Loading...
Searching...
No Matches
networkio.cpp
Go to the documentation of this file.
1
2// File: networkio.cpp
3// Description: Network input/output data, allowing float/int implementations.
4// Author: Ray Smith
5// Created: Thu Jun 19 13:01:31 PST 2014
6//
7// (C) Copyright 2014, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
18
19#include "networkio.h"
20#include <cfloat> // for FLT_MAX
21
22#include "allheaders.h"
23#include "functions.h"
24#include "statistc.h"
25#include "tprintf.h"
26
27namespace tesseract {
28
29// Minimum value to output for certainty.
30const float kMinCertainty = -20.0f;
31// Probability corresponding to kMinCertainty.
32const float kMinProb = exp(kMinCertainty);
33
34// Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.
35void NetworkIO::Resize2d(bool int_mode, int width, int num_features) {
36 stride_map_ = StrideMap();
37 int_mode_ = int_mode;
38 if (int_mode_) {
39 i_.ResizeNoInit(width, num_features, GetPadding(num_features));
40 } else {
41 f_.ResizeNoInit(width, num_features);
42 }
43}
44
45// Resizes to a specific stride_map.
46void NetworkIO::ResizeToMap(bool int_mode, const StrideMap& stride_map,
47 int num_features) {
48 // If this method crashes with this == nullptr,
49 // it most likely got here through an uninitialized scratch element,
50 // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!!
51 stride_map_ = stride_map;
52 int_mode_ = int_mode;
53 if (int_mode_) {
54 i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));
55 } else {
56 f_.ResizeNoInit(stride_map.Width(), num_features);
57 }
59}
60
61// Shrinks image size by x_scale,y_scale, and use given number of features.
63 int x_scale, int y_scale, int num_features) {
64 StrideMap stride_map = src.stride_map_;
65 stride_map.ScaleXY(x_scale, y_scale);
66 ResizeToMap(src.int_mode_, stride_map, num_features);
67}
68
69// Resizes to just 1 x-coord, whatever the input.
70void NetworkIO::ResizeXTo1(const NetworkIO& src, int num_features) {
71 StrideMap stride_map = src.stride_map_;
73 ResizeToMap(src.int_mode_, stride_map, num_features);
74}
75
76// Initialize all the array to zero.
78 int width = Width();
79 // Zero out the everything. Column-by-column in case it is aligned.
80 for (int t = 0; t < width; ++t) {
81 ZeroTimeStep(t);
82 }
83}
84
85// Initializes to zero all elements of the array that do not correspond to
86// valid image positions. (If a batch of different-sized images are packed
87// together, then there will be padding pixels.)
89 int num_features = NumFeatures();
90 int full_width = stride_map_.Size(FD_WIDTH);
91 int full_height = stride_map_.Size(FD_HEIGHT);
92 StrideMap::Index b_index(stride_map_);
93 do {
94 int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
95 if (end_x < full_width) {
96 // The width is small, so fill for every valid y.
97 StrideMap::Index y_index(b_index);
98 int fill_size = num_features * (full_width - end_x);
99 do {
100 StrideMap::Index z_index(y_index);
101 z_index.AddOffset(end_x, FD_WIDTH);
102 if (int_mode_) {
103 ZeroVector(fill_size, i_[z_index.t()]);
104 } else {
105 ZeroVector(fill_size, f_[z_index.t()]);
106 }
107 } while (y_index.AddOffset(1, FD_HEIGHT));
108 }
109 int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;
110 if (end_y < full_height) {
111 // The height is small, so fill in the space in one go.
112 StrideMap::Index y_index(b_index);
113 y_index.AddOffset(end_y, FD_HEIGHT);
114 int fill_size = num_features * full_width * (full_height - end_y);
115 if (int_mode_) {
116 ZeroVector(fill_size, i_[y_index.t()]);
117 } else {
118 ZeroVector(fill_size, f_[y_index.t()]);
119 }
120 }
121 } while (b_index.AddOffset(1, FD_BATCH));
122}
123
124// Helper computes a black point and white point to contrast-enhance an image.
125// The computation is based on the assumption that the image is of a single line
126// of text, so a horizontal line through the middle of the image passes through
127// at least some of it, so local minima and maxima are a good proxy for black
128// and white pixel samples.
129static void ComputeBlackWhite(Pix* pix, float* black, float* white) {
130 int width = pixGetWidth(pix);
131 int height = pixGetHeight(pix);
132 STATS mins(0, 256), maxes(0, 256);
133 if (width >= 3) {
134 int y = height / 2;
135 l_uint32* line = pixGetData(pix) + pixGetWpl(pix) * y;
136 int prev = GET_DATA_BYTE(line, 0);
137 int curr = GET_DATA_BYTE(line, 1);
138 for (int x = 1; x + 1 < width; ++x) {
139 int next = GET_DATA_BYTE(line, x + 1);
140 if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) {
141 // Local minimum.
142 mins.add(curr, 1);
143 }
144 if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) {
145 // Local maximum.
146 maxes.add(curr, 1);
147 }
148 prev = curr;
149 curr = next;
150 }
151 }
152 if (mins.get_total() == 0) mins.add(0, 1);
153 if (maxes.get_total() == 0) maxes.add(255, 1);
154 *black = mins.ile(0.25);
155 *white = maxes.ile(0.75);
156}
157
158// Sets up the array from the given image, using the currently set int_mode_.
159// If the image width doesn't match the shape, the image is truncated or padded
160// with noise to match.
161void NetworkIO::FromPix(const StaticShape& shape, const Pix* pix,
162 TRand* randomizer) {
163 std::vector<const Pix*> pixes(1, pix);
164 FromPixes(shape, pixes, randomizer);
165}
166
167// Sets up the array from the given set of images, using the currently set
168// int_mode_. If the image width doesn't match the shape, the images are
169// truncated or padded with noise to match.
171 const std::vector<const Pix*>& pixes,
172 TRand* randomizer) {
173 int target_height = shape.height();
174 int target_width = shape.width();
175 std::vector<std::pair<int, int>> h_w_pairs;
176 for (auto pix : pixes) {
177 Pix* var_pix = const_cast<Pix*>(pix);
178 int width = pixGetWidth(var_pix);
179 if (target_width != 0) width = target_width;
180 int height = pixGetHeight(var_pix);
181 if (target_height != 0) height = target_height;
182 h_w_pairs.emplace_back(height, width);
183 }
184 stride_map_.SetStride(h_w_pairs);
185 ResizeToMap(int_mode(), stride_map_, shape.depth());
186 // Iterate over the images again to copy the data.
187 for (size_t b = 0; b < pixes.size(); ++b) {
188 Pix* pix = const_cast<Pix*>(pixes[b]);
189 float black = 0.0f, white = 255.0f;
190 if (shape.depth() != 3) ComputeBlackWhite(pix, &black, &white);
191 float contrast = (white - black) / 2.0f;
192 if (contrast <= 0.0f) contrast = 1.0f;
193 if (shape.height() == 1) {
194 Copy1DGreyImage(b, pix, black, contrast, randomizer);
195 } else {
196 Copy2DImage(b, pix, black, contrast, randomizer);
197 }
198 }
199}
200
201// Copies the given pix to *this at the given batch index, stretching and
202// clipping the pixel values so that [black, black + 2*contrast] maps to the
203// dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.
204// This is a 2-d operation in the sense that the output depth is the number
205// of input channels, the height is the height of the image, and the width
206// is the width of the image, or truncated/padded with noise if the width
207// is a fixed size.
208void NetworkIO::Copy2DImage(int batch, Pix* pix, float black, float contrast,
209 TRand* randomizer) {
210 int width = pixGetWidth(pix);
211 int height = pixGetHeight(pix);
212 int wpl = pixGetWpl(pix);
213 StrideMap::Index index(stride_map_);
214 index.AddOffset(batch, FD_BATCH);
215 int t = index.t();
216 int target_height = stride_map_.Size(FD_HEIGHT);
217 int target_width = stride_map_.Size(FD_WIDTH);
218 int num_features = NumFeatures();
219 bool color = num_features == 3;
220 if (width > target_width) width = target_width;
221 uint32_t* line = pixGetData(pix);
222 for (int y = 0; y < target_height; ++y, line += wpl) {
223 int x = 0;
224 if (y < height) {
225 for (x = 0; x < width; ++x, ++t) {
226 if (color) {
227 int f = 0;
228 for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) {
229 int pixel = GET_DATA_BYTE(line + x, c);
230 SetPixel(t, f++, pixel, black, contrast);
231 }
232 } else {
233 int pixel = GET_DATA_BYTE(line, x);
234 SetPixel(t, 0, pixel, black, contrast);
235 }
236 }
237 }
238 for (; x < target_width; ++x) Randomize(t++, 0, num_features, randomizer);
239 }
240}
241
242// Copies the given pix to *this at the given batch index, as Copy2DImage
243// above, except that the output depth is the height of the input image, the
244// output height is 1, and the output width as for Copy2DImage.
245// The image is thus treated as a 1-d set of vertical pixel strips.
246void NetworkIO::Copy1DGreyImage(int batch, Pix* pix, float black,
247 float contrast, TRand* randomizer) {
248 int width = pixGetWidth(pix);
249 int height = pixGetHeight(pix);
250 ASSERT_HOST(height == NumFeatures());
251 int wpl = pixGetWpl(pix);
252 StrideMap::Index index(stride_map_);
253 index.AddOffset(batch, FD_BATCH);
254 int t = index.t();
255 int target_width = stride_map_.Size(FD_WIDTH);
256 if (width > target_width) width = target_width;
257 int x;
258 for (x = 0; x < width; ++x, ++t) {
259 for (int y = 0; y < height; ++y) {
260 uint32_t* line = pixGetData(pix) + wpl * y;
261 int pixel = GET_DATA_BYTE(line, x);
262 SetPixel(t, y, pixel, black, contrast);
263 }
264 }
265 for (; x < target_width; ++x) Randomize(t++, 0, height, randomizer);
266}
267
268// Helper stores the pixel value in i_ or f_ according to int_mode_.
269// t: is the index from the StrideMap corresponding to the current
270// [batch,y,x] position
271// f: is the index into the depth/channel
272// pixel: the value of the pixel from the image (in one channel)
273// black: the pixel value to map to the lowest of the range of *this
274// contrast: the range of pixel values to stretch to half the range of *this.
275void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) {
276 float float_pixel = (pixel - black) / contrast - 1.0f;
277 if (int_mode_) {
278 i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel),
279 -INT8_MAX, INT8_MAX);
280 } else {
281 f_[t][f] = float_pixel;
282 }
283}
284
285// Converts the array to a Pix. Must be pixDestroyed after use.
286Pix* NetworkIO::ToPix() const {
287 // Count the width of the image, and find the max multiplication factor.
288 int im_width = stride_map_.Size(FD_WIDTH);
289 int im_height = stride_map_.Size(FD_HEIGHT);
290 int num_features = NumFeatures();
291 int feature_factor = 1;
292 if (num_features == 3) {
293 // Special hack for color.
294 num_features = 1;
295 feature_factor = 3;
296 }
297 Pix* pix = pixCreate(im_width, im_height * num_features, 32);
298 StrideMap::Index index(stride_map_);
299 do {
300 int im_x = index.index(FD_WIDTH);
301 int top_im_y = index.index(FD_HEIGHT);
302 int im_y = top_im_y;
303 int t = index.t();
304 if (int_mode_) {
305 const int8_t* features = i_[t];
306 for (int y = 0; y < num_features; ++y, im_y += im_height) {
307 int pixel = features[y * feature_factor];
308 // 1 or 2 features use greyscale.
309 int red = ClipToRange<int>(pixel + 128, 0, 255);
310 int green = red, blue = red;
311 if (feature_factor == 3) {
312 // With 3 features assume RGB color.
313 green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);
314 blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);
315 } else if (num_features > 3) {
316 // More than 3 features use false yellow/blue color, assuming a signed
317 // input in the range [-1,1].
318 red = abs(pixel) * 2;
319 if (pixel >= 0) {
320 green = red;
321 blue = 0;
322 } else {
323 blue = red;
324 green = red = 0;
325 }
326 }
327 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
328 (green << L_GREEN_SHIFT) |
329 (blue << L_BLUE_SHIFT));
330 }
331 } else {
332 const float* features = f_[t];
333 for (int y = 0; y < num_features; ++y, im_y += im_height) {
334 float pixel = features[y * feature_factor];
335 // 1 or 2 features use greyscale.
336 int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
337 int green = red, blue = red;
338 if (feature_factor == 3) {
339 // With 3 features assume RGB color.
340 pixel = features[y * feature_factor + 1];
341 green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
342 pixel = features[y * feature_factor + 2];
343 blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);
344 } else if (num_features > 3) {
345 // More than 3 features use false yellow/blue color, assuming a signed
346 // input in the range [-1,1].
347 red = ClipToRange<int>(IntCastRounded(fabs(pixel) * 255), 0, 255);
348 if (pixel >= 0) {
349 green = red;
350 blue = 0;
351 } else {
352 blue = red;
353 green = red = 0;
354 }
355 }
356 pixSetPixel(pix, im_x, im_y, (red << L_RED_SHIFT) |
357 (green << L_GREEN_SHIFT) |
358 (blue << L_BLUE_SHIFT));
359 }
360 }
361 } while (index.Increment());
362 return pix;
363}
364
365// Prints the first and last num timesteps of the array for each feature.
366void NetworkIO::Print(int num) const {
367 int num_features = NumFeatures();
368 for (int y = 0; y < num_features; ++y) {
369 for (int t = 0; t < Width(); ++t) {
370 if (num == 0 || t < num || t + num >= Width()) {
371 if (int_mode_) {
372 tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX);
373 } else {
374 tprintf(" %g", f_[t][y]);
375 }
376 }
377 }
378 tprintf("\n");
379 }
380}
381
382// Copies a single time step from src.
383void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO& src, int src_t) {
384 ASSERT_HOST(int_mode_ == src.int_mode_);
385 if (int_mode_) {
386 memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));
387 } else {
388 memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));
389 }
390}
391
392// Copies a part of single time step from src.
393void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset,
394 int num_features, const NetworkIO& src,
395 int src_t, int src_offset) {
396 ASSERT_HOST(int_mode_ == src.int_mode_);
397 if (int_mode_) {
398 memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset,
399 num_features * sizeof(i_[0][0]));
400 } else {
401 memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset,
402 num_features * sizeof(f_[0][0]));
403 }
404}
405
406// Zeroes a single time step.
407void NetworkIO::ZeroTimeStepGeneral(int t, int offset, int num_features) {
408 if (int_mode_) {
409 ZeroVector(num_features, i_[t] + offset);
410 } else {
411 ZeroVector(num_features, f_[t] + offset);
412 }
413}
414
415// Sets the given range to random values.
416void NetworkIO::Randomize(int t, int offset, int num_features,
417 TRand* randomizer) {
418 if (int_mode_) {
419 int8_t* line = i_[t] + offset;
420 for (int i = 0; i < num_features; ++i)
421 line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX));
422 } else {
423 // float mode.
424 float* line = f_[t] + offset;
425 for (int i = 0; i < num_features; ++i)
426 line[i] = randomizer->SignedRand(1.0);
427 }
428}
429
430// Helper returns the label and score of the best choice over a range.
431int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this,
432 int null_ch, float* rating,
433 float* certainty) const {
434 if (t_end <= t_start) return -1;
435 int max_char = -1;
436 float min_score = 0.0f;
437 for (int c = 0; c < NumFeatures(); ++c) {
438 if (c == not_this || c == null_ch) continue;
439 ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);
440 if (max_char < 0 || *rating < min_score) {
441 min_score = *rating;
442 max_char = c;
443 }
444 }
445 ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);
446 return max_char;
447}
448
449// Helper returns the rating and certainty of the choice over a range in output.
450void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch,
451 float* rating, float* certainty) const {
452 ASSERT_HOST(!int_mode_);
453 *rating = 0.0f;
454 *certainty = 0.0f;
455 if (t_end <= t_start || t_end <= 0) return;
456 float ratings[3] = {0.0f, 0.0f, 0.0f};
457 float certs[3] = {0.0f, 0.0f, 0.0f};
458 for (int t = t_start; t < t_end; ++t) {
459 const float* line = f_[t];
460 float score = ProbToCertainty(line[choice]);
461 float zero = ProbToCertainty(line[null_ch]);
462 if (t == t_start) {
463 ratings[2] = FLT_MAX;
464 ratings[1] = -score;
465 certs[1] = score;
466 } else {
467 for (int i = 2; i >= 1; --i) {
468 if (ratings[i] > ratings[i - 1]) {
469 ratings[i] = ratings[i - 1];
470 certs[i] = certs[i - 1];
471 }
472 }
473 ratings[2] -= zero;
474 if (zero < certs[2]) certs[2] = zero;
475 ratings[1] -= score;
476 if (score < certs[1]) certs[1] = score;
477 }
478 ratings[0] -= zero;
479 if (zero < certs[0]) certs[0] = zero;
480 }
481 int best_i = ratings[2] < ratings[1] ? 2 : 1;
482 *rating = ratings[best_i] + t_end - t_start;
483 *certainty = certs[best_i];
484}
485
486// Returns the index (label) of the best value at the given timestep,
487// excluding not_this and not_that, and if not null, sets the score to the
488// log of the corresponding value.
489int NetworkIO::BestLabel(int t, int not_this, int not_that,
490 float* score) const {
491 ASSERT_HOST(!int_mode_);
492 int best_index = -1;
493 float best_score = -FLT_MAX;
494 const float* line = f_[t];
495 for (int i = 0; i < f_.dim2(); ++i) {
496 if (line[i] > best_score && i != not_this && i != not_that) {
497 best_score = line[i];
498 best_index = i;
499 }
500 }
501 if (score != nullptr) *score = ProbToCertainty(best_score);
502 return best_index;
503}
504
505// Returns the best start position out of [start, end) (into which all labels
506// must fit) to obtain the highest cumulative score for the given labels.
508 int end) const {
509 int length = labels.size();
510 int last_start = end - length;
511 int best_start = -1;
512 double best_score = 0.0;
513 for (int s = start; s <= last_start; ++s) {
514 double score = ScoreOfLabels(labels, s);
515 if (score > best_score || best_start < 0) {
516 best_score = score;
517 best_start = s;
518 }
519 }
520 return best_start;
521}
522
523// Returns the cumulative score of the given labels starting at start, and
524// using one label per time-step.
526 int start) const {
527 int length = labels.size();
528 double score = 0.0;
529 for (int i = 0; i < length; ++i) {
530 score += f_(start + i, labels[i]);
531 }
532 return score;
533}
534
535// Helper function sets all the outputs for a single timestep, such that
536// label has value ok_score, and the other labels share 1 - ok_score.
537void NetworkIO::SetActivations(int t, int label, float ok_score) {
538 ASSERT_HOST(!int_mode_);
539 int num_classes = NumFeatures();
540 float bad_score = (1.0f - ok_score) / (num_classes - 1);
541 float* targets = f_[t];
542 for (int i = 0; i < num_classes; ++i)
543 targets[i] = bad_score;
544 targets[label] = ok_score;
545}
546
547// Modifies the values, only if needed, so that the given label is
548// the winner at the given time step t.
549void NetworkIO::EnsureBestLabel(int t, int label) {
550 ASSERT_HOST(!int_mode_);
551 if (BestLabel(t, nullptr) != label) {
552 // Output value needs enhancing. Third all the other elements and add the
553 // remainder to best_label.
554 int num_classes = NumFeatures();
555 float* targets = f_[t];
556 for (int c = 0; c < num_classes; ++c) {
557 if (c == label) {
558 targets[c] += (1.0 - targets[c]) * (2 / 3.0);
559 } else {
560 targets[c] /= 3.0;
561 }
562 }
563 }
564}
565
566// Helper function converts prob to certainty taking the minimum into account.
567/* static */
568float NetworkIO::ProbToCertainty(float prob) {
569 return prob > kMinProb ? log(prob) : kMinCertainty;
570}
571
572// Returns true if there is any bad value that is suspiciously like a GT
573// error. Assuming that *this is the difference(gradient) between target
574// and forward output, returns true if there is a large negative value
575// (correcting a very confident output) for which there is no corresponding
576// positive value in an adjacent timestep for the same feature index. This
577// allows the box-truthed samples to make fine adjustments to position while
578// stopping other disagreements of confident output with ground truth.
579bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const {
580 int num_features = NumFeatures();
581 for (int t = 0; t < Width(); ++t) {
582 const float* features = f_[t];
583 for (int y = 0; y < num_features; ++y) {
584 float grad = features[y];
585 if (grad < -confidence_thr) {
586 // Correcting strong output. Check for movement.
587 if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&
588 (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) {
589 return true; // No strong positive on either side.
590 }
591 }
592 }
593 }
594 return false;
595}
596
597// Reads a single timestep to floats in the range [-1, 1].
598void NetworkIO::ReadTimeStep(int t, double* output) const {
599 if (int_mode_) {
600 const int8_t* line = i_[t];
601 for (int i = 0; i < i_.dim2(); ++i) {
602 output[i] = static_cast<double>(line[i]) / INT8_MAX;
603 }
604 } else {
605 const float* line = f_[t];
606 for (int i = 0; i < f_.dim2(); ++i) {
607 output[i] = static_cast<double>(line[i]);
608 }
609 }
610}
611
612// Adds a single timestep to floats.
613void NetworkIO::AddTimeStep(int t, double* inout) const {
614 int num_features = NumFeatures();
615 if (int_mode_) {
616 const int8_t* line = i_[t];
617 for (int i = 0; i < num_features; ++i) {
618 inout[i] += static_cast<double>(line[i]) / INT8_MAX;
619 }
620 } else {
621 const float* line = f_[t];
622 for (int i = 0; i < num_features; ++i) {
623 inout[i] += line[i];
624 }
625 }
626}
627
628// Adds part of a single timestep to floats.
629void NetworkIO::AddTimeStepPart(int t, int offset, int num_features,
630 float* inout) const {
631 if (int_mode_) {
632 const int8_t* line = i_[t] + offset;
633 for (int i = 0; i < num_features; ++i) {
634 inout[i] += static_cast<float>(line[i]) / INT8_MAX;
635 }
636 } else {
637 const float* line = f_[t] + offset;
638 for (int i = 0; i < num_features; ++i) {
639 inout[i] += line[i];
640 }
641 }
642}
643
644// Writes a single timestep from floats in the range [-1, 1].
645void NetworkIO::WriteTimeStep(int t, const double* input) {
646 WriteTimeStepPart(t, 0, NumFeatures(), input);
647}
648
649// Writes a single timestep from floats in the range [-1, 1] writing only
650// num_features elements of input to (*this)[t], starting at offset.
651void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features,
652 const double* input) {
653 if (int_mode_) {
654 int8_t* line = i_[t] + offset;
655 for (int i = 0; i < num_features; ++i) {
656 line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX),
657 -INT8_MAX, INT8_MAX);
658 }
659 } else {
660 float* line = f_[t] + offset;
661 for (int i = 0; i < num_features; ++i) {
662 line[i] = static_cast<float>(input[i]);
663 }
664 }
665}
666
667// Maxpools a single time step from src.
668void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO& src, int src_t,
669 int* max_line) {
670 ASSERT_HOST(int_mode_ == src.int_mode_);
671 if (int_mode_) {
672 int dim = i_.dim2();
673 int8_t* dest_line = i_[dest_t];
674 const int8_t* src_line = src.i_[src_t];
675 for (int i = 0; i < dim; ++i) {
676 if (dest_line[i] < src_line[i]) {
677 dest_line[i] = src_line[i];
678 max_line[i] = src_t;
679 }
680 }
681 } else {
682 int dim = f_.dim2();
683 float* dest_line = f_[dest_t];
684 const float* src_line = src.f_[src_t];
685 for (int i = 0; i < dim; ++i) {
686 if (dest_line[i] < src_line[i]) {
687 dest_line[i] = src_line[i];
688 max_line[i] = src_t;
689 }
690 }
691 }
692}
693
694// Runs maxpool backward, using maxes to index timesteps in *this.
696 const GENERIC_2D_ARRAY<int>& maxes) {
697 ASSERT_HOST(!int_mode_);
698 Zero();
699 StrideMap::Index index(fwd.stride_map_);
700 do {
701 int t = index.t();
702 const int* max_line = maxes[t];
703 const float* fwd_line = fwd.f_[t];
704 int num_features = fwd.f_.dim2();
705 for (int i = 0; i < num_features; ++i) {
706 f_[max_line[i]][i] = fwd_line[i];
707 }
708 } while (index.Increment());
709}
710
711// Returns the min over time of the maxes over features of the outputs.
713 float min_max = 0.0f;
714 int width = Width();
715 int num_features = NumFeatures();
716 for (int t = 0; t < width; ++t) {
717 float max_value = -FLT_MAX;
718 if (int_mode_) {
719 const int8_t* column = i_[t];
720 for (int i = 0; i < num_features; ++i) {
721 if (column[i] > max_value) max_value = column[i];
722 }
723 } else {
724 const float* column = f_[t];
725 for (int i = 0; i < num_features; ++i) {
726 if (column[i] > max_value) max_value = column[i];
727 }
728 }
729 if (t == 0 || max_value < min_max) min_max = max_value;
730 }
731 return min_max;
732}
733
734// Computes combined results for a combiner that chooses between an existing
735// input and itself, with an additional output to indicate the choice.
736void NetworkIO::CombineOutputs(const NetworkIO& base_output,
737 const NetworkIO& combiner_output) {
738 int no = base_output.NumFeatures();
739 ASSERT_HOST(combiner_output.NumFeatures() == no + 1);
740 Resize(base_output, no);
741 int width = Width();
742 if (int_mode_) {
743 // Number of outputs from base and final result.
744 for (int t = 0; t < width; ++t) {
745 int8_t* out_line = i_[t];
746 const int8_t* base_line = base_output.i_[t];
747 const int8_t* comb_line = combiner_output.i_[t];
748 float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;
749 float boost_weight = 1.0f - base_weight;
750 for (int i = 0; i < no; ++i) {
751 out_line[i] = IntCastRounded(base_line[i] * base_weight +
752 comb_line[i] * boost_weight);
753 }
754 }
755 } else {
756 for (int t = 0; t < width; ++t) {
757 float* out_line = f_[t];
758 const float* base_line = base_output.f_[t];
759 const float* comb_line = combiner_output.f_[t];
760 float base_weight = comb_line[no];
761 float boost_weight = 1.0f - base_weight;
762 for (int i = 0; i < no; ++i) {
763 out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;
764 }
765 }
766 }
767}
768
769// Computes deltas for a combiner that chooses between 2 sets of inputs.
771 const NetworkIO& base_output) {
772 ASSERT_HOST(!int_mode_);
773 // Compute the deltas for the combiner.
774 int width = Width();
775 int no = NumFeatures() - 1;
776 ASSERT_HOST(fwd_deltas.NumFeatures() == no);
777 ASSERT_HOST(base_output.NumFeatures() == no);
778 // Number of outputs from base and final result.
779 for (int t = 0; t < width; ++t) {
780 const float* delta_line = fwd_deltas.f_[t];
781 const float* base_line = base_output.f_[t];
782 float* comb_line = f_[t];
783 float base_weight = comb_line[no];
784 float boost_weight = 1.0f - base_weight;
785 float max_base_delta = 0.0;
786 for (int i = 0; i < no; ++i) {
787 // What did the combiner actually produce?
788 float output = base_line[i] * base_weight + comb_line[i] * boost_weight;
789 // Reconstruct the target from the delta.
790 float comb_target = delta_line[i] + output;
791 comb_line[i] = comb_target - comb_line[i];
792 float base_delta = fabs(comb_target - base_line[i]);
793 if (base_delta > max_base_delta) max_base_delta = base_delta;
794 }
795 if (max_base_delta >= 0.5) {
796 // The base network got it wrong. The combiner should output the right
797 // answer and 0 for the base network.
798 comb_line[no] = 0.0 - base_weight;
799 } else {
800 // The base network was right. The combiner should flag that.
801 for (int i = 0; i < no; ++i) {
802 // All other targets are 0.
803 if (comb_line[i] > 0.0) comb_line[i] -= 1.0;
804 }
805 comb_line[no] = 1.0 - base_weight;
806 }
807 }
808}
809
810// Copies the array checking that the types match.
812 ASSERT_HOST(src.int_mode_ == int_mode_);
813 f_ = src.f_;
814}
815
816// Checks that both are floats and adds the src array to *this.
818 ASSERT_HOST(!int_mode_);
819 ASSERT_HOST(!src.int_mode_);
820 f_ += src.f_;
821}
822
823// Subtracts the array from a float array. src must also be float.
825 ASSERT_HOST(!int_mode_);
826 ASSERT_HOST(!src.int_mode_);
827 f_ -= src.f_;
828}
829
830// Copies src to *this, with maxabs normalization to match scale.
832 const NetworkIO& scale) {
833 ASSERT_HOST(!int_mode_);
834 ASSERT_HOST(!src.int_mode_);
835 ASSERT_HOST(!scale.int_mode_);
836 float src_max = src.f_.MaxAbs();
837 ASSERT_HOST(std::isfinite(src_max));
838 float scale_max = scale.f_.MaxAbs();
839 ASSERT_HOST(std::isfinite(scale_max));
840 if (src_max > 0.0f) {
841 float factor = scale_max / src_max;
842 for (int t = 0; t < src.Width(); ++t) {
843 const float* src_ptr = src.f_[t];
844 float* dest_ptr = f_[t];
845 for (int i = 0; i < src.f_.dim2(); ++i) dest_ptr[i] = src_ptr[i] * factor;
846 }
847 } else {
848 f_.Clear();
849 }
850}
851
852// Copies src to *this with independent reversal of the y dimension.
854 int num_features = src.NumFeatures();
855 Resize(src, num_features);
856 StrideMap::Index b_index(src.stride_map_);
857 do {
858 int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;
859 StrideMap::Index fwd_index(b_index);
860 StrideMap::Index rev_index(b_index);
861 rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);
862 do {
863 int fwd_t = fwd_index.t();
864 int rev_t = rev_index.t();
865 for (int x = 0; x < width; ++x) CopyTimeStepFrom(rev_t++, src, fwd_t++);
866 } while (fwd_index.AddOffset(1, FD_HEIGHT) &&
867 rev_index.AddOffset(-1, FD_HEIGHT));
868 } while (b_index.AddOffset(1, FD_BATCH));
869}
870
871// Copies src to *this with independent reversal of the x dimension.
873 int num_features = src.NumFeatures();
874 Resize(src, num_features);
875 StrideMap::Index b_index(src.stride_map_);
876 do {
877 StrideMap::Index y_index(b_index);
878 do {
879 StrideMap::Index fwd_index(y_index);
880 StrideMap::Index rev_index(y_index);
881 rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);
882 do {
883 CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());
884 } while (fwd_index.AddOffset(1, FD_WIDTH) &&
885 rev_index.AddOffset(-1, FD_WIDTH));
886 } while (y_index.AddOffset(1, FD_HEIGHT));
887 } while (b_index.AddOffset(1, FD_BATCH));
888}
889
890// Copies src to *this with independent transpose of the x and y dimensions.
892 int num_features = src.NumFeatures();
893 stride_map_ = src.stride_map_;
894 stride_map_.TransposeXY();
895 ResizeToMap(src.int_mode(), stride_map_, num_features);
896 StrideMap::Index src_b_index(src.stride_map_);
897 StrideMap::Index dest_b_index(stride_map_);
898 do {
899 StrideMap::Index src_y_index(src_b_index);
900 StrideMap::Index dest_x_index(dest_b_index);
901 do {
902 StrideMap::Index src_x_index(src_y_index);
903 StrideMap::Index dest_y_index(dest_x_index);
904 do {
905 CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());
906 } while (src_x_index.AddOffset(1, FD_WIDTH) &&
907 dest_y_index.AddOffset(1, FD_HEIGHT));
908 } while (src_y_index.AddOffset(1, FD_HEIGHT) &&
909 dest_x_index.AddOffset(1, FD_WIDTH));
910 } while (src_b_index.AddOffset(1, FD_BATCH) &&
911 dest_b_index.AddOffset(1, FD_BATCH));
912}
913
914// Copies src to *this, at the given feature_offset, returning the total
915// feature offset after the copy. Multiple calls will stack outputs from
916// multiple sources in feature space.
917int NetworkIO::CopyPacking(const NetworkIO& src, int feature_offset) {
918 ASSERT_HOST(int_mode_ == src.int_mode_);
919 int width = src.Width();
920 ASSERT_HOST(width <= Width());
921 int num_features = src.NumFeatures();
922 ASSERT_HOST(num_features + feature_offset <= NumFeatures());
923 if (int_mode_) {
924 for (int t = 0; t < width; ++t) {
925 memcpy(i_[t] + feature_offset, src.i_[t],
926 num_features * sizeof(i_[t][0]));
927 }
928 for (int t = width; t < i_.dim1(); ++t) {
929 memset(i_[t], 0, num_features * sizeof(i_[t][0]));
930 }
931 } else {
932 for (int t = 0; t < width; ++t) {
933 memcpy(f_[t] + feature_offset, src.f_[t],
934 num_features * sizeof(f_[t][0]));
935 }
936 for (int t = width; t < f_.dim1(); ++t) {
937 memset(f_[t], 0, num_features * sizeof(f_[t][0]));
938 }
939 }
940 return num_features + feature_offset;
941}
942
943// Opposite of CopyPacking, fills *this with a part of src, starting at
944// feature_offset, and picking num_features.
945void NetworkIO::CopyUnpacking(const NetworkIO& src, int feature_offset,
946 int num_features) {
947 Resize(src, num_features);
948 int width = src.Width();
949 ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());
950 if (int_mode_) {
951 for (int t = 0; t < width; ++t) {
952 memcpy(i_[t], src.i_[t] + feature_offset,
953 num_features * sizeof(i_[t][0]));
954 }
955 } else {
956 for (int t = 0; t < width; ++t) {
957 memcpy(f_[t], src.f_[t] + feature_offset,
958 num_features * sizeof(f_[t][0]));
959 }
960 }
961}
962
963// Transposes the float part of *this into dest.
965 int width = Width();
966 dest->ResizeNoInit(NumFeatures(), width);
967 for (int t = 0; t < width; ++t) dest->WriteStrided(t, f_[t]);
968}
969
970// Clips the content of a single time-step to +/-range.
971void NetworkIO::ClipVector(int t, float range) {
972 ASSERT_HOST(!int_mode_);
973 float* v = f_[t];
974 int dim = f_.dim2();
975 for (int i = 0; i < dim; ++i)
976 v[i] = ClipToRange<float>(v[i], -range, range);
977}
978
979// Returns the padding required for the given number of features in order
980// for the SIMD operations to be safe.
981/* static */
982int NetworkIO::GetPadding(int num_features) {
983 int padding = 0;
985 padding =
986 IntSimdMatrix::intSimdMatrix->RoundInputs(num_features) - num_features;
987 }
988 return padding;
989}
990
991} // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:88
int IntCastRounded(double x)
Definition: helpers.h:175
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const float kMinCertainty
Definition: networkio.cpp:30
@ FD_WIDTH
Definition: stridemap.h:35
@ FD_BATCH
Definition: stridemap.h:33
@ FD_HEIGHT
Definition: stridemap.h:34
void ZeroVector(int n, T *vec)
Definition: functions.h:202
const float kMinProb
Definition: networkio.cpp:32
int size() const
Definition: genericvector.h:72
int dim2() const
Definition: matrix.h:210
void Clear()
Definition: matrix.h:139
T MaxAbs() const
Definition: matrix.h:358
int dim1() const
Definition: matrix.h:209
void ResizeNoInit(int size1, int size2, int pad=0)
Definition: matrix.h:94
int RoundInputs(int size) const
Definition: intsimdmatrix.h:69
static const IntSimdMatrix * intSimdMatrix
Definition: statistc.h:31
double SignedRand(double range)
Definition: helpers.h:55
void Resize(const NetworkIO &src, int num_features)
Definition: networkio.h:45
void ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output)
Definition: networkio.cpp:770
void ResizeXTo1(const NetworkIO &src, int num_features)
Definition: networkio.cpp:70
void ZeroInvalidElements()
Definition: networkio.cpp:88
float MinOfMaxes() const
Definition: networkio.cpp:712
void MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line)
Definition: networkio.cpp:668
void Copy1DGreyImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:246
bool int_mode() const
Definition: networkio.h:127
void Copy2DImage(int batch, Pix *pix, float black, float contrast, TRand *randomizer)
Definition: networkio.cpp:208
void WriteTimeStepPart(int t, int offset, int num_features, const double *input)
Definition: networkio.cpp:651
void CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, const NetworkIO &src, int src_t, int src_offset)
Definition: networkio.cpp:393
void ClipVector(int t, float range)
Definition: networkio.cpp:971
void AddTimeStepPart(int t, int offset, int num_features, float *inout) const
Definition: networkio.cpp:629
void CopyWithXReversal(const NetworkIO &src)
Definition: networkio.cpp:872
void Print(int num) const
Definition: networkio.cpp:366
void FromPixes(const StaticShape &shape, const std::vector< const Pix * > &pixes, TRand *randomizer)
Definition: networkio.cpp:170
int PositionOfBestMatch(const GenericVector< int > &labels, int start, int end) const
Definition: networkio.cpp:507
void ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:450
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
int CopyPacking(const NetworkIO &src, int feature_offset)
Definition: networkio.cpp:917
void CopyWithXYTranspose(const NetworkIO &src)
Definition: networkio.cpp:891
void WriteTimeStep(int t, const double *input)
Definition: networkio.cpp:645
float * f(int t)
Definition: networkio.h:115
int Width() const
Definition: networkio.h:107
void Resize2d(bool int_mode, int width, int num_features)
Definition: networkio.cpp:35
void MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY< int > &maxes)
Definition: networkio.cpp:695
void ZeroTimeStep(int t)
Definition: networkio.h:148
void SetActivations(int t, int label, float ok_score)
Definition: networkio.cpp:537
double ScoreOfLabels(const GenericVector< int > &labels, int start) const
Definition: networkio.cpp:525
void CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output)
Definition: networkio.cpp:736
void Transpose(TransposedArray *dest) const
Definition: networkio.cpp:964
void ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features)
Definition: networkio.cpp:62
void ReadTimeStep(int t, double *output) const
Definition: networkio.cpp:598
void FromPix(const StaticShape &shape, const Pix *pix, TRand *randomizer)
Definition: networkio.cpp:161
bool AnySuspiciousTruth(float confidence_thr) const
Definition: networkio.cpp:579
Pix * ToPix() const
Definition: networkio.cpp:286
void CopyWithYReversal(const NetworkIO &src)
Definition: networkio.cpp:853
void ZeroTimeStepGeneral(int t, int offset, int num_features)
Definition: networkio.cpp:407
void Randomize(int t, int offset, int num_features, TRand *randomizer)
Definition: networkio.cpp:416
void CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features)
Definition: networkio.cpp:945
void EnsureBestLabel(int t, int label)
Definition: networkio.cpp:549
void AddTimeStep(int t, double *inout) const
Definition: networkio.cpp:613
void AddAllToFloat(const NetworkIO &src)
Definition: networkio.cpp:817
const StrideMap & stride_map() const
Definition: networkio.h:133
void SubtractAllFromFloat(const NetworkIO &src)
Definition: networkio.cpp:824
void ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features)
Definition: networkio.cpp:46
const int8_t * i(int t) const
Definition: networkio.h:123
void CopyAll(const NetworkIO &src)
Definition: networkio.cpp:811
void CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t)
Definition: networkio.cpp:383
int BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, float *certainty) const
Definition: networkio.cpp:431
void CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale)
Definition: networkio.cpp:831
int NumFeatures() const
Definition: networkio.h:111
void SetPixel(int t, int f, int pixel, float black, float contrast)
Definition: networkio.cpp:275
int BestLabel(int t, float *score) const
Definition: networkio.h:161
int Width() const
Definition: stridemap.h:116
int Size(FlexDimensions dimension) const
Definition: stridemap.h:114
void ScaleXY(int x_factor, int y_factor)
Definition: stridemap.cpp:144
void SetStride(const std::vector< std::pair< int, int > > &h_w_pairs)
Definition: stridemap.cpp:126
int index(FlexDimensions dimension) const
Definition: stridemap.h:58
bool AddOffset(int offset, FlexDimensions dimension)
Definition: stridemap.cpp:62
int MaxIndexOfDim(FlexDimensions dim) const
Definition: stridemap.cpp:43