tesseract 4.1.1
Loading...
Searching...
No Matches
lstmrecognizer.cpp
Go to the documentation of this file.
1
2// File: lstmrecognizer.cpp
3// Description: Top-level line recognizer class for LSTM-based networks.
4// Author: Ray Smith
5// Created: Thu May 02 10:59:06 PST 2013
6//
7// (C) Copyright 2013, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
18
19// Include automatically generated configuration file if running autoconf.
20#ifdef HAVE_CONFIG_H
21# include "config_auto.h"
22#endif
23
24#include "lstmrecognizer.h"
25
26#include "allheaders.h"
27#include "callcpp.h"
28#include "dict.h"
29#include "genericheap.h"
30#include "helpers.h"
31#include "imagedata.h"
32#include "input.h"
33#include "lstm.h"
34#include "normalis.h"
35#include "pageres.h"
36#include "ratngs.h"
37#include "recodebeam.h"
38#include "scrollview.h"
39#include "statistc.h"
40#include "tprintf.h"
41
42namespace tesseract {
43
44// Default ratio between dict and non-dict words.
45const double kDictRatio = 2.25;
46// Default certainty offset to give the dictionary a chance.
47const double kCertOffset = -0.085;
48
49LSTMRecognizer::LSTMRecognizer(const STRING language_data_path_prefix)
51 ccutil_.language_data_path_prefix = language_data_path_prefix;
52}
53
55 : network_(nullptr),
56 training_flags_(0),
57 training_iteration_(0),
58 sample_iteration_(0),
59 null_char_(UNICHAR_BROKEN),
60 learning_rate_(0.0f),
61 momentum_(0.0f),
62 adam_beta_(0.0f),
63 dict_(nullptr),
64 search_(nullptr),
65 debug_win_(nullptr) {}
66
68 delete network_;
69 delete dict_;
70 delete search_;
71}
72
73// Loads a model from mgr, including the dictionary only if lang is not null.
74bool LSTMRecognizer::Load(const ParamsVectors* params, const char* lang,
75 TessdataManager* mgr) {
76 TFile fp;
77 if (!mgr->GetComponent(TESSDATA_LSTM, &fp)) return false;
78 if (!DeSerialize(mgr, &fp)) return false;
79 if (lang == nullptr) return true;
80 // Allow it to run without a dictionary.
81 LoadDictionary(params, lang, mgr);
82 return true;
83}
84
85// Writes to the given file. Returns false in case of error.
86bool LSTMRecognizer::Serialize(const TessdataManager* mgr, TFile* fp) const {
87 bool include_charsets = mgr == nullptr ||
90 if (!network_->Serialize(fp)) return false;
91 if (include_charsets && !GetUnicharset().save_to_file(fp)) return false;
92 if (!network_str_.Serialize(fp)) return false;
93 if (!fp->Serialize(&training_flags_)) return false;
94 if (!fp->Serialize(&training_iteration_)) return false;
95 if (!fp->Serialize(&sample_iteration_)) return false;
96 if (!fp->Serialize(&null_char_)) return false;
97 if (!fp->Serialize(&adam_beta_)) return false;
98 if (!fp->Serialize(&learning_rate_)) return false;
99 if (!fp->Serialize(&momentum_)) return false;
100 if (include_charsets && IsRecoding() && !recoder_.Serialize(fp)) return false;
101 return true;
102}
103
104// Reads from the given file. Returns false in case of error.
106 delete network_;
108 if (network_ == nullptr) return false;
109 bool include_charsets = mgr == nullptr ||
112 if (include_charsets && !ccutil_.unicharset.load_from_file(fp, false))
113 return false;
114 if (!network_str_.DeSerialize(fp)) return false;
115 if (!fp->DeSerialize(&training_flags_)) return false;
116 if (!fp->DeSerialize(&training_iteration_)) return false;
117 if (!fp->DeSerialize(&sample_iteration_)) return false;
118 if (!fp->DeSerialize(&null_char_)) return false;
119 if (!fp->DeSerialize(&adam_beta_)) return false;
120 if (!fp->DeSerialize(&learning_rate_)) return false;
121 if (!fp->DeSerialize(&momentum_)) return false;
122 if (include_charsets && !LoadRecoder(fp)) return false;
123 if (!include_charsets && !LoadCharsets(mgr)) return false;
126 return true;
127}
128
129// Loads the charsets from mgr.
131 TFile fp;
132 if (!mgr->GetComponent(TESSDATA_LSTM_UNICHARSET, &fp)) return false;
133 if (!ccutil_.unicharset.load_from_file(&fp, false)) return false;
134 if (!mgr->GetComponent(TESSDATA_LSTM_RECODER, &fp)) return false;
135 if (!LoadRecoder(&fp)) return false;
136 return true;
137}
138
139// Loads the Recoder.
141 if (IsRecoding()) {
142 if (!recoder_.DeSerialize(fp)) return false;
143 RecodedCharID code;
145 if (code(0) != UNICHAR_SPACE) {
146 tprintf("Space was garbled in recoding!!\n");
147 return false;
148 }
149 } else {
152 }
153 return true;
154}
155
156// Loads the dictionary if possible from the traineddata file.
157// Prints a warning message, and returns false but otherwise fails silently
158// and continues to work without it if loading fails.
159// Note that dictionary load is independent from DeSerialize, but dependent
160// on the unicharset matching. This enables training to deserialize a model
161// from checkpoint or restore without having to go back and reload the
162// dictionary.
163// Some parameters have to be passed in (from langdata/config/api via Tesseract)
165 const char* lang, TessdataManager* mgr) {
166 delete dict_;
167 dict_ = new Dict(&ccutil_);
168 dict_->user_words_file.ResetFrom(params);
169 dict_->user_words_suffix.ResetFrom(params);
170 dict_->user_patterns_file.ResetFrom(params);
171 dict_->user_patterns_suffix.ResetFrom(params);
173 dict_->LoadLSTM(lang, mgr);
174 if (dict_->FinishLoad()) return true; // Success.
175 tprintf("Failed to load any lstm-specific dictionaries for lang %s!!\n",
176 lang);
177 delete dict_;
178 dict_ = nullptr;
179 return false;
180}
181
182// Recognizes the line image, contained within image_data, returning the
183// ratings matrix and matching box_word for each WERD_RES in the output.
184void LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
185 bool debug, double worst_dict_cert,
186 const TBOX& line_box,
188 int lstm_choice_mode) {
189 NetworkIO outputs;
190 float scale_factor;
191 NetworkIO inputs;
192 if (!RecognizeLine(image_data, invert, debug, false, false, &scale_factor,
193 &inputs, &outputs))
194 return;
195 if (search_ == nullptr) {
196 search_ =
198 }
199 search_->Decode(outputs, kDictRatio, kCertOffset, worst_dict_cert,
200 &GetUnicharset(), lstm_choice_mode);
201 search_->ExtractBestPathAsWords(line_box, scale_factor, debug,
202 &GetUnicharset(), words, lstm_choice_mode);
203}
204
205// Helper computes min and mean best results in the output.
206void LSTMRecognizer::OutputStats(const NetworkIO& outputs, float* min_output,
207 float* mean_output, float* sd) {
208 const int kOutputScale = INT8_MAX;
209 STATS stats(0, kOutputScale + 1);
210 for (int t = 0; t < outputs.Width(); ++t) {
211 int best_label = outputs.BestLabel(t, nullptr);
212 if (best_label != null_char_) {
213 float best_output = outputs.f(t)[best_label];
214 stats.add(static_cast<int>(kOutputScale * best_output), 1);
215 }
216 }
217 // If the output is all nulls it could be that the photometric interpretation
218 // is wrong, so make it look bad, so the other way can win, even if not great.
219 if (stats.get_total() == 0) {
220 *min_output = 0.0f;
221 *mean_output = 0.0f;
222 *sd = 1.0f;
223 } else {
224 *min_output = static_cast<float>(stats.min_bucket()) / kOutputScale;
225 *mean_output = stats.mean() / kOutputScale;
226 *sd = stats.sd() / kOutputScale;
227 }
228}
229
230// Recognizes the image_data, returning the labels,
231// scores, and corresponding pairs of start, end x-coords in coords.
232bool LSTMRecognizer::RecognizeLine(const ImageData& image_data, bool invert,
233 bool debug, bool re_invert, bool upside_down,
234 float* scale_factor, NetworkIO* inputs,
235 NetworkIO* outputs) {
236 // Maximum width of image to train on.
237 const int kMaxImageWidth = 2560;
238 // This ensures consistent recognition results.
240 int min_width = network_->XScaleFactor();
241 Pix* pix = Input::PrepareLSTMInputs(image_data, network_, min_width,
242 &randomizer_, scale_factor);
243 if (pix == nullptr) {
244 tprintf("Line cannot be recognized!!\n");
245 return false;
246 }
247 if (network_->IsTraining() && pixGetWidth(pix) > kMaxImageWidth) {
248 tprintf("Image too large to learn!! Size = %dx%d\n", pixGetWidth(pix),
249 pixGetHeight(pix));
250 pixDestroy(&pix);
251 return false;
252 }
253 if (upside_down) pixRotate180(pix, pix);
254 // Reduction factor from image to coords.
255 *scale_factor = min_width / *scale_factor;
256 inputs->set_int_mode(IsIntMode());
259 network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
260 // Check for auto inversion.
261 float pos_min, pos_mean, pos_sd;
262 OutputStats(*outputs, &pos_min, &pos_mean, &pos_sd);
263 if (invert && pos_min < 0.5) {
264 // Run again inverted and see if it is any better.
265 NetworkIO inv_inputs, inv_outputs;
266 inv_inputs.set_int_mode(IsIntMode());
268 pixInvert(pix, pix);
270 &inv_inputs);
271 network_->Forward(debug, inv_inputs, nullptr, &scratch_space_,
272 &inv_outputs);
273 float inv_min, inv_mean, inv_sd;
274 OutputStats(inv_outputs, &inv_min, &inv_mean, &inv_sd);
275 if (inv_min > pos_min && inv_mean > pos_mean && inv_sd < pos_sd) {
276 // Inverted did better. Use inverted data.
277 if (debug) {
278 tprintf("Inverting image: old min=%g, mean=%g, sd=%g, inv %g,%g,%g\n",
279 pos_min, pos_mean, pos_sd, inv_min, inv_mean, inv_sd);
280 }
281 *outputs = inv_outputs;
282 *inputs = inv_inputs;
283 } else if (re_invert) {
284 // Inverting was not an improvement, so undo and run again, so the
285 // outputs match the best forward result.
287 network_->Forward(debug, *inputs, nullptr, &scratch_space_, outputs);
288 }
289 }
290 pixDestroy(&pix);
291 if (debug) {
292 GenericVector<int> labels, coords;
293 LabelsFromOutputs(*outputs, &labels, &coords);
294 DisplayForward(*inputs, labels, coords, "LSTMForward", &debug_win_);
295 DebugActivationPath(*outputs, labels, coords);
296 }
297 return true;
298}
299
300// Converts an array of labels to utf-8, whether or not the labels are
301// augmented with character boundaries.
303 STRING result;
304 int end = 1;
305 for (int start = 0; start < labels.size(); start = end) {
306 if (labels[start] == null_char_) {
307 end = start + 1;
308 } else {
309 result += DecodeLabel(labels, start, &end, nullptr);
310 }
311 }
312 return result;
313}
314
315// Displays the forward results in a window with the characters and
316// boundaries as determined by the labels and label_coords.
318 const GenericVector<int>& labels,
319 const GenericVector<int>& label_coords,
320 const char* window_name,
321 ScrollView** window) {
322#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
323 Pix* input_pix = inputs.ToPix();
324 Network::ClearWindow(false, window_name, pixGetWidth(input_pix),
325 pixGetHeight(input_pix), window);
326 int line_height = Network::DisplayImage(input_pix, *window);
327 DisplayLSTMOutput(labels, label_coords, line_height, *window);
328#endif // GRAPHICS_DISABLED
329}
330
331// Displays the labels and cuts at the corresponding xcoords.
332// Size of labels should match xcoords.
334 const GenericVector<int>& xcoords,
335 int height, ScrollView* window) {
336#ifndef GRAPHICS_DISABLED // do nothing if there's no graphics
337 int x_scale = network_->XScaleFactor();
338 window->TextAttributes("Arial", height / 4, false, false, false);
339 int end = 1;
340 for (int start = 0; start < labels.size(); start = end) {
341 int xpos = xcoords[start] * x_scale;
342 if (labels[start] == null_char_) {
343 end = start + 1;
344 window->Pen(ScrollView::RED);
345 } else {
346 window->Pen(ScrollView::GREEN);
347 const char* str = DecodeLabel(labels, start, &end, nullptr);
348 if (*str == '\\') str = "\\\\";
349 xpos = xcoords[(start + end) / 2] * x_scale;
350 window->Text(xpos, height, str);
351 }
352 window->Line(xpos, 0, xpos, height * 3 / 2);
353 }
354 window->Update();
355#endif // GRAPHICS_DISABLED
356}
357
358// Prints debug output detailing the activation path that is implied by the
359// label_coords.
361 const GenericVector<int>& labels,
362 const GenericVector<int>& xcoords) {
363 if (xcoords[0] > 0)
364 DebugActivationRange(outputs, "<null>", null_char_, 0, xcoords[0]);
365 int end = 1;
366 for (int start = 0; start < labels.size(); start = end) {
367 if (labels[start] == null_char_) {
368 end = start + 1;
369 DebugActivationRange(outputs, "<null>", null_char_, xcoords[start],
370 xcoords[end]);
371 continue;
372 } else {
373 int decoded;
374 const char* label = DecodeLabel(labels, start, &end, &decoded);
375 DebugActivationRange(outputs, label, labels[start], xcoords[start],
376 xcoords[start + 1]);
377 for (int i = start + 1; i < end; ++i) {
378 DebugActivationRange(outputs, DecodeSingleLabel(labels[i]), labels[i],
379 xcoords[i], xcoords[i + 1]);
380 }
381 }
382 }
383}
384
385// Prints debug output detailing activations and 2nd choice over a range
386// of positions.
388 const char* label, int best_choice,
389 int x_start, int x_end) {
390 tprintf("%s=%d On [%d, %d), scores=", label, best_choice, x_start, x_end);
391 double max_score = 0.0;
392 double mean_score = 0.0;
393 const int width = x_end - x_start;
394 for (int x = x_start; x < x_end; ++x) {
395 const float* line = outputs.f(x);
396 const double score = line[best_choice] * 100.0;
397 if (score > max_score) max_score = score;
398 mean_score += score / width;
399 int best_c = 0;
400 double best_score = 0.0;
401 for (int c = 0; c < outputs.NumFeatures(); ++c) {
402 if (c != best_choice && line[c] > best_score) {
403 best_c = c;
404 best_score = line[c];
405 }
406 }
407 tprintf(" %.3g(%s=%d=%.3g)", score, DecodeSingleLabel(best_c), best_c,
408 best_score * 100.0);
409 }
410 tprintf(", Mean=%g, max=%g\n", mean_score, max_score);
411}
412
413// Helper returns true if the null_char is the winner at t, and it beats the
414// null_threshold, or the next choice is space, in which case we will use the
415// null anyway.
416#if 0 // TODO: unused, remove if still unused after 2020.
417static bool NullIsBest(const NetworkIO& output, float null_thr,
418 int null_char, int t) {
419 if (output.f(t)[null_char] >= null_thr) return true;
420 if (output.BestLabel(t, null_char, null_char, nullptr) != UNICHAR_SPACE)
421 return false;
422 return output.f(t)[null_char] > output.f(t)[UNICHAR_SPACE];
423}
424#endif
425
426// Converts the network output to a sequence of labels. Outputs labels, scores
427// and start xcoords of each char, and each null_char_, with an additional
428// final xcoord for the end of the output.
429// The conversion method is determined by internal state.
431 GenericVector<int>* labels,
432 GenericVector<int>* xcoords) {
433 if (SimpleTextOutput()) {
434 LabelsViaSimpleText(outputs, labels, xcoords);
435 } else {
436 LabelsViaReEncode(outputs, labels, xcoords);
437 }
438}
439
440// As LabelsViaCTC except that this function constructs the best path that
441// contains only legal sequences of subcodes for CJK.
443 GenericVector<int>* labels,
444 GenericVector<int>* xcoords) {
445 if (search_ == nullptr) {
446 search_ =
448 }
449 search_->Decode(output, 1.0, 0.0, RecodeBeamSearch::kMinCertainty, nullptr);
450 search_->ExtractBestPathAsLabels(labels, xcoords);
451}
452
453// Converts the network output to a sequence of labels, with scores, using
454// the simple character model (each position is a char, and the null_char_ is
455// mainly intended for tail padding.)
457 GenericVector<int>* labels,
458 GenericVector<int>* xcoords) {
459 labels->truncate(0);
460 xcoords->truncate(0);
461 const int width = output.Width();
462 for (int t = 0; t < width; ++t) {
463 float score = 0.0f;
464 const int label = output.BestLabel(t, &score);
465 if (label != null_char_) {
466 labels->push_back(label);
467 xcoords->push_back(t);
468 }
469 }
470 xcoords->push_back(width);
471}
472
473// Returns a string corresponding to the label starting at start. Sets *end
474// to the next start and if non-null, *decoded to the unichar id.
476 int start, int* end, int* decoded) {
477 *end = start + 1;
478 if (IsRecoding()) {
479 // Decode labels via recoder_.
480 RecodedCharID code;
481 if (labels[start] == null_char_) {
482 if (decoded != nullptr) {
483 code.Set(0, null_char_);
484 *decoded = recoder_.DecodeUnichar(code);
485 }
486 return "<null>";
487 }
488 int index = start;
489 while (index < labels.size() &&
491 code.Set(code.length(), labels[index++]);
492 while (index < labels.size() && labels[index] == null_char_) ++index;
493 int uni_id = recoder_.DecodeUnichar(code);
494 // If the next label isn't a valid first code, then we need to continue
495 // extending even if we have a valid uni_id from this prefix.
496 if (uni_id != INVALID_UNICHAR_ID &&
497 (index == labels.size() ||
499 recoder_.IsValidFirstCode(labels[index]))) {
500 *end = index;
501 if (decoded != nullptr) *decoded = uni_id;
502 if (uni_id == UNICHAR_SPACE) return " ";
503 return GetUnicharset().get_normed_unichar(uni_id);
504 }
505 }
506 return "<Undecodable>";
507 } else {
508 if (decoded != nullptr) *decoded = labels[start];
509 if (labels[start] == null_char_) return "<null>";
510 if (labels[start] == UNICHAR_SPACE) return " ";
511 return GetUnicharset().get_normed_unichar(labels[start]);
512 }
513}
514
515// Returns a string corresponding to a given single label id, falling back to
516// a default of ".." for part of a multi-label unichar-id.
517const char* LSTMRecognizer::DecodeSingleLabel(int label) {
518 if (label == null_char_) return "<null>";
519 if (IsRecoding()) {
520 // Decode label via recoder_.
521 RecodedCharID code;
522 code.Set(0, label);
523 label = recoder_.DecodeUnichar(code);
524 if (label == INVALID_UNICHAR_ID) return ".."; // Part of a bigger code.
525 }
526 if (label == UNICHAR_SPACE) return " ";
527 return GetUnicharset().get_normed_unichar(label);
528}
529
530} // namespace tesseract.
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ UNICHAR_BROKEN
Definition: unicharset.h:36
@ UNICHAR_SPACE
Definition: unicharset.h:34
@ TF_COMPRESS_UNICHARSET
@ TESSDATA_LSTM_UNICHARSET
@ TESSDATA_LSTM_RECODER
const double kCertOffset
const double kDictRatio
int push_back(T object)
int size() const
Definition: genericvector.h:72
void truncate(int size)
Definition: rect.h:34
Definition: statistc.h:31
double mean() const
Definition: statistc.cpp:127
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
double sd() const
Definition: statistc.cpp:143
int32_t get_total() const
Definition: statistc.h:84
int32_t min_bucket() const
Definition: statistc.cpp:198
STRING language_data_path_prefix
Definition: ccutil.h:72
UNICHARSET unicharset
Definition: ccutil.h:73
bool Serialize(const char *data, size_t count=1)
Definition: serialis.cpp:148
bool DeSerialize(char *data, size_t count=1)
Definition: serialis.cpp:104
Definition: strngs.h:45
bool Serialize(FILE *fp) const
Definition: strngs.cpp:146
bool DeSerialize(bool swap, FILE *fp)
Definition: strngs.cpp:159
bool GetComponent(TessdataType type, TFile *fp)
bool IsComponentAvailable(TessdataType type) const
void Set(int index, int value)
static const int kMaxCodeLen
int EncodeUnichar(int unichar_id, RecodedCharID *code) const
bool IsValidFirstCode(int code) const
void SetupPassThrough(const UNICHARSET &unicharset)
int DecodeUnichar(const RecodedCharID &code) const
bool Serialize(TFile *fp) const
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:828
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:388
static TESS_API DawgCache * GlobalDawgCache()
Definition: dict.cpp:184
char * user_patterns_suffix
Definition: dict.h:584
void SetupForLoad(DawgCache *dawg_cache)
Definition: dict.cpp:192
bool FinishLoad()
Definition: dict.cpp:351
char * user_words_file
Definition: dict.h:578
char * user_words_suffix
Definition: dict.h:580
void LoadLSTM(const STRING &lang, TessdataManager *data_file)
Definition: dict.cpp:291
char * user_patterns_file
Definition: dict.h:582
static void PreparePixInput(const StaticShape &shape, const Pix *pix, TRand *randomizer, NetworkIO *input)
Definition: input.cpp:111
static Pix * PrepareLSTMInputs(const ImageData &image_data, const Network *network, int min_width, TRand *randomizer, float *image_scale)
Definition: input.cpp:83
bool Load(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
NetworkScratch scratch_space_
const char * DecodeSingleLabel(int label)
void DisplayLSTMOutput(const GenericVector< int > &labels, const GenericVector< int > &xcoords, int height, ScrollView *window)
bool LoadCharsets(const TessdataManager *mgr)
const char * DecodeLabel(const GenericVector< int > &labels, int start, int *end, int *decoded)
void RecognizeLine(const ImageData &image_data, bool invert, bool debug, double worst_dict_cert, const TBOX &line_box, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
STRING DecodeLabels(const GenericVector< int > &labels)
void LabelsViaReEncode(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
void OutputStats(const NetworkIO &outputs, float *min_output, float *mean_output, float *sd)
void DebugActivationPath(const NetworkIO &outputs, const GenericVector< int > &labels, const GenericVector< int > &xcoords)
RecodeBeamSearch * search_
void LabelsFromOutputs(const NetworkIO &outputs, GenericVector< int > *labels, GenericVector< int > *xcoords)
bool LoadDictionary(const ParamsVectors *params, const char *lang, TessdataManager *mgr)
bool Serialize(const TessdataManager *mgr, TFile *fp) const
void DebugActivationRange(const NetworkIO &outputs, const char *label, int best_choice, int x_start, int x_end)
const UNICHARSET & GetUnicharset() const
bool DeSerialize(const TessdataManager *mgr, TFile *fp)
void DisplayForward(const NetworkIO &inputs, const GenericVector< int > &labels, const GenericVector< int > &label_coords, const char *window_name, ScrollView **window)
void LabelsViaSimpleText(const NetworkIO &output, GenericVector< int > *labels, GenericVector< int > *xcoords)
virtual int XScaleFactor() const
Definition: network.h:209
virtual void Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, NetworkScratch *scratch, NetworkIO *output)=0
static void ClearWindow(bool tess_coords, const char *window_name, int width, int height, ScrollView **window)
Definition: network.cpp:312
static Network * CreateFromFile(TFile *fp)
Definition: network.cpp:187
bool IsTraining() const
Definition: network.h:115
static int DisplayImage(Pix *pix, ScrollView *window)
Definition: network.cpp:335
virtual bool Serialize(TFile *fp) const
Definition: network.cpp:151
virtual void CacheXScaleFactor(int factor)
Definition: network.h:215
virtual void SetRandomizer(TRand *randomizer)
Definition: network.cpp:138
virtual StaticShape InputShape() const
Definition: network.h:127
float * f(int t)
Definition: networkio.h:115
int Width() const
Definition: networkio.h:107
Pix * ToPix() const
Definition: networkio.cpp:286
void set_int_mode(bool is_quantized)
Definition: networkio.h:130
int NumFeatures() const
Definition: networkio.h:111
int BestLabel(int t, float *score) const
Definition: networkio.h:161
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:76
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:133
static constexpr float kMinCertainty
Definition: recodebeam.h:222
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:171
static void Update()
Definition: scrollview.cpp:709
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:532
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
Definition: scrollview.cpp:635
void Text(int x, int y, const char *mystring)
Definition: scrollview.cpp:652
void Pen(Color color)
Definition: scrollview.cpp:719