tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::LSTMTester Class Reference

#include <lstmtester.h>

Public Member Functions

 LSTMTester (int64_t max_memory)
 
bool LoadAllEvalData (const STRING &filenames_file)
 
bool LoadAllEvalData (const GenericVector< STRING > &filenames)
 
STRING RunEvalAsync (int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
 
STRING RunEvalSync (int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
 

Detailed Description

Definition at line 29 of file lstmtester.h.

Constructor & Destructor Documentation

◆ LSTMTester()

tesseract::LSTMTester::LSTMTester ( int64_t  max_memory)

Definition at line 26 of file lstmtester.cpp.

27 : test_data_(max_memory), total_pages_(0), async_running_(false) {}

Member Function Documentation

◆ LoadAllEvalData() [1/2]

bool tesseract::LSTMTester::LoadAllEvalData ( const GenericVector< STRING > &  filenames)

Definition at line 45 of file lstmtester.cpp.

45 {
46 test_data_.Clear();
47 bool result = test_data_.LoadDocuments(filenames, CS_SEQUENTIAL, nullptr);
48 total_pages_ = test_data_.TotalPages();
49 return result;
50}
@ CS_SEQUENTIAL
Definition: imagedata.h:49
bool LoadDocuments(const GenericVector< STRING > &filenames, CachingStrategy cache_strategy, FileReader reader)
Definition: imagedata.cpp:580

◆ LoadAllEvalData() [2/2]

bool tesseract::LSTMTester::LoadAllEvalData ( const STRING filenames_file)

Definition at line 32 of file lstmtester.cpp.

32 {
33 GenericVector<STRING> filenames;
34 if (!LoadFileLinesToStrings(filenames_file.c_str(), &filenames)) {
35 tprintf("Failed to load list of eval filenames from %s\n",
36 filenames_file.string());
37 return false;
38 }
39 return LoadAllEvalData(filenames);
40}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool LoadFileLinesToStrings(const char *filename, GenericVector< STRING > *lines)
Definition: fileio.h:31
const char * c_str() const
Definition: strngs.cpp:205
const char * string() const
Definition: strngs.cpp:194
bool LoadAllEvalData(const STRING &filenames_file)
Definition: lstmtester.cpp:32

◆ RunEvalAsync()

STRING tesseract::LSTMTester::RunEvalAsync ( int  iteration,
const double *  training_errors,
const TessdataManager model_mgr,
int  training_stage 
)

Definition at line 54 of file lstmtester.cpp.

56 {
57 STRING result;
58 if (total_pages_ == 0) {
59 result.add_str_int("No test data at iteration", iteration);
60 return result;
61 }
62 if (!LockIfNotRunning()) {
63 result.add_str_int("Previous test incomplete, skipping test at iteration",
64 iteration);
65 return result;
66 }
67 // Save the args.
68 STRING prev_result = test_result_;
69 test_result_ = "";
70 if (training_errors != nullptr) {
71 test_iteration_ = iteration;
72 test_training_errors_ = training_errors;
73 test_model_mgr_ = model_mgr;
74 test_training_stage_ = training_stage;
75 SVSync::StartThread(&LSTMTester::ThreadFunc, this);
76 } else {
77 UnlockRunning();
78 }
79 return prev_result;
80}
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
static void StartThread(void *(*func)(void *), void *arg)
Create new thread.
Definition: svutil.cpp:81

◆ RunEvalSync()

STRING tesseract::LSTMTester::RunEvalSync ( int  iteration,
const double *  training_errors,
const TessdataManager model_mgr,
int  training_stage,
int  verbosity 
)

Definition at line 84 of file lstmtester.cpp.

86 {
87 LSTMTrainer trainer;
88 trainer.InitCharSet(model_mgr);
89 TFile fp;
90 if (!model_mgr.GetComponent(TESSDATA_LSTM, &fp) ||
91 !trainer.DeSerialize(&model_mgr, &fp)) {
92 return "Deserialize failed";
93 }
94 int eval_iteration = 0;
95 double char_error = 0.0;
96 double word_error = 0.0;
97 int error_count = 0;
98 while (error_count < total_pages_) {
99 const ImageData* trainingdata = test_data_.GetPageBySerial(eval_iteration);
100 trainer.SetIteration(++eval_iteration);
101 NetworkIO fwd_outputs, targets;
102 Trainability result =
103 trainer.PrepareForBackward(trainingdata, &fwd_outputs, &targets);
104 if (result != UNENCODABLE) {
105 char_error += trainer.NewSingleError(tesseract::ET_CHAR_ERROR);
106 word_error += trainer.NewSingleError(tesseract::ET_WORD_RECERR);
107 ++error_count;
108 if (verbosity > 1 || (verbosity > 0 && result != PERFECT)) {
109 tprintf("Truth:%s\n", trainingdata->transcription().string());
110 GenericVector<int> ocr_labels;
111 GenericVector<int> xcoords;
112 trainer.LabelsFromOutputs(fwd_outputs, &ocr_labels, &xcoords);
113 STRING ocr_text = trainer.DecodeLabels(ocr_labels);
114 tprintf("OCR :%s\n", ocr_text.string());
115 }
116 }
117 }
118 char_error *= 100.0 / total_pages_;
119 word_error *= 100.0 / total_pages_;
120 STRING result;
121 result.add_str_int("At iteration ", iteration);
122 result.add_str_int(", stage ", training_stage);
123 result.add_str_double(", Eval Char error rate=", char_error);
124 result.add_str_double(", Word error rate=", word_error);
125 return result;
126}
@ ET_WORD_RECERR
Definition: lstmtrainer.h:40
@ ET_CHAR_ERROR
Definition: lstmtrainer.h:41
const ImageData * GetPageBySerial(int serial)
Definition: imagedata.h:344
void add_str_double(const char *str, double number)
Definition: strngs.cpp:387

The documentation for this class was generated from the following files: