tesseract 4.1.1
Loading...
Searching...
No Matches
errorcounter.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13//
15#include <algorithm>
16#include <ctime>
17
18#include "errorcounter.h"
19
20#include "fontinfo.h"
21#include "sampleiterator.h"
22#include "shapeclassifier.h"
23#include "shapetable.h"
24#include "trainingsample.h"
25#include "trainingsampleset.h"
26#include "unicity_table.h"
27
28namespace tesseract {
29
30// Difference in result rating to be thought of as an "equal" choice.
31const double kRatingEpsilon = 1.0 / 32;
32
33// Tests a classifier, computing its error rate.
34// See errorcounter.h for description of arguments.
35// Iterates over the samples, calling the classifier in normal/silent mode.
36// If the classifier makes a CT_UNICHAR_TOPN_ERR error, and the appropriate
37// report_level is set (4 or greater), it will then call the classifier again
38// with a debug flag and a keep_this argument to find out what is going on.
40 int report_level, CountTypes boosting_mode,
41 const FontInfoTable& fontinfo_table,
42 const GenericVector<Pix*>& page_images, SampleIterator* it,
43 double* unichar_error, double* scaled_error, STRING* fonts_report) {
44 const int fontsize = it->sample_set()->NumFonts();
45 ErrorCounter counter(classifier->GetUnicharset(), fontsize);
47
48 clock_t start = clock();
49 unsigned total_samples = 0;
50 double unscaled_error = 0.0;
51 // Set a number of samples on which to run the classify debug mode.
52 int error_samples = report_level > 3 ? report_level * report_level : 0;
53 // Iterate over all the samples, accumulating errors.
54 for (it->Begin(); !it->AtEnd(); it->Next()) {
55 TrainingSample* mutable_sample = it->MutableSample();
56 int page_index = mutable_sample->page_num();
57 Pix* page_pix = 0 <= page_index && page_index < page_images.size()
58 ? page_images[page_index] : nullptr;
59 // No debug, no keep this.
60 classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
61 INVALID_UNICHAR_ID, &results);
62 bool debug_it = false;
63 int correct_id = mutable_sample->class_id();
64 if (counter.unicharset_.has_special_codes() &&
65 (correct_id == UNICHAR_SPACE || correct_id == UNICHAR_JOINED ||
66 correct_id == UNICHAR_BROKEN)) {
67 // This is junk so use the special counter.
68 debug_it = counter.AccumulateJunk(report_level > 3,
69 results,
70 mutable_sample);
71 } else {
72 debug_it = counter.AccumulateErrors(report_level > 3, boosting_mode,
73 fontinfo_table,
74 results, mutable_sample);
75 }
76 if (debug_it && error_samples > 0) {
77 // Running debug, keep the correct answer, and debug the classifier.
78 tprintf("Error on sample %d: %s Classifier debug output:\n",
80 it->sample_set()->SampleToString(*mutable_sample).string());
81 classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
82 --error_samples;
83 }
84 ++total_samples;
85 }
86 const double total_time = 1.0 * (clock() - start) / CLOCKS_PER_SEC;
87 // Create the appropriate error report.
88 unscaled_error = counter.ReportErrors(report_level, boosting_mode,
89 fontinfo_table,
90 *it, unichar_error, fonts_report);
91 if (scaled_error != nullptr) *scaled_error = counter.scaled_error_;
92 if (report_level > 1 && total_samples > 0) {
93 // It is useful to know the time in microseconds/char.
94 tprintf("Errors computed in %.2fs at %.1f μs/char\n",
95 total_time, 1000000.0 * total_time / total_samples);
96 }
97 return unscaled_error;
98}
99
100// Tests a pair of classifiers, debugging errors of the new against the old.
101// See errorcounter.h for description of arguments.
102// Iterates over the samples, calling the classifiers in normal/silent mode.
103// If the new_classifier makes a boosting_mode error that the old_classifier
104// does not, it will then call the new_classifier again with a debug flag
105// and a keep_this argument to find out what is going on.
107 ShapeClassifier* new_classifier, ShapeClassifier* old_classifier,
108 CountTypes boosting_mode,
109 const FontInfoTable& fontinfo_table,
110 const GenericVector<Pix*>& page_images, SampleIterator* it) {
111 int fontsize = it->sample_set()->NumFonts();
112 ErrorCounter old_counter(old_classifier->GetUnicharset(), fontsize);
113 ErrorCounter new_counter(new_classifier->GetUnicharset(), fontsize);
115
116 int total_samples = 0;
117 int error_samples = 25;
118 int total_new_errors = 0;
119 // Iterate over all the samples, accumulating errors.
120 for (it->Begin(); !it->AtEnd(); it->Next()) {
121 TrainingSample* mutable_sample = it->MutableSample();
122 int page_index = mutable_sample->page_num();
123 Pix* page_pix = 0 <= page_index && page_index < page_images.size()
124 ? page_images[page_index] : nullptr;
125 // No debug, no keep this.
126 old_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
127 INVALID_UNICHAR_ID, &results);
128 int correct_id = mutable_sample->class_id();
129 if (correct_id != 0 &&
130 !old_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
131 results, mutable_sample)) {
132 // old classifier was correct, check the new one.
133 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 0,
134 INVALID_UNICHAR_ID, &results);
135 if (correct_id != 0 &&
136 new_counter.AccumulateErrors(true, boosting_mode, fontinfo_table,
137 results, mutable_sample)) {
138 tprintf("New Error on sample %d: Classifier debug output:\n",
139 it->GlobalSampleIndex());
140 ++total_new_errors;
141 new_classifier->UnicharClassifySample(*mutable_sample, page_pix, 1,
142 correct_id, &results);
143 if (results.size() > 0 && error_samples > 0) {
144 new_classifier->DebugDisplay(*mutable_sample, page_pix, correct_id);
145 --error_samples;
146 }
147 }
148 }
149 ++total_samples;
150 }
151 tprintf("Total new errors = %d\n", total_new_errors);
152}
153
154// Constructor is private. Only anticipated use of ErrorCounter is via
155// the static ComputeErrorRate.
156ErrorCounter::ErrorCounter(const UNICHARSET& unicharset, int fontsize)
157 : scaled_error_(0.0), rating_epsilon_(kRatingEpsilon),
158 unichar_counts_(unicharset.size(), unicharset.size(), 0),
159 ok_score_hist_(0, 101), bad_score_hist_(0, 101),
160 unicharset_(unicharset) {
161 Counts empty_counts;
162 font_counts_.init_to_size(fontsize, empty_counts);
163 multi_unichar_counts_.init_to_size(unicharset.size(), 0);
164}
165
166// Accumulates the errors from the classifier results on a single sample.
167// Returns true if debug is true and a CT_UNICHAR_TOPN_ERR error occurred.
168// boosting_mode selects the type of error to be used for boosting and the
169// is_error_ member of sample is set according to whether the required type
170// of error occurred. The font_table provides access to font properties
171// for error counting and shape_table is used to understand the relationship
172// between unichar_ids and shape_ids in the results
173bool ErrorCounter::AccumulateErrors(bool debug, CountTypes boosting_mode,
174 const FontInfoTable& font_table,
175 const GenericVector<UnicharRating>& results,
176 TrainingSample* sample) {
177 int num_results = results.size();
178 int answer_actual_rank = -1;
179 int font_id = sample->font_id();
180 int unichar_id = sample->class_id();
181 sample->set_is_error(false);
182 if (num_results == 0) {
183 // Reject. We count rejects as a separate category, but still mark the
184 // sample as an error in case any training module wants to use that to
185 // improve the classifier.
186 sample->set_is_error(true);
187 ++font_counts_[font_id].n[CT_REJECT];
188 } else {
189 // Find rank of correct unichar answer, using rating_epsilon_ to allow
190 // different answers to score as equal. (Ignoring the font.)
191 int epsilon_rank = 0;
192 int answer_epsilon_rank = -1;
193 int num_top_answers = 0;
194 double prev_rating = results[0].rating;
195 bool joined = false;
196 bool broken = false;
197 int res_index = 0;
198 while (res_index < num_results) {
199 if (results[res_index].rating < prev_rating - rating_epsilon_) {
200 ++epsilon_rank;
201 prev_rating = results[res_index].rating;
202 }
203 if (results[res_index].unichar_id == unichar_id &&
204 answer_epsilon_rank < 0) {
205 answer_epsilon_rank = epsilon_rank;
206 answer_actual_rank = res_index;
207 }
208 if (results[res_index].unichar_id == UNICHAR_JOINED &&
209 unicharset_.has_special_codes())
210 joined = true;
211 else if (results[res_index].unichar_id == UNICHAR_BROKEN &&
212 unicharset_.has_special_codes())
213 broken = true;
214 else if (epsilon_rank == 0)
215 ++num_top_answers;
216 ++res_index;
217 }
218 if (answer_actual_rank != 0) {
219 // Correct result is not absolute top.
220 ++font_counts_[font_id].n[CT_UNICHAR_TOPTOP_ERR];
221 if (boosting_mode == CT_UNICHAR_TOPTOP_ERR) sample->set_is_error(true);
222 }
223 if (answer_epsilon_rank == 0) {
224 ++font_counts_[font_id].n[CT_UNICHAR_TOP_OK];
225 // Unichar OK, but count if multiple unichars.
226 if (num_top_answers > 1) {
227 ++font_counts_[font_id].n[CT_OK_MULTI_UNICHAR];
228 ++multi_unichar_counts_[unichar_id];
229 }
230 // Check to see if any font in the top choice has attributes that match.
231 // TODO(rays) It is easy to add counters for individual font attributes
232 // here if we want them.
233 if (font_table.SetContainsFontProperties(
234 font_id, results[answer_actual_rank].fonts)) {
235 // Font attributes were matched.
236 // Check for multiple properties.
237 if (font_table.SetContainsMultipleFontProperties(
238 results[answer_actual_rank].fonts))
239 ++font_counts_[font_id].n[CT_OK_MULTI_FONT];
240 } else {
241 // Font attributes weren't matched.
242 ++font_counts_[font_id].n[CT_FONT_ATTR_ERR];
243 }
244 } else {
245 // This is a top unichar error.
246 ++font_counts_[font_id].n[CT_UNICHAR_TOP1_ERR];
247 if (boosting_mode == CT_UNICHAR_TOP1_ERR) sample->set_is_error(true);
248 // Count maps from unichar id to wrong unichar id.
249 ++unichar_counts_(unichar_id, results[0].unichar_id);
250 if (answer_epsilon_rank < 0 || answer_epsilon_rank >= 2) {
251 // It is also a 2nd choice unichar error.
252 ++font_counts_[font_id].n[CT_UNICHAR_TOP2_ERR];
253 if (boosting_mode == CT_UNICHAR_TOP2_ERR) sample->set_is_error(true);
254 }
255 if (answer_epsilon_rank < 0) {
256 // It is also a top-n choice unichar error.
257 ++font_counts_[font_id].n[CT_UNICHAR_TOPN_ERR];
258 if (boosting_mode == CT_UNICHAR_TOPN_ERR) sample->set_is_error(true);
259 answer_epsilon_rank = epsilon_rank;
260 }
261 }
262 // Compute mean number of return values and mean rank of correct answer.
263 font_counts_[font_id].n[CT_NUM_RESULTS] += num_results;
264 font_counts_[font_id].n[CT_RANK] += answer_epsilon_rank;
265 if (joined)
266 ++font_counts_[font_id].n[CT_OK_JOINED];
267 if (broken)
268 ++font_counts_[font_id].n[CT_OK_BROKEN];
269 }
270 // If it was an error for boosting then sum the weight.
271 if (sample->is_error()) {
272 scaled_error_ += sample->weight();
273 if (debug) {
274 tprintf("%d results for char %s font %d :",
275 num_results, unicharset_.id_to_unichar(unichar_id),
276 font_id);
277 for (int i = 0; i < num_results; ++i) {
278 tprintf(" %.3f : %s\n",
279 results[i].rating,
280 unicharset_.id_to_unichar(results[i].unichar_id));
281 }
282 return true;
283 }
284 int percent = 0;
285 if (num_results > 0)
286 percent = IntCastRounded(results[0].rating * 100);
287 bad_score_hist_.add(percent, 1);
288 } else {
289 int percent = 0;
290 if (answer_actual_rank >= 0)
291 percent = IntCastRounded(results[answer_actual_rank].rating * 100);
292 ok_score_hist_.add(percent, 1);
293 }
294 return false;
295}
296
297// Accumulates counts for junk. Counts only whether the junk was correctly
298// rejected or not.
299bool ErrorCounter::AccumulateJunk(bool debug,
300 const GenericVector<UnicharRating>& results,
301 TrainingSample* sample) {
302 // For junk we accept no answer, or an explicit shape answer matching the
303 // class id of the sample.
304 const int num_results = results.size();
305 const int font_id = sample->font_id();
306 const int unichar_id = sample->class_id();
307 int percent = 0;
308 if (num_results > 0)
309 percent = IntCastRounded(results[0].rating * 100);
310 if (num_results > 0 && results[0].unichar_id != unichar_id) {
311 // This is a junk error.
312 ++font_counts_[font_id].n[CT_ACCEPTED_JUNK];
313 sample->set_is_error(true);
314 // It counts as an error for boosting too so sum the weight.
315 scaled_error_ += sample->weight();
316 bad_score_hist_.add(percent, 1);
317 return debug;
318 } else {
319 // Correctly rejected.
320 ++font_counts_[font_id].n[CT_REJECTED_JUNK];
321 sample->set_is_error(false);
322 ok_score_hist_.add(percent, 1);
323 }
324 return false;
325}
326
327// Creates a report of the error rate. The report_level controls the detail
328// that is reported to stderr via tprintf:
329// 0 -> no output.
330// >=1 -> bottom-line error rate.
331// >=3 -> font-level error rate.
332// boosting_mode determines the return value. It selects which (un-weighted)
333// error rate to return.
334// The fontinfo_table from MasterTrainer provides the names of fonts.
335// The it determines the current subset of the training samples.
336// If not nullptr, the top-choice unichar error rate is saved in unichar_error.
337// If not nullptr, the report string is saved in fonts_report.
338// (Ignoring report_level).
339double ErrorCounter::ReportErrors(int report_level, CountTypes boosting_mode,
340 const FontInfoTable& fontinfo_table,
341 const SampleIterator& it,
342 double* unichar_error,
343 STRING* fonts_report) {
344 // Compute totals over all the fonts and report individual font results
345 // when required.
346 Counts totals;
347 int fontsize = font_counts_.size();
348 for (int f = 0; f < fontsize; ++f) {
349 // Accumulate counts over fonts.
350 totals += font_counts_[f];
351 STRING font_report;
352 if (ReportString(false, font_counts_[f], &font_report)) {
353 if (fonts_report != nullptr) {
354 *fonts_report += fontinfo_table.get(f).name;
355 *fonts_report += ": ";
356 *fonts_report += font_report;
357 *fonts_report += "\n";
358 }
359 if (report_level > 2) {
360 // Report individual font error rates.
361 tprintf("%s: %s\n", fontinfo_table.get(f).name, font_report.string());
362 }
363 }
364 }
365 // Report the totals.
366 STRING total_report;
367 bool any_results = ReportString(true, totals, &total_report);
368 if (fonts_report != nullptr && fonts_report->length() == 0) {
369 // Make sure we return something even if there were no samples.
370 *fonts_report = "NoSamplesFound: ";
371 *fonts_report += total_report;
372 *fonts_report += "\n";
373 }
374 if (report_level > 0) {
375 // Report the totals.
376 STRING total_report;
377 if (any_results) {
378 tprintf("TOTAL Scaled Err=%.4g%%, %s\n",
379 scaled_error_ * 100.0, total_report.string());
380 }
381 // Report the worst substitution error only for now.
382 if (totals.n[CT_UNICHAR_TOP1_ERR] > 0) {
383 int charsetsize = unicharset_.size();
384 int worst_uni_id = 0;
385 int worst_result_id = 0;
386 int worst_err = 0;
387 for (int u = 0; u < charsetsize; ++u) {
388 for (int v = 0; v < charsetsize; ++v) {
389 if (unichar_counts_(u, v) > worst_err) {
390 worst_err = unichar_counts_(u, v);
391 worst_uni_id = u;
392 worst_result_id = v;
393 }
394 }
395 }
396 if (worst_err > 0) {
397 tprintf("Worst error = %d:%s -> %s with %d/%d=%.2f%% errors\n",
398 worst_uni_id, unicharset_.id_to_unichar(worst_uni_id),
399 unicharset_.id_to_unichar(worst_result_id),
400 worst_err, totals.n[CT_UNICHAR_TOP1_ERR],
401 100.0 * worst_err / totals.n[CT_UNICHAR_TOP1_ERR]);
402 }
403 }
404 tprintf("Multi-unichar shape use:\n");
405 for (int u = 0; u < multi_unichar_counts_.size(); ++u) {
406 if (multi_unichar_counts_[u] > 0) {
407 tprintf("%d multiple answers for unichar: %s\n",
408 multi_unichar_counts_[u],
409 unicharset_.id_to_unichar(u));
410 }
411 }
412 tprintf("OK Score histogram:\n");
413 ok_score_hist_.print();
414 tprintf("ERROR Score histogram:\n");
415 bad_score_hist_.print();
416 }
417
418 double rates[CT_SIZE];
419 if (!ComputeRates(totals, rates))
420 return 0.0;
421 // Set output values if asked for.
422 if (unichar_error != nullptr)
423 *unichar_error = rates[CT_UNICHAR_TOP1_ERR];
424 return rates[boosting_mode];
425}
426
427// Sets the report string to a combined human and machine-readable report
428// string of the error rates.
429// Returns false if there is no data, leaving report unchanged, unless
430// even_if_empty is true.
431bool ErrorCounter::ReportString(bool even_if_empty, const Counts& counts,
432 STRING* report) {
433 // Compute the error rates.
434 double rates[CT_SIZE];
435 if (!ComputeRates(counts, rates) && !even_if_empty)
436 return false;
437 // Using %.4g%%, the length of the output string should exactly match the
438 // length of the format string, but in case of overflow, allow for +eddd
439 // on each number.
440 const int kMaxExtraLength = 5; // Length of +eddd.
441 // Keep this format string and the snprintf in sync with the CountTypes enum.
442 const char* format_str = "Unichar=%.4g%%[1], %.4g%%[2], %.4g%%[n], %.4g%%[T] "
443 "Mult=%.4g%%, Jn=%.4g%%, Brk=%.4g%%, Rej=%.4g%%, "
444 "FontAttr=%.4g%%, Multi=%.4g%%, "
445 "Answers=%.3g, Rank=%.3g, "
446 "OKjunk=%.4g%%, Badjunk=%.4g%%";
447 const size_t max_str_len = strlen(format_str) + kMaxExtraLength * (CT_SIZE - 1) + 1;
448 char* formatted_str = new char[max_str_len];
449 snprintf(formatted_str, max_str_len, format_str,
450 rates[CT_UNICHAR_TOP1_ERR] * 100.0,
451 rates[CT_UNICHAR_TOP2_ERR] * 100.0,
452 rates[CT_UNICHAR_TOPN_ERR] * 100.0,
453 rates[CT_UNICHAR_TOPTOP_ERR] * 100.0,
454 rates[CT_OK_MULTI_UNICHAR] * 100.0,
455 rates[CT_OK_JOINED] * 100.0,
456 rates[CT_OK_BROKEN] * 100.0,
457 rates[CT_REJECT] * 100.0,
458 rates[CT_FONT_ATTR_ERR] * 100.0,
459 rates[CT_OK_MULTI_FONT] * 100.0,
460 rates[CT_NUM_RESULTS],
461 rates[CT_RANK],
462 100.0 * rates[CT_REJECTED_JUNK],
463 100.0 * rates[CT_ACCEPTED_JUNK]);
464 *report = formatted_str;
465 delete [] formatted_str;
466 // Now append each field of counts with a tab in front so the result can
467 // be loaded into a spreadsheet.
468 for (int ct : counts.n)
469 report->add_str_int("\t", ct);
470 return true;
471}
472
473// Computes the error rates and returns in rates which is an array of size
474// CT_SIZE. Returns false if there is no data, leaving rates unchanged.
475bool ErrorCounter::ComputeRates(const Counts& counts, double rates[CT_SIZE]) {
476 const int ok_samples = counts.n[CT_UNICHAR_TOP_OK] + counts.n[CT_UNICHAR_TOP1_ERR] +
477 counts.n[CT_REJECT];
478 const int junk_samples = counts.n[CT_REJECTED_JUNK] + counts.n[CT_ACCEPTED_JUNK];
479 // Compute rates for normal chars.
480 double denominator = static_cast<double>(std::max(ok_samples, 1));
481 for (int ct = 0; ct <= CT_RANK; ++ct)
482 rates[ct] = counts.n[ct] / denominator;
483 // Compute rates for junk.
484 denominator = static_cast<double>(std::max(junk_samples, 1));
485 for (int ct = CT_REJECTED_JUNK; ct <= CT_ACCEPTED_JUNK; ++ct)
486 rates[ct] = counts.n[ct] / denominator;
487 return ok_samples != 0 || junk_samples != 0;
488}
489
490ErrorCounter::Counts::Counts() {
491 memset(n, 0, sizeof(n[0]) * CT_SIZE);
492}
493// Adds other into this for computing totals.
494void ErrorCounter::Counts::operator+=(const Counts& other) {
495 for (int ct = 0; ct < CT_SIZE; ++ct)
496 n[ct] += other.n[ct];
497}
498
499
500} // namespace tesseract.
int IntCastRounded(double x)
Definition: helpers.h:175
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ UNICHAR_BROKEN
Definition: unicharset.h:36
@ UNICHAR_JOINED
Definition: unicharset.h:35
@ UNICHAR_SPACE
Definition: unicharset.h:34
const double kRatingEpsilon
@ CT_UNICHAR_TOPN_ERR
Definition: errorcounter.h:76
@ CT_UNICHAR_TOP_OK
Definition: errorcounter.h:70
@ CT_UNICHAR_TOP1_ERR
Definition: errorcounter.h:74
@ CT_UNICHAR_TOP2_ERR
Definition: errorcounter.h:75
@ CT_OK_MULTI_FONT
Definition: errorcounter.h:83
@ CT_REJECTED_JUNK
Definition: errorcounter.h:86
@ CT_UNICHAR_TOPTOP_ERR
Definition: errorcounter.h:77
@ CT_FONT_ATTR_ERR
Definition: errorcounter.h:82
@ CT_ACCEPTED_JUNK
Definition: errorcounter.h:87
@ CT_OK_MULTI_UNICHAR
Definition: errorcounter.h:78
@ CT_NUM_RESULTS
Definition: errorcounter.h:84
void init_to_size(int size, const T &t)
int size() const
Definition: genericvector.h:72
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
void print() const
Definition: statistc.cpp:526
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
int32_t length() const
Definition: strngs.cpp:189
const char * string() const
Definition: strngs.cpp:194
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
int size() const
Definition: unicharset.h:341
bool has_special_codes() const
Definition: unicharset.h:722
Definition: cluster.h:32
const TrainingSampleSet * sample_set() const
TrainingSample * MutableSample() const
virtual const UNICHARSET & GetUnicharset() const
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
UNICHAR_ID class_id() const
STRING SampleToString(const TrainingSample &sample) const
static void DebugNewErrors(ShapeClassifier *new_classifier, ShapeClassifier *old_classifier, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it)
static double ComputeErrorRate(ShapeClassifier *classifier, int report_level, CountTypes boosting_mode, const FontInfoTable &fontinfo_table, const GenericVector< Pix * > &page_images, SampleIterator *it, double *unichar_error, double *scaled_error, STRING *fonts_report)