tesseract 4.1.1
Loading...
Searching...
No Matches
commontraining.h
Go to the documentation of this file.
1// Copyright 2008 Google Inc. All Rights Reserved.
2// Author: scharron@google.com (Samuel Charron)
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#ifndef TESSERACT_TRAINING_COMMONTRAINING_H_
15#define TESSERACT_TRAINING_COMMONTRAINING_H_
16
17#ifdef HAVE_CONFIG_H
18#include "config_auto.h"
19#endif
20
21#include "baseapi.h"
22
23#ifdef DISABLED_LEGACY_ENGINE
24
25#include "tprintf.h"
26#include "commandlineflags.h"
27
28
29void ParseArguments(int* argc, char*** argv);
30
31
32namespace tesseract {
33
34// Check whether the shared tesseract library is the right one.
35// This function must be inline because otherwise it would be part of
36// the shared library, so it could not compare the versions.
37static inline void CheckSharedLibraryVersion()
38{
39#ifdef HAVE_CONFIG_H
40 if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) {
41 tprintf("ERROR: shared library version mismatch (was %s, expected %s\n"
42 "Did you use a wrong shared tesseract library?\n",
43 TessBaseAPI::Version(), TESSERACT_VERSION_STR);
44 exit(1);
45 }
46#endif
47}
48
49} // namespace tesseract
50
51
52#else
53
54#include "cluster.h"
55#include "commandlineflags.h"
56#include "featdefs.h"
57#include "intproto.h"
58#include "oldlist.h"
59
60namespace tesseract {
61class Classify;
62class MasterTrainer;
63class ShapeTable;
64}
65
67// Globals ///////////////////////////////////////////////////////////////////
69
71
72// Must be defined in the file that "implements" commonTraining facilities.
74
76// Structs ///////////////////////////////////////////////////////////////////
78typedef struct
79{
80 char *Label;
84}
86
87typedef struct
88{
89 char* Label;
90 int NumMerged[MAX_NUM_PROTOS];
94
95
97// Functions /////////////////////////////////////////////////////////////////
99void ParseArguments(int* argc, char*** argv);
100
101namespace tesseract {
102
103// Check whether the shared tesseract library is the right one.
104// This function must be inline because otherwise it would be part of
105// the shared library, so it could not compare the versions.
106static inline void CheckSharedLibraryVersion()
107{
108#ifdef HAVE_CONFIG_H
109 if (!!strcmp(TESSERACT_VERSION_STR, TessBaseAPI::Version())) {
110 tprintf("ERROR: shared library version mismatch (was %s, expected %s\n"
111 "Did you use a wrong shared tesseract library?\n",
112 TessBaseAPI::Version(), TESSERACT_VERSION_STR);
113 exit(1);
114 }
115#endif
116}
117
118// Helper loads shape table from the given file.
119ShapeTable* LoadShapeTable(const STRING& file_prefix);
120// Helper to write the shape_table.
121void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
122
123// Creates a MasterTraininer and loads the training data into it:
124// Initializes feature_defs and IntegerFX.
125// Loads the shape_table if shape_table != nullptr.
126// Loads initial unicharset from -U command-line option.
127// If FLAGS_input_trainer is set, loads the majority of data from there, else:
128// Loads font info from -F option.
129// Loads xheights from -X option.
130// Loads samples from .tr files in remaining command-line args.
131// Deletes outliers and computes canonical samples.
132// If FLAGS_output_trainer is set, saves the trainer for future use.
133// Computes canonical and cloud features.
134// If shape_table is not nullptr, but failed to load, make a fake flat one,
135// as shape clustering was not run.
136MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
137 bool replication,
138 ShapeTable** shape_table,
139 STRING* file_prefix);
140} // namespace tesseract.
141
142const char *GetNextFilename(int argc, const char* const * argv);
143
145 LIST List,
146 char *Label);
147
149 const char *Label);
150
152 const char *feature_name, int max_samples,
153 UNICHARSET* unicharset,
154 FILE* file, LIST* training_samples);
155
157 const FEATURE_DEFS_STRUCT &FeatureDefs,
158 char *Directory,
159 LIST CharList,
160 const char *program_feature_type);
161
163 LIST CharList);
164
165void FreeLabeledList(
166 LABELEDLIST LabeledList);
167
169 LIST ClassListList);
170
172 const FEATURE_DEFS_STRUCT &FeatureDefs,
173 LABELEDLIST CharSample,
174 const char *program_feature_type);
175
177 LIST ProtoList,
178 bool KeepSigProtos,
179 bool KeepInsigProtos,
180 int N);
181
183 LIST ProtoList);
184
186 LIST ProtoList,
187 const char *label,
188 CLUSTERER *Clusterer,
190
192 LIST List,
193 const char *Label);
194
196 const char *Label);
197
199 LIST CharList);
200
201CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
202 LIST LabeledClassList);
203
204void Normalize(
205 float *Values);
206
208 LIST CharList);
209
211 LIST* NormProtoList,
212 LIST ProtoList,
213 char *CharName);
214
216 LIST ProtoList,
217 bool CountSigProtos,
218 bool CountInsigProtos);
219
220
222
223#endif // def DISABLED_LEGACY_ENGINE
224
225#endif // TESSERACT_TRAINING_COMMONTRAINING_H_
#define TESSERACT_VERSION_STR
Definition: tess_version.h:28
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define MAX_NUM_PROTOS
Definition: intproto.h:48
void WriteTrainingSamples(const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
CLUSTERCONFIG Config
void AddToNormProtosList(LIST *NormProtoList, LIST ProtoList, char *CharName)
int NumberOfProtos(LIST ProtoList, bool CountSigProtos, bool CountInsigProtos)
void FreeTrainingSamples(LIST CharList)
CLASS_STRUCT * SetUpForFloat2Int(const UNICHARSET &unicharset, LIST LabeledClassList)
const char * GetNextFilename(int argc, const char *const *argv)
CLUSTERER * SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
void FreeLabeledList(LABELEDLIST LabeledList)
void ParseArguments(int *argc, char ***argv)
void CleanUpUnusedData(LIST ProtoList)
void allocNormProtos()
void FreeNormProtoList(LIST CharList)
LABELEDLIST FindList(LIST List, char *Label)
FEATURE_DEFS_STRUCT feature_defs
LABELEDLIST NewLabeledList(const char *Label)
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
void Normalize(float *Values)
LIST RemoveInsignificantProtos(LIST ProtoList, bool KeepSigProtos, bool KeepInsigProtos, int N)
void MergeInsignificantProtos(LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
MERGE_CLASS FindClass(LIST List, const char *Label)
MERGE_CLASS NewLabeledClass(const char *Label)
void FreeLabeledClassList(LIST ClassListList)
struct LABELEDLISTNODE * LABELEDLIST
ShapeTable * LoadShapeTable(const STRING &file_prefix)
MasterTrainer * LoadTrainingData(int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
void WriteShapeTable(const STRING &file_prefix, const ShapeTable &shape_table)
Definition: strngs.h:45
CLASS_TYPE Class