19#ifndef TESSERACT_CCUTIL_TESSDATAMANAGER_H_
20#define TESSERACT_CCUTIL_TESSDATAMANAGER_H_
25static const char kTrainedDataSuffix[] =
"traineddata";
29static const char kLangConfigFileSuffix[] =
"config";
30static const char kUnicharsetFileSuffix[] =
"unicharset";
31static const char kAmbigsFileSuffix[] =
"unicharambigs";
32static const char kBuiltInTemplatesFileSuffix[] =
"inttemp";
33static const char kBuiltInCutoffsFileSuffix[] =
"pffmtable";
34static const char kNormProtoFileSuffix[] =
"normproto";
35static const char kPuncDawgFileSuffix[] =
"punc-dawg";
36static const char kSystemDawgFileSuffix[] =
"word-dawg";
37static const char kNumberDawgFileSuffix[] =
"number-dawg";
38static const char kFreqDawgFileSuffix[] =
"freq-dawg";
39static const char kFixedLengthDawgsFileSuffix[] =
"fixed-length-dawgs";
40static const char kCubeUnicharsetFileSuffix[] =
"cube-unicharset";
41static const char kCubeSystemDawgFileSuffix[] =
"cube-word-dawg";
42static const char kShapeTableFileSuffix[] =
"shapetable";
43static const char kBigramDawgFileSuffix[] =
"bigram-dawg";
44static const char kUnambigDawgFileSuffix[] =
"unambig-dawg";
45static const char kParamsModelFileSuffix[] =
"params-model";
46static const char kLSTMModelFileSuffix[] =
"lstm";
47static const char kLSTMPuncDawgFileSuffix[] =
"lstm-punc-dawg";
48static const char kLSTMSystemDawgFileSuffix[] =
"lstm-word-dawg";
49static const char kLSTMNumberDawgFileSuffix[] =
"lstm-number-dawg";
50static const char kLSTMUnicharsetFileSuffix[] =
"lstm-unicharset";
51static const char kLSTMRecoderFileSuffix[] =
"lstm-recoder";
52static const char kVersionFileSuffix[] =
"version";
89static const char *
const kTessdataFileSuffixes[] = {
90 kLangConfigFileSuffix,
91 kUnicharsetFileSuffix,
93 kBuiltInTemplatesFileSuffix,
94 kBuiltInCutoffsFileSuffix,
97 kSystemDawgFileSuffix,
98 kNumberDawgFileSuffix,
100 kFixedLengthDawgsFileSuffix,
101 kCubeUnicharsetFileSuffix,
102 kCubeSystemDawgFileSuffix,
103 kShapeTableFileSuffix,
104 kBigramDawgFileSuffix,
105 kUnambigDawgFileSuffix,
106 kParamsModelFileSuffix,
107 kLSTMModelFileSuffix,
108 kLSTMPuncDawgFileSuffix,
109 kLSTMSystemDawgFileSuffix,
110 kLSTMNumberDawgFileSuffix,
111 kLSTMUnicharsetFileSuffix,
112 kLSTMRecoderFileSuffix,
123static const int kMaxNumTessdataEntries = 1000;
133 bool swap()
const {
return swap_; }
143 bool Init(
const char *data_file_name);
146 bool LoadMemBuffer(
const char *name,
const char *data,
int size);
162 return !entries_[type].
empty();
194 const char *output_filename);
202 char **component_filenames,
203 int num_new_components);
220 bool LoadArchiveFile(
const char *filename);
228 static bool TessdataTypeFromFileSuffix(
const char *suffix,
235 static bool TessdataTypeFromFileName(
const char *filename,
bool(*)(const STRING &, GenericVector< char > *) FileReader
@ TESSDATA_LSTM_SYSTEM_DAWG
@ TESSDATA_LSTM_UNICHARSET
@ TESSDATA_CUBE_SYSTEM_DAWG
@ TESSDATA_CUBE_UNICHARSET
@ TESSDATA_LSTM_PUNC_DAWG
@ TESSDATA_LSTM_NUMBER_DAWG
@ TESSDATA_FIXED_LENGTH_DAWGS
bool(*)(const GenericVector< char > &, const STRING &) FileWriter
void OverwriteEntry(TessdataType type, const char *data, int size)
std::string VersionString() const
bool IsLSTMAvailable() const
bool CombineDataFiles(const char *language_data_path_prefix, const char *output_filename)
void SetVersionString(const std::string &v_str)
bool GetComponent(TessdataType type, TFile *fp)
const STRING & GetDataFileName() const
bool OverwriteComponents(const char *new_traineddata_filename, char **component_filenames, int num_new_components)
void Serialize(GenericVector< char > *data) const
bool ExtractToFile(const char *filename)
void LoadFileLater(const char *data_file_name)
~TessdataManager()=default
bool IsBaseAvailable() const
bool SaveFile(const STRING &filename, FileWriter writer) const
bool IsComponentAvailable(TessdataType type) const
bool LoadMemBuffer(const char *name, const char *data, int size)
bool Init(const char *data_file_name)