19#define _USE_MATH_DEFINES
23#include "config_auto.h"
55#include "allheaders.h"
56#ifndef DISABLED_LEGACY_ENGINE
60#include "config_auto.h"
71#ifndef DISABLED_LEGACY_ENGINE
77#if defined(USE_OPENCL)
98static BOOL_VAR(stream_filelist,
false,
"Stream a filelist from stdin");
99static STRING_VAR(document_title,
"",
"Title of output document (used for hOCR and PDF output)");
115static const char* kInputFile =
"noname.tif";
119static const char* kOldVarsFile =
"failed_vars.txt";
125static void addAvailableLanguages(
const STRING &datadir,
const STRING &base,
128 const STRING base2 = (base.
string()[0] ==
'\0') ? base : base +
"/";
129 const size_t extlen =
sizeof(kTrainedDataSuffix);
131 WIN32_FIND_DATA data;
132 HANDLE handle = FindFirstFile((datadir + base2 +
"*").
string(), &data);
133 if (handle != INVALID_HANDLE_VALUE) {
136 char *name = data.cFileName;
138 if (name[0] !=
'.') {
139 if ((data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ==
140 FILE_ATTRIBUTE_DIRECTORY) {
141 addAvailableLanguages(datadir, base2 + name, langs);
143 size_t len = strlen(name);
144 if (len > extlen && name[len - extlen] ==
'.' &&
145 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
146 name[len - extlen] =
'\0';
151 result = FindNextFile(handle, &data);
156 DIR* dir = opendir((datadir + base).
string());
157 if (dir !=
nullptr) {
159 while ((de = readdir(dir))) {
160 char *name = de->d_name;
162 if (name[0] !=
'.') {
164 if (stat((datadir + base2 + name).
string(), &st) == 0 &&
165 (st.st_mode & S_IFDIR) == S_IFDIR) {
166 addAvailableLanguages(datadir, base2 + name, langs);
168 size_t len = strlen(name);
169 if (len > extlen && name[len - extlen] ==
'.' &&
170 strcmp(&name[len - extlen + 1], kTrainedDataSuffix) == 0) {
171 name[len - extlen] =
'\0';
183static int CompareSTRING(
const void* p1,
const void* p2) {
184 const auto* s1 =
static_cast<const STRING*
>(p1);
185 const auto* s2 =
static_cast<const STRING*
>(p2);
186 return strcmp(s1->c_str(), s2->c_str());
189TessBaseAPI::TessBaseAPI()
190 : tesseract_(nullptr),
191 osd_tesseract_(nullptr),
192 equ_detect_(nullptr),
197 thresholder_(nullptr),
198 paragraph_models_(nullptr),
199 block_list_(nullptr),
201 input_file_(nullptr),
202 output_file_(nullptr),
206 recognition_done_(false),
222 std::locale::global(std::locale(
""));
226TessBaseAPI::~TessBaseAPI() {
233const char* TessBaseAPI::Version() {
234 return PACKAGE_VERSION;
244size_t TessBaseAPI::getOpenCLDevice(
void **data) {
246 ds_device device = OpenclDevice::getDeviceSelection();
247 if (device.type == DS_DEVICE_OPENCL_DEVICE) {
248 *data =
new cl_device_id;
249 memcpy(*data, &device.oclDeviceID,
sizeof(cl_device_id));
250 return sizeof(cl_device_id);
262void TessBaseAPI::CatchSignals() {
264 tprintf(
"Deprecated method CatchSignals has only a dummy implementation!\n");
271void TessBaseAPI::SetInputName(
const char* name) {
279void TessBaseAPI::SetOutputName(
const char* name) {
286bool TessBaseAPI::SetVariable(
const char* name,
const char* value) {
292bool TessBaseAPI::SetDebugVariable(
const char* name,
const char* value) {
298bool TessBaseAPI::GetIntVariable(
const char *name,
int *value)
const {
299 auto *p = ParamUtils::FindParam<IntParam>(
301 if (p ==
nullptr)
return false;
302 *value = (int32_t)(*p);
306bool TessBaseAPI::GetBoolVariable(
const char *name,
bool *value)
const {
307 auto *p = ParamUtils::FindParam<BoolParam>(
309 if (p ==
nullptr)
return false;
314const char *TessBaseAPI::GetStringVariable(
const char *name)
const {
315 auto *p = ParamUtils::FindParam<StringParam>(
317 return (p !=
nullptr) ? p->string() :
nullptr;
320bool TessBaseAPI::GetDoubleVariable(
const char *name,
double *value)
const {
321 auto *p = ParamUtils::FindParam<DoubleParam>(
323 if (p ==
nullptr)
return false;
324 *value = (double)(*p);
329bool TessBaseAPI::GetVariableAsString(
const char *name,
STRING *val) {
334void TessBaseAPI::PrintVariables(FILE *fp)
const {
346int TessBaseAPI::Init(
const char* datapath,
const char* language,
350 bool set_only_non_debug_params) {
351 return Init(datapath, 0, language,
oem, configs, configs_size, vars_vec,
352 vars_values, set_only_non_debug_params,
nullptr);
358int TessBaseAPI::Init(
const char* data,
int data_size,
const char* language,
362 bool set_only_non_debug_params,
FileReader reader) {
364 if (language ==
nullptr) language =
"eng";
365 STRING datapath = data_size == 0 ? data : language;
382 bool reset_classifier =
true;
384 reset_classifier =
false;
386 if (reader !=
nullptr)
reader_ = reader;
388 if (data_size != 0) {
394 language,
oem, configs, configs_size, vars_vec, vars_values,
395 set_only_non_debug_params, &mgr) != 0) {
415#ifndef DISABLED_LEGACY_ENGINE
417 if (reset_classifier) {
432const char* TessBaseAPI::GetInitLanguagesAsString()
const {
442void TessBaseAPI::GetLoadedLanguagesAsVector(
448 for (
int i = 0; i < num_subs; ++i)
456void TessBaseAPI::GetAvailableLanguagesAsVector(
461 langs->
sort(CompareSTRING);
466#ifndef DISABLED_LEGACY_ENGINE
473int TessBaseAPI::InitLangMod(
const char* datapath,
const char* language) {
487void TessBaseAPI::InitForAnalysePage() {
490 #ifndef DISABLED_LEGACY_ENGINE
501void TessBaseAPI::ReadConfigFile(
const char* filename) {
506void TessBaseAPI::ReadDebugConfigFile(
const char* filename) {
542char* TessBaseAPI::TesseractRect(
const unsigned char* imagedata,
546 int width,
int height) {
552 int bits_per_pixel = bytes_per_pixel == 0 ? 1 : bytes_per_pixel * 8;
553 SetImage(imagedata, bytes_per_line * 8 / bits_per_pixel, height + top,
554 bytes_per_pixel, bytes_per_line);
560#ifndef DISABLED_LEGACY_ENGINE
565void TessBaseAPI::ClearAdaptiveClassifier() {
580void TessBaseAPI::SetImage(
const unsigned char* imagedata,
581 int width,
int height,
582 int bytes_per_pixel,
int bytes_per_line) {
585 bytes_per_pixel, bytes_per_line);
590void TessBaseAPI::SetSourceResolution(
int ppi) {
594 tprintf(
"Please call SetImage before SetSourceResolution.\n");
605void TessBaseAPI::SetImage(Pix* pix) {
607 if (pixGetSpp(pix) == 4 && pixGetInputFormat(pix) == IFF_PNG) {
609 Pix* p1 = pixRemoveAlpha(pix);
611 (void)pixCopy(pix, p1);
624void TessBaseAPI::SetRectangle(
int left,
int top,
int width,
int height) {
635Pix* TessBaseAPI::GetThresholdedImage() {
649Boxa* TessBaseAPI::GetRegions(Pixa** pixa) {
661Boxa* TessBaseAPI::GetTextlines(
const bool raw_image,
const int raw_padding,
662 Pixa** pixa,
int** blockids,
int** paraids) {
664 pixa, blockids, paraids);
675Boxa* TessBaseAPI::GetStrips(Pixa** pixa,
int** blockids) {
684Boxa* TessBaseAPI::GetWords(Pixa** pixa) {
694Boxa* TessBaseAPI::GetConnectedComponents(Pixa** pixa) {
707 bool text_only,
bool raw_image,
708 const int raw_padding,
709 Pixa** pixa,
int** blockids,
712 if (page_it ==
nullptr)
714 if (page_it ==
nullptr)
718 int component_count = 0;
719 int left, top, right, bottom;
726 &left, &top, &right, &bottom);
732 level, &left, &top, &right, &bottom);
735 if (get_bbox->
Run() &&
738 }
while (page_it->
Next(level));
740 Boxa* boxa = boxaCreate(component_count);
742 *pixa = pixaCreate(component_count);
743 if (blockids !=
nullptr)
744 *blockids =
new int[component_count];
745 if (paraids !=
nullptr)
746 *paraids =
new int[component_count];
750 int component_index = 0;
753 if (get_bbox->
Run() &&
755 Box* lbox = boxCreate(left, top, right - left, bottom - top);
756 boxaAddBox(boxa, lbox, L_INSERT);
757 if (pixa !=
nullptr) {
765 pixaAddPix(*pixa, pix, L_INSERT);
766 pixaAddBox(*pixa, lbox, L_CLONE);
768 if (paraids !=
nullptr) {
769 (*paraids)[component_index] = paraid;
773 if (blockids !=
nullptr) {
774 (*blockids)[component_index] = blockid;
782 }
while (page_it->
Next(level));
788int TessBaseAPI::GetThresholdedImageScaleFactor()
const {
844#ifndef DISABLED_LEGACY_ENGINE
867#ifndef DISABLED_LEGACY_ENGINE
887 #ifndef GRAPHICS_DISABLED
895 #ifndef DISABLED_LEGACY_ENGINE
905 fclose(training_output_file);
909 bool wait_for_text =
true;
921#ifndef DISABLED_LEGACY_ENGINE
927 tprintf(
"Please call SetImage before attempting recognition.\n");
944 while (page_res_it.
word() !=
nullptr) {
948 page_res_it.
row()->
row, word_res);
960const char * TessBaseAPI::GetInputName() {
966const char * TessBaseAPI::GetDatapath() {
970int TessBaseAPI::GetSourceYResolution() {
978bool TessBaseAPI::ProcessPagesFileList(FILE *flist,
980 const char* retry_config,
981 int timeout_millisec,
983 int tessedit_page_number) {
984 if (!flist && !buf)
return false;
985 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
990 buf->
split(
'\n', &lines);
991 if (lines.
empty())
return false;
995 for (
int i = 0; i < page; i++) {
997 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr)
break;
1002 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1009 if (fgets(pagename,
sizeof(pagename), flist) ==
nullptr)
break;
1011 if (page >= lines.
size())
break;
1012 snprintf(pagename,
sizeof(pagename),
"%s", lines[page].c_str());
1015 Pix *pix = pixRead(pagename);
1016 if (pix ==
nullptr) {
1017 tprintf(
"Image file %s cannot be read!\n", pagename);
1020 tprintf(
"Page %d : %s\n", page, pagename);
1021 bool r =
ProcessPage(pix, page, pagename, retry_config,
1022 timeout_millisec, renderer);
1024 if (!r)
return false;
1025 if (tessedit_page_number >= 0)
break;
1036bool TessBaseAPI::ProcessPagesMultipageTiff(
const l_uint8 *data,
1038 const char* filename,
1039 const char* retry_config,
1040 int timeout_millisec,
1042 int tessedit_page_number) {
1043#ifndef ANDROID_BUILD
1045 int page = (tessedit_page_number >= 0) ? tessedit_page_number : 0;
1048 if (tessedit_page_number >= 0) {
1049 page = tessedit_page_number;
1050 pix = (data) ? pixReadMemTiff(data, size, page)
1051 : pixReadTiff(filename, page);
1053 pix = (data) ? pixReadMemFromMultipageTiff(data, size, &offset)
1054 : pixReadFromMultipageTiff(filename, &offset);
1056 if (pix ==
nullptr)
break;
1057 tprintf(
"Page %d\n", page + 1);
1061 bool r =
ProcessPage(pix, page, filename, retry_config,
1062 timeout_millisec, renderer);
1064 if (!r)
return false;
1065 if (tessedit_page_number >= 0)
break;
1076bool TessBaseAPI::ProcessPages(
const char* filename,
const char* retry_config,
1077 int timeout_millisec,
1081 #ifndef DISABLED_LEGACY_ENGINE
1094WriteMemoryCallback(
void *contents,
size_t size,
size_t nmemb,
void *userp)
1096 size = size * nmemb;
1097 std::string* buf =
reinterpret_cast<std::string*
>(userp);
1098 buf->append(
reinterpret_cast<const char*
>(contents), size);
1113bool TessBaseAPI::ProcessPagesInternal(
const char* filename,
1114 const char* retry_config,
1115 int timeout_millisec,
1117 bool stdInput = !strcmp(filename,
"stdin") || !strcmp(filename,
"-");
1120 if (_setmode(_fileno(stdin), _O_BINARY) == -1)
1121 tprintf(
"ERROR: cin to binary: %s", strerror(errno));
1125 if (stream_filelist) {
1126 return ProcessPagesFileList(stdin,
nullptr, retry_config,
1127 timeout_millisec, renderer,
1135 const l_uint8 *data =
nullptr;
1137 buf.assign((std::istreambuf_iterator<char>(std::cin)),
1138 (std::istreambuf_iterator<char>()));
1139 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1140 }
else if (strncmp(filename,
"http:", 5) == 0 ||
1141 strncmp(filename,
"https:", 6) == 0 ) {
1144 CURL* curl = curl_easy_init();
1145 if (curl ==
nullptr) {
1146 fprintf(stderr,
"Error, curl_easy_init failed\n");
1150 curlcode = curl_easy_setopt(curl, CURLOPT_URL, filename);
1152 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
1154 curlcode = curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buf);
1156 curlcode = curl_easy_perform(curl);
1158 curl_easy_cleanup(curl);
1159 data =
reinterpret_cast<const l_uint8 *
>(buf.data());
1162 fprintf(stderr,
"Error, this tesseract has no URL support\n");
1167 if (FILE* file = fopen(filename,
"rb")) {
1170 fprintf(stderr,
"Error, cannot read input file %s: %s\n",
1171 filename, strerror(errno));
1178 int r = (data !=
nullptr) ?
1179 findFileFormatBuffer(data, &format) :
1180 findFileFormat(filename, &format);
1183 if (r != 0 || format == IFF_UNKNOWN) {
1185 if (data !=
nullptr) {
1188 std::ifstream t(filename);
1189 std::string u((std::istreambuf_iterator<char>(t)),
1190 std::istreambuf_iterator<char>());
1193 return ProcessPagesFileList(
nullptr, &s, retry_config,
1194 timeout_millisec, renderer,
1199 bool tiff = (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
1200 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
1201 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
1202#if LIBLEPT_MAJOR_VERSION > 1 || LIBLEPT_MINOR_VERSION > 76
1203 format == IFF_TIFF_JPEG ||
1205 format == IFF_TIFF_ZIP);
1210 pix = (data !=
nullptr) ? pixReadMem(data, buf.size()) : pixRead(filename);
1211 if (pix ==
nullptr) {
1217 if (renderer && !renderer->
BeginDocument(document_title.c_str())) {
1224 ProcessPagesMultipageTiff(data, buf.size(), filename, retry_config,
1225 timeout_millisec, renderer,
1228 timeout_millisec, renderer);
1234 if (!r || (renderer && !renderer->
EndDocument())) {
1240bool TessBaseAPI::ProcessPage(Pix* pix,
int page_index,
const char* filename,
1241 const char* retry_config,
int timeout_millisec,
1245 bool failed =
false;
1251 if (it ==
nullptr) {
1258 }
else if (timeout_millisec > 0) {
1261 monitor.
cancel =
nullptr;
1273#ifndef ANDROID_BUILD
1275 pixWrite(
"tessinput.tif", page_pix, IFF_TIFF_G4);
1279 if (failed && retry_config !=
nullptr && retry_config[0] !=
'\0') {
1281 FILE* fp = fopen(kOldVarsFile,
"wb");
1282 if (fp ==
nullptr) {
1283 tprintf(
"Error, failed to open file \"%s\"\n", kOldVarsFile);
1296 if (renderer && !failed) {
1297 failed = !renderer->
AddImage(
this);
1351char* TessBaseAPI::GetUTF8Text() {
1360 text += para_text.get();
1362 char* result =
new char[text.
length() + 1];
1370 int left, top, right, bottom;
1371 it->
BoundingBox(level, &left, &top, &right, &bottom);
1383char* TessBaseAPI::GetTSVText(
int page_number) {
1387 int lcnt = 1, bcnt = 1, pcnt = 1, wcnt = 1;
1388 int page_id = page_number + 1;
1392 int page_num = page_id;
1407 tsv_str +=
"\t-1\t\n";
1427 AddBoxToTSV(res_it,
RIL_BLOCK, &tsv_str);
1428 tsv_str +=
"\t-1\t\n";
1439 AddBoxToTSV(res_it,
RIL_PARA, &tsv_str);
1440 tsv_str +=
"\t-1\t\n";
1451 tsv_str +=
"\t-1\t\n";
1455 int left, top, right, bottom;
1484 char* ret =
new char[tsv_str.
length() + 1];
1485 strcpy(ret, tsv_str.
string());
1520char* TessBaseAPI::GetBoxText(
int page_number) {
1528 char* result =
new char[total_length];
1530 int output_length = 0;
1533 int left, top, right, bottom;
1535 const std::unique_ptr<
char[]> text(
1539 for (
int i = 0; text[i] !=
'\0'; ++i) {
1543 snprintf(result + output_length, total_length - output_length,
1544 "%s %d %d %d %d %d\n", text.get(), left,
image_height_ - bottom,
1546 output_length += strlen(result + output_length);
1562 0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
1566 0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
1574char* TessBaseAPI::GetUNLVText() {
1578 bool tilde_crunch_written =
false;
1579 bool last_char_was_newline =
true;
1580 bool last_char_was_tilde =
false;
1584 char* result =
new char[total_length];
1592 (!tilde_crunch_written ||
1603 last_char_was_tilde =
false;
1605 if (!last_char_was_tilde) {
1607 last_char_was_tilde =
true;
1609 tilde_crunch_written =
true;
1610 last_char_was_newline =
false;
1615 tilde_crunch_written =
false;
1619 int length = lengths.
length();
1623 if (last_char_was_tilde &&
1624 word->
word->
space() == 0 && wordstr[offset] ==
' ') {
1628 offset = lengths[i++];
1630 if (i < length && wordstr[offset] != 0) {
1631 if (!last_char_was_newline)
1634 last_char_was_newline =
false;
1635 for (; i < length; offset += lengths[i++]) {
1636 if (wordstr[offset] ==
' ' ||
1639 last_char_was_tilde =
true;
1643 UNICHAR ch(wordstr + offset, lengths[i]);
1645 for (
int j = 0;
kUniChs[j] != 0; ++j) {
1651 if (uni_ch <= 0xff) {
1652 *ptr++ =
static_cast<char>(uni_ch);
1653 last_char_was_tilde =
false;
1656 last_char_was_tilde =
true;
1665 tilde_crunch_written =
false;
1666 last_char_was_newline =
true;
1667 last_char_was_tilde =
false;
1675#ifndef DISABLED_LEGACY_ENGINE
1686bool TessBaseAPI::DetectOrientationScript(
int* orient_deg,
float* orient_conf,
1687 const char** script_name,
1688 float* script_conf) {
1699 if (orient_deg) *orient_deg = orient_id * 90;
1704 *script_name = script;
1717char* TessBaseAPI::GetOsdText(
int page_number) {
1720 const char* script_name;
1730 std::stringstream stream;
1732 stream.imbue(std::locale::classic());
1734 stream.precision(2);
1737 <<
"Page number: " << page_number <<
"\n"
1738 <<
"Orientation in degrees: " << orient_deg <<
"\n"
1739 <<
"Rotate: " << rotate <<
"\n"
1740 <<
"Orientation confidence: " << orient_conf <<
"\n"
1741 <<
"Script: " << script_name <<
"\n"
1742 <<
"Script confidence: " << script_conf <<
"\n";
1743 const std::string& text = stream.str();
1744 char* result =
new char[text.length() + 1];
1745 strcpy(result, text.c_str());
1752int TessBaseAPI::MeanTextConf() {
1754 if (!conf)
return 0;
1757 while (*pt >= 0) sum += *pt++;
1758 if (pt != conf) sum /= pt - conf;
1764int* TessBaseAPI::AllWordConfidences() {
1773 int* conf =
new int[n_word+1];
1778 int w_conf =
static_cast<int>(100 + 5 * choice->
certainty());
1780 if (w_conf < 0) w_conf = 0;
1781 if (w_conf > 100) w_conf = 100;
1782 conf[n_word++] = w_conf;
1788#ifndef DISABLED_LEGACY_ENGINE
1799bool TessBaseAPI::AdaptToWordStr(
PageSegMode mode,
const char* wordstr) {
1802 bool success =
true;
1806 const std::unique_ptr<const char[]> text(
GetUTF8Text());
1808 tprintf(
"Trying to adapt \"%s\" to \"%s\"\n", text.get(), wordstr);
1810 if (text !=
nullptr) {
1813 if (word_res !=
nullptr) {
1818 for (t = 0; text[t] !=
'\0'; ++t) {
1819 if (text[t] ==
'\n' || text[t] ==
' ')
1821 while (wordstr[w] ==
' ') ++w;
1822 if (text[t] != wordstr[w])
1826 if (text[t] !=
'\0' || wordstr[w] !=
'\0') {
1834 if (pr_it.
word() ==
nullptr)
1837 word_res = pr_it.
word();
1862void TessBaseAPI::Clear() {
1875void TessBaseAPI::End() {
1910void TessBaseAPI::ClearPersistentCache() {
1918int TessBaseAPI::IsValidWord(
const char *word) {
1922bool TessBaseAPI::IsValidCharacter(
const char *utf8_character) {
1929bool TessBaseAPI::GetTextDirection(
int* out_offset,
float* out_slope) {
1931 if (it ==
nullptr) {
1937 if (x2 <= x1) x2 = x1 + 1;
1939 *out_slope =
static_cast<float>(y2 - y1) / (x2 - x1);
1940 *out_offset =
static_cast<int>(y1 - *out_slope * x1);
1943 int left, top, right, bottom;
1953 *out_offset += bottom - std::max(left_y, right_y);
1956 *out_slope = -*out_slope;
1983 for (
int i = 0; i < num_subs; ++i) {
1989#ifndef DISABLED_LEGACY_ENGINE
1997bool TessBaseAPI::InternalSetImage() {
1999 tprintf(
"Please call Init before attempting to set an image.\n");
2014bool TessBaseAPI::Threshold(Pix** pix) {
2016 if (*pix !=
nullptr)
2024 tprintf(
"Warning: User defined image dpi is outside of expected range "
2033 tprintf(
"Warning: Invalid resolution %d dpi. Using %d instead.\n",
2059 tprintf(
"Estimated internal resolution %d out of range! "
2060 "Corrected to %d.\n",
2068int TessBaseAPI::FindLines() {
2070 tprintf(
"Please call SetImage before attempting recognition.\n");
2080 #ifndef DISABLED_LEGACY_ENGINE
2091#ifndef DISABLED_LEGACY_ENGINE
2097 tprintf(
"Warning: Could not set equation detector\n");
2107 osd_tess ==
nullptr) {
2114 tprintf(
"Warning: Auto orientation and script detection requested,"
2115 " but data path is undefined\n");
2120 nullptr, 0,
nullptr,
nullptr,
2121 false, &mgr) == 0) {
2126 tprintf(
"Warning: Auto orientation and script detection requested,"
2127 " but osd language failed to load\n");
2144void TessBaseAPI::ClearResults() {
2169int TessBaseAPI::TextLength(
int* blob_count) {
2174 int total_length = 2;
2175 int total_blobs = 0;
2181 if (choice !=
nullptr) {
2182 total_blobs += choice->
length() + 2;
2190 if (blob_count !=
nullptr)
2191 *blob_count = total_blobs;
2192 return total_length;
2195#ifndef DISABLED_LEGACY_ENGINE
2215void TessBaseAPI::set_min_orientation_margin(
double margin) {
2233void TessBaseAPI::GetBlockTextOrientations(
int** block_orientation,
2234 bool** vertical_writing) {
2235 delete[] *block_orientation;
2236 *block_orientation =
nullptr;
2237 delete[] *vertical_writing;
2238 *vertical_writing =
nullptr;
2241 block_it.move_to_first();
2243 for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
2244 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2250 tprintf(
"WARNING: Found no blocks\n");
2253 *block_orientation =
new int[num_blocks];
2254 *vertical_writing =
new bool[num_blocks];
2255 block_it.move_to_first();
2257 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
2258 block_it.forward()) {
2259 if (!block_it.data()->pdblk.poly_block()->IsText()) {
2262 FCOORD re_rotation = block_it.data()->re_rotation();
2263 float re_theta = re_rotation.
angle();
2264 FCOORD classify_rotation = block_it.data()->classify_rotation();
2265 float classify_theta = classify_rotation.
angle();
2266 double rot_theta = - (re_theta - classify_theta) * 2.0 / M_PI;
2267 if (rot_theta < 0) rot_theta += 4;
2268 int num_rotations =
static_cast<int>(rot_theta + 0.5);
2269 (*block_orientation)[i] = num_rotations;
2272 (*vertical_writing)[i] = classify_rotation.
y() != 0.0f;
2278void TessBaseAPI::DetectParagraphs(
bool after_text_recognition) {
2279 int debug_level = 0;
2287 result_it, &models);
2294const char* TessBaseAPI::GetUnichar(
int unichar_id) {
2299const Dawg *TessBaseAPI::GetDawg(
int i)
const {
2305int TessBaseAPI::NumDawgs()
const {
2313 for (ptr = text; *ptr; ptr++) {
2315 case '<': ret +=
"<";
break;
2316 case '>': ret +=
">";
break;
2317 case '&': ret +=
"&";
break;
2318 case '"': ret +=
""";
break;
2319 case '\'': ret +=
"'";
break;
2320 default: ret += *ptr;
2327#ifndef DISABLED_LEGACY_ENGINE
2334BLOCK_LIST* TessBaseAPI::FindLinesCreateBlockList() {
2346void TessBaseAPI::DeleteBlockList(BLOCK_LIST *block_list) {
2355 int32_t xstarts[] = {-32000};
2356 double quad_coeffs[] = {0, 0,
baseline};
2368TBLOB *TessBaseAPI::MakeTBLOB(Pix *pix) {
2369 int width = pixGetWidth(pix);
2370 int height = pixGetHeight(pix);
2371 BLOCK block(
"a character",
true, 0, 0, 0, 0, width, height);
2378 C_BLOB_IT c_blob_it(list);
2379 if (c_blob_it.empty())
2382 C_OUTLINE_IT ol_it(c_blob_it.data()->out_list());
2383 for (c_blob_it.forward();
2384 !c_blob_it.at_first();
2385 c_blob_it.forward()) {
2386 C_BLOB *c_blob = c_blob_it.data();
2387 ol_it.add_list_after(c_blob->
out_list());
2398void TessBaseAPI::NormalizeTBLOB(
TBLOB *tblob,
ROW *row,
bool numeric_mode) {
2400 float x_center = (box.
left() + box.
right()) / 2.0f;
2411static TBLOB *make_tesseract_blob(
float baseline,
float xheight,
2412 float descender,
float ascender,
2413 bool numeric_mode, Pix* pix) {
2414 TBLOB *tblob = TessBaseAPI::MakeTBLOB(pix);
2418 TessBaseAPI::MakeTessOCRRow(
baseline, xheight, descender, ascender);
2419 TessBaseAPI::NormalizeTBLOB(tblob, row, numeric_mode);
2429void TessBaseAPI::AdaptToCharacter(
const char *unichar_repr,
2436 TBLOB *blob = make_tesseract_blob(
baseline, xheight, descender, ascender,
2440 float best_rating = -100;
2444 BLOB_CHOICE_LIST choices;
2446 BLOB_CHOICE_IT choice_it;
2447 choice_it.set_to_list(&choices);
2448 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
2449 choice_it.forward()) {
2450 if (choice_it.data()->rating() > best_rating) {
2451 best_rating = choice_it.data()->rating();
2464PAGE_RES* TessBaseAPI::RecognitionPass1(BLOCK_LIST* block_list) {
2465 auto *page_res =
new PAGE_RES(
false, block_list,
2471PAGE_RES* TessBaseAPI::RecognitionPass2(BLOCK_LIST* block_list,
2474 pass1_result =
new PAGE_RES(
false, block_list,
2477 return pass1_result;
2487 length = (len == -1 ? strlen(repr) : len);
2506static void add_space(TESS_CHAR_IT* it) {
2507 auto *t =
new TESS_CHAR(0,
" ");
2508 it->add_after_then_move(t);
2512static float rating_to_cost(
float rating) {
2513 rating = 100 + rating;
2517 if (rating < 0) rating = 0;
2525static void extract_result(TESS_CHAR_IT* out,
2529 while (page_res_it.word() !=
nullptr) {
2537 int n = strlen(len);
2538 for (
int i = 0; i < n; i++) {
2542 out->add_after_then_move(tc);
2546 page_res_it.forward();
2555int TessBaseAPI::TesseractExtractResult(
char** text,
2563 TESS_CHAR_LIST tess_chars;
2564 TESS_CHAR_IT tess_chars_it(&tess_chars);
2565 extract_result(&tess_chars_it, page_res);
2566 tess_chars_it.move_to_first();
2567 int n = tess_chars.length();
2569 *lengths =
new int[n];
2570 *costs =
new float[n];
2576 for (tess_chars_it.mark_cycle_pt();
2577 !tess_chars_it.cycled_list();
2578 tess_chars_it.forward(), i++) {
2580 text_len += (*lengths)[i] = tc->
length;
2581 (*costs)[i] = tc->
cost;
2585 (*y1)[i] = tc->
box.
top();
2587 char *p = *text =
new char[text_len];
2589 tess_chars_it.move_to_first();
2590 for (tess_chars_it.mark_cycle_pt();
2591 !tess_chars_it.cycled_list();
2592 tess_chars_it.forward()) {
2607void TessBaseAPI::GetFeaturesForBlob(
TBLOB* blob,
2610 int* feature_outline_index) {
2616 &cn_features, &fx_info, &outline_counts);
2621 *num_features = cn_features.
size();
2622 memcpy(int_features, &cn_features[0], *num_features *
sizeof(cn_features[0]));
2624 if (feature_outline_index !=
nullptr) {
2626 for (
int i = 0; i < outline_counts.
size(); ++i) {
2627 while (f < outline_counts[i])
2628 feature_outline_index[f++] = i;
2635ROW* TessBaseAPI::FindRowForBox(BLOCK_LIST* blocks,
2636 int left,
int top,
int right,
int bottom) {
2637 TBOX box(left, bottom, right, top);
2638 BLOCK_IT b_it(blocks);
2639 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
2640 BLOCK* block = b_it.data();
2644 for (r_it.mark_cycle_pt(); !r_it.cycled_list(); r_it.forward()) {
2645 ROW* row = r_it.data();
2649 for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
2650 WERD* word = w_it.data();
2660void TessBaseAPI::RunAdaptiveClassifier(
TBLOB* blob,
2661 int num_max_matches,
2664 int* num_matches_returned) {
2665 auto* choices =
new BLOB_CHOICE_LIST;
2667 BLOB_CHOICE_IT choices_it(choices);
2668 int& index = *num_matches_returned;
2670 for (choices_it.mark_cycle_pt();
2671 !choices_it.cycled_list() && index < num_max_matches;
2672 choices_it.forward()) {
2675 ratings[index] = choice->
rating();
2678 *num_matches_returned = index;
struct TessResultRenderer TessResultRenderer
int OrientationIdToValue(const int &id)
int orientation_and_script_detection(STRING &filename, OSResults *osr, tesseract::Tesseract *tess)
const int kBlnBaselineOffset
bool PTIsTextType(PolyBlockType type)
constexpr int kMinCredibleResolution
constexpr int kMaxCredibleResolution
@ W_FUZZY_NON
fuzzy nonspace
#define ELISTIZEH(CLASSNAME)
#define ELISTIZE(CLASSNAME)
void chomp_string(char *str)
int IntCastRounded(double x)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
tesseract::ParamsVectors * GlobalParams()
#define BOOL_VAR(name, val, comment)
#define STRING_VAR(name, val, comment)
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
DLLSYM void tprintf(const char *format,...)
#define MAX_NUM_INT_FEATURES
void extract_edges(Pix *pix, BLOCK *block)
@ SET_PARAM_CONSTRAINT_NON_INIT_ONLY
@ SET_PARAM_CONSTRAINT_DEBUG_ONLY
const char kTesseractReject
bool(*)(const STRING &, GenericVector< char > *) FileReader
const int kBytesPerBoxFileLine
bool PSM_OSD_ENABLED(int pageseg_mode)
@ PSM_OSD_ONLY
Orientation and script detection only.
@ PSM_AUTO_ONLY
Automatic page segmentation, but no OSD, or OCR.
@ PSM_SINGLE_BLOCK
Assume a single uniform block of text. (Default.)
void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *) FillLatticeFunc
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
const int kBytesPer64BitNumber
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
const int kMaxBytesPerLine
STRING HOcrEscape(const char *text)
const int kBytesPerNumber
void ExtractFontName(const STRING &filename, STRING *fontname)
const int kNumbersPerBlob
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
TESS_CHAR(float _cost, const char *repr, int len=-1)
void delete_data_pointers()
EquationDetect * equ_detect_
The equation detector.
GenericVector< ParagraphModel * > * paragraph_models_
bool ProcessPagesInternal(const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
virtual TESS_LOCAL bool Threshold(Pix **pix)
bool ProcessPage(Pix *pix, int page_index, const char *filename, const char *retry_config, int timeout_millisec, TessResultRenderer *renderer)
int Recognize(ETEXT_DESC *monitor)
PAGE_RES * page_res_
The page-level data.
void SetPageSegMode(PageSegMode mode)
TESS_LOCAL bool InternalSetImage()
STRING * output_file_
Name used by debug code.
Tesseract * tesseract_
The underlying data object.
bool GetIntVariable(const char *name, int *value) const
void SetRectangle(int left, int top, int width, int height)
MutableIterator * GetMutableIterator()
bool SetVariable(const char *name, const char *value)
TESS_LOCAL void DetectParagraphs(bool after_text_recognition)
STRING * input_file_
Name used by training code.
int * AllWordConfidences()
ResultIterator * GetIterator()
bool DetectOS(OSResults *)
PageSegMode GetPageSegMode() const
STRING * language_
Last initialized language.
bool recognition_done_
page_res_ contains recognition data.
FileReader reader_
Reads files from any filesystem.
TESS_LOCAL int FindLines()
void SetInputName(const char *name)
int Init(const char *datapath, const char *language, OcrEngineMode mode, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_non_debug_params)
OcrEngineMode oem() const
void PrintVariables(FILE *fp) const
ImageThresholder * thresholder_
Image thresholding module.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Boxa * GetComponentImages(PageIteratorLevel level, bool text_only, bool raw_image, int raw_padding, Pixa **pixa, int **blockids, int **paraids)
PageIterator * AnalyseLayout()
void ReadConfigFile(const char *filename)
BLOCK_LIST * block_list_
The page layout.
TruthCallback * truth_cb_
bool DetectOrientationScript(int *orient_deg, float *orient_conf, const char **script_name, float *script_conf)
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Tesseract * osd_tesseract_
For orientation & script detection.
STRING * datapath_
Current location of tessdata.
bool GetBoolVariable(const char *name, bool *value) const
TESS_LOCAL int TextLength(int *blob_count)
Pix * GetThresholdedImage()
void SetInputImage(Pix *pix)
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
bool AddImage(TessBaseAPI *api)
bool BeginDocument(const char *title)
char * GetUTF8Text(PageIteratorLevel level) const
float Confidence(PageIteratorLevel level) const
TESS_API int get_best_script(int orientation_id) const
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
virtual bool Next(PageIteratorLevel level)
PolyBlockType BlockType() const
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
bool Empty(PageIteratorLevel level) const
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
Pix * GetBinaryImage(PageIteratorLevel level) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const override
virtual char * GetUTF8Text(PageIteratorLevel level) const
bool IsAtBeginningOf(PageIteratorLevel level) const override
bool Next(PageIteratorLevel level) override
static ResultIterator * StartOfParagraph(const LTRResultIterator &resit)
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)
int tessedit_pageseg_mode
double min_orientation_margin
void SetEquationDetect(EquationDetect *detector)
int init_tesseract(const char *arg0, const char *textbase, const char *language, OcrEngineMode oem, char **configs, int configs_size, const GenericVector< STRING > *vars_vec, const GenericVector< STRING > *vars_values, bool set_only_init_params, TessdataManager *mgr)
void set_unlv_suspects(WERD_RES *word)
void set_pix_grey(Pix *grey_pix)
int SegmentPage(const STRING *input_file, BLOCK_LIST *blocks, Tesseract *osd_tess, OSResults *osr)
void SetBlackAndWhitelist()
int num_sub_langs() const
void TidyUp(PAGE_RES *page_res)
void read_config_file(const char *filename, SetParamConstraint constraint)
FILE * init_recog_training(const STRING &fname)
bool interactive_display_mode
void ReSegmentByClassification(PAGE_RES *page_res)
bool tessedit_resegment_from_boxes
Dict & getDict() override
void ApplyBoxTraining(const STRING &fontname, PAGE_RES *page_res)
bool tessedit_train_line_recognizer
Tesseract * get_sub_lang(int index) const
bool tessedit_resegment_from_line_boxes
void PrepareForTessOCR(BLOCK_LIST *block_list, Tesseract *osd_tess, OSResults *osr)
PAGE_RES * SetupApplyBoxes(const GenericVector< TBOX > &boxes, BLOCK_LIST *block_list)
Pix * pix_original() const
void set_pix_thresholds(Pix *thresholds)
bool tessedit_train_from_boxes
void ResetAdaptiveClassifier()
int init_tesseract_lm(const char *arg0, const char *textbase, const char *language, TessdataManager *mgr)
void set_source_resolution(int ppi)
void CorrectClassifyWords(PAGE_RES *page_res)
void MaximallyChopWord(const GenericVector< TBOX > &boxes, BLOCK *block, ROW *row, WERD_RES *word_res)
bool tessedit_make_boxes_from_boxes
bool tessedit_write_images
void pgeditor_main(int width, int height, PAGE_RES *page_res)
bool TrainLineRecognizer(const STRING &input_imagename, const STRING &output_basename, BLOCK_LIST *block_list)
Pix ** mutable_pix_binary()
void ResetDocumentDictionary()
PAGE_RES * ApplyBoxes(const STRING &fname, bool find_segmentation, BLOCK_LIST *block_list)
void set_pix_original(Pix *original_pix)
bool textord_equation_detect
bool tessedit_ambigs_training
bool recog_all_words(PAGE_RES *page_res, ETEXT_DESC *monitor, const TBOX *target_word_box, const char *word_config, int dopasses)
int GetScaledEstimatedResolution() const
virtual Pix * GetPixRectGrey()
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
int GetSourceYResolution() const
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
bool IsEmpty() const
Return true if no image has been set.
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
int GetScaledYResolution() const
virtual Pix * GetPixRectThresholds()
void SetRectangle(int left, int top, int width, int height)
int GetScaleFactor() const
bool IsBinary() const
Returns true if the source image is binary.
void SetSourceYResolution(int ppi)
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
TBOX bounding_box() const
void Normalize(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift, bool inverse, Pix *pix)
static TBLOB * PolygonalCopy(bool allow_detailed_fx, C_BLOB *src)
const TBOX & BlobBox(int index) const
ROW_LIST * row_list()
get rows
C_BLOB_LIST * blob_list()
get blobs
PDBLK pdblk
Page Description Block.
float base_line(float xpos) const
TBOX bounding_box() const
tesseract::BoxWord * box_word
WERD_CHOICE * best_choice
CRUNCH_MODE unlv_crunch_mode
void BestChoiceToCorrectText()
BLOCK_RES * block() const
WERD_RES * restart_page()
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
float angle() const
find angle
UNICHAR_ID unichar_id() const
const STRING & unichar_string() const
const STRING & unichar_lengths() const
bool major_overlap(const TBOX &box) const
TBOX intersection(const TBOX &box) const
C_OUTLINE_LIST * out_list()
void set_text(const char *new_text)
TBOX bounding_box() const
bool flag(WERD_FLAGS mask) const
void set_deadline_msecs(int32_t deadline_msecs)
void * cancel_this
monitor-aware progress callback
CANCEL_FUNC cancel
for errcode use
GenericVector< IntParam * > int_params
GenericVector< DoubleParam * > double_params
GenericVector< BoolParam * > bool_params
GenericVector< StringParam * > string_params
static bool GetParamAsString(const char *name, const ParamsVectors *member_params, STRING *value)
static void ResetToDefaults(ParamsVectors *member_params)
static void PrintParams(FILE *fp, const ParamsVectors *member_params)
static bool SetParam(const char *name, const char *value, SetParamConstraint constraint, ParamsVectors *member_params)
const char * c_str() const
void add_str_int(const char *str, int number)
const char * string() const
void split(char c, GenericVector< STRING > *splited)
virtual void Run(A1, A2, A3, A4)=0
bool LoadMemBuffer(const char *name, const char *data, int size)
bool contains_unichar(const char *const unichar_repr) const
const char * get_script_from_script_id(int id) const
const char * id_to_unichar(UNICHAR_ID id) const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
static void ExtractFeatures(const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
void LearnWord(const char *fontname, WERD_RES *word)
ADAPT_TEMPLATES AdaptedTemplates
bool classify_bln_numeric_mode
bool WriteTRFile(const STRING &filename)
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
void InitAdaptiveClassifier(TessdataManager *mgr)
double matcher_good_threshold
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
static TESS_API DawgCache * GlobalDawgCache()
int(Dict::* letter_is_okay_)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
int valid_word(const WERD_CHOICE &word, bool numbers_ok) const
int NumDawgs() const
Return the number of dawgs in the dawgs_ vector.
const Dawg * GetDawg(int index) const
Return i-th dawg pointer recorded in the dawgs_ vector.
double(Dict::* probability_in_context_)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Probability in context function used by the ngram permuter.
const UNICHARSET & getUnicharset() const
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
WERD_CHOICE * prev_word_best_choice_