tesseract 4.1.1
Loading...
Searching...
No Matches
baseapi.h
Go to the documentation of this file.
1
2// File: baseapi.h
3// Description: Simple API for calling tesseract.
4// Author: Ray Smith
5//
6// (C) Copyright 2006, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_API_BASEAPI_H_
20#define TESSERACT_API_BASEAPI_H_
21
22#include <cstdio>
23// To avoid collision with other typenames include the ABSOLUTE MINIMUM
24// complexity of includes here. Use forward declarations wherever possible
25// and hide includes of complex types in baseapi.cpp.
26#include "apitypes.h"
27#include "pageiterator.h"
28#include "platform.h"
29#include "publictypes.h"
30#include "resultiterator.h"
31#include "serialis.h"
32#include "tess_version.h"
33#include "tesscallback.h"
34#include "thresholder.h"
35#include "unichar.h"
36
37template <typename T> class GenericVector;
38class PAGE_RES;
39class PAGE_RES_IT;
40class ParagraphModel;
41struct BlamerBundle;
42class BLOCK_LIST;
43class DENORM;
44class MATRIX;
45class ROW;
46class STRING;
47class WERD;
48struct Pix;
49struct Box;
50struct Pixa;
51struct Boxa;
52class ETEXT_DESC;
53struct OSResults;
54class TBOX;
55class UNICHARSET;
56class WERD_CHOICE_LIST;
57
60struct TBLOB;
61
62namespace tesseract {
63
64class Dawg;
65class Dict;
66class EquationDetect;
67class PageIterator;
68class LTRResultIterator;
69class ResultIterator;
70class MutableIterator;
72class Tesseract;
73class Trie;
74class Wordrec;
75
76using DictFunc = int (Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const;
77using ProbabilityInContextFunc = double (Dict::*)(const char *, const char *, int, const char *, int);
78using ParamsModelClassifyFunc = float (Dict::*)(const char *, void *);
79using FillLatticeFunc = void (Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *);
82
92 public:
94 virtual ~TessBaseAPI();
95
99 static const char* Version();
100
108 static size_t getOpenCLDevice(void **device);
109
114 static void CatchSignals();
115
120 void SetInputName(const char* name);
128 const char* GetInputName();
129 // Takes ownership of the input pix.
130 void SetInputImage(Pix *pix);
131 Pix* GetInputImage();
132 int GetSourceYResolution();
133 const char* GetDatapath();
134
136 void SetOutputName(const char* name);
137
151 bool SetVariable(const char* name, const char* value);
152 bool SetDebugVariable(const char* name, const char* value);
153
158 bool GetIntVariable(const char *name, int *value) const;
159 bool GetBoolVariable(const char *name, bool *value) const;
160 bool GetDoubleVariable(const char *name, double *value) const;
161
166 const char *GetStringVariable(const char *name) const;
167
171 void PrintVariables(FILE *fp) const;
172
176 bool GetVariableAsString(const char *name, STRING *val);
177
215 int Init(const char* datapath, const char* language, OcrEngineMode mode,
216 char **configs, int configs_size,
217 const GenericVector<STRING> *vars_vec,
218 const GenericVector<STRING> *vars_values,
219 bool set_only_non_debug_params);
220 int Init(const char* datapath, const char* language, OcrEngineMode oem) {
221 return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
222 }
223 int Init(const char* datapath, const char* language) {
224 return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
225 }
226 // In-memory version reads the traineddata file directly from the given
227 // data[data_size] array, and/or reads data via a FileReader.
228 int Init(const char* data, int data_size, const char* language,
229 OcrEngineMode mode, char** configs, int configs_size,
230 const GenericVector<STRING>* vars_vec,
231 const GenericVector<STRING>* vars_values,
232 bool set_only_non_debug_params, FileReader reader);
233
242 const char* GetInitLanguagesAsString() const;
243
249 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
250
254 void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
255
262 int InitLangMod(const char* datapath, const char* language);
263
268 void InitForAnalysePage();
269
276 void ReadConfigFile(const char* filename);
278 void ReadDebugConfigFile(const char* filename);
279
285 void SetPageSegMode(PageSegMode mode);
286
288 PageSegMode GetPageSegMode() const;
289
307 char* TesseractRect(const unsigned char* imagedata,
308 int bytes_per_pixel, int bytes_per_line,
309 int left, int top, int width, int height);
310
315 void ClearAdaptiveClassifier();
316
323 /* @{ */
324
332 void SetImage(const unsigned char* imagedata, int width, int height,
333 int bytes_per_pixel, int bytes_per_line);
334
343 void SetImage(Pix* pix);
344
349 void SetSourceResolution(int ppi);
350
356 void SetRectangle(int left, int top, int width, int height);
357
365 void SetThresholder(ImageThresholder* thresholder) {
366 delete thresholder_;
367 thresholder_ = thresholder;
368 ClearResults();
369 }
370
376 Pix* GetThresholdedImage();
377
383 Boxa* GetRegions(Pixa** pixa);
384
396 Boxa* GetTextlines(bool raw_image, int raw_padding,
397 Pixa** pixa, int** blockids, int** paraids);
398 /*
399 Helper method to extract from the thresholded image. (most common usage)
400 */
401 Boxa* GetTextlines(Pixa** pixa, int** blockids) {
402 return GetTextlines(false, 0, pixa, blockids, nullptr);
403 }
404
413 Boxa* GetStrips(Pixa** pixa, int** blockids);
414
420 Boxa* GetWords(Pixa** pixa);
421
430 Boxa* GetConnectedComponents(Pixa** cc);
431
445 Boxa* GetComponentImages(PageIteratorLevel level,
446 bool text_only, bool raw_image,
447 int raw_padding,
448 Pixa** pixa, int** blockids, int** paraids);
449 // Helper function to get binary images with no padding (most common usage).
451 const bool text_only,
452 Pixa** pixa, int** blockids) {
453 return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
454 }
455
462 int GetThresholdedImageScaleFactor() const;
463
479 PageIterator* AnalyseLayout();
480 PageIterator* AnalyseLayout(bool merge_similar_words);
481
488 int Recognize(ETEXT_DESC* monitor);
489
495 #ifndef DISABLED_LEGACY_ENGINE
497 int RecognizeForChopTest(ETEXT_DESC* monitor);
498 #endif
499
522 bool ProcessPages(const char* filename, const char* retry_config,
523 int timeout_millisec, TessResultRenderer* renderer);
524 // Does the real work of ProcessPages.
525 bool ProcessPagesInternal(const char* filename, const char* retry_config,
526 int timeout_millisec, TessResultRenderer* renderer);
527
537 bool ProcessPage(Pix* pix, int page_index, const char* filename,
538 const char* retry_config, int timeout_millisec,
539 TessResultRenderer* renderer);
540
549 ResultIterator* GetIterator();
550
559 MutableIterator* GetMutableIterator();
560
565 char* GetUTF8Text();
566
576 char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
577
584 char* GetHOCRText(int page_number);
585
590 char* GetAltoText(ETEXT_DESC* monitor, int page_number);
591
592
597 char* GetAltoText(int page_number);
598
604 char* GetTSVText(int page_number);
605
612 char* GetLSTMBoxText(int page_number);
613
621 char* GetBoxText(int page_number);
622
629 char* GetWordStrBoxText(int page_number);
630
636 char* GetUNLVText();
637
647 bool DetectOrientationScript(int* orient_deg, float* orient_conf,
648 const char** script_name, float* script_conf);
649
655 char* GetOsdText(int page_number);
656
658 int MeanTextConf();
665 int* AllWordConfidences();
666
667#ifndef DISABLED_LEGACY_ENGINE
678 bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
679#endif // ndef DISABLED_LEGACY_ENGINE
680
687 void Clear();
688
695 void End();
696
704 static void ClearPersistentCache();
705
712 int IsValidWord(const char *word);
713 // Returns true if utf8_character is defined in the UniCharset.
714 bool IsValidCharacter(const char *utf8_character);
715
716
717 bool GetTextDirection(int* out_offset, float* out_slope);
718
720 void SetDictFunc(DictFunc f);
721
725 void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
726
731 bool DetectOS(OSResults*);
732
737 void GetBlockTextOrientations(int** block_orientation,
738 bool** vertical_writing);
739
740
741 #ifndef DISABLED_LEGACY_ENGINE
742
744 void SetFillLatticeFunc(FillLatticeFunc f);
745
747 BLOCK_LIST* FindLinesCreateBlockList();
748
754 static void DeleteBlockList(BLOCK_LIST* block_list);
755
757 static ROW *MakeTessOCRRow(float baseline, float xheight,
758 float descender, float ascender);
759
761 static TBLOB *MakeTBLOB(Pix *pix);
762
768 static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
769
771 void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
772 int* num_features, int* feature_outline_index);
773
778 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
779 int right, int bottom);
780
785 void RunAdaptiveClassifier(TBLOB* blob,
786 int num_max_matches,
787 int* unichar_ids,
788 float* ratings,
789 int* num_matches_returned);
790#endif // ndef DISABLED_LEGACY_ENGINE
791
793 const char* GetUnichar(int unichar_id);
794
796 const Dawg *GetDawg(int i) const;
797
799 int NumDawgs() const;
800
801 Tesseract* tesseract() const { return tesseract_; }
802
803 OcrEngineMode oem() const { return last_oem_requested_; }
804
805 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
806
807 void set_min_orientation_margin(double margin);
808 /* @} */
809
810 protected:
811
813 TESS_LOCAL bool InternalSetImage();
814
819 TESS_LOCAL virtual bool Threshold(Pix** pix);
820
825 TESS_LOCAL int FindLines();
826
828 void ClearResults();
829
835 TESS_LOCAL LTRResultIterator* GetLTRIterator();
836
843 TESS_LOCAL int TextLength(int* blob_count);
844
846 TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
847
848 #ifndef DISABLED_LEGACY_ENGINE
849
851 /* @{ */
852
857 TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
858 int length,
859 float baseline,
860 float xheight,
861 float descender,
862 float ascender);
863
865 TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
866
867 TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
868 PAGE_RES* pass1_result);
869
874 TESS_LOCAL static int TesseractExtractResult(char** text,
875 int** lengths,
876 float** costs,
877 int** x0,
878 int** y0,
879 int** x1,
880 int** y1,
881 PAGE_RES* page_res);
882
883 TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
884 /* @} */
885#endif // ndef DISABLED_LEGACY_ENGINE
886
887 protected:
894 BLOCK_LIST* block_list_;
903
908 /* @{ */
915 /* @} */
916
917 private:
918 // A list of image filenames gets special consideration
919 bool ProcessPagesFileList(FILE *fp,
920 STRING *buf,
921 const char* retry_config, int timeout_millisec,
922 TessResultRenderer* renderer,
923 int tessedit_page_number);
924 // TIFF supports multipage so gets special consideration.
925 bool ProcessPagesMultipageTiff(const unsigned char *data,
926 size_t size,
927 const char* filename,
928 const char* retry_config,
929 int timeout_millisec,
930 TessResultRenderer* renderer,
931 int tessedit_page_number);
932 // There's currently no way to pass a document title from the
933 // Tesseract command line, and we have multiple places that choose
934 // to set the title to an empty string. Using a single named
935 // variable will hopefully reduce confusion if the situation changes
936 // in the future.
937 const char *unknown_title_ = "";
938}; // class TessBaseAPI.
939
941STRING HOcrEscape(const char* text);
942} // namespace tesseract.
943
944#endif // TESSERACT_API_BASEAPI_H_
struct TessBaseAPI TessBaseAPI
Definition: capi.h:93
struct TessResultRenderer TessResultRenderer
Definition: capi.h:87
@ OEM_DEFAULT
Definition: capi.h:102
#define TESS_API
Definition: platform.h:54
#define TESS_LOCAL
Definition: platform.h:55
int UNICHAR_ID
Definition: unichar.h:34
@ baseline
Definition: mfoutline.h:63
bool(*)(const STRING &, GenericVector< char > *) FileReader
Definition: serialis.h:49
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:81
void(Wordrec::*)(const MATRIX &, const WERD_CHOICE_LIST &, const UNICHARSET &, BlamerBundle *) FillLatticeFunc
Definition: baseapi.h:79
int(Dict::*)(void *, const UNICHARSET &, UNICHAR_ID, bool) const DictFunc
Definition: baseapi.h:76
double(Dict::*)(const char *, const char *, int, const char *, int) ProbabilityInContextFunc
Definition: baseapi.h:77
float(Dict::*)(const char *, void *) ParamsModelClassifyFunc
Definition: baseapi.h:78
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2310
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA * > *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel * > *models)
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:890
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:893
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:897
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:401
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:805
STRING * input_file_
Name used by training code.
Definition: baseapi.h:896
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:450
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:220
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:365
STRING * language_
Last initialized language.
Definition: baseapi.h:899
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:901
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:891
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:883
int Init(const char *datapath, const char *language)
Definition: baseapi.h:223
OcrEngineMode oem() const
Definition: baseapi.h:803
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:892
Tesseract * tesseract() const
Definition: baseapi.h:801
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:894
TruthCallback * truth_cb_
Definition: baseapi.h:902
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:889
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:898
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:900
Definition: blobs.h:284
Definition: matrix.h:578
Definition: ocrrow.h:37
Definition: rect.h:34
Definition: werd.h:56
Definition: strngs.h:45