tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::LTRResultIterator Class Reference

#include <ltrresultiterator.h>

Inheritance diagram for tesseract::LTRResultIterator:
tesseract::PageIterator tesseract::ResultIterator tesseract::MutableIterator

Public Member Functions

 LTRResultIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
 ~LTRResultIterator () override
 
char * GetUTF8Text (PageIteratorLevel level) const
 
void SetLineSeparator (const char *new_line)
 
void SetParagraphSeparator (const char *new_para)
 
float Confidence (PageIteratorLevel level) const
 
void RowAttributes (float *row_height, float *descenders, float *ascenders) const
 
const char * WordFontAttributes (bool *is_bold, bool *is_italic, bool *is_underlined, bool *is_monospace, bool *is_serif, bool *is_smallcaps, int *pointsize, int *font_id) const
 
const char * WordRecognitionLanguage () const
 
StrongScriptDirection WordDirection () const
 
bool WordIsFromDictionary () const
 
int BlanksBeforeWord () const
 
bool WordIsNumeric () const
 
bool HasBlamerInfo () const
 
const void * GetParamsTrainingBundle () const
 
const char * GetBlamerDebug () const
 
const char * GetBlamerMisadaptionDebug () const
 
bool HasTruthString () const
 
bool EquivalentToTruth (const char *str) const
 
char * WordTruthUTF8Text () const
 
char * WordNormedUTF8Text () const
 
const char * WordLattice (int *lattice_size) const
 
bool SymbolIsSuperscript () const
 
bool SymbolIsSubscript () const
 
bool SymbolIsDropcap () const
 
- Public Member Functions inherited from tesseract::PageIterator
 PageIterator (PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
 
virtual ~PageIterator ()
 
 PageIterator (const PageIterator &src)
 
const PageIteratoroperator= (const PageIterator &src)
 
bool PositionedAtSameWord (const PAGE_RES_IT *other) const
 
virtual void Begin ()
 
virtual void RestartParagraph ()
 
bool IsWithinFirstTextlineOfParagraph () const
 
virtual void RestartRow ()
 
virtual bool Next (PageIteratorLevel level)
 
virtual bool IsAtBeginningOf (PageIteratorLevel level) const
 
virtual bool IsAtFinalElement (PageIteratorLevel level, PageIteratorLevel element) const
 
int Cmp (const PageIterator &other) const
 
void SetBoundingBoxComponents (bool include_upper_dots, bool include_lower_dots)
 
bool BoundingBox (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBox (PageIteratorLevel level, int padding, int *left, int *top, int *right, int *bottom) const
 
bool BoundingBoxInternal (PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
 
bool Empty (PageIteratorLevel level) const
 
PolyBlockType BlockType () const
 
Pta * BlockPolygon () const
 
Pix * GetBinaryImage (PageIteratorLevel level) const
 
Pix * GetImage (PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
 
bool Baseline (PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
 
void Orientation (tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
 
void ParagraphInfo (tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
 
bool SetWordBlamerBundle (BlamerBundle *blamer_bundle)
 

Protected Attributes

const char * line_separator_
 
const char * paragraph_separator_
 
- Protected Attributes inherited from tesseract::PageIterator
PAGE_RESpage_res_
 
Tesseracttesseract_
 
PAGE_RES_ITit_
 
WERDword_
 
int word_length_
 
int blob_index_
 
C_BLOB_IT * cblob_it_
 
bool include_upper_dots_
 
bool include_lower_dots_
 
int scale_
 
int scaled_yres_
 
int rect_left_
 
int rect_top_
 
int rect_width_
 
int rect_height_
 

Friends

class ChoiceIterator
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::PageIterator
TESS_LOCAL void BeginWord (int offset)
 

Detailed Description

Definition at line 48 of file ltrresultiterator.h.

Constructor & Destructor Documentation

◆ LTRResultIterator()

tesseract::LTRResultIterator::LTRResultIterator ( PAGE_RES page_res,
Tesseract tesseract,
int  scale,
int  scaled_yres,
int  rect_left,
int  rect_top,
int  rect_width,
int  rect_height 
)

Definition at line 29 of file ltrresultiterator.cpp.

33 : PageIterator(page_res, tesseract, scale, scaled_yres, rect_left, rect_top,
34 rect_width, rect_height),
35 line_separator_("\n"),
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)

◆ ~LTRResultIterator()

tesseract::LTRResultIterator::~LTRResultIterator ( )
overridedefault

Member Function Documentation

◆ BlanksBeforeWord()

int tesseract::LTRResultIterator::BlanksBeforeWord ( ) const

Definition at line 239 of file ltrresultiterator.cpp.

239 {
240 if (it_->word() == nullptr) return 1;
241 return it_->word()->word->space();
242}
WERD * word
Definition: pageres.h:186
WERD_RES * word() const
Definition: pageres.h:754
uint8_t space()
Definition: werd.h:99

◆ Confidence()

float tesseract::LTRResultIterator::Confidence ( PageIteratorLevel  level) const

Definition at line 94 of file ltrresultiterator.cpp.

94 {
95 if (it_->word() == nullptr) return 0.0f; // Already at the end!
96 float mean_certainty = 0.0f;
97 int certainty_count = 0;
98 PAGE_RES_IT res_it(*it_);
99 WERD_CHOICE* best_choice = res_it.word()->best_choice;
100 ASSERT_HOST(best_choice != nullptr);
101 switch (level) {
102 case RIL_BLOCK:
103 do {
104 best_choice = res_it.word()->best_choice;
105 ASSERT_HOST(best_choice != nullptr);
106 mean_certainty += best_choice->certainty();
107 ++certainty_count;
108 res_it.forward();
109 } while (res_it.block() == res_it.prev_block());
110 break;
111 case RIL_PARA:
112 do {
113 best_choice = res_it.word()->best_choice;
114 ASSERT_HOST(best_choice != nullptr);
115 mean_certainty += best_choice->certainty();
116 ++certainty_count;
117 res_it.forward();
118 } while (res_it.block() == res_it.prev_block() &&
119 res_it.row()->row->para() == res_it.prev_row()->row->para());
120 break;
121 case RIL_TEXTLINE:
122 do {
123 best_choice = res_it.word()->best_choice;
124 ASSERT_HOST(best_choice != nullptr);
125 mean_certainty += best_choice->certainty();
126 ++certainty_count;
127 res_it.forward();
128 } while (res_it.row() == res_it.prev_row());
129 break;
130 case RIL_WORD:
131 mean_certainty += best_choice->certainty();
132 ++certainty_count;
133 break;
134 case RIL_SYMBOL:
135 mean_certainty += best_choice->certainty(blob_index_);
136 ++certainty_count;
137 }
138 if (certainty_count > 0) {
139 mean_certainty /= certainty_count;
140 float confidence = 100 + 5 * mean_certainty;
141 if (confidence < 0.0f) confidence = 0.0f;
142 if (confidence > 100.0f) confidence = 100.0f;
143 return confidence;
144 }
145 return 0.0f;
146}
#define ASSERT_HOST(x)
Definition: errcode.h:88
float certainty() const
Definition: ratngs.h:320

◆ EquivalentToTruth()

bool tesseract::LTRResultIterator::EquivalentToTruth ( const char *  str) const

Definition at line 291 of file ltrresultiterator.cpp.

291 {
292 if (!HasTruthString()) return false;
293 ASSERT_HOST(it_->word()->uch_set != nullptr);
294 WERD_CHOICE str_wd(str, *(it_->word()->uch_set));
295 return it_->word()->blamer_bundle->ChoiceIsCorrect(&str_wd);
296}
bool ChoiceIsCorrect(const WERD_CHOICE *word_choice) const
Definition: blamer.cpp:119
const UNICHARSET * uch_set
Definition: pageres.h:203
BlamerBundle * blamer_bundle
Definition: pageres.h:252

◆ GetBlamerDebug()

const char * tesseract::LTRResultIterator::GetBlamerDebug ( ) const

Definition at line 269 of file ltrresultiterator.cpp.

269 {
270 return it_->word()->blamer_bundle->debug().string();
271}
const STRING & debug() const
Definition: blamer.h:130
const char * string() const
Definition: strngs.cpp:194

◆ GetBlamerMisadaptionDebug()

const char * tesseract::LTRResultIterator::GetBlamerMisadaptionDebug ( ) const

Definition at line 275 of file ltrresultiterator.cpp.

275 {
277}
const STRING & misadaption_debug() const
Definition: blamer.h:133

◆ GetParamsTrainingBundle()

const void * tesseract::LTRResultIterator::GetParamsTrainingBundle ( ) const

Definition at line 260 of file ltrresultiterator.cpp.

260 {
261 return (it_->word() != nullptr && it_->word()->blamer_bundle != nullptr)
263 : nullptr;
264}
const tesseract::ParamsTrainingBundle & params_training_bundle() const
Definition: blamer.h:165

◆ GetUTF8Text()

char * tesseract::LTRResultIterator::GetUTF8Text ( PageIteratorLevel  level) const

Definition at line 45 of file ltrresultiterator.cpp.

45 {
46 if (it_->word() == nullptr) return nullptr; // Already at the end!
47 STRING text;
48 PAGE_RES_IT res_it(*it_);
49 WERD_CHOICE* best_choice = res_it.word()->best_choice;
50 ASSERT_HOST(best_choice != nullptr);
51 if (level == RIL_SYMBOL) {
52 text = res_it.word()->BestUTF8(blob_index_, false);
53 } else if (level == RIL_WORD) {
54 text = best_choice->unichar_string();
55 } else {
56 bool eol = false; // end of line?
57 bool eop = false; // end of paragraph?
58 do { // for each paragraph in a block
59 do { // for each text line in a paragraph
60 do { // for each word in a text line
61 best_choice = res_it.word()->best_choice;
62 ASSERT_HOST(best_choice != nullptr);
63 text += best_choice->unichar_string();
64 text += " ";
65 res_it.forward();
66 eol = res_it.row() != res_it.prev_row();
67 } while (!eol);
68 text.truncate_at(text.length() - 1);
69 text += line_separator_;
70 eop = res_it.block() != res_it.prev_block() ||
71 res_it.row()->row->para() != res_it.prev_row()->row->para();
72 } while (level != RIL_TEXTLINE && !eop);
73 if (eop) text += paragraph_separator_;
74 } while (level == RIL_BLOCK && res_it.block() == res_it.prev_block());
75 }
76 int length = text.length() + 1;
77 char* result = new char[length];
78 strncpy(result, text.string(), length);
79 return result;
80}
const STRING & unichar_string() const
Definition: ratngs.h:531
Definition: strngs.h:45
void truncate_at(int32_t index)
Definition: strngs.cpp:265
int32_t length() const
Definition: strngs.cpp:189

◆ HasBlamerInfo()

bool tesseract::LTRResultIterator::HasBlamerInfo ( ) const

Definition at line 252 of file ltrresultiterator.cpp.

252 {
253 return it_->word() != nullptr && it_->word()->blamer_bundle != nullptr &&
255}
bool HasDebugInfo() const
Definition: blamer.h:127

◆ HasTruthString()

bool tesseract::LTRResultIterator::HasTruthString ( ) const

Definition at line 280 of file ltrresultiterator.cpp.

280 {
281 if (it_->word() == nullptr) return false; // Already at the end!
282 if (it_->word()->blamer_bundle == nullptr ||
283 it_->word()->blamer_bundle->NoTruth()) {
284 return false; // no truth information for this word
285 }
286 return true;
287}
bool NoTruth() const
Definition: blamer.h:123

◆ RowAttributes()

void tesseract::LTRResultIterator::RowAttributes ( float *  row_height,
float *  descenders,
float *  ascenders 
) const

Definition at line 148 of file ltrresultiterator.cpp.

149 {
150 *row_height = it_->row()->row->x_height() + it_->row()->row->ascenders() -
151 it_->row()->row->descenders();
152 *descenders = it_->row()->row->descenders();
153 *ascenders = it_->row()->row->ascenders();
154}
float descenders() const
Definition: ocrrow.h:85
float ascenders() const
Definition: ocrrow.h:82
float x_height() const
Definition: ocrrow.h:64
ROW * row
Definition: pageres.h:140
ROW_RES * row() const
Definition: pageres.h:757

◆ SetLineSeparator()

void tesseract::LTRResultIterator::SetLineSeparator ( const char *  new_line)

Definition at line 83 of file ltrresultiterator.cpp.

83 {
84 line_separator_ = new_line;
85}

◆ SetParagraphSeparator()

void tesseract::LTRResultIterator::SetParagraphSeparator ( const char *  new_para)

Definition at line 88 of file ltrresultiterator.cpp.

88 {
89 paragraph_separator_ = new_para;
90}

◆ SymbolIsDropcap()

bool tesseract::LTRResultIterator::SymbolIsDropcap ( ) const

Definition at line 357 of file ltrresultiterator.cpp.

357 {
358 if (cblob_it_ == nullptr && it_->word() != nullptr)
360 return false;
361}
@ SP_DROPCAP
Definition: ratngs.h:256
WERD_CHOICE * best_choice
Definition: pageres.h:241
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:312

◆ SymbolIsSubscript()

bool tesseract::LTRResultIterator::SymbolIsSubscript ( ) const

Definition at line 348 of file ltrresultiterator.cpp.

348 {
349 if (cblob_it_ == nullptr && it_->word() != nullptr)
351 return false;
352}
@ SP_SUBSCRIPT
Definition: ratngs.h:254

◆ SymbolIsSuperscript()

bool tesseract::LTRResultIterator::SymbolIsSuperscript ( ) const

Definition at line 338 of file ltrresultiterator.cpp.

338 {
339 if (cblob_it_ == nullptr && it_->word() != nullptr)
342 return false;
343}
@ SP_SUPERSCRIPT
Definition: ratngs.h:255

◆ WordDirection()

StrongScriptDirection tesseract::LTRResultIterator::WordDirection ( ) const

Definition at line 220 of file ltrresultiterator.cpp.

220 {
221 if (it_->word() == nullptr) return DIR_NEUTRAL;
222 bool has_rtl = it_->word()->AnyRtlCharsInWord();
223 bool has_ltr = it_->word()->AnyLtrCharsInWord();
224 if (has_rtl && !has_ltr) return DIR_RIGHT_TO_LEFT;
225 if (has_ltr && !has_rtl) return DIR_LEFT_TO_RIGHT;
226 if (!has_ltr && !has_rtl) return DIR_NEUTRAL;
227 return DIR_MIX;
228}
@ DIR_MIX
Definition: unichar.h:45
@ DIR_RIGHT_TO_LEFT
Definition: unichar.h:44
@ DIR_LEFT_TO_RIGHT
Definition: unichar.h:43
@ DIR_NEUTRAL
Definition: unichar.h:42
bool AnyRtlCharsInWord() const
Definition: pageres.h:393
bool AnyLtrCharsInWord() const
Definition: pageres.h:409

◆ WordFontAttributes()

const char * tesseract::LTRResultIterator::WordFontAttributes ( bool *  is_bold,
bool *  is_italic,
bool *  is_underlined,
bool *  is_monospace,
bool *  is_serif,
bool *  is_smallcaps,
int *  pointsize,
int *  font_id 
) const

Definition at line 164 of file ltrresultiterator.cpp.

166 {
167 const char* result = nullptr;
168
169 if (it_->word() == nullptr) {
170 // Already at the end!
171 *pointsize = 0;
172 } else {
173 float row_height = it_->row()->row->x_height() +
174 it_->row()->row->ascenders() -
175 it_->row()->row->descenders();
176 // Convert from pixels to printers points.
177 *pointsize =
178 scaled_yres_ > 0
179 ? static_cast<int>(row_height * kPointsPerInch / scaled_yres_ + 0.5)
180 : 0;
181
182 #ifndef DISABLED_LEGACY_ENGINE
183 const FontInfo* font_info = it_->word()->fontinfo;
184 if (font_info) {
185 // Font information available.
186 *font_id = font_info->universal_id;
187 *is_bold = font_info->is_bold();
188 *is_italic = font_info->is_italic();
189 *is_underlined = false; // TODO(rays) fix this!
190 *is_monospace = font_info->is_fixed_pitch();
191 *is_serif = font_info->is_serif();
192 result = font_info->name;
193 }
194 #endif // ndef DISABLED_LEGACY_ENGINE
195
196 *is_smallcaps = it_->word()->small_caps;
197 }
198
199 if (!result) {
200 *is_bold = false;
201 *is_italic = false;
202 *is_underlined = false;
203 *is_monospace = false;
204 *is_serif = false;
205 *is_smallcaps = false;
206 *font_id = -1;
207 }
208
209 return result;
210}
constexpr int kPointsPerInch
Definition: publictypes.h:33
int32_t universal_id
Definition: fontinfo.h:123
const FontInfo * fontinfo
Definition: pageres.h:309
bool small_caps
Definition: pageres.h:306

◆ WordIsFromDictionary()

bool tesseract::LTRResultIterator::WordIsFromDictionary ( ) const

Definition at line 231 of file ltrresultiterator.cpp.

231 {
232 if (it_->word() == nullptr) return false; // Already at the end!
233 int permuter = it_->word()->best_choice->permuter();
234 return permuter == SYSTEM_DAWG_PERM || permuter == FREQ_DAWG_PERM ||
235 permuter == USER_DAWG_PERM;
236}
@ FREQ_DAWG_PERM
Definition: ratngs.h:244
@ USER_DAWG_PERM
Definition: ratngs.h:243
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:241
uint8_t permuter() const
Definition: ratngs.h:336

◆ WordIsNumeric()

bool tesseract::LTRResultIterator::WordIsNumeric ( ) const

Definition at line 245 of file ltrresultiterator.cpp.

245 {
246 if (it_->word() == nullptr) return false; // Already at the end!
247 int permuter = it_->word()->best_choice->permuter();
248 return permuter == NUMBER_PERM;
249}
@ NUMBER_PERM
Definition: ratngs.h:239

◆ WordLattice()

const char * tesseract::LTRResultIterator::WordLattice ( int *  lattice_size) const

Definition at line 328 of file ltrresultiterator.cpp.

328 {
329 if (it_->word() == nullptr) return nullptr; // Already at the end!
330 if (it_->word()->blamer_bundle == nullptr) return nullptr;
331 *lattice_size = it_->word()->blamer_bundle->lattice_size();
332 return it_->word()->blamer_bundle->lattice_data();
333}
const char * lattice_data() const
Definition: blamer.h:152
int lattice_size() const
Definition: blamer.h:155

◆ WordNormedUTF8Text()

char * tesseract::LTRResultIterator::WordNormedUTF8Text ( ) const

Definition at line 311 of file ltrresultiterator.cpp.

311 {
312 if (it_->word() == nullptr) return nullptr; // Already at the end!
313 STRING ocr_text;
314 WERD_CHOICE* best_choice = it_->word()->best_choice;
315 const UNICHARSET* unicharset = it_->word()->uch_set;
316 ASSERT_HOST(best_choice != nullptr);
317 for (int i = 0; i < best_choice->length(); ++i) {
318 ocr_text += unicharset->get_normed_unichar(best_choice->unichar_id(i));
319 }
320 int length = ocr_text.length() + 1;
321 char* result = new char[length];
322 strncpy(result, ocr_text.string(), length);
323 return result;
324}
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
int length() const
Definition: ratngs.h:293
const char * get_normed_unichar(UNICHAR_ID unichar_id) const
Definition: unicharset.h:828

◆ WordRecognitionLanguage()

const char * tesseract::LTRResultIterator::WordRecognitionLanguage ( ) const

Definition at line 213 of file ltrresultiterator.cpp.

213 {
214 if (it_->word() == nullptr || it_->word()->tesseract == nullptr)
215 return nullptr;
216 return it_->word()->tesseract->lang.string();
217}
tesseract::Tesseract * tesseract
Definition: pageres.h:280
STRING lang
Definition: ccutil.h:71

◆ WordTruthUTF8Text()

char * tesseract::LTRResultIterator::WordTruthUTF8Text ( ) const

Definition at line 300 of file ltrresultiterator.cpp.

300 {
301 if (!HasTruthString()) return nullptr;
302 STRING truth_text = it_->word()->blamer_bundle->TruthString();
303 int length = truth_text.length() + 1;
304 char* result = new char[length];
305 strncpy(result, truth_text.string(), length);
306 return result;
307}
STRING TruthString() const
Definition: blamer.h:114

Friends And Related Function Documentation

◆ ChoiceIterator

friend class ChoiceIterator
friend

Definition at line 49 of file ltrresultiterator.h.

Member Data Documentation

◆ line_separator_

const char* tesseract::LTRResultIterator::line_separator_
protected

Definition at line 186 of file ltrresultiterator.h.

◆ paragraph_separator_

const char* tesseract::LTRResultIterator::paragraph_separator_
protected

Definition at line 187 of file ltrresultiterator.h.


The documentation for this class was generated from the following files: