tesseract 4.1.1
Loading...
Searching...
No Matches
resultiterator.h
Go to the documentation of this file.
1
2// File: resultiterator.h
3// Description: Iterator for tesseract results that is capable of
4// iterating in proper reading order over Bi Directional
5// (e.g. mixed Hebrew and English) text.
6// Author: David Eger
7// Created: Fri May 27 13:58:06 PST 2011
8//
9// (C) Copyright 2011, Google Inc.
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13// http://www.apache.org/licenses/LICENSE-2.0
14// Unless required by applicable law or agreed to in writing, software
15// distributed under the License is distributed on an "AS IS" BASIS,
16// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17// See the License for the specific language governing permissions and
18// limitations under the License.
19//
21
22#ifndef TESSERACT_CCMAIN_RESULT_ITERATOR_H_
23#define TESSERACT_CCMAIN_RESULT_ITERATOR_H_
24
25#include <set> // for std::pair
26#include <vector> // for std::vector
27#include "ltrresultiterator.h" // for LTRResultIterator
28#include "platform.h" // for TESS_API, TESS_LOCAL
29#include "publictypes.h" // for PageIteratorLevel
30#include "unichar.h" // for StrongScriptDirection
31
32template <typename T> class GenericVector;
33template <typename T> class GenericVectorEqEq;
34
35class STRING;
36
37namespace tesseract {
38
39class Tesseract;
40
42 public:
43 static ResultIterator *StartOfParagraph(const LTRResultIterator &resit);
44
49 ~ResultIterator() override = default;
50
51 // ============= Moving around within the page ============.
56 void Begin() override;
57
70 bool Next(PageIteratorLevel level) override;
71
78 bool IsAtBeginningOf(PageIteratorLevel level) const override;
79
85 bool IsAtFinalElement(PageIteratorLevel level,
86 PageIteratorLevel element) const override;
87
88 // ============= Functions that refer to words only ============.
89 // Returns the number of blanks before the current word.
90 int BlanksBeforeWord() const;
91
92 // ============= Accessing data ==============.
93
98 virtual char* GetUTF8Text(PageIteratorLevel level) const;
99
103 virtual std::vector<std::vector<std::pair<const char*, float>>>*
104 GetBestLSTMSymbolChoices() const;
105
110 bool ParagraphIsLtr() const;
111
112 // ============= Exposed only for testing =============.
113
136 static void CalculateTextlineOrder(
137 bool paragraph_is_ltr,
139 GenericVectorEqEq<int> *reading_order);
140
141 static const int kMinorRunStart;
142 static const int kMinorRunEnd;
143 static const int kComplexWord;
144
145 protected:
152 TESS_LOCAL explicit ResultIterator(const LTRResultIterator &resit);
153
154 private:
159 bool CurrentParagraphIsLtr() const;
160
172 void CalculateTextlineOrder(bool paragraph_is_ltr,
173 const LTRResultIterator &resit,
174 GenericVectorEqEq<int> *indices) const;
176 void CalculateTextlineOrder(bool paragraph_is_ltr,
177 const LTRResultIterator &resit,
179 GenericVectorEqEq<int> *indices) const;
180
185 int LTRWordIndex() const;
186
191 void CalculateBlobOrder(GenericVector<int> *blob_indices) const;
192
194 void MoveToLogicalStartOfTextline();
195
200 void MoveToLogicalStartOfWord();
201
203 bool IsAtFinalSymbolOfWord() const;
204
206 bool IsAtFirstSymbolOfWord() const;
207
212 void AppendSuffixMarks(STRING *text) const;
213
215 void AppendUTF8WordText(STRING *text) const;
216
224 void IterateAndAppendUTF8TextlineText(STRING *text);
225
232 void AppendUTF8ParagraphText(STRING *text) const;
233
235 bool BidiDebug(int min_level) const;
236
237 bool current_paragraph_is_ltr_;
238
243 bool at_beginning_of_minor_run_;
244
246 bool in_minor_direction_;
247
252 bool preserve_interword_spaces_;
253};
254
255} // namespace tesseract.
256
257#endif // TESSERACT_CCMAIN_RESULT_ITERATOR_H_
#define TESS_API
Definition: platform.h:54
#define TESS_LOCAL
Definition: platform.h:55
static const int kMinorRunEnd
static const int kMinorRunStart
~ResultIterator() override=default
static const int kComplexWord
Definition: strngs.h:45