tesseract 4.1.1
Loading...
Searching...
No Matches
pageiterator.h
Go to the documentation of this file.
1
2// File: pageiterator.h
3// Description: Iterator for tesseract page structure that avoids using
4// tesseract internal data structures.
5// Author: Ray Smith
6// Created: Fri Feb 26 11:01:06 PST 2010
7//
8// (C) Copyright 2010, Google Inc.
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
20
21#ifndef TESSERACT_CCMAIN_PAGEITERATOR_H_
22#define TESSERACT_CCMAIN_PAGEITERATOR_H_
23
24#include "platform.h"
25#include "publictypes.h"
26
27struct BlamerBundle;
28class C_BLOB_IT;
29class PAGE_RES;
30class PAGE_RES_IT;
31class WERD;
32struct Pix;
33struct Pta;
34
35namespace tesseract {
36
37class Tesseract;
38
53 public:
69 int scale, int scaled_yres,
70 int rect_left, int rect_top,
71 int rect_width, int rect_height);
72 virtual ~PageIterator();
73
80 PageIterator(const PageIterator& src);
81 const PageIterator& operator=(const PageIterator& src);
82
84 bool PositionedAtSameWord(const PAGE_RES_IT* other) const;
85
86 // ============= Moving around within the page ============.
87
92 virtual void Begin();
93
99 virtual void RestartParagraph();
100
105 bool IsWithinFirstTextlineOfParagraph() const;
106
112 virtual void RestartRow();
113
125 virtual bool Next(PageIteratorLevel level);
126
140 virtual bool IsAtBeginningOf(PageIteratorLevel level) const;
141
158 virtual bool IsAtFinalElement(PageIteratorLevel level,
159 PageIteratorLevel element) const;
160
167 int Cmp(const PageIterator &other) const;
168
169 // ============= Accessing data ==============.
170 // Coordinate system:
171 // Integer coordinates are at the cracks between the pixels.
172 // The top-left corner of the top-left pixel in the image is at (0,0).
173 // The bottom-right corner of the bottom-right pixel in the image is at
174 // (width, height).
175 // Every bounding box goes from the top-left of the top-left contained
176 // pixel to the bottom-right of the bottom-right contained pixel, so
177 // the bounding box of the single top-left pixel in the image is:
178 // (0,0)->(1,1).
179 // If an image rectangle has been set in the API, then returned coordinates
180 // relate to the original (full) image, rather than the rectangle.
181
191 void SetBoundingBoxComponents(bool include_upper_dots,
192 bool include_lower_dots) {
193 include_upper_dots_ = include_upper_dots;
194 include_lower_dots_ = include_lower_dots;
195 }
196
206 bool BoundingBox(PageIteratorLevel level,
207 int* left, int* top, int* right, int* bottom) const;
208 bool BoundingBox(PageIteratorLevel level, int padding,
209 int* left, int* top, int* right, int* bottom) const;
215 bool BoundingBoxInternal(PageIteratorLevel level,
216 int* left, int* top, int* right, int* bottom) const;
217
219 bool Empty(PageIteratorLevel level) const;
220
225 PolyBlockType BlockType() const;
226
234 Pta* BlockPolygon() const;
235
242 Pix* GetBinaryImage(PageIteratorLevel level) const;
243
255 Pix* GetImage(PageIteratorLevel level, int padding, Pix* original_img,
256 int* left, int* top) const;
257
264 bool Baseline(PageIteratorLevel level,
265 int* x1, int* y1, int* x2, int* y2) const;
266
275 void Orientation(tesseract::Orientation *orientation,
276 tesseract::WritingDirection *writing_direction,
277 tesseract::TextlineOrder *textline_order,
278 float *deskew_angle) const;
279
308 void ParagraphInfo(tesseract::ParagraphJustification *justification,
309 bool *is_list_item,
310 bool *is_crown,
311 int *first_line_indent) const;
312
313 // If the current WERD_RES (it_->word()) is not nullptr, sets the BlamerBundle
314 // of the current word to the given pointer (takes ownership of the pointer)
315 // and returns true.
316 // Can only be used when iterating on the word level.
317 bool SetWordBlamerBundle(BlamerBundle *blamer_bundle);
318
319 protected:
324 TESS_LOCAL void BeginWord(int offset);
325
349 C_BLOB_IT* cblob_it_;
360};
361
362} // namespace tesseract.
363
364#endif // TESSERACT_CCMAIN_PAGEITERATOR_H_
PolyBlockType
Definition: publictypes.h:53
#define TESS_API
Definition: platform.h:54
#define TESS_LOCAL
Definition: platform.h:55
ParagraphJustification
Definition: publictypes.h:251
void SetBoundingBoxComponents(bool include_upper_dots, bool include_lower_dots)
Definition: pageiterator.h:191
Definition: werd.h:56