tesseract 4.1.1
Loading...
Searching...
No Matches
boxword.cpp
Go to the documentation of this file.
1
2// File: boxword.cpp
3// Description: Class to represent the bounding boxes of the output.
4// Author: Ray Smith
5//
6// (C) Copyright 2010, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "blobs.h"
20#include "boxword.h"
21#include "host.h" // for NearlyEqual
22#include "normalis.h"
23#include "ocrblock.h"
24#include "pageres.h"
25
26namespace tesseract {
27
28// Clip output boxes to input blob boxes for bounds that are within this
29// tolerance. Otherwise, the blob may be chopped and we have to just use
30// the word bounding box.
31const int kBoxClipTolerance = 2;
32
33BoxWord::BoxWord() : length_(0) {
34}
35
37 CopyFrom(src);
38}
39
41 CopyFrom(src);
42 return *this;
43}
44
45void BoxWord::CopyFrom(const BoxWord& src) {
46 bbox_ = src.bbox_;
47 length_ = src.length_;
48 boxes_.clear();
49 boxes_.reserve(length_);
50 for (int i = 0; i < length_; ++i)
51 boxes_.push_back(src.boxes_[i]);
52}
53
54// Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
55// switch back to original image coordinates.
57 auto* boxword = new BoxWord();
58 // Count the blobs.
59 boxword->length_ = tessword->NumBlobs();
60 // Allocate memory.
61 boxword->boxes_.reserve(boxword->length_);
62
63 for (int b = 0; b < boxword->length_; ++b) {
64 TBLOB* tblob = tessword->blobs[b];
65 TBOX blob_box;
66 for (TESSLINE* outline = tblob->outlines; outline != nullptr;
67 outline = outline->next) {
68 EDGEPT* edgept = outline->loop;
69 // Iterate over the edges.
70 do {
71 if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
72 ICOORD pos(edgept->pos.x, edgept->pos.y);
73 TPOINT denormed;
74 tblob->denorm().DenormTransform(nullptr, edgept->pos, &denormed);
75 pos.set_x(denormed.x);
76 pos.set_y(denormed.y);
77 TBOX pt_box(pos, pos);
78 blob_box += pt_box;
79 }
80 edgept = edgept->next;
81 } while (edgept != outline->loop);
82 }
83 boxword->boxes_.push_back(blob_box);
84 }
85 boxword->ComputeBoundingBox();
86 return boxword;
87}
88
89// Clean up the bounding boxes from the polygonal approximation by
90// expanding slightly, then clipping to the blobs from the original_word
91// that overlap. If not null, the block provides the inverse rotation.
92void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) {
93 for (int i = 0; i < length_; ++i) {
94 TBOX box = boxes_[i];
95 // Expand by a single pixel, as the poly approximation error is 1 pixel.
96 box = TBOX(box.left() - 1, box.bottom() - 1,
97 box.right() + 1, box.top() + 1);
98 // Now find the original box that matches.
99 TBOX original_box;
100 C_BLOB_IT b_it(original_word->cblob_list());
101 for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
102 TBOX blob_box = b_it.data()->bounding_box();
103 if (block != nullptr)
104 blob_box.rotate(block->re_rotation());
105 if (blob_box.major_overlap(box)) {
106 original_box += blob_box;
107 }
108 }
109 if (!original_box.null_box()) {
110 if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
111 box.set_left(original_box.left());
112 if (NearlyEqual<int>(original_box.right(), box.right(),
114 box.set_right(original_box.right());
115 if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
116 box.set_top(original_box.top());
117 if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
119 box.set_bottom(original_box.bottom());
120 }
121 original_box = original_word->bounding_box();
122 if (block != nullptr)
123 original_box.rotate(block->re_rotation());
124 boxes_[i] = box.intersection(original_box);
125 }
126 ComputeBoundingBox();
127}
128
129// Merges the boxes from start to end, not including end, and deletes
130// the boxes between start and end.
131void BoxWord::MergeBoxes(int start, int end) {
132 start = ClipToRange(start, 0, length_);
133 end = ClipToRange(end, 0, length_);
134 if (end <= start + 1)
135 return;
136 for (int i = start + 1; i < end; ++i) {
137 boxes_[start] += boxes_[i];
138 }
139 int shrinkage = end - 1 - start;
140 length_ -= shrinkage;
141 for (int i = start + 1; i < length_; ++i)
142 boxes_[i] = boxes_[i + shrinkage];
143 boxes_.truncate(length_);
144}
145
146// Inserts a new box before the given index.
147// Recomputes the bounding box.
148void BoxWord::InsertBox(int index, const TBOX& box) {
149 if (index < length_)
150 boxes_.insert(box, index);
151 else
152 boxes_.push_back(box);
153 length_ = boxes_.size();
154 ComputeBoundingBox();
155}
156
157// Changes the box at the given index to the new box.
158// Recomputes the bounding box.
159void BoxWord::ChangeBox(int index, const TBOX& box) {
160 boxes_[index] = box;
161 ComputeBoundingBox();
162}
163
164// Deletes the box with the given index, and shuffles up the rest.
165// Recomputes the bounding box.
166void BoxWord::DeleteBox(int index) {
167 ASSERT_HOST(0 <= index && index < length_);
168 boxes_.remove(index);
169 --length_;
170 ComputeBoundingBox();
171}
172
173// Deletes all the boxes stored in BoxWord.
175 length_ = 0;
176 boxes_.clear();
177 bbox_ = TBOX();
178}
179
180// Computes the bounding box of the word.
181void BoxWord::ComputeBoundingBox() {
182 bbox_ = TBOX();
183 for (int i = 0; i < length_; ++i)
184 bbox_ += boxes_[i];
185}
186
187// This and other putatively are the same, so call the (permanent) callback
188// for each blob index where the bounding boxes match.
189// The callback is deleted on completion.
191 TessCallback1<int>* cb) const {
192 for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
193 TBOX blob_box = other.blobs[i]->bounding_box();
194 if (blob_box == boxes_[i])
195 cb->Run(i);
196 }
197 delete cb;
198}
199
200} // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:88
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
const int kBoxClipTolerance
Definition: boxword.cpp:31
int push_back(T object)
int size() const
Definition: genericvector.h:72
void remove(int index)
void insert(const T &t, int index)
void truncate(int size)
void reserve(int size)
Definition: blobs.h:51
int16_t x
Definition: blobs.h:93
int16_t y
Definition: blobs.h:94
Definition: blobs.h:99
EDGEPT * next
Definition: blobs.h:192
bool IsHidden() const
Definition: blobs.h:176
EDGEPT * prev
Definition: blobs.h:193
TPOINT pos
Definition: blobs.h:186
TESSLINE * next
Definition: blobs.h:281
Definition: blobs.h:284
TESSLINE * outlines
Definition: blobs.h:400
const DENORM & denorm() const
Definition: blobs.h:363
Definition: blobs.h:418
int NumBlobs() const
Definition: blobs.h:448
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
virtual void Run(A1)=0
void ProcessMatchedBlobs(const TWERD &other, TessCallback1< int > *cb) const
Definition: boxword.cpp:190
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:148
void MergeBoxes(int start, int end)
Definition: boxword.cpp:131
BoxWord & operator=(const BoxWord &src)
Definition: boxword.cpp:40
void DeleteBox(int index)
Definition: boxword.cpp:166
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:56
void ChangeBox(int index, const TBOX &box)
Definition: boxword.cpp:159
void DeleteAllBoxes()
Definition: boxword.cpp:174
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:92
void CopyFrom(const BoxWord &src)
Definition: boxword.cpp:45
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
Definition: ocrblock.h:31
FCOORD re_rotation() const
Definition: ocrblock.h:134
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
Definition: rect.h:34
void set_right(int x)
Definition: rect.h:82
void rotate(const FCOORD &vec)
Definition: rect.h:197
int16_t top() const
Definition: rect.h:58
void set_bottom(int y)
Definition: rect.h:68
bool major_overlap(const TBOX &box) const
Definition: rect.h:368
void set_top(int y)
Definition: rect.h:61
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
bool null_box() const
Definition: rect.h:50
void set_left(int x)
Definition: rect.h:75
int16_t right() const
Definition: rect.h:79
Definition: werd.h:56
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
TBOX bounding_box() const
Definition: werd.cpp:148