tesseract 4.1.1
Loading...
Searching...
No Matches
normalis.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: normalis.h (Formerly denorm.h)
3 * Description: Code for the DENORM class.
4 * Author: Ray Smith
5 * Created: Thu Apr 23 09:22:43 BST 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20#ifndef NORMALIS_H
21#define NORMALIS_H
22
23const int kBlnCellHeight = 256; // Full-height for baseline normalization.
24const int kBlnXHeight = 128; // x-height for baseline normalization.
25const int kBlnBaselineOffset = 64; // offset for baseline normalization.
26
27class BLOCK;
28class FCOORD;
29class TBOX;
30class UNICHARSET;
31
32struct Pix;
33struct TBLOB;
34struct TPOINT;
35
36template <typename T> class GenericVector;
37
38namespace tesseract {
39
40// Possible normalization methods. Use NEGATIVE values as these also
41// double up as markers for the last sub-classifier.
43 NM_BASELINE = -3, // The original BL normalization mode.
44 NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
45 NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
46};
47
48} // namespace tesseract.
49
50class DENORM {
51 public:
52 DENORM();
53
54 // Copying a DENORM is allowed.
55 DENORM(const DENORM &);
56 DENORM& operator=(const DENORM&);
57 ~DENORM();
58
59 // Setup the normalization transformation parameters.
60 // The normalizations applied to a blob are as follows:
61 // 1. An optional block layout rotation that was applied during layout
62 // analysis to make the textlines horizontal.
63 // 2. A normalization transformation (LocalNormTransform):
64 // Subtract the "origin"
65 // Apply an x,y scaling.
66 // Apply an optional rotation.
67 // Add back a final translation.
68 // The origin is in the block-rotated space, and is usually something like
69 // the x-middle of the word at the baseline.
70 // 3. Zero or more further normalization transformations that are applied
71 // in sequence, with a similar pattern to the first normalization transform.
72 //
73 // A DENORM holds the parameters of a single normalization, and can execute
74 // both the LocalNormTransform (a forwards normalization), and the
75 // LocalDenormTransform which is an inverse transform or de-normalization.
76 // A DENORM may point to a predecessor DENORM, which is actually the earlier
77 // normalization, so the full normalization sequence involves executing all
78 // predecessors first and then the transform in "this".
79 // Let x be image co-ordinates and that we have normalization classes A, B, C
80 // where we first apply A then B then C to get normalized x':
81 // x' = CBAx
82 // Then the backwards (to original coordinates) would be:
83 // x = A^-1 B^-1 C^-1 x'
84 // and A = B->predecessor_ and B = C->predecessor_
85 // NormTransform executes all predecessors recursively, and then this.
86 // NormTransform would be used to transform an image-based feature to
87 // normalized space for use in a classifier
88 // DenormTransform inverts this and then all predecessors. It can be
89 // used to get back to the original image coordinates from normalized space.
90 // The LocalNormTransform member executes just the transformation
91 // in "this" without the layout rotation or any predecessors. It would be
92 // used to run each successive normalization, eg the word normalization,
93 // and later the character normalization.
94
95 // Arguments:
96 // block: if not nullptr, then this is the first transformation, and
97 // block->re_rotation() needs to be used after the Denorm
98 // transformation to get back to the image coords.
99 // rotation: if not nullptr, apply this rotation after translation to the
100 // origin and scaling. (Usually a classify rotation.)
101 // predecessor: if not nullptr, then predecessor has been applied to the
102 // input space and needs to be undone to complete the inverse.
103 // The above pointers are not owned by this DENORM and are assumed to live
104 // longer than this denorm, except rotation, which is deep copied on input.
105 //
106 // x_origin: The x origin which will be mapped to final_xshift in the result.
107 // y_origin: The y origin which will be mapped to final_yshift in the result.
108 // Added to result of row->baseline(x) if not nullptr.
109 //
110 // x_scale: scale factor for the x-coordinate.
111 // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
112 // Note that these scale factors apply to the same x and y system as the
113 // x-origin and y-origin apply, ie after any block rotation, but before
114 // the rotation argument is applied.
115 //
116 // final_xshift: The x component of the final translation.
117 // final_yshift: The y component of the final translation.
118 //
119 // In theory, any of the commonly used normalizations can be setup here:
120 // * Traditional baseline normalization on a word:
121 // SetupNormalization(block, nullptr, nullptr,
122 // box.x_middle(), baseline,
123 // kBlnXHeight / x_height, kBlnXHeight / x_height,
124 // 0, kBlnBaselineOffset);
125 // * "Numeric mode" baseline normalization on a word, in which the blobs
126 // are positioned with the bottom as the baseline is achieved by making
127 // a separate DENORM for each blob.
128 // SetupNormalization(block, nullptr, nullptr,
129 // box.x_middle(), box.bottom(),
130 // kBlnXHeight / x_height, kBlnXHeight / x_height,
131 // 0, kBlnBaselineOffset);
132 // * Anisotropic character normalization used by IntFx.
133 // SetupNormalization(nullptr, nullptr, denorm,
134 // centroid_x, centroid_y,
135 // 51.2 / ry, 51.2 / rx, 128, 128);
136 // * Normalize blob height to x-height (current OSD):
137 // SetupNormalization(nullptr, &rotation, nullptr,
138 // box.rotational_x_middle(rotation),
139 // box.rotational_y_middle(rotation),
140 // kBlnXHeight / box.rotational_height(rotation),
141 // kBlnXHeight / box.rotational_height(rotation),
142 // 0, kBlnBaselineOffset);
143 // * Secondary normalization for classification rotation (current):
144 // FCOORD rotation = block->classify_rotation();
145 // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
146 // SetupNormalization(nullptr, &rotation, denorm,
147 // box.rotational_x_middle(rotation),
148 // box.rotational_y_middle(rotation),
149 // target_height / box.rotational_height(rotation),
150 // target_height / box.rotational_height(rotation),
151 // 0, kBlnBaselineOffset);
152 // * Proposed new normalizations for CJK: Between them there is then
153 // no need for further normalization at all, and the character fills the cell.
154 // ** Replacement for baseline normalization on a word:
155 // Scales height and width independently so that modal height and pitch
156 // fill the cell respectively.
157 // float cap_height = x_height / CCStruct::kXHeightCapRatio;
158 // SetupNormalization(block, nullptr, nullptr,
159 // box.x_middle(), cap_height / 2.0f,
160 // kBlnCellHeight / fixed_pitch,
161 // kBlnCellHeight / cap_height,
162 // 0, 0);
163 // ** Secondary normalization for classification (with rotation) (proposed):
164 // Requires a simple translation to the center of the appropriate character
165 // cell, no further scaling and a simple rotation (or nothing) about the
166 // cell center.
167 // FCOORD rotation = block->classify_rotation();
168 // SetupNormalization(nullptr, &rotation, denorm,
169 // fixed_pitch_cell_center,
170 // 0.0f,
171 // 1.0f,
172 // 1.0f,
173 // 0, 0);
174 void SetupNormalization(const BLOCK* block,
175 const FCOORD* rotation,
176 const DENORM* predecessor,
177 float x_origin, float y_origin,
178 float x_scale, float y_scale,
179 float final_xshift, float final_yshift);
180
181 // Sets up the DENORM to execute a non-linear transformation based on
182 // preserving an even distribution of stroke edges. The transformation
183 // operates only within the given box, scaling input coords within the box
184 // non-linearly to a box of target_width by target_height, with all other
185 // coords being clipped to the box edge. As with SetupNormalization above,
186 // final_xshift and final_yshift are applied after scaling, and the bottom-
187 // left of box is used as a pre-scaling origin.
188 // x_coords is a collection of the x-coords of vertical edges for each
189 // y-coord starting at box.bottom().
190 // y_coords is a collection of the y-coords of horizontal edges for each
191 // x-coord starting at box.left().
192 // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
193 // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
194 // The second-level vectors must all be sorted in ascending order.
195 void SetupNonLinear(const DENORM* predecessor, const TBOX& box,
196 float target_width, float target_height,
197 float final_xshift, float final_yshift,
198 const GenericVector<GenericVector<int> >& x_coords,
199 const GenericVector<GenericVector<int> >& y_coords);
200
201 // Transforms the given coords one step forward to normalized space, without
202 // using any block rotation or predecessor.
203 void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
204 void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
205 // Transforms the given coords forward to normalized space using the
206 // full transformation sequence defined by the block rotation, the
207 // predecessors, deepest first, and finally this. If first_norm is not nullptr,
208 // then the first and deepest transformation used is first_norm, ending
209 // with this, and the block rotation will not be applied.
210 void NormTransform(const DENORM* first_norm, const TPOINT& pt,
211 TPOINT* transformed) const;
212 void NormTransform(const DENORM* first_norm, const FCOORD& pt,
213 FCOORD* transformed) const;
214 // Transforms the given coords one step back to source space, without
215 // using to any block rotation or predecessor.
216 void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
217 void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
218 // Transforms the given coords all the way back to source image space using
219 // the full transformation sequence defined by this and its predecessors
220 // recursively, shallowest first, and finally any block re_rotation.
221 // If last_denorm is not nullptr, then the last transformation used will
222 // be last_denorm, and the block re_rotation will never be executed.
223 void DenormTransform(const DENORM* last_denorm, const TPOINT& pt,
224 TPOINT* original) const;
225 void DenormTransform(const DENORM* last_denorm, const FCOORD& pt,
226 FCOORD* original) const;
227
228 // Normalize a blob using blob transformations. Less accurate, but
229 // more accurately copies the old way.
230 void LocalNormBlob(TBLOB* blob) const;
231
232 // Fills in the x-height range accepted by the given unichar_id in blob
233 // coordinates, given its bounding box in the usual baseline-normalized
234 // coordinates, with some initial crude x-height estimate (such as word
235 // size) and this denoting the transformation that was used.
236 // Also returns the amount the character must have shifted up or down.
237 void XHeightRange(int unichar_id, const UNICHARSET& unicharset,
238 const TBOX& bbox,
239 float* min_xht,
240 float* max_xht,
241 float* yshift) const;
242
243 // Prints the content of the DENORM for debug purposes.
244 void Print() const;
245
246 Pix* pix() const {
247 return pix_;
248 }
249 void set_pix(Pix* pix) {
250 pix_ = pix;
251 }
252 bool inverse() const {
253 return inverse_;
254 }
255 void set_inverse(bool value) {
256 inverse_ = value;
257 }
258 const DENORM* RootDenorm() const {
259 if (predecessor_ != nullptr)
260 return predecessor_->RootDenorm();
261 return this;
262 }
263 const DENORM* predecessor() const {
264 return predecessor_;
265 }
266 // Accessors - perhaps should not be needed.
267 float x_scale() const {
268 return x_scale_;
269 }
270 float y_scale() const {
271 return y_scale_;
272 }
273 const BLOCK* block() const {
274 return block_;
275 }
276 void set_block(const BLOCK* block) {
277 block_ = block;
278 }
279
280 private:
281 // Free allocated memory and clear pointers.
282 void Clear();
283 // Setup default values.
284 void Init();
285
286 // Best available image.
287 Pix* pix_;
288 // True if the source image is white-on-black.
289 bool inverse_;
290 // Block the word came from. If not null, block->re_rotation() takes the
291 // "untransformed" coordinates even further back to the original image.
292 // Used only on the first DENORM in a chain.
293 const BLOCK* block_;
294 // Rotation to apply between translation to the origin and scaling.
295 const FCOORD* rotation_;
296 // Previous transformation in a chain.
297 const DENORM* predecessor_;
298 // Non-linear transformation maps directly from each integer offset from the
299 // origin to the corresponding x-coord. Owned by the DENORM.
300 GenericVector<float>* x_map_;
301 // Non-linear transformation maps directly from each integer offset from the
302 // origin to the corresponding y-coord. Owned by the DENORM.
303 GenericVector<float>* y_map_;
304 // x-coordinate to be mapped to final_xshift_ in the result.
305 float x_origin_;
306 // y-coordinate to be mapped to final_yshift_ in the result.
307 float y_origin_;
308 // Scale factors for x and y coords. Applied to pre-rotation system.
309 float x_scale_;
310 float y_scale_;
311 // Destination coords of the x_origin_ and y_origin_.
312 float final_xshift_;
313 float final_yshift_;
314};
315
316#endif
const int kBlnBaselineOffset
Definition: normalis.h:25
const int kBlnCellHeight
Definition: normalis.h:23
const int kBlnXHeight
Definition: normalis.h:24
NormalizationMode
Definition: normalis.h:42
@ NM_BASELINE
Definition: normalis.h:43
@ NM_CHAR_ANISOTROPIC
Definition: normalis.h:45
@ NM_CHAR_ISOTROPIC
Definition: normalis.h:44
Definition: blobs.h:51
Definition: blobs.h:284
void Print() const
Definition: normalis.cpp:505
void LocalDenormTransform(const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:360
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:268
void set_pix(Pix *pix)
Definition: normalis.h:249
void LocalNormBlob(TBLOB *blob) const
Definition: normalis.cpp:412
void set_inverse(bool value)
Definition: normalis.h:255
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:390
bool inverse() const
Definition: normalis.h:252
DENORM()
Definition: normalis.cpp:38
float x_scale() const
Definition: normalis.h:267
DENORM & operator=(const DENORM &)
Definition: normalis.cpp:48
~DENORM()
Definition: normalis.cpp:67
Pix * pix() const
Definition: normalis.h:246
const DENORM * RootDenorm() const
Definition: normalis.h:258
void NormTransform(const DENORM *first_norm, const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:335
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
void set_block(const BLOCK *block)
Definition: normalis.h:276
float y_scale() const
Definition: normalis.h:270
const DENORM * predecessor() const
Definition: normalis.h:263
void LocalNormTransform(const TPOINT &pt, TPOINT *transformed) const
Definition: normalis.cpp:306
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:96
const BLOCK * block() const
Definition: normalis.h:273
Definition: ocrblock.h:31
Definition: points.h:189
Definition: rect.h:34