tesseract 4.1.1
Loading...
Searching...
No Matches
degradeimage.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: degradeimage.cpp
3 * Description: Function to degrade an image (usually of text) as if it
4 * has been printed and then scanned.
5 * Authors: Ray Smith
6 * Created: Tue Nov 19 2013
7 *
8 * (C) Copyright 2013, Google Inc.
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 **********************************************************************/
20
21#include "degradeimage.h"
22
23#include <cstdlib>
24#include "allheaders.h" // from leptonica
25#include "genericvector.h"
26#include "helpers.h" // For TRand.
27#include "rect.h"
28
29namespace tesseract {
30
31// A randomized perspective distortion can be applied to synthetic input.
32// The perspective distortion comes from leptonica, which uses 2 sets of 4
33// corners to determine the distortion. There are random values for each of
34// the x numbers x0..x3 and y0..y3, except for x2 and x3 which are instead
35// defined in terms of a single shear value. This reduces the degrees of
36// freedom enough to make the distortion more realistic than it would otherwise
37// be if all 8 coordinates could move independently.
38// One additional factor is used for the color of the pixels that don't exist
39// in the source image.
40// Name for each of the randomizing factors.
50 // x2 = x1 - shear
51 // x3 = x0 + shear
53};
54
55// Rotation is +/- kRotationRange radians.
56const float kRotationRange = 0.02f;
57// Number of grey levels to shift by for each exposure step.
58const int kExposureFactor = 16;
59// Salt and pepper noise is +/- kSaltnPepper.
60const int kSaltnPepper = 5;
61// Min sum of width + height on which to operate the ramp.
62const int kMinRampSize = 1000;
63
64// Degrade the pix as if by a print/copy/scan cycle with exposure > 0
65// corresponding to darkening on the copier and <0 lighter and 0 not copied.
66// Exposures in [-2,2] are most useful, with -3 and 3 being extreme.
67// If rotation is nullptr, rotation is skipped. If *rotation is non-zero, the
68// pix is rotated by *rotation else it is randomly rotated and *rotation is
69// modified.
70//
71// HOW IT WORKS:
72// Most of the process is really dictated by the fact that the minimum
73// available convolution is 3X3, which is too big really to simulate a
74// good quality print/scan process. (2X2 would be better.)
75// 1 pixel wide inputs are heavily smeared by the 3X3 convolution, making the
76// images generally biased to being too light, so most of the work is to make
77// them darker. 3 levels of thickening/darkening are achieved with 2 dilations,
78// (using a greyscale erosion) one heavy (by being before convolution) and one
79// light (after convolution).
80// With no dilation, after covolution, the images are so light that a heavy
81// constant offset is required to make the 0 image look reasonable. A simple
82// constant offset multiple of exposure to undo this value is enough to achieve
83// all the required lightening. This gives the advantage that exposure level 1
84// with a single dilation gives a good impression of the broken-yet-too-dark
85// problem that is often seen in scans.
86// A small random rotation gives some varying greyscale values on the edges,
87// and some random salt and pepper noise on top helps to realistically jaggy-up
88// the edges.
89// Finally a greyscale ramp provides a continuum of effects between exposure
90// levels.
91Pix* DegradeImage(Pix* input, int exposure, TRand* randomizer,
92 float* rotation) {
93 Pix* pix = pixConvertTo8(input, false);
94 pixDestroy(&input);
95 input = pix;
96 int width = pixGetWidth(input);
97 int height = pixGetHeight(input);
98
99 if (exposure >= 2) {
100 // An erosion simulates the spreading darkening of a dark copy.
101 // This is backwards to binary morphology,
102 // see http://www.leptonica.com/grayscale-morphology.html
103 pix = input;
104 input = pixErodeGray(pix, 3, 3);
105 pixDestroy(&pix);
106 }
107 // A convolution is essential to any mode as no scanner produces an
108 // image as sharp as the electronic image.
109 pix = pixBlockconv(input, 1, 1);
110 pixDestroy(&input);
111 // A small random rotation helps to make the edges jaggy in a realistic way.
112 if (rotation != nullptr) {
113 float radians_clockwise = 0.0f;
114 if (*rotation) {
115 radians_clockwise = *rotation;
116 } else if (randomizer != nullptr) {
117 radians_clockwise = randomizer->SignedRand(kRotationRange);
118 }
119
120 input = pixRotate(pix, radians_clockwise,
121 L_ROTATE_AREA_MAP, L_BRING_IN_WHITE,
122 0, 0);
123 // Rotate the boxes to match.
124 *rotation = radians_clockwise;
125 pixDestroy(&pix);
126 } else {
127 input = pix;
128 }
129
130 if (exposure >= 3 || exposure == 1) {
131 // Erosion after the convolution is not as heavy as before, so it is
132 // good for level 1 and in addition as a level 3.
133 // This is backwards to binary morphology,
134 // see http://www.leptonica.com/grayscale-morphology.html
135 pix = input;
136 input = pixErodeGray(pix, 3, 3);
137 pixDestroy(&pix);
138 }
139 // The convolution really needed to be 2x2 to be realistic enough, but
140 // we only have 3x3, so we have to bias the image darker or lose thin
141 // strokes.
142 int erosion_offset = 0;
143 // For light and 0 exposure, there is no dilation, so compensate for the
144 // convolution with a big darkening bias which is undone for lighter
145 // exposures.
146 if (exposure <= 0)
147 erosion_offset = -3 * kExposureFactor;
148 // Add in a general offset of the greyscales for the exposure level so
149 // a threshold of 128 gives a reasonable binary result.
150 erosion_offset -= exposure * kExposureFactor;
151 // Add a gradual fade over the page and a small amount of salt and pepper
152 // noise to simulate noise in the sensor/paper fibres and varying
153 // illumination.
154 l_uint32* data = pixGetData(input);
155 for (int y = 0; y < height; ++y) {
156 for (int x = 0; x < width; ++x) {
157 int pixel = GET_DATA_BYTE(data, x);
158 if (randomizer != nullptr)
159 pixel += randomizer->IntRand() % (kSaltnPepper*2 + 1) - kSaltnPepper;
160 if (height + width > kMinRampSize)
161 pixel -= (2*x + y) * 32 / (height + width);
162 pixel += erosion_offset;
163 if (pixel < 0)
164 pixel = 0;
165 if (pixel > 255)
166 pixel = 255;
167 SET_DATA_BYTE(data, x, pixel);
168 }
169 data += input->wpl;
170 }
171 return input;
172}
173
174// Creates and returns a Pix distorted by various means according to the bool
175// flags. If boxes is not nullptr, the boxes are resized/positioned according to
176// any spatial distortion and also by the integer reduction factor box_scale
177// so they will match what the network will output.
178// Returns nullptr on error. The returned Pix must be pixDestroyed.
179Pix* PrepareDistortedPix(const Pix* pix, bool perspective, bool invert,
180 bool white_noise, bool smooth_noise, bool blur,
181 int box_reduction, TRand* randomizer,
182 GenericVector<TBOX>* boxes) {
183 Pix* distorted = pixCopy(nullptr, const_cast<Pix*>(pix));
184 // Things to do to synthetic training data.
185 if ((white_noise || smooth_noise) && randomizer->SignedRand(1.0) > 0.0) {
186 // TODO(rays) Cook noise in a more thread-safe manner than rand().
187 // Attempt to make the sequences reproducible.
188 srand(randomizer->IntRand());
189 Pix* pixn = pixAddGaussianNoise(distorted, 8.0);
190 pixDestroy(&distorted);
191 if (smooth_noise) {
192 distorted = pixBlockconv(pixn, 1, 1);
193 pixDestroy(&pixn);
194 } else {
195 distorted = pixn;
196 }
197 }
198 if (blur && randomizer->SignedRand(1.0) > 0.0) {
199 Pix* blurred = pixBlockconv(distorted, 1, 1);
200 pixDestroy(&distorted);
201 distorted = blurred;
202 }
203 if (perspective)
204 GeneratePerspectiveDistortion(0, 0, randomizer, &distorted, boxes);
205 if (boxes != nullptr) {
206 for (int b = 0; b < boxes->size(); ++b) {
207 (*boxes)[b].scale(1.0f / box_reduction);
208 if ((*boxes)[b].width() <= 0)
209 (*boxes)[b].set_right((*boxes)[b].left() + 1);
210 }
211 }
212 if (invert && randomizer->SignedRand(1.0) < -0)
213 pixInvert(distorted, distorted);
214 return distorted;
215}
216
217// Distorts anything that has a non-null pointer with the same pseudo-random
218// perspective distortion. Width and height only need to be set if there
219// is no pix. If there is a pix, then they will be taken from there.
220void GeneratePerspectiveDistortion(int width, int height, TRand* randomizer,
221 Pix** pix, GenericVector<TBOX>* boxes) {
222 if (pix != nullptr && *pix != nullptr) {
223 width = pixGetWidth(*pix);
224 height = pixGetHeight(*pix);
225 }
226 float* im_coeffs = nullptr;
227 float* box_coeffs = nullptr;
228 l_int32 incolor =
229 ProjectiveCoeffs(width, height, randomizer, &im_coeffs, &box_coeffs);
230 if (pix != nullptr && *pix != nullptr) {
231 // Transform the image.
232 Pix* transformed = pixProjective(*pix, im_coeffs, incolor);
233 if (transformed == nullptr) {
234 tprintf("Projective transformation failed!!\n");
235 return;
236 }
237 pixDestroy(pix);
238 *pix = transformed;
239 }
240 if (boxes != nullptr) {
241 // Transform the boxes.
242 for (int b = 0; b < boxes->size(); ++b) {
243 int x1, y1, x2, y2;
244 const TBOX& box = (*boxes)[b];
245 projectiveXformSampledPt(box_coeffs, box.left(), height - box.top(), &x1,
246 &y1);
247 projectiveXformSampledPt(box_coeffs, box.right(), height - box.bottom(),
248 &x2, &y2);
249 TBOX new_box1(x1, height - y2, x2, height - y1);
250 projectiveXformSampledPt(box_coeffs, box.left(), height - box.bottom(),
251 &x1, &y1);
252 projectiveXformSampledPt(box_coeffs, box.right(), height - box.top(), &x2,
253 &y2);
254 TBOX new_box2(x1, height - y1, x2, height - y2);
255 (*boxes)[b] = new_box1.bounding_union(new_box2);
256 }
257 }
258 free(im_coeffs);
259 free(box_coeffs);
260}
261
262// Computes the coefficients of a randomized projective transformation.
263// The image transform requires backward transformation coefficient, and the
264// box transform the forward coefficients.
265// Returns the incolor arg to pixProjective.
266int ProjectiveCoeffs(int width, int height, TRand* randomizer,
267 float** im_coeffs, float** box_coeffs) {
268 // Setup "from" points.
269 Pta* src_pts = ptaCreate(4);
270 ptaAddPt(src_pts, 0.0f, 0.0f);
271 ptaAddPt(src_pts, width, 0.0f);
272 ptaAddPt(src_pts, width, height);
273 ptaAddPt(src_pts, 0.0f, height);
274 // Extract factors from pseudo-random sequence.
275 float factors[FN_NUM_FACTORS];
276 float shear = 0.0f; // Shear is signed.
277 for (int i = 0; i < FN_NUM_FACTORS; ++i) {
278 // Everything is squared to make wild values rarer.
279 if (i == FN_SHEAR) {
280 // Shear is signed.
281 shear = randomizer->SignedRand(0.5 / 3.0);
282 shear = shear >= 0.0 ? shear * shear : -shear * shear;
283 // Keep the sheared points within the original rectangle.
284 if (shear < -factors[FN_X0]) shear = -factors[FN_X0];
285 if (shear > factors[FN_X1]) shear = factors[FN_X1];
286 factors[i] = shear;
287 } else if (i != FN_INCOLOR) {
288 factors[i] = fabs(randomizer->SignedRand(1.0));
289 if (i <= FN_Y3)
290 factors[i] *= 5.0 / 8.0;
291 else
292 factors[i] *= 0.5;
293 factors[i] *= factors[i];
294 }
295 }
296 // Setup "to" points.
297 Pta* dest_pts = ptaCreate(4);
298 ptaAddPt(dest_pts, factors[FN_X0] * width, factors[FN_Y0] * height);
299 ptaAddPt(dest_pts, (1.0f - factors[FN_X1]) * width, factors[FN_Y1] * height);
300 ptaAddPt(dest_pts, (1.0f - factors[FN_X1] + shear) * width,
301 (1 - factors[FN_Y2]) * height);
302 ptaAddPt(dest_pts, (factors[FN_X0] + shear) * width,
303 (1 - factors[FN_Y3]) * height);
304 getProjectiveXformCoeffs(dest_pts, src_pts, im_coeffs);
305 getProjectiveXformCoeffs(src_pts, dest_pts, box_coeffs);
306 ptaDestroy(&src_pts);
307 ptaDestroy(&dest_pts);
308 return factors[FN_INCOLOR] > 0.5f ? L_BRING_IN_WHITE : L_BRING_IN_BLACK;
309}
310
311} // namespace tesseract
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const float kRotationRange
void GeneratePerspectiveDistortion(int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
const int kExposureFactor
const int kSaltnPepper
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
Pix * PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
const int kMinRampSize
int ProjectiveCoeffs(int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
int size() const
Definition: genericvector.h:72
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
int16_t right() const
Definition: rect.h:79
double SignedRand(double range)
Definition: helpers.h:55
int32_t IntRand()
Definition: helpers.h:50