tesseract 4.1.1
Loading...
Searching...
No Matches
thresholder.cpp
Go to the documentation of this file.
1
2// File: thresholder.cpp
3// Description: Base API for thresholding images in tesseract.
4// Author: Ray Smith
5//
6// (C) Copyright 2008, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#include "allheaders.h"
20
21#include "thresholder.h"
22
23#include <cstdint> // for uint32_t
24#include <cstring>
25
26#include "otsuthr.h"
27#include "tprintf.h" // for tprintf
28
29#if defined(USE_OPENCL)
30#include "openclwrapper.h" // for OpenclDevice
31#endif
32
33namespace tesseract {
34
36 : pix_(nullptr),
37 image_width_(0), image_height_(0),
38 pix_channels_(0), pix_wpl_(0),
39 scale_(1), yres_(300), estimated_res_(300) {
40 SetRectangle(0, 0, 0, 0);
41}
42
44 Clear();
45}
46
47// Destroy the Pix if there is one, freeing memory.
49 pixDestroy(&pix_);
50}
51
52// Return true if no image has been set.
54 return pix_ == nullptr;
55}
56
57// SetImage makes a copy of all the image data, so it may be deleted
58// immediately after this call.
59// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
60// Palette color images will not work properly and must be converted to
61// 24 bit.
62// Binary images of 1 bit per pixel may also be given but they must be
63// byte packed with the MSB of the first byte being the first pixel, and a
64// one pixel is WHITE. For binary images set bytes_per_pixel=0.
65void ImageThresholder::SetImage(const unsigned char* imagedata,
66 int width, int height,
67 int bytes_per_pixel, int bytes_per_line) {
68 int bpp = bytes_per_pixel * 8;
69 if (bpp == 0) bpp = 1;
70 Pix* pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
71 l_uint32* data = pixGetData(pix);
72 int wpl = pixGetWpl(pix);
73 switch (bpp) {
74 case 1:
75 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
76 for (int x = 0; x < width; ++x) {
77 if (imagedata[x / 8] & (0x80 >> (x % 8)))
78 CLEAR_DATA_BIT(data, x);
79 else
80 SET_DATA_BIT(data, x);
81 }
82 }
83 break;
84
85 case 8:
86 // Greyscale just copies the bytes in the right order.
87 for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
88 for (int x = 0; x < width; ++x)
89 SET_DATA_BYTE(data, x, imagedata[x]);
90 }
91 break;
92
93 case 24:
94 // Put the colors in the correct places in the line buffer.
95 for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
96 for (int x = 0; x < width; ++x, ++data) {
97 SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
98 SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
99 SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
100 }
101 }
102 break;
103
104 case 32:
105 // Maintain byte order consistency across different endianness.
106 for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
107 for (int x = 0; x < width; ++x) {
108 data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
109 (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
110 }
111 }
112 break;
113
114 default:
115 tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
116 }
117 SetImage(pix);
118 pixDestroy(&pix);
119}
120
121// Store the coordinates of the rectangle to process for later use.
122// Doesn't actually do any thresholding.
123void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
124 rect_left_ = left;
125 rect_top_ = top;
126 rect_width_ = width;
127 rect_height_ = height;
128}
129
130// Get enough parameters to be able to rebuild bounding boxes in the
131// original image (not just within the rectangle).
132// Left and top are enough with top-down coordinates, but
133// the height of the rectangle and the image are needed for bottom-up.
134void ImageThresholder::GetImageSizes(int* left, int* top,
135 int* width, int* height,
136 int* imagewidth, int* imageheight) {
137 *left = rect_left_;
138 *top = rect_top_;
139 *width = rect_width_;
140 *height = rect_height_;
141 *imagewidth = image_width_;
142 *imageheight = image_height_;
143}
144
145// Pix vs raw, which to use? Pix is the preferred input for efficiency,
146// since raw buffers are copied.
147// SetImage for Pix clones its input, so the source pix may be pixDestroyed
148// immediately after, but may not go away until after the Thresholder has
149// finished with it.
150void ImageThresholder::SetImage(const Pix* pix) {
151 if (pix_ != nullptr)
152 pixDestroy(&pix_);
153 Pix* src = const_cast<Pix*>(pix);
154 int depth;
155 pixGetDimensions(src, &image_width_, &image_height_, &depth);
156 // Convert the image as necessary so it is one of binary, plain RGB, or
157 // 8 bit with no colormap. Guarantee that we always end up with our own copy,
158 // not just a clone of the input.
159 if (pixGetColormap(src)) {
160 Pix* tmp = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
161 depth = pixGetDepth(tmp);
162 if (depth > 1 && depth < 8) {
163 pix_ = pixConvertTo8(tmp, false);
164 pixDestroy(&tmp);
165 } else {
166 pix_ = tmp;
167 }
168 } else if (depth > 1 && depth < 8) {
169 pix_ = pixConvertTo8(src, false);
170 } else {
171 pix_ = pixCopy(nullptr, src);
172 }
173 depth = pixGetDepth(pix_);
174 pix_channels_ = depth / 8;
175 pix_wpl_ = pixGetWpl(pix_);
176 scale_ = 1;
177 estimated_res_ = yres_ = pixGetYRes(pix_);
178 Init();
179}
180
181// Threshold the source image as efficiently as possible to the output Pix.
182// Creates a Pix and sets pix to point to the resulting pointer.
183// Caller must use pixDestroy to free the created Pix.
185bool ImageThresholder::ThresholdToPix(PageSegMode pageseg_mode, Pix** pix) {
186 if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
187 tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
188 return false;
189 }
190 if (pix_channels_ == 0) {
191 // We have a binary image, but it still has to be copied, as this API
192 // allows the caller to modify the output.
193 Pix* original = GetPixRect();
194 *pix = pixCopy(nullptr, original);
195 pixDestroy(&original);
196 } else {
198 }
199 return true;
200}
201
202// Gets a pix that contains an 8 bit threshold value at each pixel. The
203// returned pix may be an integer reduction of the binary image such that
204// the scale factor may be inferred from the ratio of the sizes, even down
205// to the extreme of a 1x1 pixel thresholds image.
206// Ideally the 8 bit threshold should be the exact threshold used to generate
207// the binary image in ThresholdToPix, but this is not a hard constraint.
208// Returns nullptr if the input is binary. PixDestroy after use.
210 if (IsBinary()) return nullptr;
211 Pix* pix_grey = GetPixRectGrey();
212 int width = pixGetWidth(pix_grey);
213 int height = pixGetHeight(pix_grey);
214 int* thresholds;
215 int* hi_values;
216 OtsuThreshold(pix_grey, 0, 0, width, height, &thresholds, &hi_values);
217 pixDestroy(&pix_grey);
218 Pix* pix_thresholds = pixCreate(width, height, 8);
219 int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
220 pixSetAllArbitrary(pix_thresholds, threshold);
221 delete [] thresholds;
222 delete [] hi_values;
223 return pix_thresholds;
224}
225
226// Common initialization shared between SetImage methods.
229}
230
231// Get a clone/copy of the source image rectangle.
232// The returned Pix must be pixDestroyed.
233// This function will be used in the future by the page layout analysis, and
234// the layout analysis that uses it will only be available with Leptonica,
235// so there is no raw equivalent.
237 if (IsFullImage()) {
238 // Just clone the whole thing.
239 return pixClone(pix_);
240 } else {
241 // Crop to the given rectangle.
242 Box* box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
243 Pix* cropped = pixClipRectangle(pix_, box, nullptr);
244 boxDestroy(&box);
245 return cropped;
246 }
247}
248
249// Get a clone/copy of the source image rectangle, reduced to greyscale,
250// and at the same resolution as the output binary.
251// The returned Pix must be pixDestroyed.
252// Provided to the classifier to extract features from the greyscale image.
254 Pix* pix = GetPixRect(); // May have to be reduced to grey.
255 int depth = pixGetDepth(pix);
256 if (depth != 8) {
257 Pix* result = depth < 8 ? pixConvertTo8(pix, false)
258 : pixConvertRGBToLuminance(pix);
259 pixDestroy(&pix);
260 return result;
261 }
262 return pix;
263}
264
265// Otsu thresholds the rectangle, taking the rectangle from *this.
267 Pix** out_pix) const {
268 int* thresholds;
269 int* hi_values;
270
271 int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_,
272 rect_height_, &thresholds, &hi_values);
273 // only use opencl if compiled w/ OpenCL and selected device is opencl
274#ifdef USE_OPENCL
275 OpenclDevice od;
276 if (num_channels == 4 &&
277 od.selectedDeviceIsOpenCL() && rect_top_ == 0 && rect_left_ == 0) {
278 od.ThresholdRectToPixOCL((unsigned char*)pixGetData(src_pix), num_channels,
279 pixGetWpl(src_pix) * 4, thresholds, hi_values,
280 out_pix /*pix_OCL*/, rect_height_, rect_width_,
282 } else {
283#endif
284 ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
285#ifdef USE_OPENCL
286 }
287#endif
288 delete [] thresholds;
289 delete [] hi_values;
290}
291
295// arrays and also the bytes per pixel in src_pix.
297 int num_channels,
298 const int* thresholds,
299 const int* hi_values,
300 Pix** pix) const {
301 *pix = pixCreate(rect_width_, rect_height_, 1);
302 uint32_t* pixdata = pixGetData(*pix);
303 int wpl = pixGetWpl(*pix);
304 int src_wpl = pixGetWpl(src_pix);
305 uint32_t* srcdata = pixGetData(src_pix);
306 pixSetXRes(*pix, pixGetXRes(src_pix));
307 pixSetYRes(*pix, pixGetYRes(src_pix));
308 for (int y = 0; y < rect_height_; ++y) {
309 const uint32_t* linedata = srcdata + (y + rect_top_) * src_wpl;
310 uint32_t* pixline = pixdata + y * wpl;
311 for (int x = 0; x < rect_width_; ++x) {
312 bool white_result = true;
313 for (int ch = 0; ch < num_channels; ++ch) {
314 int pixel =
315 GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
316 if (hi_values[ch] >= 0 &&
317 (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
318 white_result = false;
319 break;
320 }
321 }
322 if (white_result)
323 CLEAR_DATA_BIT(pixline, x);
324 else
325 SET_DATA_BIT(pixline, x);
326 }
327 }
328}
329
330} // namespace tesseract.
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int OtsuThreshold(Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
Definition: otsuthr.cpp:40
virtual Pix * GetPixRectGrey()
int pix_wpl_
Words per line of pix_.
Definition: thresholder.h:176
void ThresholdRectToPix(Pix *src_pix, int num_channels, const int *thresholds, const int *hi_values, Pix **pix) const
virtual bool ThresholdToPix(PageSegMode pageseg_mode, Pix **pix)
Returns false on error.
virtual void GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth, int *imageheight)
bool IsFullImage() const
Return true if we are processing the full image.
Definition: thresholder.h:152
bool IsEmpty() const
Return true if no image has been set.
Definition: thresholder.cpp:53
void SetImage(const unsigned char *imagedata, int width, int height, int bytes_per_pixel, int bytes_per_line)
Definition: thresholder.cpp:65
int estimated_res_
Resolution estimate from text size.
Definition: thresholder.h:180
void OtsuThresholdRectToPix(Pix *src_pix, Pix **out_pix) const
virtual Pix * GetPixRectThresholds()
void SetRectangle(int left, int top, int width, int height)
virtual void Init()
Common initialization shared between SetImage methods.
int scale_
Scale factor from original image.
Definition: thresholder.h:178
int pix_channels_
Number of 8-bit channels in pix_.
Definition: thresholder.h:175
int yres_
y pixels/inch in source image.
Definition: thresholder.h:179
int image_width_
Width of source pix_.
Definition: thresholder.h:173
bool IsBinary() const
Returns true if the source image is binary.
Definition: thresholder.h:74
int image_height_
Height of source pix_.
Definition: thresholder.h:174
virtual void Clear()
Destroy the Pix if there is one, freeing memory.
Definition: thresholder.cpp:48