tesseract 4.1.1
Loading...
Searching...
No Matches
renderer.h
Go to the documentation of this file.
1
2// File: renderer.h
3// Description: Rendering interface to inject into TessBaseAPI
4//
5// (C) Copyright 2011, Google Inc.
6// Licensed under the Apache License, Version 2.0 (the "License");
7// you may not use this file except in compliance with the License.
8// You may obtain a copy of the License at
9// http://www.apache.org/licenses/LICENSE-2.0
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
17
18#ifndef TESSERACT_API_RENDERER_H_
19#define TESSERACT_API_RENDERER_H_
20
21// To avoid collision with other typenames include the ABSOLUTE MINIMUM
22// complexity of includes here. Use forward declarations wherever possible
23// and hide includes of complex types in baseapi.cpp.
24#include <string> // for std::string
25#include "genericvector.h"
26#include "platform.h"
27#include "strngs.h" // for STRING
28
29struct Pix;
30
31namespace tesseract {
32
33class TessBaseAPI;
34
49 public:
50 virtual ~TessResultRenderer();
51
52 // Takes ownership of pointer so must be new'd instance.
53 // Renderers aren't ordered, but appends the sequences of next parameter
54 // and existing next(). The renderers should be unique across both lists.
55 void insert(TessResultRenderer* next);
56
57 // Returns the next renderer or nullptr.
59 return next_;
60 }
61
67 bool BeginDocument(const char* title);
68
77 bool AddImage(TessBaseAPI* api);
78
83 bool EndDocument();
84
85 const char* file_extension() const {
86 return file_extension_;
87 }
88 const char* title() const {
89 return title_.c_str();
90 }
91
92 // Is everything fine? Otherwise something went wrong.
93 bool happy() {
94 return happy_;
95 }
96
106 int imagenum() const {
107 return imagenum_;
108 }
109
110 protected:
121 TessResultRenderer(const char* outputbase, const char* extension);
122
123 // Hook for specialized handling in BeginDocument()
124 virtual bool BeginDocumentHandler();
125
126 // This must be overridden to render the OCR'd results
127 virtual bool AddImageHandler(TessBaseAPI* api) = 0;
128
129 // Hook for specialized handling in EndDocument()
130 virtual bool EndDocumentHandler();
131
132 // Renderers can call this to append '\0' terminated strings into
133 // the output string returned by GetOutput.
134 // This method will grow the output buffer if needed.
135 void AppendString(const char* s);
136
137 // Renderers can call this to append binary byte sequences into
138 // the output string returned by GetOutput. Note that s is not necessarily
139 // '\0' terminated (and can contain '\0' within it).
140 // This method will grow the output buffer if needed.
141 void AppendData(const char* s, int len);
142
143 private:
144 const char* file_extension_; // standard extension for generated output
145 STRING title_; // title of document being renderered
146 int imagenum_; // index of last image added
147
148 FILE* fout_; // output file pointer
149 TessResultRenderer* next_; // Can link multiple renderers together
150 bool happy_; // I get grumpy when the disk fills up, etc.
151};
152
157 public:
158 explicit TessTextRenderer(const char* outputbase);
159
160 protected:
161 bool AddImageHandler(TessBaseAPI* api) override;
162};
163
168 public:
169 explicit TessHOcrRenderer(const char* outputbase, bool font_info);
170 explicit TessHOcrRenderer(const char* outputbase);
171
172 protected:
173 bool BeginDocumentHandler() override;
174 bool AddImageHandler(TessBaseAPI* api) override;
175 bool EndDocumentHandler() override;
176
177 private:
178 bool font_info_; // whether to print font information
179};
180
185 public:
186 explicit TessAltoRenderer(const char* outputbase);
187
188 protected:
189 bool BeginDocumentHandler() override;
190 bool AddImageHandler(TessBaseAPI* api) override;
191 bool EndDocumentHandler() override;
192};
193
198 public:
199 explicit TessTsvRenderer(const char* outputbase, bool font_info);
200 explicit TessTsvRenderer(const char* outputbase);
201
202 protected:
203 bool BeginDocumentHandler() override;
204 bool AddImageHandler(TessBaseAPI* api) override;
205 bool EndDocumentHandler() override;
206
207 private:
208 bool font_info_; // whether to print font information
209};
210
215 public:
216 // datadir is the location of the TESSDATA. We need it because
217 // we load a custom PDF font from this location.
218 TessPDFRenderer(const char* outputbase, const char* datadir,
219 bool textonly = false);
220
221 protected:
222 bool BeginDocumentHandler() override;
223 bool AddImageHandler(TessBaseAPI* api) override;
224 bool EndDocumentHandler() override;
225
226 private:
227 // We don't want to have every image in memory at once,
228 // so we store some metadata as we go along producing
229 // PDFs one page at a time. At the end, that metadata is
230 // used to make everything that isn't easily handled in a
231 // streaming fashion.
232 long int obj_; // counter for PDF objects
233 GenericVector<long int> offsets_; // offset of every PDF object in bytes
234 GenericVector<long int> pages_; // object number for every /Page object
235 std::string datadir_; // where to find the custom font
236 bool textonly_; // skip images if set
237 // Bookkeeping only. DIY = Do It Yourself.
238 void AppendPDFObjectDIY(size_t objectsize);
239 // Bookkeeping + emit data.
240 void AppendPDFObject(const char* data);
241 // Create the /Contents object for an entire page.
242 char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
243 // Turn an image into a PDF object. Only transcode if we have to.
244 static bool imageToPDFObj(Pix* pix, const char* filename, long int objnum,
245 char** pdf_object, long int* pdf_object_size,
246 int jpg_quality);
247};
248
253 public:
254 explicit TessUnlvRenderer(const char* outputbase);
255
256 protected:
257 bool AddImageHandler(TessBaseAPI* api) override;
258};
259
264 public:
265 explicit TessLSTMBoxRenderer(const char* outputbase);
266
267 protected:
268 bool AddImageHandler(TessBaseAPI* api) override;
269};
270
275 public:
276 explicit TessBoxTextRenderer(const char* outputbase);
277
278 protected:
279 bool AddImageHandler(TessBaseAPI* api) override;
280};
281
286 public:
287 explicit TessWordStrBoxRenderer(const char* outputbase);
288
289 protected:
290 bool AddImageHandler(TessBaseAPI* api) override;
291};
292
293#ifndef DISABLED_LEGACY_ENGINE
294
299 public:
300 explicit TessOsdRenderer(const char* outputbase);
301
302 protected:
303 bool AddImageHandler(TessBaseAPI* api) override;
304};
305
306#endif // ndef DISABLED_LEGACY_ENGINE
307
308} // namespace tesseract.
309
310#endif // TESSERACT_API_RENDERER_H_
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:90
struct TessBaseAPI TessBaseAPI
Definition: capi.h:93
struct TessResultRenderer TessResultRenderer
Definition: capi.h:87
struct TessTextRenderer TessTextRenderer
Definition: capi.h:88
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:92
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:91
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:89
#define TESS_API
Definition: platform.h:54
void insert(LIST list, void *node)
Definition: oldlist.cpp:172
virtual bool AddImageHandler(TessBaseAPI *api)=0
const char * file_extension() const
Definition: renderer.h:85
const char * title() const
Definition: renderer.h:88
TessResultRenderer * next()
Definition: renderer.h:58
Definition: strngs.h:45