tesseract 4.1.1
Loading...
Searching...
No Matches
text2image.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: text2image.cpp
3 * Description: Program to generate OCR training pages. Given a text file it
4 * outputs an image with a given font and degradation.
5 *
6 * Note that since the results depend on the fonts available on
7 * your system, running the code on a different machine, or
8 * different OS, or even at a different time on the same machine,
9 * may produce different fonts even if --font is given explicitly.
10 * To see names of available fonts, use --list_available_fonts with
11 * the appropriate --fonts_dir path.
12 * Specifying --use_only_legacy_fonts will restrict the available
13 * fonts to those listed in legacy_fonts.h
14 * Authors: Ranjith Unnikrishnan, Ray Smith
15 *
16 * (C) Copyright 2013, Google Inc.
17 * Licensed under the Apache License, Version 2.0 (the "License");
18 * you may not use this file except in compliance with the License.
19 * You may obtain a copy of the License at
20 * http://www.apache.org/licenses/LICENSE-2.0
21 * Unless required by applicable law or agreed to in writing, software
22 * distributed under the License is distributed on an "AS IS" BASIS,
23 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 * See the License for the specific language governing permissions and
25 * limitations under the License.
26 *
27 **********************************************************************/
28
29#include <cstdlib>
30#include <cstring>
31#include <algorithm>
32#include <iostream>
33#include <map>
34#include <random>
35#include <string>
36#include <utility>
37#include <vector>
38
39#include "allheaders.h" // from leptonica
40#include "boxchar.h"
41#include "commandlineflags.h"
42#include "commontraining.h" // CheckSharedLibraryVersion
43#include "degradeimage.h"
44#include "errcode.h"
45#include "fileio.h"
46#include "helpers.h"
47#include "normstrngs.h"
48#include "stringrenderer.h"
49#include "tlog.h"
50#include "unicharset.h"
51#include "util.h"
52#ifdef _MSC_VER
53# define putenv(s) _putenv(s)
54#endif
55
56// A number with which to initialize the random number generator.
57const int kRandomSeed = 0x18273645;
58
59// The text input file.
60static STRING_PARAM_FLAG(text, "", "File name of text input to process");
61
62// The text output file.
63static STRING_PARAM_FLAG(outputbase, "", "Basename for output image/box file");
64
65// Degrade the rendered image to mimic scanner quality.
66static BOOL_PARAM_FLAG(degrade_image, true,
67 "Degrade rendered image with speckle noise, dilation/erosion "
68 "and rotation");
69
70// Rotate the rendered image to have more realistic glyph borders
71static BOOL_PARAM_FLAG(rotate_image, true, "Rotate the image in a random way.");
72
73// Degradation to apply to the image.
74static INT_PARAM_FLAG(exposure, 0, "Exposure level in photocopier");
75
76// Distort the rendered image by various means according to the bool flags.
77static BOOL_PARAM_FLAG(distort_image, false,
78 "Degrade rendered image with noise, blur, invert.");
79
80// Distortion to apply to the image.
81static BOOL_PARAM_FLAG(invert, true, "Invert the image");
82
83// Distortion to apply to the image.
84static BOOL_PARAM_FLAG(white_noise, true, "Add Gaussian Noise");
85
86// Distortion to apply to the image.
87static BOOL_PARAM_FLAG(smooth_noise, true, "Smoothen Noise");
88
89// Distortion to apply to the image.
90static BOOL_PARAM_FLAG(blur, true, "Blur the image");
91
92#if 0
93
94// Distortion to apply to the image.
95static BOOL_PARAM_FLAG(perspective, false, "Generate Perspective Distortion");
96
97// Distortion to apply to the image.
98static INT_PARAM_FLAG(box_reduction, 0, "Integer reduction factor box_scale");
99
100#endif
101
102// Output image resolution.
103static INT_PARAM_FLAG(resolution, 300, "Pixels per inch");
104
105// Width of output image (in pixels).
106static INT_PARAM_FLAG(xsize, 3600, "Width of output image");
107
108// Max height of output image (in pixels).
109static INT_PARAM_FLAG(ysize, 4800, "Height of output image");
110
111// Max number of pages to produce.
112static INT_PARAM_FLAG(max_pages, 0, "Maximum number of pages to output (0=unlimited)");
113
114// Margin around text (in pixels).
115static INT_PARAM_FLAG(margin, 100, "Margin round edges of image");
116
117// Size of text (in points).
118static INT_PARAM_FLAG(ptsize, 12, "Size of printed text");
119
120// Inter-character space (in ems).
121static DOUBLE_PARAM_FLAG(char_spacing, 0, "Inter-character space in ems");
122
123// Sets the probability (value in [0, 1]) of starting to render a word with an
124// underline. Words are assumed to be space-delimited.
125static DOUBLE_PARAM_FLAG(underline_start_prob, 0,
126 "Fraction of words to underline (value in [0,1])");
127// Set the probability (value in [0, 1]) of continuing a started underline to
128// the next word.
129static DOUBLE_PARAM_FLAG(underline_continuation_prob, 0,
130 "Fraction of words to underline (value in [0,1])");
131
132// Inter-line space (in pixels).
133static INT_PARAM_FLAG(leading, 12, "Inter-line space (in pixels)");
134
135// Layout and glyph orientation on rendering.
136static STRING_PARAM_FLAG(writing_mode, "horizontal",
137 "Specify one of the following writing"
138 " modes.\n"
139 "'horizontal' : Render regular horizontal text. (default)\n"
140 "'vertical' : Render vertical text. Glyph orientation is"
141 " selected by Pango.\n"
142 "'vertical-upright' : Render vertical text. Glyph "
143 " orientation is set to be upright.");
144
145static INT_PARAM_FLAG(box_padding, 0, "Padding around produced bounding boxes");
146
147static BOOL_PARAM_FLAG(strip_unrenderable_words, true,
148 "Remove unrenderable words from source text");
149
150// Font name.
151static STRING_PARAM_FLAG(font, "Arial", "Font description name to use");
152
153static BOOL_PARAM_FLAG(ligatures, false,
154 "Rebuild and render ligatures");
155
156static BOOL_PARAM_FLAG(find_fonts, false,
157 "Search for all fonts that can render the text");
158static BOOL_PARAM_FLAG(render_per_font, true,
159 "If find_fonts==true, render each font to its own image. "
160 "Image filenames are of the form output_name.font_name.tif");
161static DOUBLE_PARAM_FLAG(min_coverage, 1.0,
162 "If find_fonts==true, the minimum coverage the font has of "
163 "the characters in the text file to include it, between "
164 "0 and 1.");
165
166static BOOL_PARAM_FLAG(list_available_fonts, false, "List available fonts and quit.");
167
168static BOOL_PARAM_FLAG(render_ngrams, false, "Put each space-separated entity from the"
169 " input file into one bounding box. The ngrams in the input"
170 " file will be randomly permuted before rendering (so that"
171 " there is sufficient variety of characters on each line).");
172
173static BOOL_PARAM_FLAG(output_word_boxes, false,
174 "Output word bounding boxes instead of character boxes. "
175 "This is used for Cube training, and implied by "
176 "--render_ngrams.");
177
178static STRING_PARAM_FLAG(unicharset_file, "",
179 "File with characters in the unicharset. If --render_ngrams"
180 " is true and --unicharset_file is specified, ngrams with"
181 " characters that are not in unicharset will be omitted");
182
183static BOOL_PARAM_FLAG(bidirectional_rotation, false,
184 "Rotate the generated characters both ways.");
185
186static BOOL_PARAM_FLAG(only_extract_font_properties, false,
187 "Assumes that the input file contains a list of ngrams. Renders"
188 " each ngram, extracts spacing properties and records them in"
189 " output_base/[font_name].fontinfo file.");
190
191// Use these flags to output zero-padded, square individual character images
192static BOOL_PARAM_FLAG(output_individual_glyph_images, false,
193 "If true also outputs individual character images");
194static INT_PARAM_FLAG(glyph_resized_size, 0,
195 "Each glyph is square with this side length in pixels");
196static INT_PARAM_FLAG(glyph_num_border_pixels_to_pad, 0,
197 "Final_size=glyph_resized_size+2*glyph_num_border_pixels_to_pad");
198
199namespace tesseract {
200
204 // These values are obtained from FT_Glyph_Metrics struct
205 // used by the FreeType font engine.
206 int x_gap_before; // horizontal x bearing
207 int x_gap_after; // horizontal advance - x_gap_before - width
208 std::map<std::string, int> kerned_x_gaps;
209};
210
211static bool IsWhitespaceBox(const BoxChar* boxchar) {
212 return (boxchar->box() == nullptr ||
213 SpanUTF8Whitespace(boxchar->ch().c_str()));
214}
215
216static std::string StringReplace(const std::string& in,
217 const std::string& oldsub, const std::string& newsub) {
218 std::string out;
219 size_t start_pos = 0, pos;
220 while ((pos = in.find(oldsub, start_pos)) != std::string::npos) {
221 out.append(in.data() + start_pos, pos - start_pos);
222 out.append(newsub.data(), newsub.length());
223 start_pos = pos + oldsub.length();
224 }
225 out.append(in.data() + start_pos, in.length() - start_pos);
226 return out;
227}
228
229// Assumes that each word (whitespace-separated entity) in text is a bigram.
230// Renders the bigrams and calls FontInfo::GetSpacingProperties() to
231// obtain spacing information. Produces the output .fontinfo file with a line
232// per unichar of the form:
233// unichar space_before space_after kerned1 kerned_space1 kerned2 ...
234// Fox example, if unichar "A" has spacing of 0 pixels before and -1 pixels
235// after, is kerned with "V" resulting in spacing of "AV" to be -7 and kerned
236// with "T", such that "AT" has spacing of -5, the entry/line for unichar "A"
237// in .fontinfo file will be:
238// A 0 -1 T -5 V -7
239static void ExtractFontProperties(const std::string &utf8_text,
240 StringRenderer *render,
241 const std::string &output_base) {
242 std::map<std::string, SpacingProperties> spacing_map;
243 std::map<std::string, SpacingProperties>::iterator spacing_map_it0;
244 std::map<std::string, SpacingProperties>::iterator spacing_map_it1;
245 int x_bearing, x_advance;
246 int len = utf8_text.length();
247 int offset = 0;
248 const char* text = utf8_text.c_str();
249 while (offset < len) {
250 offset +=
251 render->RenderToImage(text + offset, strlen(text + offset), nullptr);
252 const std::vector<BoxChar*> &boxes = render->GetBoxes();
253
254 // If the page break split a bigram, correct the offset so we try the bigram
255 // on the next iteration.
256 if (boxes.size() > 2 && !IsWhitespaceBox(boxes[boxes.size() - 1]) &&
257 IsWhitespaceBox(boxes[boxes.size() - 2])) {
258 if (boxes.size() > 3) {
259 tprintf("WARNING: Adjusting to bad page break after '%s%s'\n",
260 boxes[boxes.size() - 4]->ch().c_str(),
261 boxes[boxes.size() - 3]->ch().c_str());
262 }
263 offset -= boxes[boxes.size() - 1]->ch().size();
264 }
265
266 for (size_t b = 0; b < boxes.size(); b += 2) {
267 while (b < boxes.size() && IsWhitespaceBox(boxes[b])) ++b;
268 if (b + 1 >= boxes.size()) break;
269 const std::string &ch0 = boxes[b]->ch();
270 // We encountered a ligature. This happens in at least two scenarios:
271 // One is when the rendered bigram forms a grapheme cluster (eg. the
272 // second character in the bigram is a combining vowel), in which case we
273 // correctly output only one bounding box.
274 // A second far less frequent case is when caused some fonts like 'DejaVu
275 // Sans Ultra-Light' force Pango to render a ligatured character even if
276 // the input consists of the separated characters. NOTE(ranjith): As per
277 // behdad@ this is not currently controllable at the level of the Pango
278 // API.
279 // The most frequent of all is a single character "word" made by the CJK
280 // segmenter.
281 // Safeguard against these cases here by just skipping the bigram.
282 if (IsWhitespaceBox(boxes[b+1])) {
283 continue;
284 }
285 int xgap = (boxes[b+1]->box()->x -
286 (boxes[b]->box()->x + boxes[b]->box()->w));
287 spacing_map_it0 = spacing_map.find(ch0);
288 int ok_count = 0;
289 if (spacing_map_it0 == spacing_map.end() &&
290 render->font().GetSpacingProperties(ch0, &x_bearing, &x_advance)) {
291 spacing_map[ch0] = SpacingProperties(
292 x_bearing, x_advance - x_bearing - boxes[b]->box()->w);
293 spacing_map_it0 = spacing_map.find(ch0);
294 ++ok_count;
295 }
296 const std::string &ch1 = boxes[b+1]->ch();
297 tlog(3, "%s%s\n", ch0.c_str(), ch1.c_str());
298 spacing_map_it1 = spacing_map.find(ch1);
299 if (spacing_map_it1 == spacing_map.end() &&
300 render->font().GetSpacingProperties(ch1, &x_bearing, &x_advance)) {
301 spacing_map[ch1] = SpacingProperties(
302 x_bearing, x_advance - x_bearing - boxes[b+1]->box()->w);
303 spacing_map_it1 = spacing_map.find(ch1);
304 ++ok_count;
305 }
306 if (ok_count == 2 && xgap != (spacing_map_it0->second.x_gap_after +
307 spacing_map_it1->second.x_gap_before)) {
308 spacing_map_it0->second.kerned_x_gaps[ch1] = xgap;
309 }
310 }
311 render->ClearBoxes();
312 }
313 std::string output_string;
314 const int kBufSize = 1024;
315 char buf[kBufSize];
316 snprintf(buf, kBufSize, "%d\n", static_cast<int>(spacing_map.size()));
317 output_string.append(buf);
318 std::map<std::string, SpacingProperties>::const_iterator spacing_map_it;
319 for (spacing_map_it = spacing_map.begin();
320 spacing_map_it != spacing_map.end(); ++spacing_map_it) {
321 snprintf(buf, kBufSize,
322 "%s %d %d %d", spacing_map_it->first.c_str(),
323 spacing_map_it->second.x_gap_before,
324 spacing_map_it->second.x_gap_after,
325 static_cast<int>(spacing_map_it->second.kerned_x_gaps.size()));
326 output_string.append(buf);
327 std::map<std::string, int>::const_iterator kern_it;
328 for (kern_it = spacing_map_it->second.kerned_x_gaps.begin();
329 kern_it != spacing_map_it->second.kerned_x_gaps.end(); ++kern_it) {
330 snprintf(buf, kBufSize,
331 " %s %d", kern_it->first.c_str(), kern_it->second);
332 output_string.append(buf);
333 }
334 output_string.append("\n");
335 }
336 File::WriteStringToFileOrDie(output_string, output_base + ".fontinfo");
337}
338
339static bool MakeIndividualGlyphs(Pix* pix, const std::vector<BoxChar*>& vbox,
340 const int input_tiff_page) {
341 // If checks fail, return false without exiting text2image
342 if (!pix) {
343 tprintf("ERROR: MakeIndividualGlyphs(): Input Pix* is nullptr\n");
344 return false;
345 } else if (FLAGS_glyph_resized_size <= 0) {
346 tprintf("ERROR: --glyph_resized_size must be positive\n");
347 return false;
348 } else if (FLAGS_glyph_num_border_pixels_to_pad < 0) {
349 tprintf("ERROR: --glyph_num_border_pixels_to_pad must be 0 or positive\n");
350 return false;
351 }
352
353 const int n_boxes = vbox.size();
354 int n_boxes_saved = 0;
355 int current_tiff_page = 0;
356 int y_previous = 0;
357 static int glyph_count = 0;
358 for (int i = 0; i < n_boxes; i++) {
359 // Get one bounding box
360 Box* b = vbox[i]->mutable_box();
361 if (!b) continue;
362 const int x = b->x;
363 const int y = b->y;
364 const int w = b->w;
365 const int h = b->h;
366 // Check present tiff page (for multipage tiff)
367 if (y < y_previous-pixGetHeight(pix)/10) {
368 tprintf("ERROR: Wrap-around encountered, at i=%d\n", i);
369 current_tiff_page++;
370 }
371 if (current_tiff_page < input_tiff_page) continue;
372 else if (current_tiff_page > input_tiff_page) break;
373 // Check box validity
374 if (x < 0 || y < 0 ||
375 (x+w-1) >= pixGetWidth(pix) ||
376 (y+h-1) >= pixGetHeight(pix)) {
377 tprintf("ERROR: MakeIndividualGlyphs(): Index out of range, at i=%d"
378 " (x=%d, y=%d, w=%d, h=%d\n)", i, x, y, w, h);
379 continue;
380 } else if (w < FLAGS_glyph_num_border_pixels_to_pad &&
381 h < FLAGS_glyph_num_border_pixels_to_pad) {
382 tprintf("ERROR: Input image too small to be a character, at i=%d\n", i);
383 continue;
384 }
385 // Crop the boxed character
386 Pix* pix_glyph = pixClipRectangle(pix, b, nullptr);
387 if (!pix_glyph) {
388 tprintf("ERROR: MakeIndividualGlyphs(): Failed to clip, at i=%d\n", i);
389 continue;
390 }
391 // Resize to square
392 Pix* pix_glyph_sq = pixScaleToSize(pix_glyph,
393 FLAGS_glyph_resized_size,
394 FLAGS_glyph_resized_size);
395 if (!pix_glyph_sq) {
396 tprintf("ERROR: MakeIndividualGlyphs(): Failed to resize, at i=%d\n", i);
397 continue;
398 }
399 // Zero-pad
400 Pix* pix_glyph_sq_pad = pixAddBorder(pix_glyph_sq,
401 FLAGS_glyph_num_border_pixels_to_pad,
402 0);
403 if (!pix_glyph_sq_pad) {
404 tprintf("ERROR: MakeIndividualGlyphs(): Failed to zero-pad, at i=%d\n",
405 i);
406 continue;
407 }
408 // Write out
409 Pix* pix_glyph_sq_pad_8 = pixConvertTo8(pix_glyph_sq_pad, false);
410 char filename[1024];
411 snprintf(filename, 1024, "%s_%d.jpg", FLAGS_outputbase.c_str(),
412 glyph_count++);
413 if (pixWriteJpeg(filename, pix_glyph_sq_pad_8, 100, 0)) {
414 tprintf("ERROR: MakeIndividualGlyphs(): Failed to write JPEG to %s,"
415 " at i=%d\n", filename, i);
416 continue;
417 }
418
419 pixDestroy(&pix_glyph);
420 pixDestroy(&pix_glyph_sq);
421 pixDestroy(&pix_glyph_sq_pad);
422 pixDestroy(&pix_glyph_sq_pad_8);
423 n_boxes_saved++;
424 y_previous = y;
425 }
426 if (n_boxes_saved == 0) {
427 return false;
428 } else {
429 tprintf("Total number of characters saved = %d\n", n_boxes_saved);
430 return true;
431 }
432}
433} // namespace tesseract
434
436using tesseract::ExtractFontProperties;
437using tesseract::File;
442
443static int Main() {
444 if (FLAGS_list_available_fonts) {
445 const std::vector<std::string>& all_fonts = FontUtils::ListAvailableFonts();
446 for (unsigned int i = 0; i < all_fonts.size(); ++i) {
447 // Remove trailing comma: pango-font-description-to-string adds a comma
448 // to some fonts.
449 // See https://github.com/tesseract-ocr/tesseract/issues/408
450 std::string font_name(all_fonts[i].c_str());
451 if (font_name.back() == ',')
452 font_name.pop_back();
453 printf("%3u: %s\n", i, font_name.c_str());
454 ASSERT_HOST_MSG(FontUtils::IsAvailableFont(all_fonts[i].c_str()),
455 "Font %s is unrecognized.\n", all_fonts[i].c_str());
456 }
457 return EXIT_SUCCESS;
458 }
459
460 // Check validity of input flags.
461 if (FLAGS_text.empty()) {
462 tprintf("'--text' option is missing!\n");
463 exit(1);
464 }
465 if (FLAGS_outputbase.empty()) {
466 tprintf("'--outputbase' option is missing!\n");
467 exit(1);
468 }
469 if (!FLAGS_unicharset_file.empty() && FLAGS_render_ngrams) {
470 tprintf("Use '--unicharset_file' only if '--render_ngrams' is set.\n");
471 exit(1);
472 }
473
474 std::string font_name = FLAGS_font.c_str();
475 if (!FLAGS_find_fonts && !FontUtils::IsAvailableFont(font_name.c_str())) {
476 font_name += ',';
477 std::string pango_name;
478 if (!FontUtils::IsAvailableFont(font_name.c_str(), &pango_name)) {
479 tprintf("Could not find font named '%s'.\n", FLAGS_font.c_str());
480 if (!pango_name.empty()) {
481 tprintf("Pango suggested font '%s'.\n", pango_name.c_str());
482 }
483 tprintf("Please correct --font arg.\n");
484 exit(1);
485 }
486 }
487
488 if (FLAGS_render_ngrams)
489 FLAGS_output_word_boxes = true;
490
491 char font_desc_name[1024];
492 snprintf(font_desc_name, 1024, "%s %d", font_name.c_str(),
493 static_cast<int>(FLAGS_ptsize));
494
495 StringRenderer render(font_desc_name, FLAGS_xsize, FLAGS_ysize);
496 render.set_add_ligatures(FLAGS_ligatures);
497 render.set_leading(FLAGS_leading);
498 render.set_resolution(FLAGS_resolution);
499 render.set_char_spacing(FLAGS_char_spacing * FLAGS_ptsize);
500 render.set_h_margin(FLAGS_margin);
501 render.set_v_margin(FLAGS_margin);
502 render.set_output_word_boxes(FLAGS_output_word_boxes);
503 render.set_box_padding(FLAGS_box_padding);
504 render.set_strip_unrenderable_words(FLAGS_strip_unrenderable_words);
505 render.set_underline_start_prob(FLAGS_underline_start_prob);
506 render.set_underline_continuation_prob(FLAGS_underline_continuation_prob);
507
508 // Set text rendering orientation and their forms.
509 if (FLAGS_writing_mode == "horizontal") {
510 // Render regular horizontal text (default).
511 render.set_vertical_text(false);
512 render.set_gravity_hint_strong(false);
513 render.set_render_fullwidth_latin(false);
514 } else if (FLAGS_writing_mode == "vertical") {
515 // Render vertical text. Glyph orientation is selected by Pango.
516 render.set_vertical_text(true);
517 render.set_gravity_hint_strong(false);
518 render.set_render_fullwidth_latin(false);
519 } else if (FLAGS_writing_mode == "vertical-upright") {
520 // Render vertical text. Glyph orientation is set to be upright.
521 // Also Basic Latin characters are converted to their fullwidth forms
522 // on rendering, since fullwidth Latin characters are well designed to fit
523 // vertical text lines, while .box files store halfwidth Basic Latin
524 // unichars.
525 render.set_vertical_text(true);
526 render.set_gravity_hint_strong(true);
527 render.set_render_fullwidth_latin(true);
528 } else {
529 tprintf("Invalid writing mode: %s\n", FLAGS_writing_mode.c_str());
530 exit(1);
531 }
532
533 std::string src_utf8;
534 // This c_str is NOT redundant!
535 if (!File::ReadFileToString(FLAGS_text.c_str(), &src_utf8)) {
536 tprintf("Failed to read file: %s\n", FLAGS_text.c_str());
537 exit(1);
538 }
539
540 // Remove the unicode mark if present.
541 if (strncmp(src_utf8.c_str(), "\xef\xbb\xbf", 3) == 0) {
542 src_utf8.erase(0, 3);
543 }
544 tlog(1, "Render string of size %zu\n", src_utf8.length());
545
546 if (FLAGS_render_ngrams || FLAGS_only_extract_font_properties) {
547 // Try to preserve behavior of old text2image by expanding inter-word
548 // spaces by a factor of 4.
549 const std::string kSeparator = FLAGS_render_ngrams ? " " : " ";
550 // Also restrict the number of characters per line to try and avoid
551 // line-breaking in the middle of words like "-A", "R$" etc. which are
552 // otherwise allowed by the standard unicode line-breaking rules.
553 const unsigned int kCharsPerLine = (FLAGS_ptsize > 20) ? 50 : 100;
554 std::string rand_utf8;
555 UNICHARSET unicharset;
556 if (FLAGS_render_ngrams && !FLAGS_unicharset_file.empty() &&
557 !unicharset.load_from_file(FLAGS_unicharset_file.c_str())) {
558 tprintf("Failed to load unicharset from file %s\n",
559 FLAGS_unicharset_file.c_str());
560 exit(1);
561 }
562
563 // If we are rendering ngrams that will be OCRed later, shuffle them so that
564 // tesseract does not have difficulties finding correct baseline, word
565 // spaces, etc.
566 const char *str8 = src_utf8.c_str();
567 int len = src_utf8.length();
568 int step;
569 std::vector<std::pair<int, int> > offsets;
570 int offset = SpanUTF8Whitespace(str8);
571 while (offset < len) {
572 step = SpanUTF8NotWhitespace(str8 + offset);
573 offsets.push_back(std::make_pair(offset, step));
574 offset += step;
575 offset += SpanUTF8Whitespace(str8 + offset);
576 }
577 if (FLAGS_render_ngrams) {
578 std::seed_seq seed{kRandomSeed};
579 std::mt19937 random_gen(seed);
580 std::shuffle(offsets.begin(), offsets.end(), random_gen);
581 }
582
583 for (size_t i = 0, line = 1; i < offsets.size(); ++i) {
584 const char *curr_pos = str8 + offsets[i].first;
585 int ngram_len = offsets[i].second;
586 // Skip words that contain characters not in found in unicharset.
587 std::string cleaned = UNICHARSET::CleanupString(curr_pos, ngram_len);
588 if (!FLAGS_unicharset_file.empty() &&
589 !unicharset.encodable_string(cleaned.c_str(), nullptr)) {
590 continue;
591 }
592 rand_utf8.append(curr_pos, ngram_len);
593 if (rand_utf8.length() > line * kCharsPerLine) {
594 rand_utf8.append(" \n");
595 ++line;
596 if (line & 0x1) rand_utf8.append(kSeparator);
597 } else {
598 rand_utf8.append(kSeparator);
599 }
600 }
601 tlog(1, "Rendered ngram string of size %zu\n", rand_utf8.length());
602 src_utf8.swap(rand_utf8);
603 }
604 if (FLAGS_only_extract_font_properties) {
605 tprintf("Extracting font properties only\n");
606 ExtractFontProperties(src_utf8, &render, FLAGS_outputbase.c_str());
607 tprintf("Done!\n");
608 return 0;
609 }
610
611 int im = 0;
612 std::vector<float> page_rotation;
613 const char* to_render_utf8 = src_utf8.c_str();
614
615 tesseract::TRand randomizer;
616 randomizer.set_seed(kRandomSeed);
617 std::vector<std::string> font_names;
618 // We use a two pass mechanism to rotate images in both direction.
619 // The first pass(0) will rotate the images in random directions and
620 // the second pass(1) will mirror those rotations.
621 int num_pass = FLAGS_bidirectional_rotation ? 2 : 1;
622 for (int pass = 0; pass < num_pass; ++pass) {
623 int page_num = 0;
624 std::string font_used;
625 for (size_t offset = 0;
626 offset < strlen(to_render_utf8) &&
627 (FLAGS_max_pages == 0 || page_num < FLAGS_max_pages);
628 ++im, ++page_num) {
629 tlog(1, "Starting page %d\n", im);
630 Pix* pix = nullptr;
631 if (FLAGS_find_fonts) {
632 offset += render.RenderAllFontsToImage(FLAGS_min_coverage,
633 to_render_utf8 + offset,
634 strlen(to_render_utf8 + offset),
635 &font_used, &pix);
636 } else {
637 offset += render.RenderToImage(to_render_utf8 + offset,
638 strlen(to_render_utf8 + offset), &pix);
639 }
640 if (pix != nullptr) {
641 float rotation = 0;
642 if (pass == 1) {
643 // Pass 2, do mirror rotation.
644 rotation = -1 * page_rotation[page_num];
645 }
646 if (FLAGS_degrade_image) {
647 pix = DegradeImage(pix, FLAGS_exposure, &randomizer,
648 FLAGS_rotate_image ? &rotation : nullptr);
649 }
650 if (FLAGS_distort_image) {
651 //TODO: perspective is set to false and box_reduction to 1.
652 pix = PrepareDistortedPix(pix, false, FLAGS_invert,
653 FLAGS_white_noise, FLAGS_smooth_noise, FLAGS_blur,
654 1, &randomizer, nullptr);
655 }
656 render.RotatePageBoxes(rotation);
657
658 if (pass == 0) {
659 // Pass 1, rotate randomly and store the rotation..
660 page_rotation.push_back(rotation);
661 }
662
663 Pix* gray_pix = pixConvertTo8(pix, false);
664 pixDestroy(&pix);
665 Pix* binary = pixThresholdToBinary(gray_pix, 128);
666 pixDestroy(&gray_pix);
667 char tiff_name[1024];
668 if (FLAGS_find_fonts) {
669 if (FLAGS_render_per_font) {
670 std::string fontname_for_file = tesseract::StringReplace(
671 font_used, " ", "_");
672 snprintf(tiff_name, 1024, "%s.%s.tif", FLAGS_outputbase.c_str(),
673 fontname_for_file.c_str());
674 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, "w");
675 tprintf("Rendered page %d to file %s\n", im, tiff_name);
676 } else {
677 font_names.push_back(font_used);
678 }
679 } else {
680 snprintf(tiff_name, 1024, "%s.tif", FLAGS_outputbase.c_str());
681 pixWriteTiff(tiff_name, binary, IFF_TIFF_G4, im == 0 ? "w" : "a");
682 tprintf("Rendered page %d to file %s\n", im, tiff_name);
683 }
684 // Make individual glyphs
685 if (FLAGS_output_individual_glyph_images) {
686 if (!MakeIndividualGlyphs(binary, render.GetBoxes(), im)) {
687 tprintf("ERROR: Individual glyphs not saved\n");
688 }
689 }
690 pixDestroy(&binary);
691 }
692 if (FLAGS_find_fonts && offset != 0) {
693 // We just want a list of names, or some sample images so we don't need
694 // to render more than the first page of the text.
695 break;
696 }
697 }
698 }
699 if (!FLAGS_find_fonts) {
700 std::string box_name = FLAGS_outputbase.c_str();
701 box_name += ".box";
702 render.WriteAllBoxes(box_name);
703 } else if (!FLAGS_render_per_font && !font_names.empty()) {
704 std::string filename = FLAGS_outputbase.c_str();
705 filename += ".fontlist.txt";
706 FILE* fp = fopen(filename.c_str(), "wb");
707 if (fp == nullptr) {
708 tprintf("Failed to create output font list %s\n", filename.c_str());
709 } else {
710 for (size_t i = 0; i < font_names.size(); ++i) {
711 fprintf(fp, "%s\n", font_names[i].c_str());
712 }
713 fclose(fp);
714 }
715 }
716
717 return 0;
718}
719
720int main(int argc, char** argv) {
721 // Respect environment variable. could be:
722 // fc (fontconfig), win32, and coretext
723 // If not set force fontconfig for Mac OS.
724 // See https://github.com/tesseract-ocr/tesseract/issues/736
725 char* backend;
726 backend = getenv("PANGOCAIRO_BACKEND");
727 if (backend == nullptr) {
728 static char envstring[] = "PANGOCAIRO_BACKEND=fc";
729 putenv(envstring);
730 } else {
731 printf("Using '%s' as pango cairo backend based on environment "
732 "variable.\n", backend);
733 }
734 tesseract::CheckSharedLibraryVersion();
735 if (argc > 1) {
736 if ((strcmp(argv[1], "-v") == 0) ||
737 (strcmp(argv[1], "--version") == 0)) {
739 printf("Pango version: %s\n", pango_version_string());
740 }
741 }
742 tesseract::ParseCommandLineFlags(argv[0], &argc, &argv, true);
743 return Main();
744}
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:92
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define DOUBLE_PARAM_FLAG(name, val, comment)
#define BOOL_PARAM_FLAG(name, val, comment)
#define INT_PARAM_FLAG(name, val, comment)
#define STRING_PARAM_FLAG(name, val, comment)
int main(int argc, char **argv)
Definition: text2image.cpp:720
const int kRandomSeed
Definition: text2image.cpp:57
#define tlog(level,...)
Definition: tlog.h:33
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
unsigned int SpanUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:233
Pix * DegradeImage(Pix *input, int exposure, TRand *randomizer, float *rotation)
Pix * PrepareDistortedPix(const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
unsigned int SpanUTF8NotWhitespace(const char *text)
Definition: normstrngs.cpp:243
void set_seed(uint64_t seed)
Definition: helpers.h:40
static std::string CleanupString(const char *utf8_str)
Definition: unicharset.h:246
bool load_from_file(const char *const filename, bool skip_fragments)
Definition: unicharset.h:388
bool encodable_string(const char *str, int *first_bad_position) const
Definition: unicharset.cpp:244
const Box * box() const
Definition: boxchar.h:44
const std::string & ch() const
Definition: boxchar.h:43
static bool ReadFileToString(const std::string &filename, std::string *out)
Definition: fileio.cpp:77
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:56
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
static bool IsAvailableFont(const char *font_desc)
static void PangoFontTypeInfo()
static const std::vector< std::string > & ListAvailableFonts()
void set_underline_start_prob(const double frac)
void set_vertical_text(bool vertical_text)
void set_render_fullwidth_latin(bool render_fullwidth_latin)
const std::vector< BoxChar * > & GetBoxes() const
void set_gravity_hint_strong(bool gravity_hint_strong)
void set_resolution(const int resolution)
const PangoFontInfo & font() const
int RenderToImage(const char *text, int text_length, Pix **pix)
void set_underline_continuation_prob(const double frac)
void set_strip_unrenderable_words(bool val)
void set_add_ligatures(bool add_ligatures)
void set_h_margin(const int h_margin)
int RenderAllFontsToImage(double min_coverage, const char *text, int text_length, std::string *font_used, Pix **pix)
void set_char_spacing(int char_spacing)
void set_output_word_boxes(bool val)
void set_v_margin(const int v_margin)
void WriteAllBoxes(const std::string &filename)
void RotatePageBoxes(float rotation)
void set_leading(int leading)
std::map< std::string, int > kerned_x_gaps
Definition: text2image.cpp:208