tesseract 4.1.1
Loading...
Searching...
No Matches
pango_font_info.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: pango_font_info.cpp
3 * Description: Font-related objects and helper functions
4 * Author: Ranjith Unnikrishnan
5 *
6 * (C) Copyright 2013, Google Inc.
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 **********************************************************************/
18
19// Include automatically generated configuration file if running autoconf.
20#ifdef HAVE_CONFIG_H
21#include "config_auto.h"
22#endif
23
24#if (defined __MINGW32__) || (defined __CYGWIN__)
25// workaround for stdlib.h and putenv
26#undef __STRICT_ANSI__
27#endif
28
29#include <cstdlib>
30#include <cstdio>
31#include <cstring>
32#ifndef _MSC_VER
33#include <sys/param.h>
34#endif
35#include <algorithm>
36
37#include "pango_font_info.h"
38#include "commandlineflags.h"
39#include "fileio.h"
40#include "normstrngs.h"
41#include "tlog.h"
42#include "unichar.h"
43#include "util.h"
44#include "pango/pango.h"
45#include "pango/pangocairo.h"
46#include "pango/pangofc-font.h"
47
48STRING_PARAM_FLAG(fontconfig_tmpdir, "/tmp",
49 "Overrides fontconfig default temporary dir");
50
51#ifdef GOOGLE_TESSERACT
52#include "ocr/trainingdata/typesetting/legacy_fonts.h"
53BOOL_PARAM_FLAG(use_only_legacy_fonts, false,
54 "Overrides --fonts_dir and sets the known universe of fonts to"
55 "the list in legacy_fonts.h");
56
57STRING_PARAM_FLAG(fonts_dir, "/auto/ocr-data/tesstraining/fonts",
58 "Overrides system default font location");
59#else
60using std::pair;
61STRING_PARAM_FLAG(fonts_dir, "",
62 "If empty it use system default. Otherwise it overrides"
63 " system default font location");
64#endif
65
66namespace tesseract {
67
68// Default assumed output resolution. Required only for providing font metrics
69// in pixels.
70const int kDefaultResolution = 300;
71
72std::string PangoFontInfo::fonts_dir_;
73std::string PangoFontInfo::cache_dir_;
74
76 : desc_(nullptr), resolution_(kDefaultResolution) {
77 Clear();
78}
79
80PangoFontInfo::PangoFontInfo(const std::string& desc)
81 : desc_(nullptr), resolution_(kDefaultResolution) {
82 if (!ParseFontDescriptionName(desc)) {
83 tprintf("ERROR: Could not parse %s\n", desc.c_str());
84 Clear();
85 }
86}
87
88void PangoFontInfo::Clear() {
89 font_size_ = 0;
90 family_name_.clear();
91 font_type_ = UNKNOWN;
92 if (desc_) {
93 pango_font_description_free(desc_);
94 desc_ = nullptr;
95 }
96}
97
98PangoFontInfo::~PangoFontInfo() { pango_font_description_free(desc_); }
99
101 if (!desc_) return "";
102 char* desc_str = pango_font_description_to_string(desc_);
103 std::string desc_name(desc_str);
104 g_free(desc_str);
105 return desc_name;
106}
107
108// If not already initialized, initializes FontConfig by setting its
109// environment variable and creating a fonts.conf file that points to the
110// FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
111/* static */
113 if (fonts_dir_.empty()) {
114 HardInitFontConfig(FLAGS_fonts_dir.c_str(),
115 FLAGS_fontconfig_tmpdir.c_str());
116 }
117}
118
119// Re-initializes font config, whether or not already initialized.
120// If already initialized, any existing cache is deleted, just to be sure.
121/* static */
122void PangoFontInfo::HardInitFontConfig(const std::string& fonts_dir,
123 const std::string& cache_dir) {
124 if (!cache_dir_.empty()) {
126 File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
127 }
128 const int MAX_FONTCONF_FILESIZE = 1024;
129 char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130 cache_dir_ = cache_dir;
131 fonts_dir_ = fonts_dir;
132 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
133 "<?xml version=\"1.0\"?>\n"
134 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
135 "<fontconfig>\n"
136 "<dir>%s</dir>\n"
137 "<cachedir>%s</cachedir>\n"
138 "<config></config>\n"
139 "</fontconfig>",
140 fonts_dir.c_str(), cache_dir_.c_str());
141 std::string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
142 File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
143#ifdef _WIN32
144 std::string env("FONTCONFIG_PATH=");
145 env.append(cache_dir_.c_str());
146 _putenv(env.c_str());
147 _putenv("LANG=en_US.utf8");
148#else
149 setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
150 // Fix the locale so that the reported font names are consistent.
151 setenv("LANG", "en_US.utf8", true);
152#endif // _WIN32
153
154 if (FcInitReinitialize() != FcTrue) {
155 tprintf("FcInitiReinitialize failed!!\n");
156 }
158 // Clear Pango's font cache too.
159 pango_cairo_font_map_set_default(nullptr);
160}
161
162static void ListFontFamilies(PangoFontFamily*** families,
163 int* n_families) {
165 PangoFontMap* font_map = pango_cairo_font_map_get_default();
167 pango_font_map_list_families(font_map, families, n_families);
168}
169
170bool PangoFontInfo::ParseFontDescription(const PangoFontDescription *desc) {
171 Clear();
172 const char* family = pango_font_description_get_family(desc);
173 if (!family) {
174 char* desc_str = pango_font_description_to_string(desc);
175 tprintf("WARNING: Could not parse family name from description: '%s'\n",
176 desc_str);
177 g_free(desc_str);
178 return false;
179 }
180 family_name_ = std::string(family);
181 desc_ = pango_font_description_copy(desc);
182
183 // Set font size in points
184 font_size_ = pango_font_description_get_size(desc);
185 if (!pango_font_description_get_size_is_absolute(desc)) {
186 font_size_ /= PANGO_SCALE;
187 }
188
189 return true;
190}
191
192bool PangoFontInfo::ParseFontDescriptionName(const std::string& name) {
193 PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
194 bool success = ParseFontDescription(desc);
195 pango_font_description_free(desc);
196 return success;
197}
198
199// Returns the PangoFont structure corresponding to the closest available font
200// in the font map. Note that if the font is wholly missing, this could
201// correspond to a completely different font family and face.
202PangoFont* PangoFontInfo::ToPangoFont() const {
204 PangoFontMap* font_map = pango_cairo_font_map_get_default();
205 PangoContext* context = pango_context_new();
206 pango_cairo_context_set_resolution(context, resolution_);
207 pango_context_set_font_map(context, font_map);
208 PangoFont* font = nullptr;
209 {
211 font = pango_font_map_load_font(font_map, context, desc_);
212 }
213 g_object_unref(context);
214 return font;
215}
216
217bool PangoFontInfo::CoversUTF8Text(const char* utf8_text, int byte_length) const {
218 PangoFont* font = ToPangoFont();
219 if (font == nullptr) {
220 // Font not found.
221 return false;
222 }
223 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
224 for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
225 it != UNICHAR::end(utf8_text, byte_length);
226 ++it) {
227 if (IsWhitespace(*it) || pango_is_zero_width(*it))
228 continue;
229 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
230 char tmp[5];
231 int len = it.get_utf8(tmp);
232 tmp[len] = '\0';
233 tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
234 pango_coverage_unref(coverage);
235 g_object_unref(font);
236 return false;
237 }
238 }
239 pango_coverage_unref(coverage);
240 g_object_unref(font);
241 return true;
242}
243
244// This variant of strncpy permits src and dest to overlap. It will copy the
245// first byte first.
246static char* my_strnmove(char* dest, const char* src, size_t n) {
247 char* ret = dest;
248
249 // Copy characters until n reaches zero or the src byte is a nul.
250 do {
251 *dest = *src;
252 --n;
253 ++dest;
254 ++src;
255 } while (n && src[0]);
256
257 // If we reached a nul byte and there are more 'n' left, zero them out.
258 while (n) {
259 *dest = '\0';
260 --n;
261 ++dest;
262 }
263 return ret;
264}
265
266int PangoFontInfo::DropUncoveredChars(std::string* utf8_text) const {
267 int num_dropped_chars = 0;
268 PangoFont* font = ToPangoFont();
269 if (font == nullptr) {
270 // Font not found, drop all characters.
271 num_dropped_chars = utf8_text->length();
272 utf8_text->resize(0);
273 return num_dropped_chars;
274 }
275 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
276 // Maintain two iterators that point into the string. For space efficiency, we
277 // will repeatedly copy one covered UTF8 character from one to the other, and
278 // at the end resize the string to the right length.
279 char* out = const_cast<char*>(utf8_text->c_str());
280 const UNICHAR::const_iterator it_begin =
281 UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
282 const UNICHAR::const_iterator it_end =
283 UNICHAR::end(utf8_text->c_str(), utf8_text->length());
284 for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
285 // Skip bad utf-8.
286 if (!it.is_legal()) {
287 ++it; // One suitable error message will still be issued.
288 continue;
289 }
290 int unicode = *it;
291 int utf8_len = it.utf8_len();
292 const char* utf8_char = it.utf8_data();
293 // Move it forward before the data gets modified.
294 ++it;
295 if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
296 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
297 if (TLOG_IS_ON(2)) {
298 UNICHAR unichar(unicode);
299 char* str = unichar.utf8_str();
300 tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
301 delete[] str;
302 }
303 ++num_dropped_chars;
304 continue;
305 }
306 my_strnmove(out, utf8_char, utf8_len);
307 out += utf8_len;
308 }
309 pango_coverage_unref(coverage);
310 g_object_unref(font);
311 utf8_text->resize(out - utf8_text->c_str());
312 return num_dropped_chars;
313}
314
315bool PangoFontInfo::GetSpacingProperties(const std::string& utf8_char,
316 int* x_bearing, int* x_advance) const {
317 // Convert to equivalent PangoFont structure
318 PangoFont* font = ToPangoFont();
319 // Find the glyph index in the font for the supplied utf8 character.
320 int total_advance = 0;
321 int min_bearing = 0;
322 // Handle multi-unicode strings by reporting the left-most position of the
323 // x-bearing, and right-most position of the x-advance if the string were to
324 // be rendered.
325 const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
326 utf8_char.length());
327 const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
328 utf8_char.length());
329 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
330 PangoGlyph glyph_index = pango_fc_font_get_glyph(
331 reinterpret_cast<PangoFcFont*>(font), *it);
332 if (!glyph_index) {
333 // Glyph for given unicode character doesn't exist in font.
334 g_object_unref(font);
335 return false;
336 }
337 // Find the ink glyph extents for the glyph
338 PangoRectangle ink_rect, logical_rect;
339 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
340 pango_extents_to_pixels(&ink_rect, nullptr);
341 pango_extents_to_pixels(&logical_rect, nullptr);
342
343 int bearing = total_advance + PANGO_LBEARING(ink_rect);
344 if (it == it_begin || bearing < min_bearing) {
345 min_bearing = bearing;
346 }
347 total_advance += PANGO_RBEARING(logical_rect);
348 }
349 *x_bearing = min_bearing;
350 *x_advance = total_advance;
351 g_object_unref(font);
352 return true;
353}
354
355bool PangoFontInfo::CanRenderString(const char* utf8_word, int len) const {
356 std::vector<std::string> graphemes;
357 return CanRenderString(utf8_word, len, &graphemes);
358}
359
360bool PangoFontInfo::CanRenderString(const char* utf8_word, int len,
361 std::vector<std::string>* graphemes) const {
362 if (graphemes) graphemes->clear();
363 // We check for font coverage of the text first, as otherwise Pango could
364 // (undesirably) fall back to another font that does have the required
365 // coverage.
366 if (!CoversUTF8Text(utf8_word, len)) {
367 return false;
368 }
369 // U+25CC dotted circle character that often (but not always) gets rendered
370 // when there is an illegal grapheme sequence.
371 const char32 kDottedCircleGlyph = 9676;
372 bool bad_glyph = false;
373 PangoFontMap* font_map = pango_cairo_font_map_get_default();
374 PangoContext* context = pango_context_new();
375 pango_context_set_font_map(context, font_map);
376 PangoLayout* layout;
377 {
378 // Pango is not releasing the cached layout.
380 layout = pango_layout_new(context);
381 }
382 if (desc_) {
383 pango_layout_set_font_description(layout, desc_);
384 } else {
385 PangoFontDescription *desc = pango_font_description_from_string(
386 DescriptionName().c_str());
387 pango_layout_set_font_description(layout, desc);
388 pango_font_description_free(desc);
389 }
390 pango_layout_set_text(layout, utf8_word, len);
391 PangoLayoutIter* run_iter = nullptr;
392 { // Fontconfig caches some information here that is not freed before exit.
394 run_iter = pango_layout_get_iter(layout);
395 }
396 do {
397 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
398 if (!run) {
399 tlog(2, "Found end of line nullptr run marker\n");
400 continue;
401 }
402 PangoGlyph dotted_circle_glyph;
403 PangoFont* font = run->item->analysis.font;
404
405#ifdef _WIN32
406 PangoGlyphString* glyphs = pango_glyph_string_new();
407 const char s[] = "\xc2\xa7";
408 pango_shape(s, strlen(s), &(run->item->analysis), glyphs);
409 dotted_circle_glyph = glyphs->glyphs[0].glyph;
410#else // TODO: Do we need separate solution for non win build?
411 dotted_circle_glyph = pango_fc_font_get_glyph(
412 reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
413#endif
414
415 if (TLOG_IS_ON(2)) {
416 PangoFontDescription* desc = pango_font_describe(font);
417 char* desc_str = pango_font_description_to_string(desc);
418 tlog(2, "Desc of font in run: %s\n", desc_str);
419 g_free(desc_str);
420 pango_font_description_free(desc);
421 }
422
423 PangoGlyphItemIter cluster_iter;
424 gboolean have_cluster;
425 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
426 run, utf8_word);
427 have_cluster && !bad_glyph;
428 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
429 const int start_byte_index = cluster_iter.start_index;
430 const int end_byte_index = cluster_iter.end_index;
431 int start_glyph_index = cluster_iter.start_glyph;
432 int end_glyph_index = cluster_iter.end_glyph;
433 std::string cluster_text = std::string(utf8_word + start_byte_index,
434 end_byte_index - start_byte_index);
435 if (graphemes) graphemes->push_back(cluster_text);
436 if (IsUTF8Whitespace(cluster_text.c_str())) {
437 tlog(2, "Skipping whitespace\n");
438 continue;
439 }
440 if (TLOG_IS_ON(2)) {
441 printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
442 start_byte_index, end_byte_index,
443 start_glyph_index, end_glyph_index);
444 }
445 for (int i = start_glyph_index,
446 step = (end_glyph_index > start_glyph_index) ? 1 : -1;
447 !bad_glyph && i != end_glyph_index; i+= step) {
448 const bool unknown_glyph =
449 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
450 PANGO_GLYPH_UNKNOWN_FLAG);
451 const bool illegal_glyph =
452 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
453 dotted_circle_glyph);
454 bad_glyph = unknown_glyph || illegal_glyph;
455 if (TLOG_IS_ON(2)) {
456 printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
457 bad_glyph ? 1 : 0);
458 }
459 }
460 if (TLOG_IS_ON(2)) {
461 printf(" '%s'\n", cluster_text.c_str());
462 }
463 if (bad_glyph)
464 tlog(1, "Found illegal glyph!\n");
465 }
466#ifdef _WIN32
467 pango_glyph_string_free(glyphs);
468#endif
469 } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
470
471 pango_layout_iter_free(run_iter);
472 g_object_unref(context);
473 g_object_unref(layout);
474 if (bad_glyph && graphemes) graphemes->clear();
475 return !bad_glyph;
476}
477
478
479// ------------------------ FontUtils ------------------------------------
480std::vector<std::string> FontUtils::available_fonts_; // cache list
481
482// Returns whether the specified font description is available in the fonts
483// directory.
484//
485// The generated list of font families and faces includes "synthesized" font
486// faces that are not truly loadable. Pango versions >=1.18 have a
487// pango_font_face_is_synthesized method that can be used to prune the list.
488// Until then, we are restricted to using a hack where we try to load the font
489// from the font_map, and then check what we loaded to see if it has the
490// description we expected. If it is not, then the font is deemed unavailable.
491//
492// TODO: This function reports also some not synthesized fonts as not available
493// e.g. 'Bitstream Charter Medium Italic', 'LMRoman17', so we need this hack
494// until other solution is found.
495/* static */
496bool FontUtils::IsAvailableFont(const char* input_query_desc,
497 std::string* best_match) {
498 std::string query_desc(input_query_desc);
499 PangoFontDescription *desc = pango_font_description_from_string(
500 query_desc.c_str());
501 PangoFont* selected_font = nullptr;
502 {
504 PangoFontMap* font_map = pango_cairo_font_map_get_default();
505 PangoContext* context = pango_context_new();
506 pango_context_set_font_map(context, font_map);
507 {
509 selected_font = pango_font_map_load_font(font_map, context, desc);
510 }
511 g_object_unref(context);
512 }
513 if (selected_font == nullptr) {
514 pango_font_description_free(desc);
515 tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc);
516 return false;
517 }
518 PangoFontDescription* selected_desc = pango_font_describe(selected_font);
519
520 bool equal = pango_font_description_equal(desc, selected_desc);
521 tlog(3, "query weight = %d \t selected weight =%d\n",
522 pango_font_description_get_weight(desc),
523 pango_font_description_get_weight(selected_desc));
524
525 char* selected_desc_str = pango_font_description_to_string(selected_desc);
526 tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
527 selected_desc_str);
528 if (!equal && best_match != nullptr) {
529 *best_match = selected_desc_str;
530 // Clip the ending ' 0' if there is one. It seems that, if there is no
531 // point size on the end of the fontname, then Pango always appends ' 0'.
532 int len = best_match->size();
533 if (len > 2 && best_match->at(len - 1) == '0' &&
534 best_match->at(len - 2) == ' ') {
535 *best_match = best_match->substr(0, len - 2);
536 }
537 }
538 g_free(selected_desc_str);
539 pango_font_description_free(selected_desc);
540 g_object_unref(selected_font);
541 pango_font_description_free(desc);
542 if (!equal)
543 tlog(4, "** Font '%s' failed pango_font_description_equal!\n",
544 input_query_desc);
545 return equal;
546}
547
548static bool ShouldIgnoreFontFamilyName(const char* query) {
549 static const char* kIgnoredFamilyNames[] = {"Sans", "Serif", "Monospace",
550 nullptr};
551 const char** list = kIgnoredFamilyNames;
552 for (; *list != nullptr; ++list) {
553 if (!strcmp(*list, query))
554 return true;
555 }
556 return false;
557}
558
559// Outputs description names of available fonts.
560/* static */
561const std::vector<std::string>& FontUtils::ListAvailableFonts() {
562 if (!available_fonts_.empty()) {
563 return available_fonts_;
564 }
565#ifdef GOOGLE_TESSERACT
566 if (FLAGS_use_only_legacy_fonts) {
567 // Restrict view to list of fonts in legacy_fonts.h
568 tprintf("Using list of legacy fonts only\n");
569 const int kNumFontLists = 4;
570 for (int i = 0; i < kNumFontLists; ++i) {
571 for (int j = 0; kFontlists[i][j] != nullptr; ++j) {
572 available_fonts_.push_back(kFontlists[i][j]);
573 }
574 }
575 return available_fonts_;
576 }
577#endif
578
579 PangoFontFamily** families = nullptr;
580 int n_families = 0;
581 ListFontFamilies(&families, &n_families);
582 for (int i = 0; i < n_families; ++i) {
583 const char* family_name = pango_font_family_get_name(families[i]);
584 tlog(2, "Listing family %s\n", family_name);
585 if (ShouldIgnoreFontFamilyName(family_name)) {
586 continue;
587 }
588
589 int n_faces;
590 PangoFontFace** faces = nullptr;
591 pango_font_family_list_faces(families[i], &faces, &n_faces);
592 for (int j = 0; j < n_faces; ++j) {
593 PangoFontDescription* desc = pango_font_face_describe(faces[j]);
594 char* desc_str = pango_font_description_to_string(desc);
595 // "synthesized" font faces that are not truly loadable, so we skip it
596 if (!pango_font_face_is_synthesized(faces[j])
597 && IsAvailableFont(desc_str)) {
598 available_fonts_.push_back(desc_str);
599 }
600 pango_font_description_free(desc);
601 g_free(desc_str);
602 }
603 g_free(faces);
604 }
605 g_free(families);
606 std::sort(available_fonts_.begin(), available_fonts_.end());
607 return available_fonts_;
608}
609
610
611static void CharCoverageMapToBitmap(PangoCoverage* coverage,
612 std::vector<bool>* unichar_bitmap) {
613 const int kMinUnicodeValue = 33;
614 const int kMaxUnicodeValue = 0x10FFFF;
615 unichar_bitmap->resize(kMaxUnicodeValue + 1, false);
616 // Mark off characters that the font can render.
617 for (int i = kMinUnicodeValue; i <= kMaxUnicodeValue; ++i) {
618 if (IsInterchangeValid(i)) {
619 (*unichar_bitmap)[i]
620 = (pango_coverage_get(coverage, i) == PANGO_COVERAGE_EXACT);
621 }
622 }
623}
624
625/* static */
626void FontUtils::GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap) {
627 const std::vector<std::string>& all_fonts = ListAvailableFonts();
628 return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
629}
630
631/* static */
632void FontUtils::GetAllRenderableCharacters(const std::string& font_name,
633 std::vector<bool>* unichar_bitmap) {
634 PangoFontInfo font_info(font_name);
635 PangoFont* font = font_info.ToPangoFont();
636 if (font != nullptr) {
637 // Font found.
638 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
639 CharCoverageMapToBitmap(coverage, unichar_bitmap);
640 pango_coverage_unref(coverage);
641 g_object_unref(font);
642 }
643}
644
645/* static */
646void FontUtils::GetAllRenderableCharacters(const std::vector<std::string>& fonts,
647 std::vector<bool>* unichar_bitmap) {
648 // Form the union of coverage maps from the fonts
649 PangoCoverage* all_coverage = pango_coverage_new();
650 tlog(1, "Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
651 for (unsigned i = 0; i < fonts.size(); ++i) {
652 PangoFontInfo font_info(fonts[i]);
653 PangoFont* font = font_info.ToPangoFont();
654 if (font != nullptr) {
655 // Font found.
656 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
657 // Mark off characters that any font can render.
658 pango_coverage_max(all_coverage, coverage);
659 pango_coverage_unref(coverage);
660 g_object_unref(font);
661 }
662 }
663 CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
664 pango_coverage_unref(all_coverage);
665}
666
667
668// Utilities written to be backward compatible with StringRender
669
670/* static */
671int FontUtils::FontScore(const std::unordered_map<char32, int64_t>& ch_map,
672 const std::string& fontname, int* raw_score,
673 std::vector<bool>* ch_flags) {
674 PangoFontInfo font_info;
675 if (!font_info.ParseFontDescriptionName(fontname)) {
676 tprintf("ERROR: Could not parse %s\n", fontname.c_str());
677 }
678 PangoFont* font = font_info.ToPangoFont();
679 PangoCoverage* coverage = nullptr;
680 if (font != nullptr) coverage = pango_font_get_coverage(font, nullptr);
681 if (ch_flags) {
682 ch_flags->clear();
683 ch_flags->reserve(ch_map.size());
684 }
685 *raw_score = 0;
686 int ok_chars = 0;
687 for (std::unordered_map<char32, int64_t>::const_iterator it = ch_map.begin();
688 it != ch_map.end(); ++it) {
689 bool covered = (coverage != nullptr) && (IsWhitespace(it->first) ||
690 (pango_coverage_get(coverage, it->first)
691 == PANGO_COVERAGE_EXACT));
692 if (covered) {
693 ++(*raw_score);
694 ok_chars += it->second;
695 }
696 if (ch_flags) {
697 ch_flags->push_back(covered);
698 }
699 }
700 pango_coverage_unref(coverage);
701 g_object_unref(font);
702 return ok_chars;
703}
704
705
706/* static */
708 const std::unordered_map<char32, int64_t>& ch_map,
709 std::vector<std::pair<const char*, std::vector<bool> > >* fonts) {
710 const double kMinOKFraction = 0.99;
711 // Weighted fraction of characters that must be renderable in a font to make
712 // it OK even if the raw count is not good.
713 const double kMinWeightedFraction = 0.99995;
714
715 fonts->clear();
716 std::vector<std::vector<bool> > font_flags;
717 std::vector<int> font_scores;
718 std::vector<int> raw_scores;
719 int most_ok_chars = 0;
720 int best_raw_score = 0;
721 const std::vector<std::string>& font_names = FontUtils::ListAvailableFonts();
722 for (unsigned i = 0; i < font_names.size(); ++i) {
723 std::vector<bool> ch_flags;
724 int raw_score = 0;
725 int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
726 most_ok_chars = std::max(ok_chars, most_ok_chars);
727 best_raw_score = std::max(raw_score, best_raw_score);
728
729 font_flags.push_back(ch_flags);
730 font_scores.push_back(ok_chars);
731 raw_scores.push_back(raw_score);
732 }
733
734 // Now select the fonts with a score above a threshold fraction
735 // of both the raw and weighted best scores. To prevent bogus fonts being
736 // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
737 // BOTH weighted and raw scores.
738 // In low character-count scripts, the issue is more getting enough fonts,
739 // when only 1 or 2 might have all those rare dingbats etc in them, so we
740 // allow a font with a very high weighted (coverage) score
741 // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
742 int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
743 int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
744 int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
745
746 std::string font_list;
747 for (unsigned i = 0; i < font_names.size(); ++i) {
748 int score = font_scores[i];
749 int raw_score = raw_scores[i];
750 if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
751 score >= override_enough) {
752 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
753 tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
754 font_names[i].c_str(),
755 100.0 * score / most_ok_chars,
756 raw_score, 100.0 * raw_score / best_raw_score);
757 font_list += font_names[i];
758 font_list += "\n";
759 } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
760 tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
761 font_names[i].c_str(),
762 100.0 * score / most_ok_chars,
763 raw_score, 100.0 * raw_score / best_raw_score);
764 }
765 }
766 return font_list;
767}
768
769/* static */
770bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
771 std::string* font_name, std::vector<std::string>* graphemes) {
772 return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
773 graphemes);
774}
775
776/* static */
777bool FontUtils::SelectFont(const char* utf8_word, const int utf8_len,
778 const std::vector<std::string>& all_fonts,
779 std::string* font_name, std::vector<std::string>* graphemes) {
780 if (font_name) font_name->clear();
781 if (graphemes) graphemes->clear();
782 for (unsigned i = 0; i < all_fonts.size(); ++i) {
783 PangoFontInfo font;
784 std::vector<std::string> found_graphemes;
785 ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
786 "Could not parse font desc name %s\n",
787 all_fonts[i].c_str());
788 if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
789 if (graphemes) graphemes->swap(found_graphemes);
790 if (font_name) *font_name = all_fonts[i];
791 return true;
792 }
793 }
794 return false;
795}
796
797// PangoFontInfo is reinitialized, so clear the static list of fonts.
798/* static */
799void FontUtils::ReInit() { available_fonts_.clear(); }
800
801// Print info about used font backend
802/* static */
804 PangoFontMap* font_map = pango_cairo_font_map_get_default();
805 if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap*>(
806 font_map)) == CAIRO_FONT_TYPE_TOY) {
807 printf("Using CAIRO_FONT_TYPE_TOY.\n");
808 } else if (pango_cairo_font_map_get_font_type(
809 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
810 CAIRO_FONT_TYPE_FT) {
811 printf("Using CAIRO_FONT_TYPE_FT.\n");
812 } else if (pango_cairo_font_map_get_font_type(
813 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
814 CAIRO_FONT_TYPE_WIN32) {
815 printf("Using CAIRO_FONT_TYPE_WIN32.\n");
816 } else if (pango_cairo_font_map_get_font_type(
817 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
818 CAIRO_FONT_TYPE_QUARTZ) {
819 printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
820 } else if (pango_cairo_font_map_get_font_type(
821 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
822 CAIRO_FONT_TYPE_USER) {
823 printf("Using CAIRO_FONT_TYPE_USER.\n");
824 } else if (!font_map) {
825 printf("Can not create pango cairo font map!\n");
826 }
827}
828
829} // namespace tesseract
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:92
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
#define BOOL_PARAM_FLAG(name, val, comment)
#define STRING_PARAM_FLAG(name, val, comment)
#define TLOG_IS_ON(level)
Definition: tlog.h:39
#define tlog(level,...)
Definition: tlog.h:33
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:61
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:229
const int kDefaultResolution
signed int char32
Definition: unichar.h:51
bool IsInterchangeValid(const char32 ch)
Definition: normstrngs.cpp:253
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223
char * utf8_str() const
Definition: unichar.cpp:129
static const_iterator begin(const char *utf8_str, int byte_length)
Definition: unichar.cpp:204
static const_iterator end(const char *utf8_str, int byte_length)
Definition: unichar.cpp:208
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:121
static std::string JoinPath(const std::string &prefix, const std::string &suffix)
Definition: fileio.cpp:86
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:56
int DropUncoveredChars(std::string *utf8_text) const
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
bool ParseFontDescriptionName(const std::string &name)
std::string DescriptionName() const
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static std::string BestFonts(const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
static bool IsAvailableFont(const char *font_desc)
static void PangoFontTypeInfo()
static const std::vector< std::string > & ListAvailableFonts()