tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::PangoFontInfo Class Reference

#include <pango_font_info.h>

Public Types

enum  FontTypeEnum { UNKNOWN , SERIF , SANS_SERIF , DECORATIVE }
 

Public Member Functions

 PangoFontInfo ()
 
 ~PangoFontInfo ()
 
 PangoFontInfo (const std::string &name)
 
bool ParseFontDescriptionName (const std::string &name)
 
bool CoversUTF8Text (const char *utf8_text, int byte_length) const
 
int DropUncoveredChars (std::string *utf8_text) const
 
bool CanRenderString (const char *utf8_word, int len, std::vector< std::string > *graphemes) const
 
bool CanRenderString (const char *utf8_word, int len) const
 
bool GetSpacingProperties (const std::string &utf8_char, int *x_bearing, int *x_advance) const
 
std::string DescriptionName () const
 
const std::string & family_name () const
 
int font_size () const
 
FontTypeEnum font_type () const
 
int resolution () const
 
void set_resolution (const int resolution)
 

Static Public Member Functions

static void SoftInitFontConfig ()
 
static void HardInitFontConfig (const std::string &fonts_dir, const std::string &cache_dir)
 

Friends

class FontUtils
 

Detailed Description

Definition at line 40 of file pango_font_info.h.

Member Enumeration Documentation

◆ FontTypeEnum

Enumerator
UNKNOWN 
SERIF 
SANS_SERIF 
DECORATIVE 

Definition at line 42 of file pango_font_info.h.

Constructor & Destructor Documentation

◆ PangoFontInfo() [1/2]

tesseract::PangoFontInfo::PangoFontInfo ( )

Definition at line 75 of file pango_font_info.cpp.

76 : desc_(nullptr), resolution_(kDefaultResolution) {
77 Clear();
78}
const int kDefaultResolution

◆ ~PangoFontInfo()

tesseract::PangoFontInfo::~PangoFontInfo ( )

Definition at line 98 of file pango_font_info.cpp.

98{ pango_font_description_free(desc_); }

◆ PangoFontInfo() [2/2]

tesseract::PangoFontInfo::PangoFontInfo ( const std::string &  name)
explicit

Definition at line 80 of file pango_font_info.cpp.

81 : desc_(nullptr), resolution_(kDefaultResolution) {
82 if (!ParseFontDescriptionName(desc)) {
83 tprintf("ERROR: Could not parse %s\n", desc.c_str());
84 Clear();
85 }
86}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool ParseFontDescriptionName(const std::string &name)

Member Function Documentation

◆ CanRenderString() [1/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len 
) const

Definition at line 355 of file pango_font_info.cpp.

355 {
356 std::vector<std::string> graphemes;
357 return CanRenderString(utf8_word, len, &graphemes);
358}
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const

◆ CanRenderString() [2/2]

bool tesseract::PangoFontInfo::CanRenderString ( const char *  utf8_word,
int  len,
std::vector< std::string > *  graphemes 
) const

Definition at line 360 of file pango_font_info.cpp.

361 {
362 if (graphemes) graphemes->clear();
363 // We check for font coverage of the text first, as otherwise Pango could
364 // (undesirably) fall back to another font that does have the required
365 // coverage.
366 if (!CoversUTF8Text(utf8_word, len)) {
367 return false;
368 }
369 // U+25CC dotted circle character that often (but not always) gets rendered
370 // when there is an illegal grapheme sequence.
371 const char32 kDottedCircleGlyph = 9676;
372 bool bad_glyph = false;
373 PangoFontMap* font_map = pango_cairo_font_map_get_default();
374 PangoContext* context = pango_context_new();
375 pango_context_set_font_map(context, font_map);
376 PangoLayout* layout;
377 {
378 // Pango is not releasing the cached layout.
380 layout = pango_layout_new(context);
381 }
382 if (desc_) {
383 pango_layout_set_font_description(layout, desc_);
384 } else {
385 PangoFontDescription *desc = pango_font_description_from_string(
386 DescriptionName().c_str());
387 pango_layout_set_font_description(layout, desc);
388 pango_font_description_free(desc);
389 }
390 pango_layout_set_text(layout, utf8_word, len);
391 PangoLayoutIter* run_iter = nullptr;
392 { // Fontconfig caches some information here that is not freed before exit.
394 run_iter = pango_layout_get_iter(layout);
395 }
396 do {
397 PangoLayoutRun* run = pango_layout_iter_get_run_readonly(run_iter);
398 if (!run) {
399 tlog(2, "Found end of line nullptr run marker\n");
400 continue;
401 }
402 PangoGlyph dotted_circle_glyph;
403 PangoFont* font = run->item->analysis.font;
404
405#ifdef _WIN32
406 PangoGlyphString* glyphs = pango_glyph_string_new();
407 const char s[] = "\xc2\xa7";
408 pango_shape(s, strlen(s), &(run->item->analysis), glyphs);
409 dotted_circle_glyph = glyphs->glyphs[0].glyph;
410#else // TODO: Do we need separate solution for non win build?
411 dotted_circle_glyph = pango_fc_font_get_glyph(
412 reinterpret_cast<PangoFcFont*>(font), kDottedCircleGlyph);
413#endif
414
415 if (TLOG_IS_ON(2)) {
416 PangoFontDescription* desc = pango_font_describe(font);
417 char* desc_str = pango_font_description_to_string(desc);
418 tlog(2, "Desc of font in run: %s\n", desc_str);
419 g_free(desc_str);
420 pango_font_description_free(desc);
421 }
422
423 PangoGlyphItemIter cluster_iter;
424 gboolean have_cluster;
425 for (have_cluster = pango_glyph_item_iter_init_start(&cluster_iter,
426 run, utf8_word);
427 have_cluster && !bad_glyph;
428 have_cluster = pango_glyph_item_iter_next_cluster(&cluster_iter)) {
429 const int start_byte_index = cluster_iter.start_index;
430 const int end_byte_index = cluster_iter.end_index;
431 int start_glyph_index = cluster_iter.start_glyph;
432 int end_glyph_index = cluster_iter.end_glyph;
433 std::string cluster_text = std::string(utf8_word + start_byte_index,
434 end_byte_index - start_byte_index);
435 if (graphemes) graphemes->push_back(cluster_text);
436 if (IsUTF8Whitespace(cluster_text.c_str())) {
437 tlog(2, "Skipping whitespace\n");
438 continue;
439 }
440 if (TLOG_IS_ON(2)) {
441 printf("start_byte=%d end_byte=%d start_glyph=%d end_glyph=%d ",
442 start_byte_index, end_byte_index,
443 start_glyph_index, end_glyph_index);
444 }
445 for (int i = start_glyph_index,
446 step = (end_glyph_index > start_glyph_index) ? 1 : -1;
447 !bad_glyph && i != end_glyph_index; i+= step) {
448 const bool unknown_glyph =
449 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph &
450 PANGO_GLYPH_UNKNOWN_FLAG);
451 const bool illegal_glyph =
452 (cluster_iter.glyph_item->glyphs->glyphs[i].glyph ==
453 dotted_circle_glyph);
454 bad_glyph = unknown_glyph || illegal_glyph;
455 if (TLOG_IS_ON(2)) {
456 printf("(%d=%d)", cluster_iter.glyph_item->glyphs->glyphs[i].glyph,
457 bad_glyph ? 1 : 0);
458 }
459 }
460 if (TLOG_IS_ON(2)) {
461 printf(" '%s'\n", cluster_text.c_str());
462 }
463 if (bad_glyph)
464 tlog(1, "Found illegal glyph!\n");
465 }
466#ifdef _WIN32
467 pango_glyph_string_free(glyphs);
468#endif
469 } while (!bad_glyph && pango_layout_iter_next_run(run_iter));
470
471 pango_layout_iter_free(run_iter);
472 g_object_unref(context);
473 g_object_unref(layout);
474 if (bad_glyph && graphemes) graphemes->clear();
475 return !bad_glyph;
476}
signed int char32
#define TLOG_IS_ON(level)
Definition: tlog.h:39
#define tlog(level,...)
Definition: tlog.h:33
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:61
bool IsUTF8Whitespace(const char *text)
Definition: normstrngs.cpp:229
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
std::string DescriptionName() const

◆ CoversUTF8Text()

bool tesseract::PangoFontInfo::CoversUTF8Text ( const char *  utf8_text,
int  byte_length 
) const

Definition at line 217 of file pango_font_info.cpp.

217 {
218 PangoFont* font = ToPangoFont();
219 if (font == nullptr) {
220 // Font not found.
221 return false;
222 }
223 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
224 for (UNICHAR::const_iterator it = UNICHAR::begin(utf8_text, byte_length);
225 it != UNICHAR::end(utf8_text, byte_length);
226 ++it) {
227 if (IsWhitespace(*it) || pango_is_zero_width(*it))
228 continue;
229 if (pango_coverage_get(coverage, *it) != PANGO_COVERAGE_EXACT) {
230 char tmp[5];
231 int len = it.get_utf8(tmp);
232 tmp[len] = '\0';
233 tlog(2, "'%s' (U+%x) not covered by font\n", tmp, *it);
234 pango_coverage_unref(coverage);
235 g_object_unref(font);
236 return false;
237 }
238 }
239 pango_coverage_unref(coverage);
240 g_object_unref(font);
241 return true;
242}
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223
static const_iterator begin(const char *utf8_str, int byte_length)
Definition: unichar.cpp:204
static const_iterator end(const char *utf8_str, int byte_length)
Definition: unichar.cpp:208

◆ DescriptionName()

std::string tesseract::PangoFontInfo::DescriptionName ( ) const

Definition at line 100 of file pango_font_info.cpp.

100 {
101 if (!desc_) return "";
102 char* desc_str = pango_font_description_to_string(desc_);
103 std::string desc_name(desc_str);
104 g_free(desc_str);
105 return desc_name;
106}

◆ DropUncoveredChars()

int tesseract::PangoFontInfo::DropUncoveredChars ( std::string *  utf8_text) const

Definition at line 266 of file pango_font_info.cpp.

266 {
267 int num_dropped_chars = 0;
268 PangoFont* font = ToPangoFont();
269 if (font == nullptr) {
270 // Font not found, drop all characters.
271 num_dropped_chars = utf8_text->length();
272 utf8_text->resize(0);
273 return num_dropped_chars;
274 }
275 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
276 // Maintain two iterators that point into the string. For space efficiency, we
277 // will repeatedly copy one covered UTF8 character from one to the other, and
278 // at the end resize the string to the right length.
279 char* out = const_cast<char*>(utf8_text->c_str());
280 const UNICHAR::const_iterator it_begin =
281 UNICHAR::begin(utf8_text->c_str(), utf8_text->length());
282 const UNICHAR::const_iterator it_end =
283 UNICHAR::end(utf8_text->c_str(), utf8_text->length());
284 for (UNICHAR::const_iterator it = it_begin; it != it_end;) {
285 // Skip bad utf-8.
286 if (!it.is_legal()) {
287 ++it; // One suitable error message will still be issued.
288 continue;
289 }
290 int unicode = *it;
291 int utf8_len = it.utf8_len();
292 const char* utf8_char = it.utf8_data();
293 // Move it forward before the data gets modified.
294 ++it;
295 if (!IsWhitespace(unicode) && !pango_is_zero_width(unicode) &&
296 pango_coverage_get(coverage, unicode) != PANGO_COVERAGE_EXACT) {
297 if (TLOG_IS_ON(2)) {
298 UNICHAR unichar(unicode);
299 char* str = unichar.utf8_str();
300 tlog(2, "'%s' (U+%x) not covered by font\n", str, unicode);
301 delete[] str;
302 }
303 ++num_dropped_chars;
304 continue;
305 }
306 my_strnmove(out, utf8_char, utf8_len);
307 out += utf8_len;
308 }
309 pango_coverage_unref(coverage);
310 g_object_unref(font);
311 utf8_text->resize(out - utf8_text->c_str());
312 return num_dropped_chars;
313}

◆ family_name()

const std::string & tesseract::PangoFontInfo::family_name ( ) const
inline

Definition at line 101 of file pango_font_info.h.

101{ return family_name_; }

◆ font_size()

int tesseract::PangoFontInfo::font_size ( ) const
inline

Definition at line 103 of file pango_font_info.h.

103{ return font_size_; }

◆ font_type()

FontTypeEnum tesseract::PangoFontInfo::font_type ( ) const
inline

Definition at line 104 of file pango_font_info.h.

104{ return font_type_; }

◆ GetSpacingProperties()

bool tesseract::PangoFontInfo::GetSpacingProperties ( const std::string &  utf8_char,
int *  x_bearing,
int *  x_advance 
) const

Definition at line 315 of file pango_font_info.cpp.

316 {
317 // Convert to equivalent PangoFont structure
318 PangoFont* font = ToPangoFont();
319 // Find the glyph index in the font for the supplied utf8 character.
320 int total_advance = 0;
321 int min_bearing = 0;
322 // Handle multi-unicode strings by reporting the left-most position of the
323 // x-bearing, and right-most position of the x-advance if the string were to
324 // be rendered.
325 const UNICHAR::const_iterator it_begin = UNICHAR::begin(utf8_char.c_str(),
326 utf8_char.length());
327 const UNICHAR::const_iterator it_end = UNICHAR::end(utf8_char.c_str(),
328 utf8_char.length());
329 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
330 PangoGlyph glyph_index = pango_fc_font_get_glyph(
331 reinterpret_cast<PangoFcFont*>(font), *it);
332 if (!glyph_index) {
333 // Glyph for given unicode character doesn't exist in font.
334 g_object_unref(font);
335 return false;
336 }
337 // Find the ink glyph extents for the glyph
338 PangoRectangle ink_rect, logical_rect;
339 pango_font_get_glyph_extents(font, glyph_index, &ink_rect, &logical_rect);
340 pango_extents_to_pixels(&ink_rect, nullptr);
341 pango_extents_to_pixels(&logical_rect, nullptr);
342
343 int bearing = total_advance + PANGO_LBEARING(ink_rect);
344 if (it == it_begin || bearing < min_bearing) {
345 min_bearing = bearing;
346 }
347 total_advance += PANGO_RBEARING(logical_rect);
348 }
349 *x_bearing = min_bearing;
350 *x_advance = total_advance;
351 g_object_unref(font);
352 return true;
353}

◆ HardInitFontConfig()

void tesseract::PangoFontInfo::HardInitFontConfig ( const std::string &  fonts_dir,
const std::string &  cache_dir 
)
static

Definition at line 122 of file pango_font_info.cpp.

123 {
124 if (!cache_dir_.empty()) {
126 File::JoinPath(cache_dir_.c_str(), "*cache-?").c_str());
127 }
128 const int MAX_FONTCONF_FILESIZE = 1024;
129 char fonts_conf_template[MAX_FONTCONF_FILESIZE];
130 cache_dir_ = cache_dir;
131 fonts_dir_ = fonts_dir;
132 snprintf(fonts_conf_template, MAX_FONTCONF_FILESIZE,
133 "<?xml version=\"1.0\"?>\n"
134 "<!DOCTYPE fontconfig SYSTEM \"fonts.dtd\">\n"
135 "<fontconfig>\n"
136 "<dir>%s</dir>\n"
137 "<cachedir>%s</cachedir>\n"
138 "<config></config>\n"
139 "</fontconfig>",
140 fonts_dir.c_str(), cache_dir_.c_str());
141 std::string fonts_conf_file = File::JoinPath(cache_dir_.c_str(), "fonts.conf");
142 File::WriteStringToFileOrDie(fonts_conf_template, fonts_conf_file);
143#ifdef _WIN32
144 std::string env("FONTCONFIG_PATH=");
145 env.append(cache_dir_.c_str());
146 _putenv(env.c_str());
147 _putenv("LANG=en_US.utf8");
148#else
149 setenv("FONTCONFIG_PATH", cache_dir_.c_str(), true);
150 // Fix the locale so that the reported font names are consistent.
151 setenv("LANG", "en_US.utf8", true);
152#endif // _WIN32
153
154 if (FcInitReinitialize() != FcTrue) {
155 tprintf("FcInitiReinitialize failed!!\n");
156 }
158 // Clear Pango's font cache too.
159 pango_cairo_font_map_set_default(nullptr);
160}
static bool DeleteMatchingFiles(const char *pattern)
Definition: fileio.cpp:121
static std::string JoinPath(const std::string &prefix, const std::string &suffix)
Definition: fileio.cpp:86
static void WriteStringToFileOrDie(const std::string &str, const std::string &filename)
Definition: fileio.cpp:56

◆ ParseFontDescriptionName()

bool tesseract::PangoFontInfo::ParseFontDescriptionName ( const std::string &  name)

Definition at line 192 of file pango_font_info.cpp.

192 {
193 PangoFontDescription *desc = pango_font_description_from_string(name.c_str());
194 bool success = ParseFontDescription(desc);
195 pango_font_description_free(desc);
196 return success;
197}

◆ resolution()

int tesseract::PangoFontInfo::resolution ( ) const
inline

Definition at line 106 of file pango_font_info.h.

106{ return resolution_; }

◆ set_resolution()

void tesseract::PangoFontInfo::set_resolution ( const int  resolution)
inline

Definition at line 107 of file pango_font_info.h.

107 {
108 resolution_ = resolution;
109 }

◆ SoftInitFontConfig()

void tesseract::PangoFontInfo::SoftInitFontConfig ( )
static

Definition at line 112 of file pango_font_info.cpp.

112 {
113 if (fonts_dir_.empty()) {
114 HardInitFontConfig(FLAGS_fonts_dir.c_str(),
115 FLAGS_fontconfig_tmpdir.c_str());
116 }
117}
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)

Friends And Related Function Documentation

◆ FontUtils

friend class FontUtils
friend

Definition at line 112 of file pango_font_info.h.


The documentation for this class was generated from the following files: