tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::FontUtils Class Reference

#include <pango_font_info.h>

Static Public Member Functions

static bool IsAvailableFont (const char *font_desc)
 
static bool IsAvailableFont (const char *font_desc, std::string *best_match)
 
static const std::vector< std::string > & ListAvailableFonts ()
 
static bool SelectFont (const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
 
static bool SelectFont (const char *utf8_word, const int utf8_len, const std::vector< std::string > &all_fonts, std::string *font_name, std::vector< std::string > *graphemes)
 
static void GetAllRenderableCharacters (std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const std::vector< std::string > &font_names, std::vector< bool > *unichar_bitmap)
 
static void GetAllRenderableCharacters (const std::string &font_name, std::vector< bool > *unichar_bitmap)
 
static std::string BestFonts (const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
 
static int FontScore (const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
 
static void ReInit ()
 
static void PangoFontTypeInfo ()
 

Detailed Description

Definition at line 146 of file pango_font_info.h.

Member Function Documentation

◆ BestFonts()

std::string tesseract::FontUtils::BestFonts ( const std::unordered_map< char32, int64_t > &  ch_map,
std::vector< std::pair< const char *, std::vector< bool > > > *  font_flag 
)
static

Definition at line 707 of file pango_font_info.cpp.

709 {
710 const double kMinOKFraction = 0.99;
711 // Weighted fraction of characters that must be renderable in a font to make
712 // it OK even if the raw count is not good.
713 const double kMinWeightedFraction = 0.99995;
714
715 fonts->clear();
716 std::vector<std::vector<bool> > font_flags;
717 std::vector<int> font_scores;
718 std::vector<int> raw_scores;
719 int most_ok_chars = 0;
720 int best_raw_score = 0;
721 const std::vector<std::string>& font_names = FontUtils::ListAvailableFonts();
722 for (unsigned i = 0; i < font_names.size(); ++i) {
723 std::vector<bool> ch_flags;
724 int raw_score = 0;
725 int ok_chars = FontScore(ch_map, font_names[i], &raw_score, &ch_flags);
726 most_ok_chars = std::max(ok_chars, most_ok_chars);
727 best_raw_score = std::max(raw_score, best_raw_score);
728
729 font_flags.push_back(ch_flags);
730 font_scores.push_back(ok_chars);
731 raw_scores.push_back(raw_score);
732 }
733
734 // Now select the fonts with a score above a threshold fraction
735 // of both the raw and weighted best scores. To prevent bogus fonts being
736 // selected for CJK, we require a high fraction (kMinOKFraction = 0.99) of
737 // BOTH weighted and raw scores.
738 // In low character-count scripts, the issue is more getting enough fonts,
739 // when only 1 or 2 might have all those rare dingbats etc in them, so we
740 // allow a font with a very high weighted (coverage) score
741 // (kMinWeightedFraction = 0.99995) to be used even if its raw score is poor.
742 int least_good_enough = static_cast<int>(most_ok_chars * kMinOKFraction);
743 int least_raw_enough = static_cast<int>(best_raw_score * kMinOKFraction);
744 int override_enough = static_cast<int>(most_ok_chars * kMinWeightedFraction);
745
746 std::string font_list;
747 for (unsigned i = 0; i < font_names.size(); ++i) {
748 int score = font_scores[i];
749 int raw_score = raw_scores[i];
750 if ((score >= least_good_enough && raw_score >= least_raw_enough) ||
751 score >= override_enough) {
752 fonts->push_back(std::make_pair(font_names[i].c_str(), font_flags[i]));
753 tlog(1, "OK font %s = %.4f%%, raw = %d = %.2f%%\n",
754 font_names[i].c_str(),
755 100.0 * score / most_ok_chars,
756 raw_score, 100.0 * raw_score / best_raw_score);
757 font_list += font_names[i];
758 font_list += "\n";
759 } else if (score >= least_good_enough || raw_score >= least_raw_enough) {
760 tlog(1, "Runner-up font %s = %.4f%%, raw = %d = %.2f%%\n",
761 font_names[i].c_str(),
762 100.0 * score / most_ok_chars,
763 raw_score, 100.0 * raw_score / best_raw_score);
764 }
765 }
766 return font_list;
767}
#define tlog(level,...)
Definition: tlog.h:33
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static const std::vector< std::string > & ListAvailableFonts()

◆ FontScore()

int tesseract::FontUtils::FontScore ( const std::unordered_map< char32, int64_t > &  ch_map,
const std::string &  fontname,
int *  raw_score,
std::vector< bool > *  ch_flags 
)
static

Definition at line 671 of file pango_font_info.cpp.

673 {
674 PangoFontInfo font_info;
675 if (!font_info.ParseFontDescriptionName(fontname)) {
676 tprintf("ERROR: Could not parse %s\n", fontname.c_str());
677 }
678 PangoFont* font = font_info.ToPangoFont();
679 PangoCoverage* coverage = nullptr;
680 if (font != nullptr) coverage = pango_font_get_coverage(font, nullptr);
681 if (ch_flags) {
682 ch_flags->clear();
683 ch_flags->reserve(ch_map.size());
684 }
685 *raw_score = 0;
686 int ok_chars = 0;
687 for (std::unordered_map<char32, int64_t>::const_iterator it = ch_map.begin();
688 it != ch_map.end(); ++it) {
689 bool covered = (coverage != nullptr) && (IsWhitespace(it->first) ||
690 (pango_coverage_get(coverage, it->first)
691 == PANGO_COVERAGE_EXACT));
692 if (covered) {
693 ++(*raw_score);
694 ok_chars += it->second;
695 }
696 if (ch_flags) {
697 ch_flags->push_back(covered);
698 }
699 }
700 pango_coverage_unref(coverage);
701 g_object_unref(font);
702 return ok_chars;
703}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool IsWhitespace(const char32 ch)
Definition: normstrngs.cpp:223

◆ GetAllRenderableCharacters() [1/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const std::string &  font_name,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 632 of file pango_font_info.cpp.

633 {
634 PangoFontInfo font_info(font_name);
635 PangoFont* font = font_info.ToPangoFont();
636 if (font != nullptr) {
637 // Font found.
638 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
639 CharCoverageMapToBitmap(coverage, unichar_bitmap);
640 pango_coverage_unref(coverage);
641 g_object_unref(font);
642 }
643}

◆ GetAllRenderableCharacters() [2/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( const std::vector< std::string > &  font_names,
std::vector< bool > *  unichar_bitmap 
)
static

Definition at line 646 of file pango_font_info.cpp.

647 {
648 // Form the union of coverage maps from the fonts
649 PangoCoverage* all_coverage = pango_coverage_new();
650 tlog(1, "Processing %u fonts\n", static_cast<unsigned>(fonts.size()));
651 for (unsigned i = 0; i < fonts.size(); ++i) {
652 PangoFontInfo font_info(fonts[i]);
653 PangoFont* font = font_info.ToPangoFont();
654 if (font != nullptr) {
655 // Font found.
656 PangoCoverage* coverage = pango_font_get_coverage(font, nullptr);
657 // Mark off characters that any font can render.
658 pango_coverage_max(all_coverage, coverage);
659 pango_coverage_unref(coverage);
660 g_object_unref(font);
661 }
662 }
663 CharCoverageMapToBitmap(all_coverage, unichar_bitmap);
664 pango_coverage_unref(all_coverage);
665}

◆ GetAllRenderableCharacters() [3/3]

void tesseract::FontUtils::GetAllRenderableCharacters ( std::vector< bool > *  unichar_bitmap)
static

Definition at line 626 of file pango_font_info.cpp.

626 {
627 const std::vector<std::string>& all_fonts = ListAvailableFonts();
628 return GetAllRenderableCharacters(all_fonts, unichar_bitmap);
629}
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)

◆ IsAvailableFont() [1/2]

static bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc)
inlinestatic

Definition at line 150 of file pango_font_info.h.

150 {
151 return IsAvailableFont(font_desc, nullptr);
152 }
static bool IsAvailableFont(const char *font_desc)

◆ IsAvailableFont() [2/2]

bool tesseract::FontUtils::IsAvailableFont ( const char *  font_desc,
std::string *  best_match 
)
static

Definition at line 496 of file pango_font_info.cpp.

497 {
498 std::string query_desc(input_query_desc);
499 PangoFontDescription *desc = pango_font_description_from_string(
500 query_desc.c_str());
501 PangoFont* selected_font = nullptr;
502 {
504 PangoFontMap* font_map = pango_cairo_font_map_get_default();
505 PangoContext* context = pango_context_new();
506 pango_context_set_font_map(context, font_map);
507 {
509 selected_font = pango_font_map_load_font(font_map, context, desc);
510 }
511 g_object_unref(context);
512 }
513 if (selected_font == nullptr) {
514 pango_font_description_free(desc);
515 tlog(4, "** Font '%s' failed to load from font map!\n", input_query_desc);
516 return false;
517 }
518 PangoFontDescription* selected_desc = pango_font_describe(selected_font);
519
520 bool equal = pango_font_description_equal(desc, selected_desc);
521 tlog(3, "query weight = %d \t selected weight =%d\n",
522 pango_font_description_get_weight(desc),
523 pango_font_description_get_weight(selected_desc));
524
525 char* selected_desc_str = pango_font_description_to_string(selected_desc);
526 tlog(2, "query_desc: '%s' Selected: '%s'\n", query_desc.c_str(),
527 selected_desc_str);
528 if (!equal && best_match != nullptr) {
529 *best_match = selected_desc_str;
530 // Clip the ending ' 0' if there is one. It seems that, if there is no
531 // point size on the end of the fontname, then Pango always appends ' 0'.
532 int len = best_match->size();
533 if (len > 2 && best_match->at(len - 1) == '0' &&
534 best_match->at(len - 2) == ' ') {
535 *best_match = best_match->substr(0, len - 2);
536 }
537 }
538 g_free(selected_desc_str);
539 pango_font_description_free(selected_desc);
540 g_object_unref(selected_font);
541 pango_font_description_free(desc);
542 if (!equal)
543 tlog(4, "** Font '%s' failed pango_font_description_equal!\n",
544 input_query_desc);
545 return equal;
546}
#define DISABLE_HEAP_LEAK_CHECK
Definition: util.h:61

◆ ListAvailableFonts()

const std::vector< std::string > & tesseract::FontUtils::ListAvailableFonts ( )
static

Definition at line 561 of file pango_font_info.cpp.

561 {
562 if (!available_fonts_.empty()) {
563 return available_fonts_;
564 }
565#ifdef GOOGLE_TESSERACT
566 if (FLAGS_use_only_legacy_fonts) {
567 // Restrict view to list of fonts in legacy_fonts.h
568 tprintf("Using list of legacy fonts only\n");
569 const int kNumFontLists = 4;
570 for (int i = 0; i < kNumFontLists; ++i) {
571 for (int j = 0; kFontlists[i][j] != nullptr; ++j) {
572 available_fonts_.push_back(kFontlists[i][j]);
573 }
574 }
575 return available_fonts_;
576 }
577#endif
578
579 PangoFontFamily** families = nullptr;
580 int n_families = 0;
581 ListFontFamilies(&families, &n_families);
582 for (int i = 0; i < n_families; ++i) {
583 const char* family_name = pango_font_family_get_name(families[i]);
584 tlog(2, "Listing family %s\n", family_name);
585 if (ShouldIgnoreFontFamilyName(family_name)) {
586 continue;
587 }
588
589 int n_faces;
590 PangoFontFace** faces = nullptr;
591 pango_font_family_list_faces(families[i], &faces, &n_faces);
592 for (int j = 0; j < n_faces; ++j) {
593 PangoFontDescription* desc = pango_font_face_describe(faces[j]);
594 char* desc_str = pango_font_description_to_string(desc);
595 // "synthesized" font faces that are not truly loadable, so we skip it
596 if (!pango_font_face_is_synthesized(faces[j])
597 && IsAvailableFont(desc_str)) {
598 available_fonts_.push_back(desc_str);
599 }
600 pango_font_description_free(desc);
601 g_free(desc_str);
602 }
603 g_free(faces);
604 }
605 g_free(families);
606 std::sort(available_fonts_.begin(), available_fonts_.end());
607 return available_fonts_;
608}

◆ PangoFontTypeInfo()

void tesseract::FontUtils::PangoFontTypeInfo ( )
static

Definition at line 803 of file pango_font_info.cpp.

803 {
804 PangoFontMap* font_map = pango_cairo_font_map_get_default();
805 if (pango_cairo_font_map_get_font_type(reinterpret_cast<PangoCairoFontMap*>(
806 font_map)) == CAIRO_FONT_TYPE_TOY) {
807 printf("Using CAIRO_FONT_TYPE_TOY.\n");
808 } else if (pango_cairo_font_map_get_font_type(
809 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
810 CAIRO_FONT_TYPE_FT) {
811 printf("Using CAIRO_FONT_TYPE_FT.\n");
812 } else if (pango_cairo_font_map_get_font_type(
813 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
814 CAIRO_FONT_TYPE_WIN32) {
815 printf("Using CAIRO_FONT_TYPE_WIN32.\n");
816 } else if (pango_cairo_font_map_get_font_type(
817 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
818 CAIRO_FONT_TYPE_QUARTZ) {
819 printf("Using CAIRO_FONT_TYPE_QUARTZ.\n");
820 } else if (pango_cairo_font_map_get_font_type(
821 reinterpret_cast<PangoCairoFontMap*>(font_map)) ==
822 CAIRO_FONT_TYPE_USER) {
823 printf("Using CAIRO_FONT_TYPE_USER.\n");
824 } else if (!font_map) {
825 printf("Can not create pango cairo font map!\n");
826 }
827}

◆ ReInit()

void tesseract::FontUtils::ReInit ( )
static

Definition at line 799 of file pango_font_info.cpp.

799{ available_fonts_.clear(); }

◆ SelectFont() [1/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
const std::vector< std::string > &  all_fonts,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 777 of file pango_font_info.cpp.

779 {
780 if (font_name) font_name->clear();
781 if (graphemes) graphemes->clear();
782 for (unsigned i = 0; i < all_fonts.size(); ++i) {
783 PangoFontInfo font;
784 std::vector<std::string> found_graphemes;
785 ASSERT_HOST_MSG(font.ParseFontDescriptionName(all_fonts[i]),
786 "Could not parse font desc name %s\n",
787 all_fonts[i].c_str());
788 if (font.CanRenderString(utf8_word, utf8_len, &found_graphemes)) {
789 if (graphemes) graphemes->swap(found_graphemes);
790 if (font_name) *font_name = all_fonts[i];
791 return true;
792 }
793 }
794 return false;
795}
#define ASSERT_HOST_MSG(x,...)
Definition: errcode.h:92

◆ SelectFont() [2/2]

bool tesseract::FontUtils::SelectFont ( const char *  utf8_word,
const int  utf8_len,
std::string *  font_name,
std::vector< std::string > *  graphemes 
)
static

Definition at line 770 of file pango_font_info.cpp.

771 {
772 return SelectFont(utf8_word, utf8_len, ListAvailableFonts(), font_name,
773 graphemes);
774}
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)

The documentation for this class was generated from the following files: