tesseract 4.1.1
Loading...
Searching...
No Matches
pango_font_info.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: pango_font_info.h
3 * Description: Font-related objects and helper functions
4 * Author: Ranjith Unnikrishnan
5 * Created: Mon Nov 18 2013
6 *
7 * (C) Copyright 2013, Google Inc.
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 **********************************************************************/
19
20#ifndef TESSERACT_TRAINING_PANGO_FONT_INFO_H_
21#define TESSERACT_TRAINING_PANGO_FONT_INFO_H_
22
23#include <string>
24#include <unordered_map>
25#include <utility>
26#include <vector>
27
28#include "commandlineflags.h"
29#include "pango/pango-font.h"
30#include "pango/pango.h"
31#include "pango/pangocairo.h"
32#include "util.h"
33
34using char32 = signed int;
35
36namespace tesseract {
37
38// Data holder class for a font, intended to avoid having to work with Pango or
39// FontConfig-specific objects directly.
41 public:
47 };
50 // Initialize from parsing a font description name, defined as a string of the
51 // format:
52 // "FamilyName [FaceName] [PointSize]"
53 // where a missing FaceName implies the default regular face.
54 // eg. "Arial Italic 12", "Verdana"
55 //
56 // FaceName is a combination of:
57 // [StyleName] [Variant] [Weight] [Stretch]
58 // with (all optional) Pango-defined values of:
59 // StyleName: Oblique, Italic
60 // Variant : Small-Caps
61 // Weight : Ultra-Light, Light, Medium, Semi-Bold, Bold, Ultra-Bold, Heavy
62 // Stretch : Ultra-Condensed, Extra-Condensed, Condensed, Semi-Condensed,
63 // Semi-Expanded, Expanded, Extra-Expanded, Ultra-Expanded.
64 explicit PangoFontInfo(const std::string& name);
65 bool ParseFontDescriptionName(const std::string& name);
66
67 // Returns true if the font have codepoint coverage for the specified text.
68 bool CoversUTF8Text(const char* utf8_text, int byte_length) const;
69 // Modifies string to remove unicode points that are not covered by the
70 // font. Returns the number of characters dropped.
71 int DropUncoveredChars(std::string* utf8_text) const;
72
73 // Returns true if the entire string can be rendered by the font with full
74 // character coverage and no unknown glyph or dotted-circle glyph
75 // substitutions on encountering a badly formed unicode sequence.
76 // If true, returns individual graphemes. Any whitespace characters in the
77 // original string are also included in the list.
78 bool CanRenderString(const char* utf8_word, int len,
79 std::vector<std::string>* graphemes) const;
80 bool CanRenderString(const char* utf8_word, int len) const;
81
82 // Retrieves the x_bearing and x_advance for the given utf8 character in the
83 // font. Returns false if the glyph for the character could not be found in
84 // the font.
85 // Ref: http://freetype.sourceforge.net/freetype2/docs/glyphs/glyphs-3.html
86 bool GetSpacingProperties(const std::string& utf8_char,
87 int* x_bearing, int* x_advance) const;
88
89 // If not already initialized, initializes FontConfig by setting its
90 // environment variable and creating a fonts.conf file that points to the
91 // FLAGS_fonts_dir and the cache to FLAGS_fontconfig_tmpdir.
92 static void SoftInitFontConfig();
93 // Re-initializes font config, whether or not already initialized.
94 // If already initialized, any existing cache is deleted, just to be sure.
95 static void HardInitFontConfig(const std::string& fonts_dir,
96 const std::string& cache_dir);
97
98 // Accessors
99 std::string DescriptionName() const;
100 // Font Family name eg. "Arial"
101 const std::string& family_name() const { return family_name_; }
102 // Size in points (1/72"), rounded to the nearest integer.
103 int font_size() const { return font_size_; }
104 FontTypeEnum font_type() const { return font_type_; }
105
106 int resolution() const { return resolution_; }
107 void set_resolution(const int resolution) {
108 resolution_ = resolution;
109 }
110
111 private:
112 friend class FontUtils;
113 void Clear();
114 bool ParseFontDescription(const PangoFontDescription* desc);
115 // Returns the PangoFont structure corresponding to the closest available font
116 // in the font map.
117 PangoFont* ToPangoFont() const;
118
119 // Font properties set automatically from parsing the font description name.
120 std::string family_name_;
121 int font_size_;
122 FontTypeEnum font_type_;
123 // The Pango description that was used to initialize the instance.
124 PangoFontDescription* desc_;
125 // Default output resolution to assume for GetSpacingProperties() and any
126 // other methods that returns pixel values.
127 int resolution_;
128 // Fontconfig operates through an environment variable, so it intrinsically
129 // cannot be thread-friendly, but you can serialize multiple independent
130 // font configurations by calling HardInitFontConfig(fonts_dir, cache_dir).
131 // These hold the last initialized values set by HardInitFontConfig or
132 // the first call to SoftInitFontConfig.
133 // Directory to be scanned for font files.
134 static std::string fonts_dir_;
135 // Directory to store the cache of font information. (Can be the same as
136 // fonts_dir_)
137 static std::string cache_dir_;
138
139 private:
141 void operator=(const PangoFontInfo&);
142};
143
144// Static utility methods for querying font availability and font-selection
145// based on codepoint coverage.
147 public:
148 // Returns true if the font of the given description name is available in the
149 // target directory specified by --fonts_dir
150 static bool IsAvailableFont(const char* font_desc) {
151 return IsAvailableFont(font_desc, nullptr);
152 }
153 // Returns true if the font of the given description name is available in the
154 // target directory specified by --fonts_dir. If false is returned, and
155 // best_match is not nullptr, the closest matching font is returned there.
156 static bool IsAvailableFont(const char* font_desc, std::string* best_match);
157 // Outputs description names of available fonts.
158 static const std::vector<std::string>& ListAvailableFonts();
159
160 // Picks font among available fonts that covers and can render the given word,
161 // and returns the font description name and the decomposition of the word to
162 // graphemes. Returns false if no suitable font was found.
163 static bool SelectFont(const char* utf8_word, const int utf8_len,
164 std::string* font_name, std::vector<std::string>* graphemes);
165
166 // Picks font among all_fonts that covers and can render the given word,
167 // and returns the font description name and the decomposition of the word to
168 // graphemes. Returns false if no suitable font was found.
169 static bool SelectFont(const char* utf8_word, const int utf8_len,
170 const std::vector<std::string>& all_fonts,
171 std::string* font_name, std::vector<std::string>* graphemes);
172
173 // Returns a bitmask where the value of true at index 'n' implies that unicode
174 // value 'n' is renderable by at least one available font.
175 static void GetAllRenderableCharacters(std::vector<bool>* unichar_bitmap);
176 // Variant of the above function that inspects only the provided font names.
177 static void GetAllRenderableCharacters(const std::vector<std::string>& font_names,
178 std::vector<bool>* unichar_bitmap);
179 static void GetAllRenderableCharacters(const std::string& font_name,
180 std::vector<bool>* unichar_bitmap);
181
182 // NOTE: The following utilities were written to be backward compatible with
183 // StringRender.
184
185 // BestFonts returns a font name and a bit vector of the characters it
186 // can render for the fonts that score within some fraction of the best
187 // font on the characters in the given hash map.
188 // In the flags vector, each flag is set according to whether the
189 // corresponding character (in order of iterating ch_map) can be rendered.
190 // The return string is a list of the acceptable fonts that were used.
191 static std::string BestFonts(
192 const std::unordered_map<char32, int64_t>& ch_map,
193 std::vector<std::pair<const char*, std::vector<bool> > >* font_flag);
194
195 // FontScore returns the weighted renderability score of the given
196 // hash map character table in the given font. The unweighted score
197 // is also returned in raw_score.
198 // The values in the bool vector ch_flags correspond to whether the
199 // corresponding character (in order of iterating ch_map) can be rendered.
200 static int FontScore(const std::unordered_map<char32, int64_t>& ch_map,
201 const std::string& fontname, int* raw_score,
202 std::vector<bool>* ch_flags);
203
204 // PangoFontInfo is reinitialized, so clear the static list of fonts.
205 static void ReInit();
206 static void PangoFontTypeInfo();
207
208 private:
209 static std::vector<std::string> available_fonts_; // cache list
210};
211} // namespace tesseract
212
213#endif // TESSERACT_TRAINING_PANGO_FONT_INFO_H_
signed int char32
void set_resolution(const int resolution)
int DropUncoveredChars(std::string *utf8_text) const
FontTypeEnum font_type() const
bool CoversUTF8Text(const char *utf8_text, int byte_length) const
bool ParseFontDescriptionName(const std::string &name)
std::string DescriptionName() const
bool CanRenderString(const char *utf8_word, int len, std::vector< std::string > *graphemes) const
bool GetSpacingProperties(const std::string &utf8_char, int *x_bearing, int *x_advance) const
const std::string & family_name() const
static void HardInitFontConfig(const std::string &fonts_dir, const std::string &cache_dir)
static int FontScore(const std::unordered_map< char32, int64_t > &ch_map, const std::string &fontname, int *raw_score, std::vector< bool > *ch_flags)
static std::string BestFonts(const std::unordered_map< char32, int64_t > &ch_map, std::vector< std::pair< const char *, std::vector< bool > > > *font_flag)
static bool SelectFont(const char *utf8_word, const int utf8_len, std::string *font_name, std::vector< std::string > *graphemes)
static void GetAllRenderableCharacters(std::vector< bool > *unichar_bitmap)
static bool IsAvailableFont(const char *font_desc)
static void PangoFontTypeInfo()
static const std::vector< std::string > & ListAvailableFonts()