tesseract 4.1.1
Loading...
Searching...
No Matches
fontinfo.h
Go to the documentation of this file.
1
2// File: fontinfo.h
3// Description: Font information classes abstracted from intproto.h/cpp.
4// Author: rays@google.com (Ray Smith)
5// Created: Tue May 17 17:08:01 PDT 2011
6//
7// (C) Copyright 2011, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20
21#ifndef TESSERACT_CCSTRUCT_FONTINFO_H_
22#define TESSERACT_CCSTRUCT_FONTINFO_H_
23
24#include <cstdint> // for uint16_t, uint32_t
25#include <cstdio> // for FILE
26#include "errcode.h"
27#include "genericvector.h"
28#include "unichar.h"
29
30template <typename T> class UnicityTable;
31
32namespace tesseract {
33
34// Simple struct to hold a font and a score. The scores come from the low-level
35// integer matcher, so they are in the uint16_t range. Fonts are an index to
36// fontinfo_table.
37// These get copied around a lot, so best to keep them small.
38struct ScoredFont {
40 ScoredFont(int font_id, uint16_t classifier_score)
41 : fontinfo_id(font_id), score(classifier_score) {}
42
43 // Index into fontinfo table, but inside the classifier, may be a shapetable
44 // index.
45 int32_t fontinfo_id;
46 // Raw score from the low-level classifier.
47 uint16_t score;
48};
49
50// Struct for information about spacing between characters in a particular font.
52 int16_t x_gap_before;
53 int16_t x_gap_after;
56};
57
58/*
59 * font_properties contains properties about boldness, italicness, fixed pitch,
60 * serif, fraktur
61 */
62struct FontInfo {
63 FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {}
64 ~FontInfo() = default;
65
66 // Writes to the given file. Returns false in case of error.
67 bool Serialize(FILE* fp) const;
68 // Reads from the given file. Returns false in case of error.
69 // If swap is true, assumes a big/little-endian swap is needed.
70 bool DeSerialize(TFile* fp);
71
72 // Reserves unicharset_size spots in spacing_vec.
73 void init_spacing(int unicharset_size) {
75 spacing_vec->init_to_size(unicharset_size, nullptr);
76 }
77 // Adds the given pointer to FontSpacingInfo to spacing_vec member
78 // (FontInfo class takes ownership of the pointer).
79 // Note: init_spacing should be called before calling this function.
80 void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) {
81 ASSERT_HOST(spacing_vec != nullptr && spacing_vec->size() > uch_id);
82 (*spacing_vec)[uch_id] = spacing_info;
83 }
84
85 // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID.
86 const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const {
87 return (spacing_vec == nullptr || spacing_vec->size() <= uch_id) ?
88 nullptr : (*spacing_vec)[uch_id];
89 }
90
91 // Fills spacing with the value of the x gap expected between the two given
92 // UNICHAR_IDs. Returns true on success.
93 bool get_spacing(UNICHAR_ID prev_uch_id,
94 UNICHAR_ID uch_id,
95 int *spacing) const {
96 const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id);
97 const FontSpacingInfo *fsi = this->get_spacing(uch_id);
98 if (prev_fsi == nullptr || fsi == nullptr) return false;
99 int i = 0;
100 for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) {
101 if (prev_fsi->kerned_unichar_ids[i] == uch_id) break;
102 }
103 if (i < prev_fsi->kerned_unichar_ids.size()) {
104 *spacing = prev_fsi->kerned_x_gaps[i];
105 } else {
106 *spacing = prev_fsi->x_gap_after + fsi->x_gap_before;
107 }
108 return true;
109 }
110
111 bool is_italic() const { return properties & 1; }
112 bool is_bold() const { return (properties & 2) != 0; }
113 bool is_fixed_pitch() const { return (properties & 4) != 0; }
114 bool is_serif() const { return (properties & 8) != 0; }
115 bool is_fraktur() const { return (properties & 16) != 0; }
116
117 char* name;
118 uint32_t properties;
119 // The universal_id is a field reserved for the initialization process
120 // to assign a unique id number to all fonts loaded for the current
121 // combination of languages. This id will then be returned by
122 // ResultIterator::WordFontAttributes.
124 // Horizontal spacing between characters (indexed by UNICHAR_ID).
126};
127
128// Every class (character) owns a FontSet that represents all the fonts that can
129// render this character.
130// Since almost all the characters from the same script share the same set of
131// fonts, the sets are shared over multiple classes (see
132// Classify::fontset_table_). Thus, a class only store an id to a set.
133// Because some fonts cannot render just one character of a set, there are a
134// lot of FontSet that differ only by one font. Rather than storing directly
135// the FontInfo in the FontSet structure, it's better to share FontInfos among
136// FontSets (Classify::fontinfo_table_).
137struct FontSet {
138 int size;
139 int* configs; // FontInfo ids
140};
141
142// Class that adds a bit of functionality on top of GenericVector to
143// implement a table of FontInfo that replaces UniCityTable<FontInfo>.
144// TODO(rays) change all references once all existing traineddata files
145// are replaced.
146class FontInfoTable : public GenericVector<FontInfo> {
147 public:
150
151 // Writes to the given file. Returns false in case of error.
152 bool Serialize(FILE* fp) const;
153 // Reads from the given file. Returns false in case of error.
154 // If swap is true, assumes a big/little-endian swap is needed.
155 bool DeSerialize(TFile* fp);
156
157 // Returns true if the given set of fonts includes one with the same
158 // properties as font_id.
160 int font_id, const GenericVector<ScoredFont>& font_set) const;
161 // Returns true if the given set of fonts includes multiple properties.
163 const GenericVector<ScoredFont>& font_set) const;
164
165 // Moves any non-empty FontSpacingInfo entries from other to this.
167 // Moves this to the target unicity table.
168 void MoveTo(UnicityTable<FontInfo>* target);
169};
170
171// Compare FontInfo structures.
172bool CompareFontInfo(const FontInfo& fi1, const FontInfo& fi2);
173// Compare FontSet structures.
174bool CompareFontSet(const FontSet& fs1, const FontSet& fs2);
175// Deletion callbacks for GenericVector.
178
179// Callbacks used by UnicityTable to read/write FontInfo/FontSet structures.
180bool read_info(TFile* f, FontInfo* fi);
181bool write_info(FILE* f, const FontInfo& fi);
182bool read_spacing_info(TFile* f, FontInfo* fi);
183bool write_spacing_info(FILE* f, const FontInfo& fi);
184bool read_set(TFile* f, FontSet* fs);
185bool write_set(FILE* f, const FontSet& fs);
186
187} // namespace tesseract.
188
189#endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */
#define ASSERT_HOST(x)
Definition: errcode.h:88
int UNICHAR_ID
Definition: unichar.h:34
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:119
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:138
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:127
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:232
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:163
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:198
bool read_set(TFile *f, FontSet *fs)
Definition: fontinfo.cpp:226
bool read_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:153
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:147
bool read_spacing_info(TFile *f, FontInfo *fi)
Definition: fontinfo.cpp:170
int size() const
Definition: genericvector.h:72
ScoredFont(int font_id, uint16_t classifier_score)
Definition: fontinfo.h:40
GenericVector< UNICHAR_ID > kerned_unichar_ids
Definition: fontinfo.h:54
GenericVector< int16_t > kerned_x_gaps
Definition: fontinfo.h:55
const FontSpacingInfo * get_spacing(UNICHAR_ID uch_id) const
Definition: fontinfo.h:86
int32_t universal_id
Definition: fontinfo.h:123
bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const
Definition: fontinfo.h:93
bool is_italic() const
Definition: fontinfo.h:111
bool DeSerialize(TFile *fp)
Definition: fontinfo.cpp:33
bool is_fixed_pitch() const
Definition: fontinfo.h:113
bool Serialize(FILE *fp) const
Definition: fontinfo.cpp:26
bool is_bold() const
Definition: fontinfo.h:112
uint32_t properties
Definition: fontinfo.h:118
void init_spacing(int unicharset_size)
Definition: fontinfo.h:73
bool is_fraktur() const
Definition: fontinfo.h:115
GenericVector< FontSpacingInfo * > * spacing_vec
Definition: fontinfo.h:125
void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info)
Definition: fontinfo.h:80
bool is_serif() const
Definition: fontinfo.h:114
bool DeSerialize(TFile *fp)
Definition: fontinfo.cpp:53
bool SetContainsMultipleFontProperties(const GenericVector< ScoredFont > &font_set) const
Definition: fontinfo.cpp:71
void MoveSpacingInfoFrom(FontInfoTable *other)
Definition: fontinfo.cpp:84
bool SetContainsFontProperties(int font_id, const GenericVector< ScoredFont > &font_set) const
Definition: fontinfo.cpp:60
bool Serialize(FILE *fp) const
Definition: fontinfo.cpp:48
void MoveTo(UnicityTable< FontInfo > *target)
Definition: fontinfo.cpp:105