#include <ligature_table.h>
Definition at line 38 of file ligature_table.h.
◆ LigatureTable()
tesseract::LigatureTable::LigatureTable |
( |
| ) |
|
|
protected |
◆ AddLigatures()
std::string tesseract::LigatureTable::AddLigatures |
( |
const std::string & |
str, |
|
|
const PangoFontInfo * |
font |
|
) |
| const |
Definition at line 156 of file ligature_table.cpp.
157 {
158 std::string result;
159 int len = str.size();
160 int step = 0;
161 int i = 0;
163 step = 0;
165 if (i + liglen <= len) {
166 std::string lig_cand = str.substr(i, liglen);
169 tlog(3,
"Considering %s -> %s\n", lig_cand.c_str(),
170 it->second.c_str());
171 if (font) {
172
173 if (!font->CanRenderString(it->second.data(), it->second.length()))
174 continue;
175 }
176
177 step = liglen;
178 result += it->second;
179 tlog(2,
"Substituted %s -> %s\n", lig_cand.c_str(),
180 it->second.c_str());
181 break;
182 }
183 }
184 }
185 if (step == 0) {
186 result += str[i];
187 step = 1;
188 }
189 }
190 result += str.substr(i, len - i);
191 return result;
192}
LigHash norm_to_lig_table_
◆ Get()
Definition at line 52 of file ligature_table.cpp.
52 {
56 }
58}
static std::unique_ptr< LigatureTable > instance_
◆ Init()
void tesseract::LigatureTable::Init |
( |
| ) |
|
|
protected |
Definition at line 63 of file ligature_table.cpp.
63 {
66
67
68 std::string lig8 = EncodeAsUTF8(lig);
69 icu::UnicodeString unicode_lig8(static_cast<UChar32>(lig));
70 icu::UnicodeString normed8_result;
71 icu::ErrorCode status;
72 icu::Normalizer::normalize(unicode_lig8, UNORM_NFKC, 0, normed8_result,
73 status);
74 std::string normed8;
75 normed8_result.toUTF8String(normed8);
76
77
78 if (lig8 == "\uFB05")
79 normed8 = "ſt";
80 int lig_length = lig8.length();
81 int norm_length = normed8.size();
82 if (normed8 != lig8 && lig_length > 1 && norm_length > 1) {
93 }
94 }
95
104
107 }
108 }
109}
static TESS_API const char * kCustomLigatures[][2]
LigHash lig_to_norm_table_
◆ lig_to_norm_table()
const LigHash & tesseract::LigatureTable::lig_to_norm_table |
( |
| ) |
const |
|
inline |
◆ norm_to_lig_table()
const LigHash & tesseract::LigatureTable::norm_to_lig_table |
( |
| ) |
const |
|
inline |
◆ RemoveCustomLigatures()
std::string tesseract::LigatureTable::RemoveCustomLigatures |
( |
const std::string & |
str | ) |
const |
Definition at line 130 of file ligature_table.cpp.
130 {
131 std::string result;
132 UNICHAR::const_iterator it_begin =
UNICHAR::begin(str.c_str(), str.length());
133 UNICHAR::const_iterator it_end =
UNICHAR::end(str.c_str(), str.length());
134 char tmp[5];
135 int len;
136 int norm_ind;
137 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
138 len = it.get_utf8(tmp);
139 tmp[len] = '\0';
140 norm_ind = -1;
141 for (int i = 0;
144 norm_ind = i;
145 }
146 }
147 if (norm_ind >= 0) {
149 } else {
150 result += tmp;
151 }
152 }
153 return result;
154}
static const_iterator begin(const char *utf8_str, int byte_length)
static const_iterator end(const char *utf8_str, int byte_length)
◆ RemoveLigatures()
std::string tesseract::LigatureTable::RemoveLigatures |
( |
const std::string & |
str | ) |
const |
Definition at line 111 of file ligature_table.cpp.
111 {
112 std::string result;
113 UNICHAR::const_iterator it_begin =
UNICHAR::begin(str.c_str(), str.length());
114 UNICHAR::const_iterator it_end =
UNICHAR::end(str.c_str(), str.length());
115 char tmp[5];
116 int len;
117 for (UNICHAR::const_iterator it = it_begin; it != it_end; ++it) {
118 len = it.get_utf8(tmp);
119 tmp[len] = '\0';
122 result += lig_it->second;
123 } else {
124 result += tmp;
125 }
126 }
127 return result;
128}
◆ instance_
std::unique_ptr< LigatureTable > tesseract::LigatureTable::instance_ |
|
staticprotected |
◆ lig_to_norm_table_
LigHash tesseract::LigatureTable::lig_to_norm_table_ |
|
protected |
◆ max_lig_length_
int tesseract::LigatureTable::max_lig_length_ |
|
protected |
◆ max_norm_length_
int tesseract::LigatureTable::max_norm_length_ |
|
protected |
◆ min_lig_length_
int tesseract::LigatureTable::min_lig_length_ |
|
protected |
◆ min_norm_length_
int tesseract::LigatureTable::min_norm_length_ |
|
protected |
◆ norm_to_lig_table_
LigHash tesseract::LigatureTable::norm_to_lig_table_ |
|
protected |
The documentation for this class was generated from the following files: