102 {
104 if (radical_stroke_table != nullptr &&
105 !DecodeRadicalTable(radical_stroke_table, &radical_map))
106 return false;
107 encoder_.clear();
109
111
113
114 if (null_id >= 0) {
116 }
118
119
120 int hangul_offset = unicharset.
size();
121
123
124
125
126 int han_offset = hangul_offset + kTotalJamos;
127 for (
int u = 0; u <= unicharset.
size(); ++u) {
128
129
130 if (u == unicharset.
size() && u != null_id)
break;
131 RecodedCharID code;
132
133 std::vector<char32> unicodes;
134 std::string cleaned;
135 if (u < unicharset.
size())
137 if (u < unicharset.
size() &&
139
140 int unicode = unicodes[0];
141 int leading, vowel, trailing;
142 auto it = radical_map.find(unicode);
143 if (it != radical_map.end()) {
144
145 int num_radicals = it->second->size();
146 for (int c = 0; c < num_radicals; ++c) {
147 code.Set(c, han_offset + (*it->second)[c]);
148 }
149 int pre_hash = RadicalPreHash(*it->second);
150 int num_samples = radical_counts[pre_hash]++;
151 if (num_samples > 0)
152 code.Set(num_radicals, han_offset + num_samples +
kRadicalRadix);
154
155
156 code.Set3(leading + hangul_offset, vowel +
kLCount + hangul_offset,
158 }
159 }
160
161 if (code.length() == 0) {
162
164 code.Set(0, 0);
168 } else {
169
170
171 for (int uni : unicodes) {
172 int position = code.length();
174 tprintf(
"Unichar %d=%s is too long to encode!!\n", u,
176 return false;
177 }
178 UNICHAR unichar(uni);
179 char* utf8 = unichar.utf8_str();
183 delete[] utf8;
184 if (direct_set.
size() >
186
187 tprintf(
"Code space expanded from original unicharset!!\n");
188 return false;
189 }
190 }
191 }
192 }
193 encoder_.push_back(code);
194 }
195
196
197 int code_offset = 0;
199 int max_offset = 0;
200 for (
int u = 0; u < unicharset.
size(); ++u) {
201 RecodedCharID* code = &encoder_[u];
202 if (code->length() <= i) continue;
203 max_offset = std::max(max_offset, (*code)(i)-han_offset);
204 code->Set(i, (*code)(i) + code_offset);
205 }
206 if (max_offset == 0) break;
207 code_offset += max_offset + 1;
208 }
209 DefragmentCodeValues(null_id >= 0 ? 1 : -1);
210 SetupDecoder();
211 return true;
212}
DLLSYM void tprintf(const char *format,...)
@ SPECIAL_UNICHAR_CODES_COUNT
std::unordered_map< int, std::unique_ptr< std::vector< int > > > RSMap
std::unordered_map< int, int > RSCounts
static std::vector< char32 > UTF8ToUTF32(const char *utf8_str)
static const int kMaxCodeLen
static bool DecomposeHangul(int unicode, int *leading, int *vowel, int *trailing)
void unichar_insert(const char *const unichar_repr, OldUncleanUnichars old_style)
bool contains_unichar(const char *const unichar_repr) const
const char * id_to_unichar(UNICHAR_ID id) const
static std::string CleanupString(const char *utf8_str)
bool has_special_codes() const
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const