175 {
183 std::deque<std::tuple<int, int>> best_choices;
184 ExtractBestPaths(&best_nodes, &second_nodes);
185 if (debug) {
186 DebugPath(unicharset, best_nodes);
187 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
188 &xcoords);
189 tprintf(
"\nSecond choice path:\n");
190 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
191 xcoords);
192 }
193 int timestepEnd= 0;
194
195
196 if (lstm_choice_mode == 2) {
197 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198 &xcoords, &best_choices);
199 if (best_choices.size() > 0) {
200 timestepEnd = std::get<1>(best_choices.front());
201 best_choices.pop_front();
202 }
203 } else {
204 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
205 &xcoords);
206 }
207 int num_ids = unichar_ids.
size();
208 if (debug) {
209 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
210 xcoords);
211 }
212
213 int word_end = 0;
214 float prev_space_cert = 0.0f;
215 for (int word_start = 0; word_start < num_ids; word_start = word_end) {
216 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
217
218
219
221 int index = xcoords[word_end];
222 if (best_nodes[index]->start_of_word) break;
226 break;
227 }
228 float space_cert = 0.0f;
229 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
230 space_cert = certs[word_end];
231 bool leading_space =
232 word_start > 0 && unichar_ids[word_start - 1] ==
UNICHAR_SPACE;
233
234 WERD_RES* word_res = InitializeWord(
235 leading_space, line_box, word_start, word_end,
236 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237 if (lstm_choice_mode == 1) {
238 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
240 }
241 timestepEnd = xcoords[word_end];
242 } else if (lstm_choice_mode == 2){
243
244 float sum = 0;
245 std::vector<std::pair<const char*, float>> choice_pairs;
246 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247 for (std::pair<const char*, float> choice :
timesteps[i]) {
248 if (std::strcmp(choice.first, "")) {
249 sum += choice.second;
250 choice_pairs.push_back(choice);
251 }
252 }
253 if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254 || i == xcoords[word_end]-1) {
255 std::map<const char*, float> summed_propabilities;
256 for (auto & choice_pair : choice_pairs) {
257 summed_propabilities[choice_pair.first] += choice_pair.second;
258 }
259 std::vector<std::pair<const char*, float>> accumulated_timestep;
260 for (auto& summed_propability : summed_propabilities) {
261 if(sum == 0) break;
262 summed_propability.second/=sum;
263 size_t pos = 0;
264 while (accumulated_timestep.size() > pos
265 && accumulated_timestep[pos].second > summed_propability.second) {
266 pos++;
267 }
268 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269 std::pair<const char*,float>(summed_propability.first,
270 summed_propability.second));
271 }
272 if (best_choices.size() > 0) {
273 best_choices.pop_front();
274 }
275 choice_pairs.clear();
276 word_res->
timesteps.push_back(accumulated_timestep);
277 sum = 0;
278 }
279 }
280 timestepEnd = xcoords[word_end];
281 }
282 for (int i = word_start; i < word_end; ++i) {
283 auto* choices = new BLOB_CHOICE_LIST;
284 BLOB_CHOICE_IT bc_it(choices);
286 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
288 int col = i - word_start;
289 choice->set_matrix_cell(col, col);
290 bc_it.add_after_then_move(choice);
292 }
293 int index = xcoords[word_end - 1];
296 prev_space_cert = space_cert;
297 if (word_end < num_ids && unichar_ids[word_end] ==
UNICHAR_SPACE)
298 ++word_end;
299 }
300}
void put(ICOORD pos, const T &thing)
std::vector< std::vector< std::pair< const char *, float > > > timesteps
void FakeWordFromRatings(PermuterType permuter)
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const