tesseract 4.1.1
Loading...
Searching...
No Matches
recodebeam.cpp
Go to the documentation of this file.
1
2// File: recodebeam.cpp
3// Description: Beam search to decode from the re-encoded CJK as a sequence of
4// smaller numbers in place of a single large code.
5// Author: Ray Smith
6//
7// (C) Copyright 2015, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "recodebeam.h"
21#include "networkio.h"
22#include "pageres.h"
23#include "unicharcompress.h"
24#include <deque>
25#include <map>
26#include <set>
27#include <tuple>
28#include <vector>
29
30#include <algorithm>
31
32namespace tesseract {
33
34// The beam width at each code position.
35const int RecodeBeamSearch::kBeamWidths[RecodedCharID::kMaxCodeLen + 1] = {
36 5, 10, 16, 16, 16, 16, 16, 16, 16, 16,
37};
38
39static const char* kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"};
40
41// Prints debug details of the node.
42void RecodeNode::Print(int null_char, const UNICHARSET& unicharset,
43 int depth) const {
44 if (code == null_char) {
45 tprintf("null_char");
46 } else {
47 tprintf("label=%d, uid=%d=%s", code, unichar_id,
48 unicharset.debug_str(unichar_id).string());
49 }
50 tprintf(" score=%g, c=%g,%s%s%s perm=%d, hash=%" PRIx64, score, certainty,
51 start_of_dawg ? " DawgStart" : "", start_of_word ? " Start" : "",
52 end_of_word ? " End" : "", permuter, code_hash);
53 if (depth > 0 && prev != nullptr) {
54 tprintf(" prev:");
55 prev->Print(null_char, unicharset, depth - 1);
56 } else {
57 tprintf("\n");
58 }
59}
60
61// Borrows the pointer, which is expected to survive until *this is deleted.
63 int null_char, bool simple_text, Dict* dict)
64 : recoder_(recoder),
65 beam_size_(0),
66 top_code_(-1),
67 second_code_(-1),
68 dict_(dict),
69 space_delimited_(true),
70 is_simple_text_(simple_text),
71 null_char_(null_char) {
72 if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
73}
74
75// Decodes the set of network outputs, storing the lattice internally.
76void RecodeBeamSearch::Decode(const NetworkIO& output, double dict_ratio,
77 double cert_offset, double worst_dict_cert,
78 const UNICHARSET* charset, int lstm_choice_mode) {
79 beam_size_ = 0;
80 int width = output.Width();
81 if (lstm_choice_mode)
82 timesteps.clear();
83 for (int t = 0; t < width; ++t) {
84 ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
85 DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
86 charset);
87 if (lstm_choice_mode) {
88 SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
89 }
90 }
91}
93 double dict_ratio, double cert_offset,
94 double worst_dict_cert,
95 const UNICHARSET* charset) {
96 beam_size_ = 0;
97 int width = output.dim1();
98 for (int t = 0; t < width; ++t) {
99 ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
100 DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
101 }
102}
103
104void RecodeBeamSearch::SaveMostCertainChoices(const float* outputs,
105 int num_outputs,
106 const UNICHARSET* charset,
107 int xCoord) {
108 std::vector<std::pair<const char*, float>> choices;
109 for (int i = 0; i < num_outputs; ++i) {
110 if (outputs[i] >= 0.01f) {
111 const char* character;
112 if (i + 2 >= num_outputs) {
113 character = "";
114 } else if (i > 0) {
115 character = charset->id_to_unichar_ext(i + 2);
116 } else {
117 character = charset->id_to_unichar_ext(i);
118 }
119 size_t pos = 0;
120 //order the possible choices within one timestep
121 //beginning with the most likely
122 while (choices.size() > pos && choices[pos].second > outputs[i]) {
123 pos++;
124 }
125 choices.insert(choices.begin() + pos,
126 std::pair<const char*, float>(character, outputs[i]));
127 }
128 }
129 timesteps.push_back(choices);
130}
131
132// Returns the best path as labels/scores/xcoords similar to simple CTC.
134 GenericVector<int>* labels, GenericVector<int>* xcoords) const {
135 labels->truncate(0);
136 xcoords->truncate(0);
138 ExtractBestPaths(&best_nodes, nullptr);
139 // Now just run CTC on the best nodes.
140 int t = 0;
141 int width = best_nodes.size();
142 while (t < width) {
143 int label = best_nodes[t]->code;
144 if (label != null_char_) {
145 labels->push_back(label);
146 xcoords->push_back(t);
147 }
148 while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
149 }
150 }
151 xcoords->push_back(width);
152}
153
154// Returns the best path as unichar-ids/certs/ratings/xcoords skipping
155// duplicates, nulls and intermediate parts.
157 bool debug, const UNICHARSET* unicharset, GenericVector<int>* unichar_ids,
159 GenericVector<int>* xcoords) const {
161 ExtractBestPaths(&best_nodes, nullptr);
162 ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
163 if (debug) {
164 DebugPath(unicharset, best_nodes);
165 DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
166 *xcoords);
167 }
168}
169
170// Returns the best path as a set of WERD_RES.
172 float scale_factor, bool debug,
173 const UNICHARSET* unicharset,
175 int lstm_choice_mode) {
176 words->truncate(0);
177 GenericVector<int> unichar_ids;
179 GenericVector<float> ratings;
180 GenericVector<int> xcoords;
183 std::deque<std::tuple<int, int>> best_choices;
184 ExtractBestPaths(&best_nodes, &second_nodes);
185 if (debug) {
186 DebugPath(unicharset, best_nodes);
187 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
188 &xcoords);
189 tprintf("\nSecond choice path:\n");
190 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
191 xcoords);
192 }
193 int timestepEnd= 0;
194 //if lstm choice mode is required in granularity level 2 it stores the x
195 //Coordinates of every chosen character to match the alternative choices to it
196 if (lstm_choice_mode == 2) {
197 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198 &xcoords, &best_choices);
199 if (best_choices.size() > 0) {
200 timestepEnd = std::get<1>(best_choices.front());
201 best_choices.pop_front();
202 }
203 } else {
204 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
205 &xcoords);
206 }
207 int num_ids = unichar_ids.size();
208 if (debug) {
209 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
210 xcoords);
211 }
212 // Convert labels to unichar-ids.
213 int word_end = 0;
214 float prev_space_cert = 0.0f;
215 for (int word_start = 0; word_start < num_ids; word_start = word_end) {
216 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
217 // A word is terminated when a space character or start_of_word flag is
218 // hit. We also want to force a separate word for every non
219 // space-delimited character when not in a dictionary context.
220 if (unichar_ids[word_end] == UNICHAR_SPACE) break;
221 int index = xcoords[word_end];
222 if (best_nodes[index]->start_of_word) break;
223 if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
224 (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
225 !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
226 break;
227 }
228 float space_cert = 0.0f;
229 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
230 space_cert = certs[word_end];
231 bool leading_space =
232 word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
233 // Create a WERD_RES for the output word.
234 WERD_RES* word_res = InitializeWord(
235 leading_space, line_box, word_start, word_end,
236 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237 if (lstm_choice_mode == 1) {
238 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
239 word_res->timesteps.push_back(timesteps[i]);
240 }
241 timestepEnd = xcoords[word_end];
242 } else if (lstm_choice_mode == 2){
243 // Accumulated Timesteps (choice mode 2 processing)
244 float sum = 0;
245 std::vector<std::pair<const char*, float>> choice_pairs;
246 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247 for (std::pair<const char*, float> choice : timesteps[i]) {
248 if (std::strcmp(choice.first, "")) {
249 sum += choice.second;
250 choice_pairs.push_back(choice);
251 }
252 }
253 if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254 || i == xcoords[word_end]-1) {
255 std::map<const char*, float> summed_propabilities;
256 for (auto & choice_pair : choice_pairs) {
257 summed_propabilities[choice_pair.first] += choice_pair.second;
258 }
259 std::vector<std::pair<const char*, float>> accumulated_timestep;
260 for (auto& summed_propability : summed_propabilities) {
261 if(sum == 0) break;
262 summed_propability.second/=sum;
263 size_t pos = 0;
264 while (accumulated_timestep.size() > pos
265 && accumulated_timestep[pos].second > summed_propability.second) {
266 pos++;
267 }
268 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269 std::pair<const char*,float>(summed_propability.first,
270 summed_propability.second));
271 }
272 if (best_choices.size() > 0) {
273 best_choices.pop_front();
274 }
275 choice_pairs.clear();
276 word_res->timesteps.push_back(accumulated_timestep);
277 sum = 0;
278 }
279 }
280 timestepEnd = xcoords[word_end];
281 }
282 for (int i = word_start; i < word_end; ++i) {
283 auto* choices = new BLOB_CHOICE_LIST;
284 BLOB_CHOICE_IT bc_it(choices);
285 auto* choice = new BLOB_CHOICE(
286 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
287 static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
288 int col = i - word_start;
289 choice->set_matrix_cell(col, col);
290 bc_it.add_after_then_move(choice);
291 word_res->ratings->put(col, col, choices);
292 }
293 int index = xcoords[word_end - 1];
294 word_res->FakeWordFromRatings(best_nodes[index]->permuter);
295 words->push_back(word_res);
296 prev_space_cert = space_cert;
297 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
298 ++word_end;
299 }
300}
301
302// Generates debug output of the content of the beams after a Decode.
303void RecodeBeamSearch::DebugBeams(const UNICHARSET& unicharset) const {
304 for (int p = 0; p < beam_size_; ++p) {
305 for (int d = 0; d < 2; ++d) {
306 for (int c = 0; c < NC_COUNT; ++c) {
307 auto cont = static_cast<NodeContinuation>(c);
308 int index = BeamIndex(d, cont, 0);
309 if (beam_[p]->beams_[index].empty()) continue;
310 // Print all the best scoring nodes for each unichar found.
311 tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
312 kNodeContNames[c]);
313 DebugBeamPos(unicharset, beam_[p]->beams_[index]);
314 }
315 }
316 }
317}
318
319// Generates debug output of the content of a single beam position.
320void RecodeBeamSearch::DebugBeamPos(const UNICHARSET& unicharset,
321 const RecodeHeap& heap) const {
323 unichar_bests.init_to_size(unicharset.size(), nullptr);
324 const RecodeNode* null_best = nullptr;
325 int heap_size = heap.size();
326 for (int i = 0; i < heap_size; ++i) {
327 const RecodeNode* node = &heap.get(i).data;
328 if (node->unichar_id == INVALID_UNICHAR_ID) {
329 if (null_best == nullptr || null_best->score < node->score) null_best = node;
330 } else {
331 if (unichar_bests[node->unichar_id] == nullptr ||
332 unichar_bests[node->unichar_id]->score < node->score) {
333 unichar_bests[node->unichar_id] = node;
334 }
335 }
336 }
337 for (int u = 0; u < unichar_bests.size(); ++u) {
338 if (unichar_bests[u] != nullptr) {
339 const RecodeNode& node = *unichar_bests[u];
340 node.Print(null_char_, unicharset, 1);
341 }
342 }
343 if (null_best != nullptr) {
344 null_best->Print(null_char_, unicharset, 1);
345 }
346}
347
348// Returns the given best_nodes as unichar-ids/certs/ratings/xcoords skipping
349// duplicates, nulls and intermediate parts.
350/* static */
351void RecodeBeamSearch::ExtractPathAsUnicharIds(
352 const GenericVector<const RecodeNode*>& best_nodes,
353 GenericVector<int>* unichar_ids, GenericVector<float>* certs,
354 GenericVector<float>* ratings, GenericVector<int>* xcoords,
355 std::deque<std::tuple<int, int>>* best_choices) {
356 unichar_ids->truncate(0);
357 certs->truncate(0);
358 ratings->truncate(0);
359 xcoords->truncate(0);
360 // Backtrack extracting only valid, non-duplicate unichar-ids.
361 int t = 0;
362 int width = best_nodes.size();
363 while (t < width) {
364 int id;
365 int tposition;
366 double certainty = 0.0;
367 double rating = 0.0;
368 while (t < width && best_nodes[t]->unichar_id == INVALID_UNICHAR_ID) {
369 double cert = best_nodes[t++]->certainty;
370 if (cert < certainty) certainty = cert;
371 rating -= cert;
372 }
373 if (t < width) {
374 int unichar_id = best_nodes[t]->unichar_id;
375 if (unichar_id == UNICHAR_SPACE && !certs->empty() &&
376 best_nodes[t]->permuter != NO_PERM) {
377 // All the rating and certainty go on the previous character except
378 // for the space itself.
379 if (certainty < certs->back()) certs->back() = certainty;
380 ratings->back() += rating;
381 certainty = 0.0;
382 rating = 0.0;
383 }
384 unichar_ids->push_back(unichar_id);
385 xcoords->push_back(t);
386 if (best_choices != nullptr) {
387 tposition = t;
388 id = unichar_id;
389 }
390 do {
391 double cert = best_nodes[t++]->certainty;
392 // Special-case NO-PERM space to forget the certainty of the previous
393 // nulls. See long comment in ContinueContext.
394 if (cert < certainty || (unichar_id == UNICHAR_SPACE &&
395 best_nodes[t - 1]->permuter == NO_PERM)) {
396 certainty = cert;
397 }
398 rating -= cert;
399 } while (t < width && best_nodes[t]->duplicate);
400 certs->push_back(certainty);
401 ratings->push_back(rating);
402 } else if (!certs->empty()) {
403 if (certainty < certs->back()) certs->back() = certainty;
404 ratings->back() += rating;
405 }
406 if (best_choices != nullptr) {
407 best_choices->push_back(
408 std::tuple<int, int>(id, tposition));
409 }
410 }
411 xcoords->push_back(width);
412}
413
414// Sets up a word with the ratings matrix and fake blobs with boxes in the
415// right places.
416WERD_RES* RecodeBeamSearch::InitializeWord(bool leading_space,
417 const TBOX& line_box, int word_start,
418 int word_end, float space_certainty,
419 const UNICHARSET* unicharset,
420 const GenericVector<int>& xcoords,
421 float scale_factor) {
422 // Make a fake blob for each non-zero label.
423 C_BLOB_LIST blobs;
424 C_BLOB_IT b_it(&blobs);
425 for (int i = word_start; i < word_end; ++i) {
426 int min_half_width = xcoords[i + 1] - xcoords[i];
427 if (i > 0 && xcoords[i] - xcoords[i - 1] < min_half_width)
428 min_half_width = xcoords[i] - xcoords[i - 1];
429 if (min_half_width < 1) min_half_width = 1;
430 // Make a fake blob.
431 TBOX box(xcoords[i] - min_half_width, 0, xcoords[i] + min_half_width,
432 line_box.height());
433 box.scale(scale_factor);
434 box.move(ICOORD(line_box.left(), line_box.bottom()));
435 box.set_top(line_box.top());
436 b_it.add_after_then_move(C_BLOB::FakeBlob(box));
437 }
438 // Make a fake word from the blobs.
439 WERD* word = new WERD(&blobs, leading_space, nullptr);
440 // Make a WERD_RES from the word.
441 auto* word_res = new WERD_RES(word);
442 word_res->uch_set = unicharset;
443 word_res->combination = true; // Give it ownership of the word.
444 word_res->space_certainty = space_certainty;
445 word_res->ratings = new MATRIX(word_end - word_start, 1);
446 return word_res;
447}
448
449// Fills top_n_flags_ with bools that are true iff the corresponding output
450// is one of the top_n.
451void RecodeBeamSearch::ComputeTopN(const float* outputs, int num_outputs,
452 int top_n) {
453 top_n_flags_.init_to_size(num_outputs, TN_ALSO_RAN);
454 top_code_ = -1;
455 second_code_ = -1;
456 top_heap_.clear();
457 for (int i = 0; i < num_outputs; ++i) {
458 if (top_heap_.size() < top_n || outputs[i] > top_heap_.PeekTop().key) {
459 TopPair entry(outputs[i], i);
460 top_heap_.Push(&entry);
461 if (top_heap_.size() > top_n) top_heap_.Pop(&entry);
462 }
463 }
464 while (!top_heap_.empty()) {
465 TopPair entry;
466 top_heap_.Pop(&entry);
467 if (top_heap_.size() > 1) {
468 top_n_flags_[entry.data] = TN_TOPN;
469 } else {
470 top_n_flags_[entry.data] = TN_TOP2;
471 if (top_heap_.empty())
472 top_code_ = entry.data;
473 else
474 second_code_ = entry.data;
475 }
476 }
477 top_n_flags_[null_char_] = TN_TOP2;
478}
479
480// Adds the computation for the current time-step to the beam. Call at each
481// time-step in sequence from left to right. outputs is the activation vector
482// for the current timestep.
483void RecodeBeamSearch::DecodeStep(const float* outputs, int t,
484 double dict_ratio, double cert_offset,
485 double worst_dict_cert,
486 const UNICHARSET* charset, bool debug) {
487 if (t == beam_.size()) beam_.push_back(new RecodeBeam);
488 RecodeBeam* step = beam_[t];
489 beam_size_ = t + 1;
490 step->Clear();
491 if (t == 0) {
492 // The first step can only use singles and initials.
493 ContinueContext(nullptr, BeamIndex(false, NC_ANYTHING, 0), outputs, TN_TOP2,
494 charset, dict_ratio, cert_offset, worst_dict_cert, step);
495 if (dict_ != nullptr) {
496 ContinueContext(nullptr, BeamIndex(true, NC_ANYTHING, 0), outputs, TN_TOP2,
497 charset, dict_ratio, cert_offset, worst_dict_cert, step);
498 }
499 } else {
500 RecodeBeam* prev = beam_[t - 1];
501 if (debug) {
502 int beam_index = BeamIndex(true, NC_ANYTHING, 0);
503 for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
505 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
506 tprintf("Step %d: Dawg beam %d:\n", t, i);
507 DebugPath(charset, path);
508 }
509 beam_index = BeamIndex(false, NC_ANYTHING, 0);
510 for (int i = prev->beams_[beam_index].size() - 1; i >= 0; --i) {
512 ExtractPath(&prev->beams_[beam_index].get(i).data, &path);
513 tprintf("Step %d: Non-Dawg beam %d:\n", t, i);
514 DebugPath(charset, path);
515 }
516 }
517 int total_beam = 0;
518 // Work through the scores by group (top-2, top-n, the rest) while the beam
519 // is empty. This enables extending the context using only the top-n results
520 // first, which may have an empty intersection with the valid codes, so we
521 // fall back to the rest if the beam is empty.
522 for (int tn = 0; tn < TN_COUNT && total_beam == 0; ++tn) {
523 auto top_n = static_cast<TopNState>(tn);
524 for (int index = 0; index < kNumBeams; ++index) {
525 // Working backwards through the heaps doesn't guarantee that we see the
526 // best first, but it comes before a lot of the worst, so it is slightly
527 // more efficient than going forwards.
528 for (int i = prev->beams_[index].size() - 1; i >= 0; --i) {
529 ContinueContext(&prev->beams_[index].get(i).data, index, outputs, top_n,
530 charset, dict_ratio, cert_offset, worst_dict_cert, step);
531 }
532 }
533 for (int index = 0; index < kNumBeams; ++index) {
535 total_beam += step->beams_[index].size();
536 }
537 }
538 // Special case for the best initial dawg. Push it on the heap if good
539 // enough, but there is only one, so it doesn't blow up the beam.
540 for (int c = 0; c < NC_COUNT; ++c) {
541 if (step->best_initial_dawgs_[c].code >= 0) {
542 int index = BeamIndex(true, static_cast<NodeContinuation>(c), 0);
543 RecodeHeap* dawg_heap = &step->beams_[index];
544 PushHeapIfBetter(kBeamWidths[0], &step->best_initial_dawgs_[c],
545 dawg_heap);
546 }
547 }
548 }
549}
550
551// Adds to the appropriate beams the legal (according to recoder)
552// continuations of context prev, which is of the given length, using the
553// given network outputs to provide scores to the choices. Uses only those
554// choices for which top_n_flags[index] == top_n_flag.
555void RecodeBeamSearch::ContinueContext(const RecodeNode* prev, int index,
556 const float* outputs,
557 TopNState top_n_flag,
558 const UNICHARSET* charset,
559 double dict_ratio,
560 double cert_offset,
561 double worst_dict_cert,
562 RecodeBeam* step) {
563 RecodedCharID prefix;
564 RecodedCharID full_code;
565 const RecodeNode* previous = prev;
566 int length = LengthFromBeamsIndex(index);
567 bool use_dawgs = IsDawgFromBeamsIndex(index);
569 for (int p = length - 1; p >= 0; --p, previous = previous->prev) {
570 while (previous != nullptr &&
571 (previous->duplicate || previous->code == null_char_)) {
572 previous = previous->prev;
573 }
574 if (previous != nullptr) {
575 prefix.Set(p, previous->code);
576 full_code.Set(p, previous->code);
577 }
578 }
579 if (prev != nullptr && !is_simple_text_) {
580 if (top_n_flags_[prev->code] == top_n_flag) {
581 if (prev_cont != NC_NO_DUP) {
582 float cert =
583 NetworkIO::ProbToCertainty(outputs[prev->code]) + cert_offset;
584 PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
585 cert, worst_dict_cert, dict_ratio, use_dawgs,
586 NC_ANYTHING, prev, step);
587 }
588 if (prev_cont == NC_ANYTHING && top_n_flag == TN_TOP2 &&
589 prev->code != null_char_) {
590 float cert = NetworkIO::ProbToCertainty(outputs[prev->code] +
591 outputs[null_char_]) +
592 cert_offset;
593 PushDupOrNoDawgIfBetter(length, true, prev->code, prev->unichar_id,
594 cert, worst_dict_cert, dict_ratio, use_dawgs,
595 NC_NO_DUP, prev, step);
596 }
597 }
598 if (prev_cont == NC_ONLY_DUP) return;
599 if (prev->code != null_char_ && length > 0 &&
600 top_n_flags_[null_char_] == top_n_flag) {
601 // Allow nulls within multi code sequences, as the nulls within are not
602 // explicitly included in the code sequence.
603 float cert =
604 NetworkIO::ProbToCertainty(outputs[null_char_]) + cert_offset;
605 PushDupOrNoDawgIfBetter(length, false, null_char_, INVALID_UNICHAR_ID,
606 cert, worst_dict_cert, dict_ratio, use_dawgs,
607 NC_ANYTHING, prev, step);
608 }
609 }
610 const GenericVector<int>* final_codes = recoder_.GetFinalCodes(prefix);
611 if (final_codes != nullptr) {
612 for (int i = 0; i < final_codes->size(); ++i) {
613 int code = (*final_codes)[i];
614 if (top_n_flags_[code] != top_n_flag) continue;
615 if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
616 float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
617 if (cert < kMinCertainty && code != null_char_) continue;
618 full_code.Set(length, code);
619 int unichar_id = recoder_.DecodeUnichar(full_code);
620 // Map the null char to INVALID.
621 if (length == 0 && code == null_char_) unichar_id = INVALID_UNICHAR_ID;
622 if (unichar_id != INVALID_UNICHAR_ID &&
623 charset != nullptr &&
624 !charset->get_enabled(unichar_id))
625 continue; // disabled by whitelist/blacklist
626 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
627 use_dawgs, NC_ANYTHING, prev, step);
628 if (top_n_flag == TN_TOP2 && code != null_char_) {
629 float prob = outputs[code] + outputs[null_char_];
630 if (prev != nullptr && prev_cont == NC_ANYTHING &&
631 prev->code != null_char_ &&
632 ((prev->code == top_code_ && code == second_code_) ||
633 (code == top_code_ && prev->code == second_code_))) {
634 prob += outputs[prev->code];
635 }
636 float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
637 ContinueUnichar(code, unichar_id, cert, worst_dict_cert, dict_ratio,
638 use_dawgs, NC_ONLY_DUP, prev, step);
639 }
640 }
641 }
642 const GenericVector<int>* next_codes = recoder_.GetNextCodes(prefix);
643 if (next_codes != nullptr) {
644 for (int i = 0; i < next_codes->size(); ++i) {
645 int code = (*next_codes)[i];
646 if (top_n_flags_[code] != top_n_flag) continue;
647 if (prev != nullptr && prev->code == code && !is_simple_text_) continue;
648 float cert = NetworkIO::ProbToCertainty(outputs[code]) + cert_offset;
649 PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID, cert,
650 worst_dict_cert, dict_ratio, use_dawgs,
651 NC_ANYTHING, prev, step);
652 if (top_n_flag == TN_TOP2 && code != null_char_) {
653 float prob = outputs[code] + outputs[null_char_];
654 if (prev != nullptr && prev_cont == NC_ANYTHING &&
655 prev->code != null_char_ &&
656 ((prev->code == top_code_ && code == second_code_) ||
657 (code == top_code_ && prev->code == second_code_))) {
658 prob += outputs[prev->code];
659 }
660 float cert = NetworkIO::ProbToCertainty(prob) + cert_offset;
661 PushDupOrNoDawgIfBetter(length + 1, false, code, INVALID_UNICHAR_ID,
662 cert, worst_dict_cert, dict_ratio, use_dawgs,
663 NC_ONLY_DUP, prev, step);
664 }
665 }
666 }
667}
668
669// Continues for a new unichar, using dawg or non-dawg as per flag.
670void RecodeBeamSearch::ContinueUnichar(int code, int unichar_id, float cert,
671 float worst_dict_cert, float dict_ratio,
672 bool use_dawgs, NodeContinuation cont,
673 const RecodeNode* prev,
674 RecodeBeam* step) {
675 if (use_dawgs) {
676 if (cert > worst_dict_cert) {
677 ContinueDawg(code, unichar_id, cert, cont, prev, step);
678 }
679 } else {
680 RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
681 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, TOP_CHOICE_PERM, false,
682 false, false, false, cert * dict_ratio, prev, nullptr,
683 nodawg_heap);
684 if (dict_ != nullptr &&
685 ((unichar_id == UNICHAR_SPACE && cert > worst_dict_cert) ||
686 !dict_->getUnicharset().IsSpaceDelimited(unichar_id))) {
687 // Any top choice position that can start a new word, ie a space or
688 // any non-space-delimited character, should also be considered
689 // by the dawg search, so push initial dawg to the dawg heap.
690 float dawg_cert = cert;
691 PermuterType permuter = TOP_CHOICE_PERM;
692 // Since we use the space either side of a dictionary word in the
693 // certainty of the word, (to properly handle weak spaces) and the
694 // space is coming from a non-dict word, we need special conditions
695 // to avoid degrading the certainty of the dict word that follows.
696 // With a space we don't multiply the certainty by dict_ratio, and we
697 // flag the space with NO_PERM to indicate that we should not use the
698 // predecessor nulls to generate the confidence for the space, as they
699 // have already been multiplied by dict_ratio, and we can't go back to
700 // insert more entries in any previous heaps.
701 if (unichar_id == UNICHAR_SPACE)
702 permuter = NO_PERM;
703 else
704 dawg_cert *= dict_ratio;
705 PushInitialDawgIfBetter(code, unichar_id, permuter, false, false,
706 dawg_cert, cont, prev, step);
707 }
708 }
709}
710
711// Adds a RecodeNode composed of the tuple (code, unichar_id, cert, prev,
712// appropriate-dawg-args, cert) to the given heap (dawg_beam_) if unichar_id
713// is a valid continuation of whatever is in prev.
714void RecodeBeamSearch::ContinueDawg(int code, int unichar_id, float cert,
715 NodeContinuation cont,
716 const RecodeNode* prev, RecodeBeam* step) {
717 RecodeHeap* dawg_heap = &step->beams_[BeamIndex(true, cont, 0)];
718 RecodeHeap* nodawg_heap = &step->beams_[BeamIndex(false, cont, 0)];
719 if (unichar_id == INVALID_UNICHAR_ID) {
720 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, NO_PERM, false, false,
721 false, false, cert, prev, nullptr, dawg_heap);
722 return;
723 }
724 // Avoid dictionary probe if score a total loss.
725 float score = cert;
726 if (prev != nullptr) score += prev->score;
727 if (dawg_heap->size() >= kBeamWidths[0] &&
728 score <= dawg_heap->PeekTop().data.score &&
729 nodawg_heap->size() >= kBeamWidths[0] &&
730 score <= nodawg_heap->PeekTop().data.score) {
731 return;
732 }
733 const RecodeNode* uni_prev = prev;
734 // Prev may be a partial code, null_char, or duplicate, so scan back to the
735 // last valid unichar_id.
736 while (uni_prev != nullptr &&
737 (uni_prev->unichar_id == INVALID_UNICHAR_ID || uni_prev->duplicate))
738 uni_prev = uni_prev->prev;
739 if (unichar_id == UNICHAR_SPACE) {
740 if (uni_prev != nullptr && uni_prev->end_of_word) {
741 // Space is good. Push initial state, to the dawg beam and a regular
742 // space to the top choice beam.
743 PushInitialDawgIfBetter(code, unichar_id, uni_prev->permuter, false,
744 false, cert, cont, prev, step);
745 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, uni_prev->permuter,
746 false, false, false, false, cert, prev, nullptr,
747 nodawg_heap);
748 }
749 return;
750 } else if (uni_prev != nullptr && uni_prev->start_of_dawg &&
751 uni_prev->unichar_id != UNICHAR_SPACE &&
752 dict_->getUnicharset().IsSpaceDelimited(uni_prev->unichar_id) &&
753 dict_->getUnicharset().IsSpaceDelimited(unichar_id)) {
754 return; // Can't break words between space delimited chars.
755 }
756 DawgPositionVector initial_dawgs;
757 auto* updated_dawgs = new DawgPositionVector;
758 DawgArgs dawg_args(&initial_dawgs, updated_dawgs, NO_PERM);
759 bool word_start = false;
760 if (uni_prev == nullptr) {
761 // Starting from beginning of line.
762 dict_->default_dawgs(&initial_dawgs, false);
763 word_start = true;
764 } else if (uni_prev->dawgs != nullptr) {
765 // Continuing a previous dict word.
766 dawg_args.active_dawgs = uni_prev->dawgs;
767 word_start = uni_prev->start_of_dawg;
768 } else {
769 return; // Can't continue if not a dict word.
770 }
771 auto permuter = static_cast<PermuterType>(
772 dict_->def_letter_is_okay(&dawg_args,
773 dict_->getUnicharset(), unichar_id, false));
774 if (permuter != NO_PERM) {
775 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
776 word_start, dawg_args.valid_end, false, cert, prev,
777 dawg_args.updated_dawgs, dawg_heap);
778 if (dawg_args.valid_end && !space_delimited_) {
779 // We can start another word right away, so push initial state as well,
780 // to the dawg beam, and the regular character to the top choice beam,
781 // since non-dict words can start here too.
782 PushInitialDawgIfBetter(code, unichar_id, permuter, word_start, true,
783 cert, cont, prev, step);
784 PushHeapIfBetter(kBeamWidths[0], code, unichar_id, permuter, false,
785 word_start, true, false, cert, prev, nullptr, nodawg_heap);
786 }
787 } else {
788 delete updated_dawgs;
789 }
790}
791
792// Adds a RecodeNode composed of the tuple (code, unichar_id,
793// initial-dawg-state, prev, cert) to the given heap if/ there is room or if
794// better than the current worst element if already full.
795void RecodeBeamSearch::PushInitialDawgIfBetter(int code, int unichar_id,
796 PermuterType permuter,
797 bool start, bool end, float cert,
798 NodeContinuation cont,
799 const RecodeNode* prev,
800 RecodeBeam* step) {
801 RecodeNode* best_initial_dawg = &step->best_initial_dawgs_[cont];
802 float score = cert;
803 if (prev != nullptr) score += prev->score;
804 if (best_initial_dawg->code < 0 || score > best_initial_dawg->score) {
805 auto* initial_dawgs = new DawgPositionVector;
806 dict_->default_dawgs(initial_dawgs, false);
807 RecodeNode node(code, unichar_id, permuter, true, start, end, false, cert,
808 score, prev, initial_dawgs,
809 ComputeCodeHash(code, false, prev));
810 *best_initial_dawg = node;
811 }
812}
813
814// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
815// false, false, false, false, cert, prev, nullptr) to heap if there is room
816// or if better than the current worst element if already full.
817/* static */
818void RecodeBeamSearch::PushDupOrNoDawgIfBetter(
819 int length, bool dup, int code, int unichar_id, float cert,
820 float worst_dict_cert, float dict_ratio, bool use_dawgs,
821 NodeContinuation cont, const RecodeNode* prev, RecodeBeam* step) {
822 int index = BeamIndex(use_dawgs, cont, length);
823 if (use_dawgs) {
824 if (cert > worst_dict_cert) {
825 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
826 prev ? prev->permuter : NO_PERM, false, false, false,
827 dup, cert, prev, nullptr, &step->beams_[index]);
828 }
829 } else {
830 cert *= dict_ratio;
831 if (cert >= kMinCertainty || code == null_char_) {
832 PushHeapIfBetter(kBeamWidths[length], code, unichar_id,
833 prev ? prev->permuter : TOP_CHOICE_PERM, false, false,
834 false, dup, cert, prev, nullptr, &step->beams_[index]);
835 }
836 }
837}
838
839// Adds a RecodeNode composed of the tuple (code, unichar_id, permuter,
840// dawg_start, word_start, end, dup, cert, prev, d) to heap if there is room
841// or if better than the current worst element if already full.
842void RecodeBeamSearch::PushHeapIfBetter(int max_size, int code, int unichar_id,
843 PermuterType permuter, bool dawg_start,
844 bool word_start, bool end, bool dup,
845 float cert, const RecodeNode* prev,
846 DawgPositionVector* d,
847 RecodeHeap* heap) {
848 float score = cert;
849 if (prev != nullptr) score += prev->score;
850 if (heap->size() < max_size || score > heap->PeekTop().data.score) {
851 uint64_t hash = ComputeCodeHash(code, dup, prev);
852 RecodeNode node(code, unichar_id, permuter, dawg_start, word_start, end,
853 dup, cert, score, prev, d, hash);
854 if (UpdateHeapIfMatched(&node, heap)) return;
855 RecodePair entry(score, node);
856 heap->Push(&entry);
857 ASSERT_HOST(entry.data.dawgs == nullptr);
858 if (heap->size() > max_size) heap->Pop(&entry);
859 } else {
860 delete d;
861 }
862}
863
864// Adds a RecodeNode to heap if there is room
865// or if better than the current worst element if already full.
866void RecodeBeamSearch::PushHeapIfBetter(int max_size, RecodeNode* node,
867 RecodeHeap* heap) {
868 if (heap->size() < max_size || node->score > heap->PeekTop().data.score) {
869 if (UpdateHeapIfMatched(node, heap)) {
870 return;
871 }
872 RecodePair entry(node->score, *node);
873 heap->Push(&entry);
874 ASSERT_HOST(entry.data.dawgs == nullptr);
875 if (heap->size() > max_size) heap->Pop(&entry);
876 }
877}
878
879// Searches the heap for a matching entry, and updates the score with
880// reshuffle if needed. Returns true if there was a match.
881bool RecodeBeamSearch::UpdateHeapIfMatched(RecodeNode* new_node,
882 RecodeHeap* heap) {
883 // TODO(rays) consider hash map instead of linear search.
884 // It might not be faster because the hash map would have to be updated
885 // every time a heap reshuffle happens, and that would be a lot of overhead.
886 GenericVector<RecodePair>* nodes = heap->heap();
887 for (int i = 0; i < nodes->size(); ++i) {
888 RecodeNode& node = (*nodes)[i].data;
889 if (node.code == new_node->code && node.code_hash == new_node->code_hash &&
890 node.permuter == new_node->permuter &&
891 node.start_of_dawg == new_node->start_of_dawg) {
892 if (new_node->score > node.score) {
893 // The new one is better. Update the entire node in the heap and
894 // reshuffle.
895 node = *new_node;
896 (*nodes)[i].key = node.score;
897 heap->Reshuffle(&(*nodes)[i]);
898 }
899 return true;
900 }
901 }
902 return false;
903}
904
905// Computes and returns the code-hash for the given code and prev.
906uint64_t RecodeBeamSearch::ComputeCodeHash(int code, bool dup,
907 const RecodeNode* prev) const {
908 uint64_t hash = prev == nullptr ? 0 : prev->code_hash;
909 if (!dup && code != null_char_) {
910 int num_classes = recoder_.code_range();
911 uint64_t carry = (((hash >> 32) * num_classes) >> 32);
912 hash *= num_classes;
913 hash += carry;
914 hash += code;
915 }
916 return hash;
917}
918
919// Backtracks to extract the best path through the lattice that was built
920// during Decode. On return the best_nodes vector essentially contains the set
921// of code, score pairs that make the optimal path with the constraint that
922// the recoder can decode the code sequence back to a sequence of unichar-ids.
923void RecodeBeamSearch::ExtractBestPaths(
925 GenericVector<const RecodeNode*>* second_nodes) const {
926 // Scan both beams to extract the best and second best paths.
927 const RecodeNode* best_node = nullptr;
928 const RecodeNode* second_best_node = nullptr;
929 const RecodeBeam* last_beam = beam_[beam_size_ - 1];
930 for (int c = 0; c < NC_COUNT; ++c) {
931 if (c == NC_ONLY_DUP) continue;
932 auto cont = static_cast<NodeContinuation>(c);
933 for (int is_dawg = 0; is_dawg < 2; ++is_dawg) {
934 int beam_index = BeamIndex(is_dawg, cont, 0);
935 int heap_size = last_beam->beams_[beam_index].size();
936 for (int h = 0; h < heap_size; ++h) {
937 const RecodeNode* node = &last_beam->beams_[beam_index].get(h).data;
938 if (is_dawg) {
939 // dawg_node may be a null_char, or duplicate, so scan back to the
940 // last valid unichar_id.
941 const RecodeNode* dawg_node = node;
942 while (dawg_node != nullptr &&
943 (dawg_node->unichar_id == INVALID_UNICHAR_ID ||
944 dawg_node->duplicate))
945 dawg_node = dawg_node->prev;
946 if (dawg_node == nullptr || (!dawg_node->end_of_word &&
947 dawg_node->unichar_id != UNICHAR_SPACE)) {
948 // Dawg node is not valid.
949 continue;
950 }
951 }
952 if (best_node == nullptr || node->score > best_node->score) {
953 second_best_node = best_node;
954 best_node = node;
955 } else if (second_best_node == nullptr ||
956 node->score > second_best_node->score) {
957 second_best_node = node;
958 }
959 }
960 }
961 }
962 if (second_nodes != nullptr) ExtractPath(second_best_node, second_nodes);
963 ExtractPath(best_node, best_nodes);
964}
965
966// Helper backtracks through the lattice from the given node, storing the
967// path and reversing it.
968void RecodeBeamSearch::ExtractPath(
969 const RecodeNode* node, GenericVector<const RecodeNode*>* path) const {
970 path->truncate(0);
971 while (node != nullptr) {
972 path->push_back(node);
973 node = node->prev;
974 }
975 path->reverse();
976}
977
978// Helper prints debug information on the given lattice path.
979void RecodeBeamSearch::DebugPath(
980 const UNICHARSET* unicharset,
981 const GenericVector<const RecodeNode*>& path) const {
982 for (int c = 0; c < path.size(); ++c) {
983 const RecodeNode& node = *path[c];
984 tprintf("%d ", c);
985 node.Print(null_char_, *unicharset, 1);
986 }
987}
988
989// Helper prints debug information on the given unichar path.
990void RecodeBeamSearch::DebugUnicharPath(
991 const UNICHARSET* unicharset, const GenericVector<const RecodeNode*>& path,
992 const GenericVector<int>& unichar_ids, const GenericVector<float>& certs,
993 const GenericVector<float>& ratings,
994 const GenericVector<int>& xcoords) const {
995 int num_ids = unichar_ids.size();
996 double total_rating = 0.0;
997 for (int c = 0; c < num_ids; ++c) {
998 int coord = xcoords[c];
999 tprintf("%d %d=%s r=%g, c=%g, s=%d, e=%d, perm=%d\n", coord, unichar_ids[c],
1000 unicharset->debug_str(unichar_ids[c]).string(), ratings[c],
1001 certs[c], path[coord]->start_of_word, path[coord]->end_of_word,
1002 path[coord]->permuter);
1003 total_rating += ratings[c];
1004 }
1005 tprintf("Path total rating = %g\n", total_rating);
1006}
1007
1008} // namespace tesseract.
PermuterType
Definition: ratngs.h:232
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
@ NO_PERM
Definition: ratngs.h:233
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:44
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ UNICHAR_SPACE
Definition: unicharset.h:34
@ character
Definition: mfoutline.h:63
KDPairInc< double, RecodeNode > RecodePair
Definition: recodebeam.h:175
@ TN_ALSO_RAN
Definition: recodebeam.h:87
GenericHeap< RecodePair > RecodeHeap
Definition: recodebeam.h:176
NodeContinuation
Definition: recodebeam.h:72
@ NC_ANYTHING
Definition: recodebeam.h:73
@ NC_ONLY_DUP
Definition: recodebeam.h:74
void init_to_size(int size, const T &t)
int push_back(T object)
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
T & back() const
void truncate(int size)
int dim2() const
Definition: matrix.h:210
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
int dim1() const
Definition: matrix.h:209
Definition: matrix.h:578
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:221
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
MATRIX * ratings
Definition: pageres.h:237
integer coordinate
Definition: points.h:32
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t height() const
Definition: rect.h:108
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:241
Definition: werd.h:56
const Pair & get(int index) const
Definition: genericheap.h:87
const char * string() const
Definition: strngs.cpp:194
static const int kMaxCodeLen
const GenericVector< int > * GetFinalCodes(const RecodedCharID &code) const
const GenericVector< int > * GetNextCodes(const RecodedCharID &code) const
int DecodeUnichar(const RecodedCharID &code) const
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:299
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:878
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:343
int size() const
Definition: unicharset.h:341
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:883
void default_dawgs(DawgPositionVector *anylength_dawgs, bool suppress_patterns) const
Definition: dict.cpp:617
int def_letter_is_okay(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: dict.cpp:395
const UNICHARSET & getUnicharset() const
Definition: dict.h:101
static float ProbToCertainty(float prob)
Definition: networkio.cpp:568
float * f(int t)
Definition: networkio.h:115
int Width() const
Definition: networkio.h:107
int NumFeatures() const
Definition: networkio.h:111
const RecodeNode * prev
Definition: recodebeam.h:167
void Print(int null_char, const UNICHARSET &unicharset, int depth) const
Definition: recodebeam.cpp:42
PermuterType permuter
Definition: recodebeam.h:147
void ExtractBestPathAsUnicharIds(bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:156
void Decode(const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
Definition: recodebeam.cpp:76
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216
static bool IsDawgFromBeamsIndex(int index)
Definition: recodebeam.h:233
void ExtractBestPathAsLabels(GenericVector< int > *labels, GenericVector< int > *xcoords) const
Definition: recodebeam.cpp:133
static int LengthFromBeamsIndex(int index)
Definition: recodebeam.h:229
static NodeContinuation ContinuationFromBeamsIndex(int index)
Definition: recodebeam.h:230
void DebugBeams(const UNICHARSET &unicharset) const
Definition: recodebeam.cpp:303
RecodeBeamSearch(const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
Definition: recodebeam.cpp:62
static const int kNumBeams
Definition: recodebeam.h:227
static constexpr float kMinCertainty
Definition: recodebeam.h:222
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237
void ExtractBestPathAsWords(const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
Definition: recodebeam.cpp:171