tesseract 4.1.1
Loading...
Searching...
No Matches
tesseract::RecodeBeamSearch Class Reference

#include <recodebeam.h>

Public Member Functions

 RecodeBeamSearch (const UnicharCompress &recoder, int null_char, bool simple_text, Dict *dict)
 
void Decode (const NetworkIO &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset, int lstm_choice_mode=0)
 
void Decode (const GENERIC_2D_ARRAY< float > &output, double dict_ratio, double cert_offset, double worst_dict_cert, const UNICHARSET *charset)
 
void ExtractBestPathAsLabels (GenericVector< int > *labels, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsUnicharIds (bool debug, const UNICHARSET *unicharset, GenericVector< int > *unichar_ids, GenericVector< float > *certs, GenericVector< float > *ratings, GenericVector< int > *xcoords) const
 
void ExtractBestPathAsWords (const TBOX &line_box, float scale_factor, bool debug, const UNICHARSET *unicharset, PointerVector< WERD_RES > *words, int lstm_choice_mode=0)
 
void DebugBeams (const UNICHARSET &unicharset) const
 

Static Public Member Functions

static int LengthFromBeamsIndex (int index)
 
static NodeContinuation ContinuationFromBeamsIndex (int index)
 
static bool IsDawgFromBeamsIndex (int index)
 
static int BeamIndex (bool is_dawg, NodeContinuation cont, int length)
 

Public Attributes

std::vector< std::vector< std::pair< const char *, float > > > timesteps
 

Static Public Attributes

static constexpr float kMinCertainty = -20.0f
 
static const int kNumLengths = RecodedCharID::kMaxCodeLen + 1
 
static const int kNumBeams = 2 * NC_COUNT * kNumLengths
 

Detailed Description

Definition at line 179 of file recodebeam.h.

Constructor & Destructor Documentation

◆ RecodeBeamSearch()

tesseract::RecodeBeamSearch::RecodeBeamSearch ( const UnicharCompress recoder,
int  null_char,
bool  simple_text,
Dict dict 
)

Definition at line 62 of file recodebeam.cpp.

64 : recoder_(recoder),
65 beam_size_(0),
66 top_code_(-1),
67 second_code_(-1),
68 dict_(dict),
69 space_delimited_(true),
70 is_simple_text_(simple_text),
71 null_char_(null_char) {
72 if (dict_ != nullptr && !dict_->IsSpaceDelimitedLang()) space_delimited_ = false;
73}
bool IsSpaceDelimitedLang() const
Returns true if the language is space-delimited (not CJ, or T).
Definition: dict.cpp:883

Member Function Documentation

◆ BeamIndex()

static int tesseract::RecodeBeamSearch::BeamIndex ( bool  is_dawg,
NodeContinuation  cont,
int  length 
)
inlinestatic

Definition at line 237 of file recodebeam.h.

237 {
238 return (is_dawg * NC_COUNT + cont) * kNumLengths + length;
239 }
static const int kNumLengths
Definition: recodebeam.h:224

◆ ContinuationFromBeamsIndex()

static NodeContinuation tesseract::RecodeBeamSearch::ContinuationFromBeamsIndex ( int  index)
inlinestatic

Definition at line 230 of file recodebeam.h.

230 {
231 return static_cast<NodeContinuation>((index / kNumLengths) % NC_COUNT);
232 }
NodeContinuation
Definition: recodebeam.h:72

◆ DebugBeams()

void tesseract::RecodeBeamSearch::DebugBeams ( const UNICHARSET unicharset) const

Definition at line 303 of file recodebeam.cpp.

303 {
304 for (int p = 0; p < beam_size_; ++p) {
305 for (int d = 0; d < 2; ++d) {
306 for (int c = 0; c < NC_COUNT; ++c) {
307 auto cont = static_cast<NodeContinuation>(c);
308 int index = BeamIndex(d, cont, 0);
309 if (beam_[p]->beams_[index].empty()) continue;
310 // Print all the best scoring nodes for each unichar found.
311 tprintf("Position %d: %s+%s beam\n", p, d ? "Dict" : "Non-Dict",
312 kNodeContNames[c]);
313 DebugBeamPos(unicharset, beam_[p]->beams_[index]);
314 }
315 }
316 }
317}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
static int BeamIndex(bool is_dawg, NodeContinuation cont, int length)
Definition: recodebeam.h:237

◆ Decode() [1/2]

void tesseract::RecodeBeamSearch::Decode ( const GENERIC_2D_ARRAY< float > &  output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset 
)

Definition at line 92 of file recodebeam.cpp.

95 {
96 beam_size_ = 0;
97 int width = output.dim1();
98 for (int t = 0; t < width; ++t) {
99 ComputeTopN(output[t], output.dim2(), kBeamWidths[0]);
100 DecodeStep(output[t], t, dict_ratio, cert_offset, worst_dict_cert, charset);
101 }
102}
int dim2() const
Definition: matrix.h:210
int dim1() const
Definition: matrix.h:209

◆ Decode() [2/2]

void tesseract::RecodeBeamSearch::Decode ( const NetworkIO output,
double  dict_ratio,
double  cert_offset,
double  worst_dict_cert,
const UNICHARSET charset,
int  lstm_choice_mode = 0 
)

Definition at line 76 of file recodebeam.cpp.

78 {
79 beam_size_ = 0;
80 int width = output.Width();
81 if (lstm_choice_mode)
82 timesteps.clear();
83 for (int t = 0; t < width; ++t) {
84 ComputeTopN(output.f(t), output.NumFeatures(), kBeamWidths[0]);
85 DecodeStep(output.f(t), t, dict_ratio, cert_offset, worst_dict_cert,
86 charset);
87 if (lstm_choice_mode) {
88 SaveMostCertainChoices(output.f(t), output.NumFeatures(), charset, t);
89 }
90 }
91}
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: recodebeam.h:216

◆ ExtractBestPathAsLabels()

void tesseract::RecodeBeamSearch::ExtractBestPathAsLabels ( GenericVector< int > *  labels,
GenericVector< int > *  xcoords 
) const

Definition at line 133 of file recodebeam.cpp.

134 {
135 labels->truncate(0);
136 xcoords->truncate(0);
138 ExtractBestPaths(&best_nodes, nullptr);
139 // Now just run CTC on the best nodes.
140 int t = 0;
141 int width = best_nodes.size();
142 while (t < width) {
143 int label = best_nodes[t]->code;
144 if (label != null_char_) {
145 labels->push_back(label);
146 xcoords->push_back(t);
147 }
148 while (++t < width && !is_simple_text_ && best_nodes[t]->code == label) {
149 }
150 }
151 xcoords->push_back(width);
152}
int push_back(T object)
int size() const
Definition: genericvector.h:72
void truncate(int size)

◆ ExtractBestPathAsUnicharIds()

void tesseract::RecodeBeamSearch::ExtractBestPathAsUnicharIds ( bool  debug,
const UNICHARSET unicharset,
GenericVector< int > *  unichar_ids,
GenericVector< float > *  certs,
GenericVector< float > *  ratings,
GenericVector< int > *  xcoords 
) const

Definition at line 156 of file recodebeam.cpp.

159 {
161 ExtractBestPaths(&best_nodes, nullptr);
162 ExtractPathAsUnicharIds(best_nodes, unichar_ids, certs, ratings, xcoords);
163 if (debug) {
164 DebugPath(unicharset, best_nodes);
165 DebugUnicharPath(unicharset, best_nodes, *unichar_ids, *certs, *ratings,
166 *xcoords);
167 }
168}

◆ ExtractBestPathAsWords()

void tesseract::RecodeBeamSearch::ExtractBestPathAsWords ( const TBOX line_box,
float  scale_factor,
bool  debug,
const UNICHARSET unicharset,
PointerVector< WERD_RES > *  words,
int  lstm_choice_mode = 0 
)

Definition at line 171 of file recodebeam.cpp.

175 {
176 words->truncate(0);
177 GenericVector<int> unichar_ids;
179 GenericVector<float> ratings;
180 GenericVector<int> xcoords;
183 std::deque<std::tuple<int, int>> best_choices;
184 ExtractBestPaths(&best_nodes, &second_nodes);
185 if (debug) {
186 DebugPath(unicharset, best_nodes);
187 ExtractPathAsUnicharIds(second_nodes, &unichar_ids, &certs, &ratings,
188 &xcoords);
189 tprintf("\nSecond choice path:\n");
190 DebugUnicharPath(unicharset, second_nodes, unichar_ids, certs, ratings,
191 xcoords);
192 }
193 int timestepEnd= 0;
194 //if lstm choice mode is required in granularity level 2 it stores the x
195 //Coordinates of every chosen character to match the alternative choices to it
196 if (lstm_choice_mode == 2) {
197 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
198 &xcoords, &best_choices);
199 if (best_choices.size() > 0) {
200 timestepEnd = std::get<1>(best_choices.front());
201 best_choices.pop_front();
202 }
203 } else {
204 ExtractPathAsUnicharIds(best_nodes, &unichar_ids, &certs, &ratings,
205 &xcoords);
206 }
207 int num_ids = unichar_ids.size();
208 if (debug) {
209 DebugUnicharPath(unicharset, best_nodes, unichar_ids, certs, ratings,
210 xcoords);
211 }
212 // Convert labels to unichar-ids.
213 int word_end = 0;
214 float prev_space_cert = 0.0f;
215 for (int word_start = 0; word_start < num_ids; word_start = word_end) {
216 for (word_end = word_start + 1; word_end < num_ids; ++word_end) {
217 // A word is terminated when a space character or start_of_word flag is
218 // hit. We also want to force a separate word for every non
219 // space-delimited character when not in a dictionary context.
220 if (unichar_ids[word_end] == UNICHAR_SPACE) break;
221 int index = xcoords[word_end];
222 if (best_nodes[index]->start_of_word) break;
223 if (best_nodes[index]->permuter == TOP_CHOICE_PERM &&
224 (!unicharset->IsSpaceDelimited(unichar_ids[word_end]) ||
225 !unicharset->IsSpaceDelimited(unichar_ids[word_end - 1])))
226 break;
227 }
228 float space_cert = 0.0f;
229 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
230 space_cert = certs[word_end];
231 bool leading_space =
232 word_start > 0 && unichar_ids[word_start - 1] == UNICHAR_SPACE;
233 // Create a WERD_RES for the output word.
234 WERD_RES* word_res = InitializeWord(
235 leading_space, line_box, word_start, word_end,
236 std::min(space_cert, prev_space_cert), unicharset, xcoords, scale_factor);
237 if (lstm_choice_mode == 1) {
238 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
239 word_res->timesteps.push_back(timesteps[i]);
240 }
241 timestepEnd = xcoords[word_end];
242 } else if (lstm_choice_mode == 2){
243 // Accumulated Timesteps (choice mode 2 processing)
244 float sum = 0;
245 std::vector<std::pair<const char*, float>> choice_pairs;
246 for (size_t i = timestepEnd; i < xcoords[word_end]; i++) {
247 for (std::pair<const char*, float> choice : timesteps[i]) {
248 if (std::strcmp(choice.first, "")) {
249 sum += choice.second;
250 choice_pairs.push_back(choice);
251 }
252 }
253 if ((best_choices.size() > 0 && i == std::get<1>(best_choices.front()) - 1)
254 || i == xcoords[word_end]-1) {
255 std::map<const char*, float> summed_propabilities;
256 for (auto & choice_pair : choice_pairs) {
257 summed_propabilities[choice_pair.first] += choice_pair.second;
258 }
259 std::vector<std::pair<const char*, float>> accumulated_timestep;
260 for (auto& summed_propability : summed_propabilities) {
261 if(sum == 0) break;
262 summed_propability.second/=sum;
263 size_t pos = 0;
264 while (accumulated_timestep.size() > pos
265 && accumulated_timestep[pos].second > summed_propability.second) {
266 pos++;
267 }
268 accumulated_timestep.insert(accumulated_timestep.begin() + pos,
269 std::pair<const char*,float>(summed_propability.first,
270 summed_propability.second));
271 }
272 if (best_choices.size() > 0) {
273 best_choices.pop_front();
274 }
275 choice_pairs.clear();
276 word_res->timesteps.push_back(accumulated_timestep);
277 sum = 0;
278 }
279 }
280 timestepEnd = xcoords[word_end];
281 }
282 for (int i = word_start; i < word_end; ++i) {
283 auto* choices = new BLOB_CHOICE_LIST;
284 BLOB_CHOICE_IT bc_it(choices);
285 auto* choice = new BLOB_CHOICE(
286 unichar_ids[i], ratings[i], certs[i], -1, 1.0f,
287 static_cast<float>(INT16_MAX), 0.0f, BCC_STATIC_CLASSIFIER);
288 int col = i - word_start;
289 choice->set_matrix_cell(col, col);
290 bc_it.add_after_then_move(choice);
291 word_res->ratings->put(col, col, choices);
292 }
293 int index = xcoords[word_end - 1];
294 word_res->FakeWordFromRatings(best_nodes[index]->permuter);
295 words->push_back(word_res);
296 prev_space_cert = space_cert;
297 if (word_end < num_ids && unichar_ids[word_end] == UNICHAR_SPACE)
298 ++word_end;
299 }
300}
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:44
@ UNICHAR_SPACE
Definition: unicharset.h:34
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
std::vector< std::vector< std::pair< const char *, float > > > timesteps
Definition: pageres.h:221
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:898
MATRIX * ratings
Definition: pageres.h:237
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652

◆ IsDawgFromBeamsIndex()

static bool tesseract::RecodeBeamSearch::IsDawgFromBeamsIndex ( int  index)
inlinestatic

Definition at line 233 of file recodebeam.h.

233 {
234 return index / (kNumLengths * NC_COUNT) > 0;
235 }

◆ LengthFromBeamsIndex()

static int tesseract::RecodeBeamSearch::LengthFromBeamsIndex ( int  index)
inlinestatic

Definition at line 229 of file recodebeam.h.

229{ return index % kNumLengths; }

Member Data Documentation

◆ kMinCertainty

constexpr float tesseract::RecodeBeamSearch::kMinCertainty = -20.0f
staticconstexpr

Definition at line 222 of file recodebeam.h.

◆ kNumBeams

const int tesseract::RecodeBeamSearch::kNumBeams = 2 * NC_COUNT * kNumLengths
static

Definition at line 227 of file recodebeam.h.

◆ kNumLengths

const int tesseract::RecodeBeamSearch::kNumLengths = RecodedCharID::kMaxCodeLen + 1
static

Definition at line 224 of file recodebeam.h.

◆ timesteps

std::vector< std::vector<std::pair<const char*, float> > > tesseract::RecodeBeamSearch::timesteps

Definition at line 216 of file recodebeam.h.


The documentation for this class was generated from the following files: