tesseract 4.1.1
Loading...
Searching...
No Matches
ratngs.h File Reference
#include <cassert>
#include <cfloat>
#include "clst.h"
#include "elst.h"
#include "fontinfo.h"
#include "genericvector.h"
#include "matrix.h"
#include "unichar.h"
#include "unicharset.h"
#include "werd.h"

Go to the source code of this file.

Classes

class  BLOB_CHOICE
 
class  WERD_CHOICE
 

Namespaces

namespace  tesseract
 

Typedefs

using BLOB_CHOICE_LIST_VECTOR = GenericVector< BLOB_CHOICE_LIST * >
 

Enumerations

enum  BlobChoiceClassifier {
  BCC_STATIC_CLASSIFIER , BCC_ADAPTED_CLASSIFIER , BCC_SPECKLE_CLASSIFIER , BCC_AMBIG ,
  BCC_FAKE
}
 
enum  PermuterType {
  NO_PERM , PUNC_PERM , TOP_CHOICE_PERM , LOWER_CASE_PERM ,
  UPPER_CASE_PERM , NGRAM_PERM , NUMBER_PERM , USER_PATTERN_PERM ,
  SYSTEM_DAWG_PERM , DOC_DAWG_PERM , USER_DAWG_PERM , FREQ_DAWG_PERM ,
  COMPOUND_PERM , NUM_PERMUTER_TYPES
}
 
enum  tesseract::ScriptPos { tesseract::SP_NORMAL , tesseract::SP_SUBSCRIPT , tesseract::SP_SUPERSCRIPT , tesseract::SP_DROPCAP }
 

Functions

BLOB_CHOICEFindMatchingChoice (UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
 
const char * tesseract::ScriptPosToString (enum ScriptPos script_pos)
 
bool EqualIgnoringCaseAndTerminalPunct (const WERD_CHOICE &word1, const WERD_CHOICE &word2)
 
void print_ratings_list (const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
 

Typedef Documentation

◆ BLOB_CHOICE_LIST_VECTOR

using BLOB_CHOICE_LIST_VECTOR = GenericVector<BLOB_CHOICE_LIST *>

Definition at line 634 of file ratngs.h.

Enumeration Type Documentation

◆ BlobChoiceClassifier

Enumerator
BCC_STATIC_CLASSIFIER 
BCC_ADAPTED_CLASSIFIER 
BCC_SPECKLE_CLASSIFIER 
BCC_AMBIG 
BCC_FAKE 

Definition at line 43 of file ratngs.h.

43 {
44 BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
45 BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
46 BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
47 BCC_AMBIG, // Generated by ambiguity detection.
48 BCC_FAKE, // From some other process.
49};
@ BCC_ADAPTED_CLASSIFIER
Definition: ratngs.h:45
@ BCC_FAKE
Definition: ratngs.h:48
@ BCC_SPECKLE_CLASSIFIER
Definition: ratngs.h:46
@ BCC_AMBIG
Definition: ratngs.h:47
@ BCC_STATIC_CLASSIFIER
Definition: ratngs.h:44

◆ PermuterType

Enumerator
NO_PERM 
PUNC_PERM 
TOP_CHOICE_PERM 
LOWER_CASE_PERM 
UPPER_CASE_PERM 
NGRAM_PERM 
NUMBER_PERM 
USER_PATTERN_PERM 
SYSTEM_DAWG_PERM 
DOC_DAWG_PERM 
USER_DAWG_PERM 
FREQ_DAWG_PERM 
COMPOUND_PERM 
NUM_PERMUTER_TYPES 

Definition at line 232 of file ratngs.h.

232 {
233 NO_PERM, // 0
234 PUNC_PERM, // 1
235 TOP_CHOICE_PERM, // 2
236 LOWER_CASE_PERM, // 3
237 UPPER_CASE_PERM, // 4
238 NGRAM_PERM, // 5
239 NUMBER_PERM, // 6
241 SYSTEM_DAWG_PERM, // 8
242 DOC_DAWG_PERM, // 9
243 USER_DAWG_PERM, // 10
244 FREQ_DAWG_PERM, // 11
245 COMPOUND_PERM, // 12
246
248};
@ TOP_CHOICE_PERM
Definition: ratngs.h:235
@ USER_PATTERN_PERM
Definition: ratngs.h:240
@ DOC_DAWG_PERM
Definition: ratngs.h:242
@ FREQ_DAWG_PERM
Definition: ratngs.h:244
@ USER_DAWG_PERM
Definition: ratngs.h:243
@ SYSTEM_DAWG_PERM
Definition: ratngs.h:241
@ NGRAM_PERM
Definition: ratngs.h:238
@ NUMBER_PERM
Definition: ratngs.h:239
@ PUNC_PERM
Definition: ratngs.h:234
@ LOWER_CASE_PERM
Definition: ratngs.h:236
@ UPPER_CASE_PERM
Definition: ratngs.h:237
@ NUM_PERMUTER_TYPES
Definition: ratngs.h:247
@ COMPOUND_PERM
Definition: ratngs.h:245
@ NO_PERM
Definition: ratngs.h:233

Function Documentation

◆ EqualIgnoringCaseAndTerminalPunct()

bool EqualIgnoringCaseAndTerminalPunct ( const WERD_CHOICE word1,
const WERD_CHOICE word2 
)

Definition at line 809 of file ratngs.cpp.

810 {
811 const UNICHARSET *uchset = word1.unicharset();
812 if (word2.unicharset() != uchset) return false;
813 int w1start, w1end;
814 word1.punct_stripped(&w1start, &w1end);
815 int w2start, w2end;
816 word2.punct_stripped(&w2start, &w2end);
817 if (w1end - w1start != w2end - w2start) return false;
818 for (int i = 0; i < w1end - w1start; i++) {
819 if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
820 uchset->to_lower(word2.unichar_id(w2start + i))) {
821 return false;
822 }
823 }
824 return true;
825}
void punct_stripped(int *start_core, int *end_core) const
Definition: ratngs.cpp:387
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
const UNICHARSET * unicharset() const
Definition: ratngs.h:290
UNICHAR_ID to_lower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:704

◆ FindMatchingChoice()

BLOB_CHOICE * FindMatchingChoice ( UNICHAR_ID  char_id,
BLOB_CHOICE_LIST *  bc_list 
)

Definition at line 184 of file ratngs.cpp.

185 {
186 // Find the corresponding best BLOB_CHOICE.
187 BLOB_CHOICE_IT choice_it(bc_list);
188 for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
189 choice_it.forward()) {
190 BLOB_CHOICE* choice = choice_it.data();
191 if (choice->unichar_id() == char_id) {
192 return choice;
193 }
194 }
195 return nullptr;
196}
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77

◆ print_ratings_list()

void print_ratings_list ( const char *  msg,
BLOB_CHOICE_LIST *  ratings,
const UNICHARSET current_unicharset 
)

print_ratings_list

Send all the ratings out to the logfile.

Parameters
msgintro message
ratingslist of ratings
current_unicharsetunicharset that can be used for id-to-unichar conversion

Definition at line 837 of file ratngs.cpp.

839 {
840 if (ratings->length() == 0) {
841 tprintf("%s:<none>\n", msg);
842 return;
843 }
844 if (*msg != '\0') {
845 tprintf("%s\n", msg);
846 }
847 BLOB_CHOICE_IT c_it;
848 c_it.set_to_list(ratings);
849 for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
850 c_it.data()->print(&current_unicharset);
851 if (!c_it.at_last()) tprintf("\n");
852 }
853 tprintf("\n");
854 fflush(stdout);
855}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35