tesseract 4.1.1
Loading...
Searching...
No Matches
WERD_CHOICE Class Reference

#include <ratngs.h>

Inheritance diagram for WERD_CHOICE:
ELIST_LINK

Public Member Functions

 WERD_CHOICE (const UNICHARSET *unicharset)
 
 WERD_CHOICE (const UNICHARSET *unicharset, int reserved)
 
 WERD_CHOICE (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
 
 WERD_CHOICE (const char *src_string, const UNICHARSET &unicharset)
 
 WERD_CHOICE (const WERD_CHOICE &word)
 
 ~WERD_CHOICE ()
 
const UNICHARSETunicharset () const
 
int length () const
 
float adjust_factor () const
 
void set_adjust_factor (float factor)
 
const UNICHAR_IDunichar_ids () const
 
UNICHAR_ID unichar_id (int index) const
 
int state (int index) const
 
tesseract::ScriptPos BlobPosition (int index) const
 
float rating () const
 
float certainty () const
 
float certainty (int index) const
 
float min_x_height () const
 
float max_x_height () const
 
void set_x_heights (float min_height, float max_height)
 
uint8_t permuter () const
 
const char * permuter_name () const
 
BLOB_CHOICE_LIST * blob_choices (int index, MATRIX *ratings) const
 
MATRIX_COORD MatrixCoord (int index) const
 
void set_unichar_id (UNICHAR_ID unichar_id, int index)
 
bool dangerous_ambig_found () const
 
void set_dangerous_ambig_found_ (bool value)
 
void set_rating (float new_val)
 
void set_certainty (float new_val)
 
void set_permuter (uint8_t perm)
 
void set_length (int len)
 
void double_the_size ()
 Make more space in unichar_id_ and fragment_lengths_ arrays. More...
 
void init (int reserved)
 
void init (const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter)
 
void make_bad ()
 Set the fields in this choice to be default (bad) values. More...
 
void append_unichar_id_space_allocated (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
 
void append_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
 
void set_unichar_id (UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
 
void set_blob_choice (int index, int blob_count, const BLOB_CHOICE *blob_choice)
 
bool contains_unichar_id (UNICHAR_ID unichar_id) const
 
void remove_unichar_ids (int index, int num)
 
void remove_last_unichar_id ()
 
void remove_unichar_id (int index)
 
bool has_rtl_unichar_id () const
 
void reverse_and_mirror_unichar_ids ()
 
void punct_stripped (int *start_core, int *end_core) const
 
void GetNonSuperscriptSpan (int *start, int *end) const
 
WERD_CHOICE shallow_copy (int start, int end) const
 
void string_and_lengths (STRING *word_str, STRING *word_lengths_str) const
 
const STRING debug_string () const
 
bool ContainsAnyNonSpaceDelimited () const
 
bool IsAllSpaces () const
 
bool set_unichars_in_script_order (bool in_script_order)
 
bool unichars_in_script_order () const
 
const STRINGunichar_string () const
 
const STRINGunichar_lengths () const
 
void SetScriptPositions (bool small_caps, TWERD *word, int debug=0)
 
void SetScriptPositions (const tesseract::ScriptPos *positions, int length)
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
int GetTopScriptID () const
 
void UpdateStateForSplit (int blob_position)
 
int TotalOfStates () const
 
void print () const
 
void print (const char *msg) const
 
void print_state (const char *msg) const
 
void DisplaySegmentation (TWERD *word)
 
WERD_CHOICEoperator+= (const WERD_CHOICE &second)
 
WERD_CHOICEoperator= (const WERD_CHOICE &source)
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static const char * permuter_name (uint8_t permuter)
 
static tesseract::ScriptPos ScriptPositionOf (bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)
 

Static Public Attributes

static const float kBadRating = 100000.0
 

Detailed Description

Definition at line 263 of file ratngs.h.

Constructor & Destructor Documentation

◆ WERD_CHOICE() [1/5]

WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset)
inline

Definition at line 268 of file ratngs.h.

269 : unicharset_(unicharset) { this->init(8); }
void init(int reserved)
Definition: ratngs.h:399
const UNICHARSET * unicharset() const
Definition: ratngs.h:290

◆ WERD_CHOICE() [2/5]

WERD_CHOICE::WERD_CHOICE ( const UNICHARSET unicharset,
int  reserved 
)
inline

Definition at line 270 of file ratngs.h.

271 : unicharset_(unicharset) { this->init(reserved); }

◆ WERD_CHOICE() [3/5]

WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uint8_t  src_permuter,
const UNICHARSET unicharset 
)
inline

Definition at line 272 of file ratngs.h.

278 : unicharset_(&unicharset) {
279 this->init(src_string, src_lengths, src_rating,
280 src_certainty, src_permuter);
281 }

◆ WERD_CHOICE() [4/5]

WERD_CHOICE::WERD_CHOICE ( const char *  src_string,
const UNICHARSET unicharset 
)

WERD_CHOICE::WERD_CHOICE

Constructor to build a WERD_CHOICE from the given string. The function assumes that src_string is not nullptr.

Definition at line 222 of file ratngs.cpp.

224 : unicharset_(&unicharset){
226 GenericVector<char> lengths;
227 std::string cleaned = unicharset.CleanupString(src_string);
228 if (unicharset.encode_string(cleaned.c_str(), true, &encoding, &lengths,
229 nullptr)) {
230 lengths.push_back('\0');
231 STRING src_lengths = &lengths[0];
232 this->init(cleaned.c_str(), src_lengths.string(), 0.0, 0.0, NO_PERM);
233 } else { // There must have been an invalid unichar in the string.
234 this->init(8);
235 this->make_bad();
236 }
237}
@ NO_PERM
Definition: ratngs.h:233
int push_back(T object)
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:433
Definition: strngs.h:45
const char * string() const
Definition: strngs.cpp:194
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
Definition: unicharset.cpp:259
static std::string CleanupString(const char *utf8_str)
Definition: unicharset.h:246

◆ WERD_CHOICE() [5/5]

WERD_CHOICE::WERD_CHOICE ( const WERD_CHOICE word)
inline

Definition at line 283 of file ratngs.h.

284 : ELIST_LINK(word), unicharset_(word.unicharset_) {
285 this->init(word.length());
286 this->operator=(word);
287 }
WERD_CHOICE & operator=(const WERD_CHOICE &source)
Definition: ratngs.cpp:525
int length() const
Definition: ratngs.h:293
ELIST_LINK()
Definition: elst.h:85

◆ ~WERD_CHOICE()

WERD_CHOICE::~WERD_CHOICE ( )

WERD_CHOICE::~WERD_CHOICE

Definition at line 280 of file ratngs.cpp.

280 {
281 delete[] unichar_ids_;
282 delete[] script_pos_;
283 delete[] state_;
284 delete[] certainties_;
285}

Member Function Documentation

◆ adjust_factor()

float WERD_CHOICE::adjust_factor ( ) const
inline

Definition at line 296 of file ratngs.h.

296 {
297 return adjust_factor_;
298 }

◆ append_unichar_id()

void WERD_CHOICE::append_unichar_id ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty 
)

append_unichar_id

Make sure there is enough space in the word for the new unichar id and call append_unichar_id_space_allocated().

Definition at line 472 of file ratngs.cpp.

474 {
475 if (length_ == reserved_) {
476 this->double_the_size();
477 }
480}
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:377
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:305
float certainty() const
Definition: ratngs.h:320
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:442
float rating() const
Definition: ratngs.h:317

◆ append_unichar_id_space_allocated()

void WERD_CHOICE::append_unichar_id_space_allocated ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty 
)
inline

This function assumes that there is enough space reserved in the WERD_CHOICE for adding another unichar. This is an efficient alternative to append_unichar_id().

Definition at line 442 of file ratngs.h.

444 {
445 assert(reserved_ > length_);
446 length_++;
447 this->set_unichar_id(unichar_id, blob_count,
448 rating, certainty, length_-1);
449 }
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:349

◆ blob_choices()

BLOB_CHOICE_LIST * WERD_CHOICE::blob_choices ( int  index,
MATRIX ratings 
) const

Definition at line 294 of file ratngs.cpp.

294 {
295 MATRIX_COORD coord = MatrixCoord(index);
296 BLOB_CHOICE_LIST* result = ratings->get(coord.col, coord.row);
297 if (result == nullptr) {
298 result = new BLOB_CHOICE_LIST;
299 ratings->put(coord.col, coord.row, result);
300 }
301 return result;
302}
T get(ICOORD pos) const
Definition: matrix.h:231
void put(ICOORD pos, const T &thing)
Definition: matrix.h:223
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:306

◆ BlobPosition()

tesseract::ScriptPos WERD_CHOICE::BlobPosition ( int  index) const
inline

Definition at line 312 of file ratngs.h.

312 {
313 if (index < 0 || index >= length_)
315 return script_pos_[index];
316 }
@ SP_NORMAL
Definition: ratngs.h:253

◆ certainty() [1/2]

float WERD_CHOICE::certainty ( ) const
inline

Definition at line 320 of file ratngs.h.

320 {
321 return certainty_;
322 }

◆ certainty() [2/2]

float WERD_CHOICE::certainty ( int  index) const
inline

Definition at line 323 of file ratngs.h.

323 {
324 return certainties_[index];
325 }

◆ contains_unichar_id()

bool WERD_CHOICE::contains_unichar_id ( UNICHAR_ID  unichar_id) const

contains_unichar_id

Returns true if unichar_ids_ contain the given unichar_id, false otherwise.

Definition at line 330 of file ratngs.cpp.

330 {
331 for (int i = 0; i < length_; ++i) {
332 if (unichar_ids_[i] == unichar_id) {
333 return true;
334 }
335 }
336 return false;
337}

◆ ContainsAnyNonSpaceDelimited()

bool WERD_CHOICE::ContainsAnyNonSpaceDelimited ( ) const
inline

Definition at line 504 of file ratngs.h.

504 {
505 for (int i = 0; i < length_; ++i) {
506 if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
507 }
508 return false;
509 }
bool IsSpaceDelimited(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652

◆ dangerous_ambig_found()

bool WERD_CHOICE::dangerous_ambig_found ( ) const
inline

Definition at line 353 of file ratngs.h.

353 {
354 return dangerous_ambig_found_;
355 }

◆ debug_string()

const STRING WERD_CHOICE::debug_string ( ) const
inline

Definition at line 495 of file ratngs.h.

495 {
496 STRING word_str;
497 for (int i = 0; i < length_; ++i) {
498 word_str += unicharset_->debug_str(unichar_ids_[i]);
499 word_str += " ";
500 }
501 return word_str;
502 }
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:343

◆ DisplaySegmentation()

void WERD_CHOICE::DisplaySegmentation ( TWERD word)

Definition at line 765 of file ratngs.cpp.

765 {
766#ifndef GRAPHICS_DISABLED
767 // Number of different colors to draw with.
768 const int kNumColors = 6;
769 static ScrollView *segm_window = nullptr;
770 // Check the state against the static prev_drawn_state.
771 static GenericVector<int> prev_drawn_state;
772 bool already_done = prev_drawn_state.size() == length_;
773 if (!already_done) prev_drawn_state.init_to_size(length_, 0);
774 for (int i = 0; i < length_; ++i) {
775 if (prev_drawn_state[i] != state_[i]) {
776 already_done = false;
777 }
778 prev_drawn_state[i] = state_[i];
779 }
780 if (already_done || word->blobs.empty()) return;
781
782 // Create the window if needed.
783 if (segm_window == nullptr) {
784 segm_window = new ScrollView("Segmentation", 5, 10, 500, 256,
785 2000.0, 256.0, true);
786 } else {
787 segm_window->Clear();
788 }
789
790 TBOX bbox;
791 int blob_index = 0;
792 for (int c = 0; c < length_; ++c) {
793 auto color =
794 static_cast<ScrollView::Color>(c % kNumColors + 3);
795 for (int i = 0; i < state_[c]; ++i, ++blob_index) {
796 TBLOB* blob = word->blobs[blob_index];
797 bbox += blob->bounding_box();
798 blob->plot(segm_window, color, color);
799 }
800 }
801 segm_window->ZoomToRectangle(bbox.left(), bbox.top(),
802 bbox.right(), bbox.bottom());
803 segm_window->Update();
804 window_wait(segm_window);
805#endif
806}
char window_wait(ScrollView *win)
Definition: callcpp.cpp:103
void init_to_size(int size, const T &t)
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
Definition: blobs.h:284
TBOX bounding_box() const
Definition: blobs.cpp:468
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:510
GenericVector< TBLOB * > blobs
Definition: blobs.h:459
Definition: rect.h:34
int16_t top() const
Definition: rect.h:58
int16_t left() const
Definition: rect.h:72
int16_t bottom() const
Definition: rect.h:65
int16_t right() const
Definition: rect.h:79
static void Update()
Definition: scrollview.cpp:709
void Clear()
Definition: scrollview.cpp:589
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:757

◆ double_the_size()

void WERD_CHOICE::double_the_size ( )
inline

Make more space in unichar_id_ and fragment_lengths_ arrays.

Definition at line 377 of file ratngs.h.

377 {
378 if (reserved_ > 0) {
380 reserved_, unichar_ids_);
382 reserved_, script_pos_);
384 reserved_, state_);
386 reserved_, certainties_);
387 reserved_ *= 2;
388 } else {
389 unichar_ids_ = new UNICHAR_ID[1];
390 script_pos_ = new tesseract::ScriptPos[1];
391 state_ = new int[1];
392 certainties_ = new float[1];
393 reserved_ = 1;
394 }
395 }
int UNICHAR_ID
Definition: unichar.h:34
static T * double_the_size_memcpy(int current_size, T *data)

◆ GetNonSuperscriptSpan()

void WERD_CHOICE::GetNonSuperscriptSpan ( int *  start,
int *  end 
) const

Definition at line 401 of file ratngs.cpp.

401 {
402 int end = length();
403 while (end > 0 &&
404 unicharset_->get_isdigit(unichar_ids_[end - 1]) &&
406 end--;
407 }
408 int start = 0;
409 while (start < end &&
410 unicharset_->get_isdigit(unichar_ids_[start]) &&
412 start++;
413 }
414 *pstart = start;
415 *pend = end;
416}
@ SP_SUPERSCRIPT
Definition: ratngs.h:255
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:312
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:512

◆ GetTopScriptID()

int WERD_CHOICE::GetTopScriptID ( ) const

Definition at line 671 of file ratngs.cpp.

671 {
672 int max_script = unicharset_->get_script_table_size();
673 int *sid = new int[max_script];
674 int x;
675 for (x = 0; x < max_script; x++) sid[x] = 0;
676 for (x = 0; x < length_; ++x) {
677 int script_id = unicharset_->get_script(unichar_id(x));
678 sid[script_id]++;
679 }
680 if (unicharset_->han_sid() != unicharset_->null_sid()) {
681 // Add the Hiragana & Katakana counts to Han and zero them out.
682 if (unicharset_->hiragana_sid() != unicharset_->null_sid()) {
683 sid[unicharset_->han_sid()] += sid[unicharset_->hiragana_sid()];
684 sid[unicharset_->hiragana_sid()] = 0;
685 }
686 if (unicharset_->katakana_sid() != unicharset_->null_sid()) {
687 sid[unicharset_->han_sid()] += sid[unicharset_->katakana_sid()];
688 sid[unicharset_->katakana_sid()] = 0;
689 }
690 }
691 // Note that high script ID overrides lower one on a tie, thus biasing
692 // towards non-Common script (if sorted that way in unicharset file).
693 int max_sid = 0;
694 for (x = 1; x < max_script; x++)
695 if (sid[x] >= sid[max_sid]) max_sid = x;
696 if (sid[max_sid] < length_ / 2)
697 max_sid = unicharset_->null_sid();
698 delete[] sid;
699 return max_sid;
700}
int hiragana_sid() const
Definition: unicharset.h:890
int katakana_sid() const
Definition: unicharset.h:891
int han_sid() const
Definition: unicharset.h:889
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:663
int null_sid() const
Definition: unicharset.h:884
int get_script_table_size() const
Definition: unicharset.h:849

◆ has_rtl_unichar_id()

bool WERD_CHOICE::has_rtl_unichar_id ( ) const

has_rtl_unichar_id

Returns true if unichar_ids contain at least one "strongly" RTL unichar.

Definition at line 435 of file ratngs.cpp.

435 {
436 int i;
437 for (i = 0; i < length_; ++i) {
438 UNICHARSET::Direction dir = unicharset_->get_direction(unichar_ids_[i]);
439 if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
441 return true;
442 }
443 }
444 return false;
445}
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:690
@ U_RIGHT_TO_LEFT_ARABIC
Definition: unicharset.h:170
@ U_RIGHT_TO_LEFT
Definition: unicharset.h:158

◆ init() [1/2]

void WERD_CHOICE::init ( const char *  src_string,
const char *  src_lengths,
float  src_rating,
float  src_certainty,
uint8_t  src_permuter 
)

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter. The function assumes that src_string is not nullptr. src_lengths argument could be nullptr, in which case the unichars in src_string are assumed to all be of length 1.

WERD_CHOICE::init

Helper function to build a WERD_CHOICE from the given string, fragment lengths, rating, certainty and permuter.

The function assumes that src_string is not nullptr. src_lengths argument could be nullptr, in which case the unichars in src_string are assumed to all be of length 1.

Definition at line 249 of file ratngs.cpp.

253 {
254 int src_string_len = strlen(src_string);
255 if (src_string_len == 0) {
256 this->init(8);
257 } else {
258 this->init(src_lengths ? strlen(src_lengths): src_string_len);
259 length_ = reserved_;
260 int offset = 0;
261 for (int i = 0; i < length_; ++i) {
262 int unichar_length = src_lengths ? src_lengths[i] : 1;
263 unichar_ids_[i] =
264 unicharset_->unichar_to_id(src_string+offset, unichar_length);
265 state_[i] = 1;
266 certainties_[i] = src_certainty;
267 offset += unichar_length;
268 }
269 }
270 adjust_factor_ = 1.0f;
271 rating_ = src_rating;
272 certainty_ = src_certainty;
273 permuter_ = src_permuter;
274 dangerous_ambig_found_ = false;
275}
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210

◆ init() [2/2]

void WERD_CHOICE::init ( int  reserved)
inline

Initializes WERD_CHOICE - reserves length slots in unichar_ids_ and fragment_length_ arrays. Sets other values to default (blank) values.

Definition at line 399 of file ratngs.h.

399 {
400 reserved_ = reserved;
401 if (reserved > 0) {
402 unichar_ids_ = new UNICHAR_ID[reserved];
403 script_pos_ = new tesseract::ScriptPos[reserved];
404 state_ = new int[reserved];
405 certainties_ = new float[reserved];
406 } else {
407 unichar_ids_ = nullptr;
408 script_pos_ = nullptr;
409 state_ = nullptr;
410 certainties_ = nullptr;
411 }
412 length_ = 0;
413 adjust_factor_ = 1.0f;
414 rating_ = 0.0;
415 certainty_ = FLT_MAX;
416 min_x_height_ = 0.0f;
417 max_x_height_ = FLT_MAX;
418 permuter_ = NO_PERM;
419 unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
420 dangerous_ambig_found_ = false;
421 }

◆ IsAllSpaces()

bool WERD_CHOICE::IsAllSpaces ( ) const
inline

Definition at line 511 of file ratngs.h.

511 {
512 for (int i = 0; i < length_; ++i) {
513 if (unichar_ids_[i] != UNICHAR_SPACE) return false;
514 }
515 return true;
516 }
@ UNICHAR_SPACE
Definition: unicharset.h:34

◆ length()

int WERD_CHOICE::length ( ) const
inline

Definition at line 293 of file ratngs.h.

293 {
294 return length_;
295 }

◆ make_bad()

void WERD_CHOICE::make_bad ( )
inline

Set the fields in this choice to be default (bad) values.

Definition at line 433 of file ratngs.h.

433 {
434 length_ = 0;
435 rating_ = kBadRating;
436 certainty_ = -FLT_MAX;
437 }
static const float kBadRating
Definition: ratngs.h:265

◆ MatrixCoord()

MATRIX_COORD WERD_CHOICE::MatrixCoord ( int  index) const

Definition at line 306 of file ratngs.cpp.

306 {
307 int col = 0;
308 for (int i = 0; i < index; ++i)
309 col += state_[i];
310 int row = col + state_[index] - 1;
311 return MATRIX_COORD(col, row);
312}

◆ max_x_height()

float WERD_CHOICE::max_x_height ( ) const
inline

Definition at line 329 of file ratngs.h.

329 {
330 return max_x_height_;
331 }

◆ min_x_height()

float WERD_CHOICE::min_x_height ( ) const
inline

Definition at line 326 of file ratngs.h.

326 {
327 return min_x_height_;
328 }

◆ operator+=()

WERD_CHOICE & WERD_CHOICE::operator+= ( const WERD_CHOICE second)

WERD_CHOICE::operator+=

Cat a second word rating on the end of this current one. The ratings are added and the confidence is the min. If the permuters are NOT the same the permuter is set to COMPOUND_PERM

Definition at line 489 of file ratngs.cpp.

489 {
490 ASSERT_HOST(unicharset_ == second.unicharset_);
491 while (reserved_ < length_ + second.length()) {
492 this->double_the_size();
493 }
494 const UNICHAR_ID *other_unichar_ids = second.unichar_ids();
495 for (int i = 0; i < second.length(); ++i) {
496 unichar_ids_[length_ + i] = other_unichar_ids[i];
497 state_[length_ + i] = second.state_[i];
498 certainties_[length_ + i] = second.certainties_[i];
499 script_pos_[length_ + i] = second.BlobPosition(i);
500 }
501 length_ += second.length();
502 if (second.adjust_factor_ > adjust_factor_)
503 adjust_factor_ = second.adjust_factor_;
504 rating_ += second.rating(); // add ratings
505 if (second.certainty() < certainty_) // take min
506 certainty_ = second.certainty();
507 if (second.dangerous_ambig_found_)
508 dangerous_ambig_found_ = true;
509 if (permuter_ == NO_PERM) {
510 permuter_ = second.permuter();
511 } else if (second.permuter() != NO_PERM &&
512 second.permuter() != permuter_) {
513 permuter_ = COMPOUND_PERM;
514 }
515 return *this;
516}
@ COMPOUND_PERM
Definition: ratngs.h:245
#define ASSERT_HOST(x)
Definition: errcode.h:88
uint8_t permuter() const
Definition: ratngs.h:336
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:302

◆ operator=()

WERD_CHOICE & WERD_CHOICE::operator= ( const WERD_CHOICE source)

WERD_CHOICE::operator=

Allocate enough memory to hold a copy of source and copy over all the information from source to this WERD_CHOICE.

Definition at line 525 of file ratngs.cpp.

525 {
526 while (reserved_ < source.length()) {
527 this->double_the_size();
528 }
529
530 unicharset_ = source.unicharset_;
531 const UNICHAR_ID *other_unichar_ids = source.unichar_ids();
532 for (int i = 0; i < source.length(); ++i) {
533 unichar_ids_[i] = other_unichar_ids[i];
534 state_[i] = source.state_[i];
535 certainties_[i] = source.certainties_[i];
536 script_pos_[i] = source.BlobPosition(i);
537 }
538 length_ = source.length();
539 adjust_factor_ = source.adjust_factor_;
540 rating_ = source.rating();
541 certainty_ = source.certainty();
542 min_x_height_ = source.min_x_height();
543 max_x_height_ = source.max_x_height();
544 permuter_ = source.permuter();
545 dangerous_ambig_found_ = source.dangerous_ambig_found_;
546 return *this;
547}
float min_x_height() const
Definition: ratngs.h:326
float max_x_height() const
Definition: ratngs.h:329

◆ permuter()

uint8_t WERD_CHOICE::permuter ( ) const
inline

Definition at line 336 of file ratngs.h.

336 {
337 return permuter_;
338 }

◆ permuter_name() [1/2]

const char * WERD_CHOICE::permuter_name ( ) const

Definition at line 287 of file ratngs.cpp.

287 {
288 return kPermuterTypeNames[permuter_];
289}

◆ permuter_name() [2/2]

const char * WERD_CHOICE::permuter_name ( uint8_t  permuter)
static

Definition at line 198 of file ratngs.cpp.

198 {
199 return kPermuterTypeNames[permuter];
200}

◆ print() [1/2]

void WERD_CHOICE::print ( ) const
inline

Definition at line 570 of file ratngs.h.

570{ this->print(""); }
void print() const
Definition: ratngs.h:570

◆ print() [2/2]

void WERD_CHOICE::print ( const char *  msg) const

WERD_CHOICE::print

Print WERD_CHOICE to stdout.

Definition at line 728 of file ratngs.cpp.

728 {
729 tprintf("%s : ", msg);
730 for (int i = 0; i < length_; ++i) {
731 tprintf("%s", unicharset_->id_to_unichar(unichar_ids_[i]));
732 }
733 tprintf(" : R=%g, C=%g, F=%g, Perm=%d, xht=[%g,%g], ambig=%d\n",
734 rating_, certainty_, adjust_factor_, permuter_,
735 min_x_height_, max_x_height_, dangerous_ambig_found_);
736 tprintf("pos");
737 for (int i = 0; i < length_; ++i) {
738 tprintf("\t%s", ScriptPosToString(script_pos_[i]));
739 }
740 tprintf("\nstr");
741 for (int i = 0; i < length_; ++i) {
742 tprintf("\t%s", unicharset_->id_to_unichar(unichar_ids_[i]));
743 }
744 tprintf("\nstate:");
745 for (int i = 0; i < length_; ++i) {
746 tprintf("\t%d ", state_[i]);
747 }
748 tprintf("\nC");
749 for (int i = 0; i < length_; ++i) {
750 tprintf("\t%.3f", certainties_[i]);
751 }
752 tprintf("\n");
753}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:204
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291

◆ print_state()

void WERD_CHOICE::print_state ( const char *  msg) const

Definition at line 756 of file ratngs.cpp.

756 {
757 tprintf("%s", msg);
758 for (int i = 0; i < length_; ++i)
759 tprintf(" %d", state_[i]);
760 tprintf("\n");
761}

◆ punct_stripped()

void WERD_CHOICE::punct_stripped ( int *  start,
int *  end 
) const

punct_stripped

Returns the half-open interval of unichar_id indices [start, end) which enclose the core portion of this word – the part after stripping punctuation from the left and right.

Definition at line 387 of file ratngs.cpp.

387 {
388 *start = 0;
389 *end = length() - 1;
390 while (*start < length() &&
391 unicharset()->get_ispunctuation(unichar_id(*start))) {
392 (*start)++;
393 }
394 while (*end > -1 &&
395 unicharset()->get_ispunctuation(unichar_id(*end))) {
396 (*end)--;
397 }
398 (*end)++;
399}

◆ rating()

float WERD_CHOICE::rating ( ) const
inline

Definition at line 317 of file ratngs.h.

317 {
318 return rating_;
319 }

◆ remove_last_unichar_id()

void WERD_CHOICE::remove_last_unichar_id ( )
inline

Definition at line 473 of file ratngs.h.

473{ --length_; }

◆ remove_unichar_id()

void WERD_CHOICE::remove_unichar_id ( int  index)
inline

Definition at line 474 of file ratngs.h.

474 {
475 this->remove_unichar_ids(index, 1);
476 }
void remove_unichar_ids(int index, int num)
Definition: ratngs.cpp:346

◆ remove_unichar_ids()

void WERD_CHOICE::remove_unichar_ids ( int  start,
int  num 
)

remove_unichar_ids

Removes num unichar ids starting from index start from unichar_ids_ and updates length_ and fragment_lengths_ to reflect this change. Note: this function does not modify rating_ and certainty_.

Definition at line 346 of file ratngs.cpp.

346 {
347 ASSERT_HOST(start >= 0 && start + num <= length_);
348 // Accumulate the states to account for the merged blobs.
349 for (int i = 0; i < num; ++i) {
350 if (start > 0)
351 state_[start - 1] += state_[start + i];
352 else if (start + num < length_)
353 state_[start + num] += state_[start + i];
354 }
355 for (int i = start; i + num < length_; ++i) {
356 unichar_ids_[i] = unichar_ids_[i + num];
357 script_pos_[i] = script_pos_[i + num];
358 state_[i] = state_[i + num];
359 certainties_[i] = certainties_[i + num];
360 }
361 length_ -= num;
362}

◆ reverse_and_mirror_unichar_ids()

void WERD_CHOICE::reverse_and_mirror_unichar_ids ( )

reverse_and_mirror_unichar_ids

Reverses and mirrors unichars in unichar_ids.

Definition at line 369 of file ratngs.cpp.

369 {
370 for (int i = 0; i < length_ / 2; ++i) {
371 UNICHAR_ID tmp_id = unichar_ids_[i];
372 unichar_ids_[i] = unicharset_->get_mirror(unichar_ids_[length_-1-i]);
373 unichar_ids_[length_-1-i] = unicharset_->get_mirror(tmp_id);
374 }
375 if (length_ % 2 != 0) {
376 unichar_ids_[length_/2] = unicharset_->get_mirror(unichar_ids_[length_/2]);
377 }
378}
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:697

◆ ScriptPositionOf()

ScriptPos WERD_CHOICE::ScriptPositionOf ( bool  print_debug,
const UNICHARSET unicharset,
const TBOX blob_box,
UNICHAR_ID  unichar_id 
)
static

Definition at line 633 of file ratngs.cpp.

636 {
638 int top = blob_box.top();
639 int bottom = blob_box.bottom();
640 int min_bottom, max_bottom, min_top, max_top;
642 &min_bottom, &max_bottom,
643 &min_top, &max_top);
644
645 int sub_thresh_top = min_top - kMinSubscriptOffset;
646 int sub_thresh_bot = kBlnBaselineOffset - kMinSubscriptOffset;
647 int sup_thresh_bot = max_bottom + kMinSuperscriptOffset;
648 if (bottom <= kMaxDropCapBottom) {
649 retval = tesseract::SP_DROPCAP;
650 } else if (top < sub_thresh_top && bottom < sub_thresh_bot) {
652 } else if (bottom > sup_thresh_bot) {
654 }
655
656 if (print_debug) {
657 const char *pos = ScriptPosToString(retval);
658 tprintf("%s Character %s[bot:%d top: %d] "
659 "bot_range[%d,%d] top_range[%d, %d] "
660 "sub_thresh[bot:%d top:%d] sup_thresh_bot %d\n",
662 bottom, top,
663 min_bottom, max_bottom, min_top, max_top,
664 sub_thresh_bot, sub_thresh_top,
665 sup_thresh_bot);
666 }
667 return retval;
668}
const int kBlnBaselineOffset
Definition: normalis.h:25
const int kMinSubscriptOffset
Definition: ratngs.cpp:43
const int kMinSuperscriptOffset
Definition: ratngs.cpp:45
const int kMaxDropCapBottom
Definition: ratngs.cpp:47
@ SP_SUBSCRIPT
Definition: ratngs.h:254
@ SP_DROPCAP
Definition: ratngs.h:256
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:568

◆ set_adjust_factor()

void WERD_CHOICE::set_adjust_factor ( float  factor)
inline

Definition at line 299 of file ratngs.h.

299 {
300 adjust_factor_ = factor;
301 }

◆ set_blob_choice()

void WERD_CHOICE::set_blob_choice ( int  index,
int  blob_count,
const BLOB_CHOICE blob_choice 
)

Definition at line 316 of file ratngs.cpp.

317 {
318 unichar_ids_[index] = blob_choice->unichar_id();
319 script_pos_[index] = tesseract::SP_NORMAL;
320 state_[index] = blob_count;
321 certainties_[index] = blob_choice->certainty();
322}
float certainty() const
Definition: ratngs.h:83
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77

◆ set_certainty()

void WERD_CHOICE::set_certainty ( float  new_val)
inline

Definition at line 362 of file ratngs.h.

362 {
363 certainty_ = new_val;
364 }

◆ set_dangerous_ambig_found_()

void WERD_CHOICE::set_dangerous_ambig_found_ ( bool  value)
inline

Definition at line 356 of file ratngs.h.

356 {
357 dangerous_ambig_found_ = value;
358 }

◆ set_length()

void WERD_CHOICE::set_length ( int  len)
inline

Definition at line 371 of file ratngs.h.

371 {
372 ASSERT_HOST(reserved_ >= len);
373 length_ = len;
374 }

◆ set_permuter()

void WERD_CHOICE::set_permuter ( uint8_t  perm)
inline

Definition at line 365 of file ratngs.h.

365 {
366 permuter_ = perm;
367 }

◆ set_rating()

void WERD_CHOICE::set_rating ( float  new_val)
inline

Definition at line 359 of file ratngs.h.

359 {
360 rating_ = new_val;
361 }

◆ set_unichar_id() [1/2]

void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
int  blob_count,
float  rating,
float  certainty,
int  index 
)
inline

Definition at line 454 of file ratngs.h.

455 {
456 assert(index < length_);
457 unichar_ids_[index] = unichar_id;
458 state_[index] = blob_count;
459 certainties_[index] = certainty;
460 script_pos_[index] = tesseract::SP_NORMAL;
461 rating_ += rating;
462 if (certainty < certainty_) {
463 certainty_ = certainty;
464 }
465 }

◆ set_unichar_id() [2/2]

void WERD_CHOICE::set_unichar_id ( UNICHAR_ID  unichar_id,
int  index 
)
inline

Definition at line 349 of file ratngs.h.

349 {
350 assert(index < length_);
351 unichar_ids_[index] = unichar_id;
352 }

◆ set_unichars_in_script_order()

bool WERD_CHOICE::set_unichars_in_script_order ( bool  in_script_order)
inline

Definition at line 521 of file ratngs.h.

521 {
522 return unichars_in_script_order_ = in_script_order;
523 }

◆ set_x_heights()

void WERD_CHOICE::set_x_heights ( float  min_height,
float  max_height 
)
inline

Definition at line 332 of file ratngs.h.

332 {
333 min_x_height_ = min_height;
334 max_x_height_ = max_height;
335 }

◆ SetAllScriptPositions()

void WERD_CHOICE::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 627 of file ratngs.cpp.

627 {
628 for (int i = 0; i < length_; ++i)
629 script_pos_[i] = position;
630}

◆ SetScriptPositions() [1/2]

void WERD_CHOICE::SetScriptPositions ( bool  small_caps,
TWERD word,
int  debug = 0 
)

Definition at line 554 of file ratngs.cpp.

554 {
555 // Initialize to normal.
556 for (int i = 0; i < length_; ++i)
557 script_pos_[i] = tesseract::SP_NORMAL;
558 if (word->blobs.empty() || word->NumBlobs() != TotalOfStates()) {
559 return;
560 }
561
562 int position_counts[4] = { 0, 0, 0, 0 };
563
564 int chunk_index = 0;
565 for (int blob_index = 0; blob_index < length_; ++blob_index, ++chunk_index) {
566 TBLOB* tblob = word->blobs[chunk_index];
567 int uni_id = unichar_id(blob_index);
568 TBOX blob_box = tblob->bounding_box();
569 if (state_ != nullptr) {
570 for (int i = 1; i < state_[blob_index]; ++i) {
571 ++chunk_index;
572 tblob = word->blobs[chunk_index];
573 blob_box += tblob->bounding_box();
574 }
575 }
576 script_pos_[blob_index] = ScriptPositionOf(false, *unicharset_, blob_box,
577 uni_id);
578 if (small_caps && script_pos_[blob_index] != tesseract::SP_DROPCAP) {
579 script_pos_[blob_index] = tesseract::SP_NORMAL;
580 }
581 position_counts[script_pos_[blob_index]]++;
582 }
583 // If almost everything looks like a superscript or subscript,
584 // we most likely just got the baseline wrong.
585 if (position_counts[tesseract::SP_SUBSCRIPT] > 0.75 * length_ ||
586 position_counts[tesseract::SP_SUPERSCRIPT] > 0.75 * length_) {
587 if (debug >= 2) {
588 tprintf("Most characters of %s are subscript or superscript.\n"
589 "That seems wrong, so I'll assume we got the baseline wrong\n",
590 unichar_string().string());
591 }
592 for (int i = 0; i < length_; i++) {
593 ScriptPos sp = script_pos_[i];
595 position_counts[sp]--;
596 position_counts[tesseract::SP_NORMAL]++;
597 script_pos_[i] = tesseract::SP_NORMAL;
598 }
599 }
600 }
601
602 if ((debug >= 1 && position_counts[tesseract::SP_NORMAL] < length_) ||
603 debug >= 2) {
604 tprintf("SetScriptPosition on %s\n", unichar_string().string());
605 int chunk_index = 0;
606 for (int blob_index = 0; blob_index < length_; ++blob_index) {
607 if (debug >= 2 || script_pos_[blob_index] != tesseract::SP_NORMAL) {
608 TBLOB* tblob = word->blobs[chunk_index];
609 ScriptPositionOf(true, *unicharset_, tblob->bounding_box(),
610 unichar_id(blob_index));
611 }
612 chunk_index += state_ != nullptr ? state_[blob_index] : 1;
613 }
614 }
615}
int NumBlobs() const
Definition: blobs.h:448
const STRING & unichar_string() const
Definition: ratngs.h:531
int TotalOfStates() const
Definition: ratngs.cpp:715
static tesseract::ScriptPos ScriptPositionOf(bool print_debug, const UNICHARSET &unicharset, const TBOX &blob_box, UNICHAR_ID unichar_id)
Definition: ratngs.cpp:633

◆ SetScriptPositions() [2/2]

void WERD_CHOICE::SetScriptPositions ( const tesseract::ScriptPos positions,
int  length 
)

Definition at line 617 of file ratngs.cpp.

618 {
619 ASSERT_HOST(length == length_);
620 if (positions != script_pos_) {
621 delete [] script_pos_;
622 script_pos_ = new ScriptPos[length];
623 memcpy(script_pos_, positions, sizeof(positions[0]) * length);
624 }
625}

◆ shallow_copy()

WERD_CHOICE WERD_CHOICE::shallow_copy ( int  start,
int  end 
) const

Definition at line 418 of file ratngs.cpp.

418 {
419 ASSERT_HOST(start >= 0 && start <= length_);
420 ASSERT_HOST(end >= 0 && end <= length_);
421 if (end < start) { end = start; }
422 WERD_CHOICE retval(unicharset_, end - start);
423 for (int i = start; i < end; i++) {
424 retval.append_unichar_id_space_allocated(
425 unichar_ids_[i], state_[i], 0.0f, certainties_[i]);
426 }
427 return retval;
428}

◆ state()

int WERD_CHOICE::state ( int  index) const
inline

Definition at line 309 of file ratngs.h.

309 {
310 return state_[index];
311 }

◆ string_and_lengths()

void WERD_CHOICE::string_and_lengths ( STRING word_str,
STRING word_lengths_str 
) const

string_and_lengths

Populates the given word_str with unichars from unichar_ids and and word_lengths_str with the corresponding unichar lengths.

Definition at line 453 of file ratngs.cpp.

454 {
455 *word_str = "";
456 if (word_lengths_str != nullptr) *word_lengths_str = "";
457 for (int i = 0; i < length_; ++i) {
458 const char *ch = unicharset_->id_to_unichar_ext(unichar_ids_[i]);
459 *word_str += ch;
460 if (word_lengths_str != nullptr) {
461 *word_lengths_str += strlen(ch);
462 }
463 }
464}
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:299

◆ TotalOfStates()

int WERD_CHOICE::TotalOfStates ( ) const

Definition at line 715 of file ratngs.cpp.

715 {
716 int total_chunks = 0;
717 for (int i = 0; i < length_; ++i) {
718 total_chunks += state_[i];
719 }
720 return total_chunks;
721}

◆ unichar_id()

UNICHAR_ID WERD_CHOICE::unichar_id ( int  index) const
inline

Definition at line 305 of file ratngs.h.

305 {
306 assert(index < length_);
307 return unichar_ids_[index];
308 }

◆ unichar_ids()

const UNICHAR_ID * WERD_CHOICE::unichar_ids ( ) const
inline

Definition at line 302 of file ratngs.h.

302 {
303 return unichar_ids_;
304 }

◆ unichar_lengths()

const STRING & WERD_CHOICE::unichar_lengths ( ) const
inline

Definition at line 538 of file ratngs.h.

538 {
539 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
540 return unichar_lengths_;
541 }
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:453

◆ unichar_string()

const STRING & WERD_CHOICE::unichar_string ( ) const
inline

Definition at line 531 of file ratngs.h.

531 {
532 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
533 return unichar_string_;
534 }

◆ unichars_in_script_order()

bool WERD_CHOICE::unichars_in_script_order ( ) const
inline

Definition at line 525 of file ratngs.h.

525 {
526 return unichars_in_script_order_;
527 }

◆ unicharset()

const UNICHARSET * WERD_CHOICE::unicharset ( ) const
inline

Definition at line 290 of file ratngs.h.

290 {
291 return unicharset_;
292 }

◆ UpdateStateForSplit()

void WERD_CHOICE::UpdateStateForSplit ( int  blob_position)

Definition at line 703 of file ratngs.cpp.

703 {
704 int total_chunks = 0;
705 for (int i = 0; i < length_; ++i) {
706 total_chunks += state_[i];
707 if (total_chunks > blob_position) {
708 ++state_[i];
709 return;
710 }
711 }
712}

Member Data Documentation

◆ kBadRating

const float WERD_CHOICE::kBadRating = 100000.0
static

Definition at line 265 of file ratngs.h.


The documentation for this class was generated from the following files: