tesseract 4.1.1
Loading...
Searching...
No Matches
hyphen.cpp
Go to the documentation of this file.
1/* -*-C-*-
2 ********************************************************************************
3 * File: hyphen.cpp (Formerly hyphen.c)
4 * Description: Functions for maintaining information about hyphenated words.
5 * Author: Mark Seaman, OCR Technology
6 * Status: Reusable Software Component
7 *
8 * (c) Copyright 1987, Hewlett-Packard Company.
9 ** Licensed under the Apache License, Version 2.0 (the "License");
10 ** you may not use this file except in compliance with the License.
11 ** You may obtain a copy of the License at
12 ** http://www.apache.org/licenses/LICENSE-2.0
13 ** Unless required by applicable law or agreed to in writing, software
14 ** distributed under the License is distributed on an "AS IS" BASIS,
15 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 ** See the License for the specific language governing permissions and
17 ** limitations under the License.
18 *
19 *********************************************************************************/
20
21#include "dict.h"
22
23namespace tesseract {
24
25// Unless the previous word was the last one on the line, and the current
26// one is not (thus it is the first one on the line), erase hyphen_word_,
27// clear hyphen_active_dawgs_, hyphen_constraints_ update last_word_on_line_.
28void Dict::reset_hyphen_vars(bool last_word_on_line) {
29 if (!(last_word_on_line_ == true && last_word_on_line == false)) {
30 if (hyphen_word_ != nullptr) {
31 delete hyphen_word_;
32 hyphen_word_ = nullptr;
33 hyphen_active_dawgs_.clear();
34 }
35 }
37 tprintf("reset_hyphen_vars: last_word_on_line %d -> %d\n",
38 last_word_on_line_, last_word_on_line);
39 }
40 last_word_on_line_ = last_word_on_line;
41}
42
43// Update hyphen_word_, and copy the given DawgPositionVectors into
44// hyphen_active_dawgs_.
46 const DawgPositionVector &active_dawgs) {
47 if (hyphen_word_ == nullptr) {
48 hyphen_word_ = new WERD_CHOICE(word.unicharset());
49 hyphen_word_->make_bad();
50 }
51 if (hyphen_word_->rating() > word.rating()) {
52 *hyphen_word_ = word;
53 // Remove the last unichar id as it is a hyphen, and remove
54 // any unichar_string/lengths that are present.
55 hyphen_word_->remove_last_unichar_id();
56 hyphen_active_dawgs_ = active_dawgs;
57 }
59 hyphen_word_->print("set_hyphen_word: ");
60 }
61}
62} // namespace tesseract
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
void remove_last_unichar_id()
Definition: ratngs.h:473
const UNICHARSET * unicharset() const
Definition: ratngs.h:290
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:433
void print() const
Definition: ratngs.h:570
float rating() const
Definition: ratngs.h:317
void reset_hyphen_vars(bool last_word_on_line)
Definition: hyphen.cpp:28
int hyphen_debug_level
Definition: dict.h:623
void set_hyphen_word(const WERD_CHOICE &word, const DawgPositionVector &active_dawgs)
Definition: hyphen.cpp:45