tesseract 4.1.1
Loading...
Searching...
No Matches
werd.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: werd.h
3 * Description: Code for the WERD class.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1991, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef WERD_H
20#define WERD_H
21
22#include "bits16.h"
23#include "elst2.h"
24#include "params.h"
25#include "stepblob.h"
26#include "strngs.h"
27
42};
43
45 /* Display flags bit number allocations */
52};
53
54class ROW; // forward decl
55
56class WERD : public ELIST2_LINK {
57 public:
58 WERD() = default;
59 // WERD constructed with:
60 // blob_list - blobs of the word (we take this list's contents)
61 // blanks - number of blanks before the word
62 // text - correct text (outlives WERD)
63 WERD(C_BLOB_LIST* blob_list, uint8_t blanks, const char* text);
64
65 // WERD constructed from:
66 // blob_list - blobs in the word
67 // clone - werd to clone flags, etc from.
68 WERD(C_BLOB_LIST* blob_list, WERD* clone);
69
70 // Construct a WERD from a single_blob and clone the flags from this.
71 // W_BOL and W_EOL flags are set according to the given values.
72 WERD* ConstructFromSingleBlob(bool bol, bool eol, C_BLOB* blob);
73
74 ~WERD() = default;
75
76 // assignment
77 WERD& operator=(const WERD& source);
78
79 // This method returns a new werd constructed using the blobs in the input
80 // all_blobs list, which correspond to the blobs in this werd object. The
81 // blobs used to construct the new word are consumed and removed from the
82 // input all_blobs list.
83 // Returns nullptr if the word couldn't be constructed.
84 // Returns original blobs for which no matches were found in the output list
85 // orphan_blobs (appends).
86 WERD* ConstructWerdWithNewBlobs(C_BLOB_LIST* all_blobs,
87 C_BLOB_LIST* orphan_blobs);
88
89 // Accessors for reject / DUFF blobs in various formats
90 C_BLOB_LIST* rej_cblob_list() { // compact format
91 return &rej_cblobs;
92 }
93
94 // Accessors for good blobs in various formats.
95 C_BLOB_LIST* cblob_list() { // get compact blobs
96 return &cblobs;
97 }
98
99 uint8_t space() { // access function
100 return blanks;
101 }
102 void set_blanks(uint8_t new_blanks) { blanks = new_blanks; }
103 int script_id() const { return script_id_; }
104 void set_script_id(int id) { script_id_ = id; }
105
106 // Returns the (default) bounding box including all the dots.
107 TBOX bounding_box() const; // compute bounding box
108 // Returns the bounding box including the desired combination of upper and
109 // lower noise/diacritic elements.
110 TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const;
111 // Returns the bounding box of only the good blobs.
112 TBOX true_bounding_box() const;
113
114 const char* text() const { return correct.string(); }
115 void set_text(const char* new_text) { correct = new_text; }
116
117 bool flag(WERD_FLAGS mask) const { return flags.bit(mask); }
118 void set_flag(WERD_FLAGS mask, bool value) { flags.set_bit(mask, value); }
119
120 bool display_flag(uint8_t flag) const { return disp_flags.bit(flag); }
121 void set_display_flag(uint8_t flag, bool value) {
122 disp_flags.set_bit(flag, value);
123 }
124
125 WERD* shallow_copy(); // shallow copy word
126
127 // reposition word by vector
128 void move(const ICOORD vec);
129
130 // join other's blobs onto this werd, emptying out other.
131 void join_on(WERD* other);
132
133 // copy other's blobs onto this word, leaving other intact.
134 void copy_on(WERD* other);
135
136 // tprintf word metadata (but not blob innards)
137 void print();
138
139#ifndef GRAPHICS_DISABLED
140 // plot word on window in a uniform colour
141 void plot(ScrollView* window, ScrollView::Color colour);
142
143 // Get the next color in the (looping) rainbow.
145
146 // plot word on window in a rainbow of colours
147 void plot(ScrollView* window);
148
149 // plot rejected blobs in a rainbow of colours
150 void plot_rej_blobs(ScrollView* window);
151#endif // GRAPHICS_DISABLED
152
153 // Removes noise from the word by moving small outlines to the rej_cblobs
154 // list, based on the size_threshold.
155 void CleanNoise(float size_threshold);
156
157 // Extracts all the noise outlines and stuffs the pointers into the given
158 // vector of outlines. Afterwards, the outlines vector owns the pointers.
160 // Adds the selected outlines to the indcated real blobs, and puts the rest
161 // back in rej_cblobs where they came from. Where the target_blobs entry is
162 // nullptr, a run of wanted outlines is put into a single new blob.
163 // Ownership of the outlines is transferred back to the word. (Hence
164 // GenericVector and not PointerVector.)
165 // Returns true if any new blob was added to the start of the word, which
166 // suggests that it might need joining to the word before it, and likewise
167 // sets make_next_word_fuzzy true if any new blob was added to the end.
168 bool AddSelectedOutlines(const GenericVector<bool>& wanted,
169 const GenericVector<C_BLOB*>& target_blobs,
170 const GenericVector<C_OUTLINE*>& outlines,
171 bool* make_next_word_fuzzy);
172
173 private:
174 uint8_t blanks = 0; // no of blanks
175 BITS16 flags; // flags about word
176 BITS16 disp_flags; // display flags
177 int16_t script_id_ = 0; // From unicharset.
178 STRING correct; // correct text
179 C_BLOB_LIST cblobs; // compacted blobs
180 C_BLOB_LIST rej_cblobs; // DUFF blobs
181};
182
184#include "ocrrow.h" // placed here due to
185// compare words by increasing order of left edge, suitable for qsort(3)
186int word_comparator(const void* word1p, const void* word2p);
187#endif
DISPLAY_FLAGS
Definition: werd.h:44
@ DF_BN_POLYGONAL
BL normalisd polyapx.
Definition: werd.h:50
@ DF_EDGE_STEP
Edge steps.
Definition: werd.h:49
@ DF_TEXT
Correct ascii.
Definition: werd.h:47
@ DF_BOX
Bounding box.
Definition: werd.h:46
@ DF_BLAMER
Blamer information.
Definition: werd.h:51
@ DF_POLYGONAL
Polyg approx.
Definition: werd.h:48
int word_comparator(const void *word1p, const void *word2p)
Definition: werd.cpp:370
WERD_FLAGS
Definition: werd.h:28
@ W_BOLD
bold text
Definition: werd.h:31
@ W_FUZZY_SP
fuzzy space
Definition: werd.h:39
@ W_SCRIPT_HAS_XHEIGHT
x-height concept makes sense.
Definition: werd.h:35
@ W_EOL
end of line
Definition: werd.h:33
@ W_FUZZY_NON
fuzzy nonspace
Definition: werd.h:40
@ W_NORMALIZED
flags
Definition: werd.h:34
@ W_SEGMENTED
correctly segmented
Definition: werd.h:29
@ W_SCRIPT_IS_LATIN
Special case latin for y. splitting.
Definition: werd.h:36
@ W_REP_CHAR
repeated character
Definition: werd.h:38
@ W_DONT_CHOP
fixed pitch chopped
Definition: werd.h:37
@ W_INVERSE
white on black
Definition: werd.h:41
@ W_ITALIC
italic text
Definition: werd.h:30
@ W_BOL
start of line
Definition: werd.h:32
#define ELIST2IZEH(CLASSNAME)
Definition: elst2.h:927
Definition: ocrrow.h:37
integer coordinate
Definition: points.h:32
Definition: rect.h:34
Definition: werd.h:56
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:388
void set_script_id(int id)
Definition: werd.h:104
static ScrollView::Color NextColor(ScrollView::Color colour)
Definition: werd.cpp:292
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:90
~WERD()=default
C_BLOB_LIST * cblob_list()
Definition: werd.h:95
void set_display_flag(uint8_t flag, bool value)
Definition: werd.h:121
WERD * ConstructFromSingleBlob(bool bol, bool eol, C_BLOB *blob)
Definition: werd.cpp:125
WERD * shallow_copy()
Definition: werd.cpp:334
uint8_t space()
Definition: werd.h:99
bool AddSelectedOutlines(const GenericVector< bool > &wanted, const GenericVector< C_BLOB * > &target_blobs, const GenericVector< C_OUTLINE * > &outlines, bool *make_next_word_fuzzy)
Definition: werd.cpp:524
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:152
void print()
Definition: werd.cpp:253
void set_text(const char *new_text)
Definition: werd.h:115
WERD()=default
int script_id() const
Definition: werd.h:103
void set_blanks(uint8_t new_blanks)
Definition: werd.h:102
WERD & operator=(const WERD &source)
Definition: werd.cpp:349
bool display_flag(uint8_t flag) const
Definition: werd.h:120
void CleanNoise(float size_threshold)
Definition: werd.cpp:482
void set_flag(WERD_FLAGS mask, bool value)
Definition: werd.h:118
void copy_on(WERD *other)
Definition: werd.cpp:221
const char * text() const
Definition: werd.h:114
TBOX bounding_box() const
Definition: werd.cpp:148
void join_on(WERD *other)
Definition: werd.cpp:199
TBOX true_bounding_box() const
Definition: werd.cpp:169
void plot_rej_blobs(ScrollView *window)
Definition: werd.cpp:320
void plot(ScrollView *window, ScrollView::Color colour)
Definition: werd.cpp:283
bool flag(WERD_FLAGS mask) const
Definition: werd.h:117
void move(const ICOORD vec)
Definition: werd.cpp:186
void GetNoiseOutlines(GenericVector< C_OUTLINE * > *outlines)
Definition: werd.cpp:506
Definition: bits16.h:25
void set_bit(uint8_t bit_num, bool value)
Definition: bits16.h:42
bool bit(uint8_t bit_num) const
Definition: bits16.h:51
Definition: strngs.h:45
const char * string() const
Definition: strngs.cpp:194