tesseract 4.1.1
Loading...
Searching...
No Matches
rejctmap.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: rejctmap.h (Formerly rejmap.h)
3 * Description: REJ and REJMAP class functions.
4 * Author: Phil Cheatle
5 * Created: Thu Jun 9 13:46:38 BST 1994
6 *
7 * (C) Copyright 1994, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18
19This module may look unnecessarily verbose, but here's the philosophy...
20
21ALL processing of the reject map is done in this module. There are lots of
22separate calls to set reject/accept flags. These have DELIBERATELY been kept
23distinct so that this module can decide what to do.
24
25Basically, there is a flag for each sort of rejection or acceptance. This
26provides a history of what has happened to EACH character.
27
28Determining whether a character is CURRENTLY rejected depends on implicit
29understanding of the SEQUENCE of possible calls. The flags are defined and
30grouped in the REJ_FLAGS enum. These groupings are used in determining a
31characters CURRENT rejection status. Basically, a character is ACCEPTED if
32
33 none of the permanent rej flags are set
34 AND ( the character has never been rejected
35 OR an accept flag is set which is LATER than the latest reject flag )
36
37IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39**********************************************************************/
40
41#ifndef REJCTMAP_H
42#define REJCTMAP_H
43
44#include <memory>
45#include "bits16.h"
46#include "errcode.h"
47#include "params.h"
48
50 /* Reject modes which are NEVER overridden */
51 R_TESS_FAILURE, // PERM Tess didn't classify
52 R_SMALL_XHT, // PERM Xht too small
53 R_EDGE_CHAR, // PERM Too close to edge of image
54 R_1IL_CONFLICT, // PERM 1Il confusion
55 R_POSTNN_1IL, // PERM 1Il unrejected by NN
56 R_REJ_CBLOB, // PERM Odd blob
57 R_MM_REJECT, // PERM Matrix match rejection (m's)
58 R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
59
60 /* Initial reject modes (pre NN_ACCEPT) */
61 R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
62 R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
63 R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
64 R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
65
66 /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
67 R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
68 R_DUBIOUS, // TEMP Post NN dodgy chars
69 R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
70 R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
71 R_XHT_FIXUP, // TEMP Xht tests unsure
72
73 /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
74 R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
75
76 /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
77 R_DOC_REJ, // TEMP Document rejection
78 R_BLOCK_REJ, // TEMP Block rejection
79 R_ROW_REJ, // TEMP Row rejection
80 R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
81
82 /* Accept modes which occur between the above rejection groups */
83 R_NN_ACCEPT, // NN acceptance
84 R_HYPHEN_ACCEPT, // Hyphen acceptance
85 R_MM_ACCEPT, // Matrix match acceptance
86 R_QUALITY_ACCEPT, // Accept word in good quality doc
87 R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
88};
89
90/* REJECT MAP VALUES */
91
92#define MAP_ACCEPT '1'
93#define MAP_REJECT_PERM '0'
94#define MAP_REJECT_TEMP '2'
95#define MAP_REJECT_POTENTIAL '3'
96
97class REJ
98{
99 BITS16 flags1;
100 BITS16 flags2;
101
102 void set_flag(REJ_FLAGS rej_flag) {
103 if (rej_flag < 16)
104 flags1.turn_on_bit (rej_flag);
105 else
106 flags2.turn_on_bit (rej_flag - 16);
107 }
108
109 bool rej_before_nn_accept();
110 bool rej_between_nn_and_mm();
111 bool rej_between_mm_and_quality_accept();
112 bool rej_between_quality_and_minimal_rej_accept();
113 bool rej_before_mm_accept();
114 bool rej_before_quality_accept();
115
116 public:
117 REJ() = default;
118
119 REJ( //classwise copy
120 const REJ &source) {
121 flags1 = source.flags1;
122 flags2 = source.flags2;
123 }
124
125 REJ & operator= ( //assign REJ
126 const REJ & source) { //from this
127 flags1 = source.flags1;
128 flags2 = source.flags2;
129 return *this;
130 }
131
132 bool flag(REJ_FLAGS rej_flag) {
133 if (rej_flag < 16)
134 return flags1.bit (rej_flag);
135 else
136 return flags2.bit (rej_flag - 16);
137 }
138
140 if (perm_rejected ())
141 return MAP_REJECT_PERM;
142 else if (accept_if_good_quality ())
144 else if (rejected ())
145 return MAP_REJECT_TEMP;
146 else
147 return MAP_ACCEPT;
148 }
149
150 bool perm_rejected(); //Is char perm reject?
151
152 bool rejected(); //Is char rejected?
153
154 bool accepted() { //Is char accepted?
155 return !rejected ();
156 }
157
158 //potential rej?
160
161 bool recoverable() {
162 return (rejected () && !perm_rejected ());
163 }
164
165 void setrej_tess_failure(); //Tess generated blank
166 void setrej_small_xht(); //Small xht char/wd
167 void setrej_edge_char(); //Close to image edge
168 void setrej_1Il_conflict(); //Initial reject map
169 void setrej_postNN_1Il(); //1Il after NN
170 void setrej_rej_cblob(); //Insert duff blob
171 void setrej_mm_reject(); //Matrix matcher
172 //Odd repeated char
174 void setrej_poor_match(); //Failed Rays heuristic
175 //TEMP reject_word
177 //TEMP reject_word
179 void setrej_bad_permuter(); //POTENTIAL reject_word
180 void setrej_hyphen(); //PostNN dubious hyph or .
181 void setrej_dubious(); //PostNN dubious limit
182 void setrej_no_alphanums(); //TEMP reject_word
183 void setrej_mostly_rej(); //TEMP reject_word
184 void setrej_xht_fixup(); //xht fixup
185 void setrej_bad_quality(); //TEMP reject_word
186 void setrej_doc_rej(); //TEMP reject_word
187 void setrej_block_rej(); //TEMP reject_word
188 void setrej_row_rej(); //TEMP reject_word
189 void setrej_unlv_rej(); //TEMP reject_word
190 void setrej_nn_accept(); //NN Flipped a char
191 void setrej_hyphen_accept(); //Good aspect ratio
192 void setrej_mm_accept(); //Matrix matcher
193 //Quality flip a char
195 //Accept all except blank
197
198 void full_print(FILE *fp);
199};
200
202{
203 std::unique_ptr<REJ[]> ptr; // ptr to the chars
204 int16_t len; //Number of chars
205
206 public:
207 REJMAP() : len(0) {}
208
209 REJMAP(const REJMAP &rejmap) { *this = rejmap; }
210
211 REJMAP &operator=(const REJMAP &source);
212
213 // Sets up the ptr array to length, whatever it was before.
214 void initialise(int16_t length);
215
216 REJ &operator[]( // access function
217 int16_t index) const // map index
218 {
219 ASSERT_HOST(index < len);
220 return ptr[index]; // no bounds checks
221 }
222
223 int32_t length() const { //map length
224 return len;
225 }
226
227 int16_t accept_count(); //How many accepted?
228
229 int16_t reject_count() { //How many rejects?
230 return len - accept_count ();
231 }
232
233 void remove_pos( //Cut out an element
234 int16_t pos); //element to remove
235
236 void print(FILE *fp);
237
238 void full_print(FILE *fp);
239
240 bool recoverable_rejects(); //Any non perm rejs?
241
243 //Any potential rejs?
244
245 void rej_word_small_xht(); //Reject whole word
246 //Reject whole word
249 //Reject whole word
250 //Reject whole word
252 //Reject whole word
254 void rej_word_xht_fixup(); //Reject whole word
255 //Reject whole word
257 void rej_word_mostly_rej(); //Reject whole word
258 void rej_word_bad_quality(); //Reject whole word
259 void rej_word_doc_rej(); //Reject whole word
260 void rej_word_block_rej(); //Reject whole word
261 void rej_word_row_rej(); //Reject whole word
262};
263#endif
#define MAP_ACCEPT
Definition: rejctmap.h:92
REJ_FLAGS
Definition: rejctmap.h:49
@ R_MOSTLY_REJ
Definition: rejctmap.h:70
@ R_MM_REJECT
Definition: rejctmap.h:57
@ R_XHT_FIXUP
Definition: rejctmap.h:71
@ R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:62
@ R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:87
@ R_DUBIOUS
Definition: rejctmap.h:68
@ R_ROW_REJ
Definition: rejctmap.h:79
@ R_BLOCK_REJ
Definition: rejctmap.h:78
@ R_BAD_REPETITION
Definition: rejctmap.h:58
@ R_MM_ACCEPT
Definition: rejctmap.h:85
@ R_REJ_CBLOB
Definition: rejctmap.h:56
@ R_NO_ALPHANUMS
Definition: rejctmap.h:69
@ R_BAD_PERMUTER
Definition: rejctmap.h:64
@ R_1IL_CONFLICT
Definition: rejctmap.h:54
@ R_BAD_QUALITY
Definition: rejctmap.h:74
@ R_QUALITY_ACCEPT
Definition: rejctmap.h:86
@ R_TESS_FAILURE
Definition: rejctmap.h:51
@ R_POSTNN_1IL
Definition: rejctmap.h:55
@ R_HYPHEN_ACCEPT
Definition: rejctmap.h:84
@ R_CONTAINS_BLANKS
Definition: rejctmap.h:63
@ R_HYPHEN
Definition: rejctmap.h:67
@ R_DOC_REJ
Definition: rejctmap.h:77
@ R_POOR_MATCH
Definition: rejctmap.h:61
@ R_EDGE_CHAR
Definition: rejctmap.h:53
@ R_UNLV_REJ
Definition: rejctmap.h:80
@ R_SMALL_XHT
Definition: rejctmap.h:52
@ R_NN_ACCEPT
Definition: rejctmap.h:83
#define MAP_REJECT_POTENTIAL
Definition: rejctmap.h:95
#define MAP_REJECT_PERM
Definition: rejctmap.h:93
#define MAP_REJECT_TEMP
Definition: rejctmap.h:94
#define ASSERT_HOST(x)
Definition: errcode.h:88
Definition: rejctmap.h:98
void setrej_bad_permuter()
Definition: rejctmap.cpp:151
void setrej_no_alphanums()
Definition: rejctmap.cpp:166
void setrej_mm_reject()
Definition: rejctmap.cpp:124
void setrej_edge_char()
Definition: rejctmap.cpp:104
void setrej_small_xht()
Definition: rejctmap.cpp:99
void setrej_bad_quality()
Definition: rejctmap.cpp:181
void setrej_postNN_1Il()
Definition: rejctmap.cpp:114
void setrej_mostly_rej()
Definition: rejctmap.cpp:171
void setrej_mm_accept()
Definition: rejctmap.cpp:216
void setrej_dubious()
Definition: rejctmap.cpp:161
void setrej_contains_blanks()
Definition: rejctmap.cpp:145
void setrej_poor_match()
Definition: rejctmap.cpp:134
void setrej_quality_accept()
Definition: rejctmap.cpp:221
void setrej_unlv_rej()
Definition: rejctmap.cpp:201
bool recoverable()
Definition: rejctmap.h:161
void setrej_doc_rej()
Definition: rejctmap.cpp:186
void setrej_tess_failure()
Definition: rejctmap.cpp:94
bool rejected()
Definition: rejctmap.cpp:71
bool flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:132
void setrej_1Il_conflict()
Definition: rejctmap.cpp:109
REJ()=default
void setrej_xht_fixup()
Definition: rejctmap.cpp:176
void setrej_hyphen_accept()
Definition: rejctmap.cpp:206
REJ(const REJ &source)
Definition: rejctmap.h:119
bool perm_rejected()
Definition: rejctmap.cpp:22
void setrej_row_rej()
Definition: rejctmap.cpp:196
void setrej_block_rej()
Definition: rejctmap.cpp:191
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:139
REJ & operator=(const REJ &source)
Definition: rejctmap.h:125
void setrej_hyphen()
Definition: rejctmap.cpp:156
bool accepted()
Definition: rejctmap.h:154
bool accept_if_good_quality()
Definition: rejctmap.cpp:81
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:226
void full_print(FILE *fp)
Definition: rejctmap.cpp:232
void setrej_rej_cblob()
Definition: rejctmap.cpp:119
void setrej_bad_repetition()
Definition: rejctmap.cpp:129
char display_char()
Definition: rejctmap.h:139
void setrej_nn_accept()
Definition: rejctmap.cpp:211
void rej_word_row_rej()
Definition: rejctmap.cpp:442
bool recoverable_rejects()
Definition: rejctmap.cpp:291
void rej_word_mostly_rej()
Definition: rejctmap.cpp:406
void print(FILE *fp)
Definition: rejctmap.cpp:321
int16_t reject_count()
Definition: rejctmap.h:229
void rej_word_bad_permuter()
Definition: rejctmap.cpp:379
void full_print(FILE *fp)
Definition: rejctmap.cpp:333
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:265
int16_t accept_count()
Definition: rejctmap.cpp:279
REJMAP(const REJMAP &rejmap)
Definition: rejctmap.h:209
void rej_word_xht_fixup()
Definition: rejctmap.cpp:388
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:361
void initialise(int16_t length)
Definition: rejctmap.cpp:273
void rej_word_small_xht()
Definition: rejctmap.cpp:343
void rej_word_no_alphanums()
Definition: rejctmap.cpp:397
void rej_word_doc_rej()
Definition: rejctmap.cpp:424
void rej_word_bad_quality()
Definition: rejctmap.cpp:415
void rej_word_block_rej()
Definition: rejctmap.cpp:433
void rej_word_tess_failure()
Definition: rejctmap.cpp:352
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:309
REJ & operator[](int16_t index) const
Definition: rejctmap.h:216
void rej_word_contains_blanks()
Definition: rejctmap.cpp:370
bool quality_recoverable_rejects()
Definition: rejctmap.cpp:300
int32_t length() const
Definition: rejctmap.h:223
REJMAP()
Definition: rejctmap.h:207
Definition: bits16.h:25
void turn_on_bit(uint8_t bit_num)
Definition: bits16.h:32
bool bit(uint8_t bit_num) const
Definition: bits16.h:51