tesseract 4.1.1
Loading...
Searching...
No Matches
rejctmap.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: rejctmap.cpp (Formerly rejmap.c)
3 * Description: REJ and REJMAP class functions.
4 * Author: Phil Cheatle
5 *
6 * (C) Copyright 1994, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "rejctmap.h"
20#include "params.h"
21
22bool REJ::perm_rejected() { //Is char perm reject?
23 return (flag (R_TESS_FAILURE) ||
24 flag (R_SMALL_XHT) ||
25 flag (R_EDGE_CHAR) ||
28 flag (R_REJ_CBLOB) ||
30}
31
32
33bool REJ::rej_before_nn_accept() {
34 return flag (R_POOR_MATCH) ||
37}
38
39
40bool REJ::rej_between_nn_and_mm() {
41 return flag (R_HYPHEN) ||
42 flag (R_DUBIOUS) ||
44}
45
46
47bool REJ::rej_between_mm_and_quality_accept() {
48 return flag (R_BAD_QUALITY);
49}
50
51
52bool REJ::rej_between_quality_and_minimal_rej_accept() {
53 return flag (R_DOC_REJ) ||
55}
56
57
58bool REJ::rej_before_mm_accept() {
59 return rej_between_nn_and_mm () ||
60 (rej_before_nn_accept () &&
62}
63
64
65bool REJ::rej_before_quality_accept() {
66 return rej_between_mm_and_quality_accept () ||
67 (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
68}
69
70
71bool REJ::rejected() { //Is char rejected?
73 return false;
74 else
75 return (perm_rejected () ||
76 rej_between_quality_and_minimal_rej_accept () ||
77 (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
78}
79
80
81bool REJ::accept_if_good_quality() { //potential rej?
82 return (rejected () &&
83 !perm_rejected () &&
85 !flag (R_POOR_MATCH) &&
88 (!rej_between_nn_and_mm () &&
89 !rej_between_mm_and_quality_accept () &&
90 !rej_between_quality_and_minimal_rej_accept ()));
91}
92
93
94void REJ::setrej_tess_failure() { //Tess generated blank
95 set_flag(R_TESS_FAILURE);
96}
97
98
99void REJ::setrej_small_xht() { //Small xht char/wd
100 set_flag(R_SMALL_XHT);
101}
102
103
104void REJ::setrej_edge_char() { //Close to image edge
105 set_flag(R_EDGE_CHAR);
106}
107
108
109void REJ::setrej_1Il_conflict() { //Initial reject map
110 set_flag(R_1IL_CONFLICT);
111}
112
113
114void REJ::setrej_postNN_1Il() { //1Il after NN
115 set_flag(R_POSTNN_1IL);
116}
117
118
119void REJ::setrej_rej_cblob() { //Insert duff blob
120 set_flag(R_REJ_CBLOB);
121}
122
123
124void REJ::setrej_mm_reject() { //Matrix matcher
125 set_flag(R_MM_REJECT);
126}
127
128
129void REJ::setrej_bad_repetition() { //Odd repeated char
130 set_flag(R_BAD_REPETITION);
131}
132
133
134void REJ::setrej_poor_match() { //Failed Rays heuristic
135 set_flag(R_POOR_MATCH);
136}
137
138
140 //TEMP reject_word
141 set_flag(R_NOT_TESS_ACCEPTED);
142}
143
144
146 //TEMP reject_word
147 set_flag(R_CONTAINS_BLANKS);
148}
149
150
151void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
152 set_flag(R_BAD_PERMUTER);
153}
154
155
156void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
157 set_flag(R_HYPHEN);
158}
159
160
161void REJ::setrej_dubious() { //PostNN dubious limit
162 set_flag(R_DUBIOUS);
163}
164
165
166void REJ::setrej_no_alphanums() { //TEMP reject_word
167 set_flag(R_NO_ALPHANUMS);
168}
169
170
171void REJ::setrej_mostly_rej() { //TEMP reject_word
172 set_flag(R_MOSTLY_REJ);
173}
174
175
176void REJ::setrej_xht_fixup() { //xht fixup
177 set_flag(R_XHT_FIXUP);
178}
179
180
181void REJ::setrej_bad_quality() { //TEMP reject_word
182 set_flag(R_BAD_QUALITY);
183}
184
185
186void REJ::setrej_doc_rej() { //TEMP reject_word
187 set_flag(R_DOC_REJ);
188}
189
190
191void REJ::setrej_block_rej() { //TEMP reject_word
192 set_flag(R_BLOCK_REJ);
193}
194
195
196void REJ::setrej_row_rej() { //TEMP reject_word
197 set_flag(R_ROW_REJ);
198}
199
200
201void REJ::setrej_unlv_rej() { //TEMP reject_word
202 set_flag(R_UNLV_REJ);
203}
204
205
206void REJ::setrej_hyphen_accept() { //NN Flipped a char
207 set_flag(R_HYPHEN_ACCEPT);
208}
209
210
211void REJ::setrej_nn_accept() { //NN Flipped a char
212 set_flag(R_NN_ACCEPT);
213}
214
215
216void REJ::setrej_mm_accept() { //Matrix matcher
217 set_flag(R_MM_ACCEPT);
218}
219
220
221void REJ::setrej_quality_accept() { //Quality flip a char
222 set_flag(R_QUALITY_ACCEPT);
223}
224
225
227 //Accept all except blank
228 set_flag(R_MINIMAL_REJ_ACCEPT);
229}
230
231
232void REJ::full_print(FILE *fp) {
233 fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
234 fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
235 fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
236 fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
237 fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
238 fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
239 fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
240 fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
241 fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
242 fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
243 flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
244 fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
245 flag (R_CONTAINS_BLANKS) ? "T" : "F");
246 fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
247 fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
248 fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
249 fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
250 fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
251 fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
252 fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
253 fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
254 fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
255 fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
256 fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
257 fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
258 fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
259 fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
260 fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
261 fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
262 flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
263}
264
266 initialise(source.len);
267 for (int i = 0; i < len; i++) {
268 ptr[i] = source.ptr[i];
269 }
270 return *this;
271}
272
273void REJMAP::initialise(int16_t length) {
274 ptr.reset(new REJ[length]);
275 len = length;
276}
277
278
279int16_t REJMAP::accept_count() { //How many accepted?
280 int i;
281 int16_t count = 0;
282
283 for (i = 0; i < len; i++) {
284 if (ptr[i].accepted ())
285 count++;
286 }
287 return count;
288}
289
290
291bool REJMAP::recoverable_rejects() { //Any non perm rejs?
292 for (int i = 0; i < len; i++) {
293 if (ptr[i].recoverable ())
294 return true;
295 }
296 return false;
297}
298
299
300bool REJMAP::quality_recoverable_rejects() { //Any potential rejs?
301 for (int i = 0; i < len; i++) {
302 if (ptr[i].accept_if_good_quality ())
303 return true;
304 }
305 return false;
306}
307
308
309void REJMAP::remove_pos( //Cut out an element
310 int16_t pos //element to remove
311 ) {
312 ASSERT_HOST (pos >= 0);
313 ASSERT_HOST (pos < len);
314 ASSERT_HOST (len > 0);
315
316 len--;
317 for (; pos < len; pos++) ptr[pos] = ptr[pos + 1];
318}
319
320
321void REJMAP::print(FILE *fp) {
322 int i;
323 char buff[512];
324
325 for (i = 0; i < len; i++) {
326 buff[i] = ptr[i].display_char ();
327 }
328 buff[i] = '\0';
329 fprintf (fp, "\"%s\"", buff);
330}
331
332
333void REJMAP::full_print(FILE *fp) {
334 int i;
335
336 for (i = 0; i < len; i++) {
337 ptr[i].full_print (fp);
338 fprintf (fp, "\n");
339 }
340}
341
342
343void REJMAP::rej_word_small_xht() { //Reject whole word
344 int i;
345
346 for (i = 0; i < len; i++) {
347 ptr[i].setrej_small_xht ();
348 }
349}
350
351
352void REJMAP::rej_word_tess_failure() { //Reject whole word
353 int i;
354
355 for (i = 0; i < len; i++) {
356 ptr[i].setrej_tess_failure ();
357 }
358}
359
360
361void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
362 int i;
363
364 for (i = 0; i < len; i++) {
365 if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
366 }
367}
368
369
370void REJMAP::rej_word_contains_blanks() { //Reject whole word
371 int i;
372
373 for (i = 0; i < len; i++) {
374 if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
375 }
376}
377
378
379void REJMAP::rej_word_bad_permuter() { //Reject whole word
380 int i;
381
382 for (i = 0; i < len; i++) {
383 if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
384 }
385}
386
387
388void REJMAP::rej_word_xht_fixup() { //Reject whole word
389 int i;
390
391 for (i = 0; i < len; i++) {
392 if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
393 }
394}
395
396
397void REJMAP::rej_word_no_alphanums() { //Reject whole word
398 int i;
399
400 for (i = 0; i < len; i++) {
401 if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
402 }
403}
404
405
406void REJMAP::rej_word_mostly_rej() { //Reject whole word
407 int i;
408
409 for (i = 0; i < len; i++) {
410 if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
411 }
412}
413
414
415void REJMAP::rej_word_bad_quality() { //Reject whole word
416 int i;
417
418 for (i = 0; i < len; i++) {
419 if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
420 }
421}
422
423
424void REJMAP::rej_word_doc_rej() { //Reject whole word
425 int i;
426
427 for (i = 0; i < len; i++) {
428 if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
429 }
430}
431
432
433void REJMAP::rej_word_block_rej() { //Reject whole word
434 int i;
435
436 for (i = 0; i < len; i++) {
437 if (ptr[i].accepted()) ptr[i].setrej_block_rej();
438 }
439}
440
441
442void REJMAP::rej_word_row_rej() { //Reject whole word
443 int i;
444
445 for (i = 0; i < len; i++) {
446 if (ptr[i].accepted()) ptr[i].setrej_row_rej();
447 }
448}
@ R_MOSTLY_REJ
Definition: rejctmap.h:70
@ R_MM_REJECT
Definition: rejctmap.h:57
@ R_XHT_FIXUP
Definition: rejctmap.h:71
@ R_NOT_TESS_ACCEPTED
Definition: rejctmap.h:62
@ R_MINIMAL_REJ_ACCEPT
Definition: rejctmap.h:87
@ R_DUBIOUS
Definition: rejctmap.h:68
@ R_ROW_REJ
Definition: rejctmap.h:79
@ R_BLOCK_REJ
Definition: rejctmap.h:78
@ R_BAD_REPETITION
Definition: rejctmap.h:58
@ R_MM_ACCEPT
Definition: rejctmap.h:85
@ R_REJ_CBLOB
Definition: rejctmap.h:56
@ R_NO_ALPHANUMS
Definition: rejctmap.h:69
@ R_BAD_PERMUTER
Definition: rejctmap.h:64
@ R_1IL_CONFLICT
Definition: rejctmap.h:54
@ R_BAD_QUALITY
Definition: rejctmap.h:74
@ R_QUALITY_ACCEPT
Definition: rejctmap.h:86
@ R_TESS_FAILURE
Definition: rejctmap.h:51
@ R_POSTNN_1IL
Definition: rejctmap.h:55
@ R_HYPHEN_ACCEPT
Definition: rejctmap.h:84
@ R_CONTAINS_BLANKS
Definition: rejctmap.h:63
@ R_HYPHEN
Definition: rejctmap.h:67
@ R_DOC_REJ
Definition: rejctmap.h:77
@ R_POOR_MATCH
Definition: rejctmap.h:61
@ R_EDGE_CHAR
Definition: rejctmap.h:53
@ R_UNLV_REJ
Definition: rejctmap.h:80
@ R_SMALL_XHT
Definition: rejctmap.h:52
@ R_NN_ACCEPT
Definition: rejctmap.h:83
#define ASSERT_HOST(x)
Definition: errcode.h:88
int count(LIST var_list)
Definition: oldlist.cpp:95
Definition: rejctmap.h:98
void setrej_bad_permuter()
Definition: rejctmap.cpp:151
void setrej_no_alphanums()
Definition: rejctmap.cpp:166
void setrej_mm_reject()
Definition: rejctmap.cpp:124
void setrej_edge_char()
Definition: rejctmap.cpp:104
void setrej_small_xht()
Definition: rejctmap.cpp:99
void setrej_bad_quality()
Definition: rejctmap.cpp:181
void setrej_postNN_1Il()
Definition: rejctmap.cpp:114
void setrej_mostly_rej()
Definition: rejctmap.cpp:171
void setrej_mm_accept()
Definition: rejctmap.cpp:216
void setrej_dubious()
Definition: rejctmap.cpp:161
void setrej_contains_blanks()
Definition: rejctmap.cpp:145
void setrej_poor_match()
Definition: rejctmap.cpp:134
void setrej_quality_accept()
Definition: rejctmap.cpp:221
void setrej_unlv_rej()
Definition: rejctmap.cpp:201
void setrej_doc_rej()
Definition: rejctmap.cpp:186
void setrej_tess_failure()
Definition: rejctmap.cpp:94
bool rejected()
Definition: rejctmap.cpp:71
bool flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:132
void setrej_1Il_conflict()
Definition: rejctmap.cpp:109
void setrej_xht_fixup()
Definition: rejctmap.cpp:176
void setrej_hyphen_accept()
Definition: rejctmap.cpp:206
bool perm_rejected()
Definition: rejctmap.cpp:22
void setrej_row_rej()
Definition: rejctmap.cpp:196
void setrej_block_rej()
Definition: rejctmap.cpp:191
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:139
void setrej_hyphen()
Definition: rejctmap.cpp:156
bool accept_if_good_quality()
Definition: rejctmap.cpp:81
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:226
void full_print(FILE *fp)
Definition: rejctmap.cpp:232
void setrej_rej_cblob()
Definition: rejctmap.cpp:119
void setrej_bad_repetition()
Definition: rejctmap.cpp:129
void setrej_nn_accept()
Definition: rejctmap.cpp:211
void rej_word_row_rej()
Definition: rejctmap.cpp:442
bool recoverable_rejects()
Definition: rejctmap.cpp:291
void rej_word_mostly_rej()
Definition: rejctmap.cpp:406
void print(FILE *fp)
Definition: rejctmap.cpp:321
void rej_word_bad_permuter()
Definition: rejctmap.cpp:379
void full_print(FILE *fp)
Definition: rejctmap.cpp:333
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:265
int16_t accept_count()
Definition: rejctmap.cpp:279
void rej_word_xht_fixup()
Definition: rejctmap.cpp:388
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:361
void initialise(int16_t length)
Definition: rejctmap.cpp:273
void rej_word_small_xht()
Definition: rejctmap.cpp:343
void rej_word_no_alphanums()
Definition: rejctmap.cpp:397
void rej_word_doc_rej()
Definition: rejctmap.cpp:424
void rej_word_bad_quality()
Definition: rejctmap.cpp:415
void rej_word_block_rej()
Definition: rejctmap.cpp:433
void rej_word_tess_failure()
Definition: rejctmap.cpp:352
void remove_pos(int16_t pos)
Definition: rejctmap.cpp:309
void rej_word_contains_blanks()
Definition: rejctmap.cpp:370
bool quality_recoverable_rejects()
Definition: rejctmap.cpp:300
int32_t length() const
Definition: rejctmap.h:223