tesseract 4.1.1
Loading...
Searching...
No Matches
adaptive.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: adaptive.c
3 ** Purpose: Adaptive matcher.
4 ** Author: Dan Johnson
5 ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6 **
7 ** (c) Copyright Hewlett-Packard Company, 1988.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 ******************************************************************************/
18
19/*----------------------------------------------------------------------------
20 Include Files and Type Defines
21----------------------------------------------------------------------------*/
22#include "adaptive.h"
23#include "emalloc.h"
24#include "classify.h"
25
26#include <cassert>
27#include <cstdio>
28
30
31/*----------------------------------------------------------------------------
32 Public Code
33----------------------------------------------------------------------------*/
34/*---------------------------------------------------------------------------*/
46 ADAPT_CLASS Class,
47 CLASS_ID ClassId) {
48 INT_CLASS IntClass;
49
50 assert (Templates != nullptr);
51 assert (Class != nullptr);
52 assert (LegalClassId (ClassId));
53 assert (UnusedClassIdIn (Templates->Templates, ClassId));
54 assert (Class->NumPermConfigs == 0);
55
56 IntClass = NewIntClass (1, 1);
57 AddIntClass (Templates->Templates, ClassId, IntClass);
58
59 assert (Templates->Class[ClassId] == nullptr);
60 Templates->Class[ClassId] = Class;
61
62} /* AddAdaptedClass */
63
64
65/*---------------------------------------------------------------------------*/
75 assert (Config != nullptr);
76 FreeBitVector (Config->Protos);
77 free(Config);
78} /* FreeTempConfig */
79
80/*---------------------------------------------------------------------------*/
81void FreeTempProto(void *arg) {
82 auto proto = static_cast<PROTO>(arg);
83
84 free(proto);
85}
86
87static void FreePermConfig(PERM_CONFIG Config) {
88 assert(Config != nullptr);
89 delete [] Config->Ambigs;
90 free(Config);
91}
92
93/*---------------------------------------------------------------------------*/
103 ADAPT_CLASS Class;
104
105 Class = static_cast<ADAPT_CLASS>(Emalloc (sizeof (ADAPT_CLASS_STRUCT)));
106 Class->NumPermConfigs = 0;
107 Class->MaxNumTimesSeen = 0;
108 Class->TempProtos = NIL_LIST;
109
110 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
111 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
112 zero_all_bits (Class->PermProtos, WordsInVectorOfSize (MAX_NUM_PROTOS));
113 zero_all_bits (Class->PermConfigs, WordsInVectorOfSize (MAX_NUM_CONFIGS));
114
115 for (int i = 0; i < MAX_NUM_CONFIGS; i++)
116 TempConfigFor (Class, i) = nullptr;
117
118 return (Class);
119
120} /* NewAdaptedClass */
121
122
123/*-------------------------------------------------------------------------*/
125 for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
126 if (ConfigIsPermanent (adapt_class, i)
127 && PermConfigFor (adapt_class, i) != nullptr)
128 FreePermConfig (PermConfigFor (adapt_class, i));
129 else if (!ConfigIsPermanent (adapt_class, i)
130 && TempConfigFor (adapt_class, i) != nullptr)
131 FreeTempConfig (TempConfigFor (adapt_class, i));
132 }
133 FreeBitVector (adapt_class->PermProtos);
134 FreeBitVector (adapt_class->PermConfigs);
135 destroy_nodes (adapt_class->TempProtos, FreeTempProto);
136 Efree(adapt_class);
137}
138
139
140/*---------------------------------------------------------------------------*/
141namespace tesseract {
152 ADAPT_TEMPLATES Templates;
153
154 Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
155
156 Templates->Templates = NewIntTemplates ();
157 Templates->NumPermClasses = 0;
158 Templates->NumNonEmptyClasses = 0;
159
160 /* Insert an empty class for each unichar id in unicharset */
161 for (int i = 0; i < MAX_NUM_CLASSES; i++) {
162 Templates->Class[i] = nullptr;
163 if (InitFromUnicharset && i < unicharset.size()) {
164 AddAdaptedClass(Templates, NewAdaptedClass(), i);
165 }
166 }
167
168 return (Templates);
169
170} /* NewAdaptedTemplates */
171
172// Returns FontinfoId of the given config of the given adapted class.
173int Classify::GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId) {
174 return (ConfigIsPermanent(Class, ConfigId) ?
175 PermConfigFor(Class, ConfigId)->FontinfoId :
176 TempConfigFor(Class, ConfigId)->FontinfoId);
177}
178
179} // namespace tesseract
180
181/*----------------------------------------------------------------------------*/
183
184 if (templates != nullptr) {
185 for (int i = 0; i < (templates->Templates)->NumClasses; i++)
186 free_adapted_class (templates->Class[i]);
187 free_int_templates (templates->Templates);
188 Efree(templates);
189 }
190}
191
192
193/*---------------------------------------------------------------------------*/
203TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
204 int NumProtos = MaxProtoId + 1;
205
206 auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
207 Config->Protos = NewBitVector (NumProtos);
208
209 Config->NumTimesSeen = 1;
210 Config->MaxProtoId = MaxProtoId;
211 Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
212 zero_all_bits (Config->Protos, Config->ProtoVectorSize);
213 Config->FontinfoId = FontinfoId;
214
215 return (Config);
216
217} /* NewTempConfig */
218
219
220/*---------------------------------------------------------------------------*/
229 return static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
230} /* NewTempProto */
231
232
233/*---------------------------------------------------------------------------*/
234namespace tesseract {
245 INT_CLASS IClass;
246 ADAPT_CLASS AClass;
247
248 fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
249 fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
250 Templates->NumNonEmptyClasses, Templates->NumPermClasses);
251 fprintf (File, " Id NC NPC NP NPP\n");
252 fprintf (File, "------------------------\n");
253
254 for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
255 IClass = Templates->Templates->Class[i];
256 AClass = Templates->Class[i];
257 if (!IsEmptyAdaptedClass (AClass)) {
258 fprintf (File, "%5d %s %3d %3d %3d %3d\n",
260 IClass->NumConfigs, AClass->NumPermConfigs,
261 IClass->NumProtos,
262 IClass->NumProtos - count (AClass->TempProtos));
263 }
264 }
265 fprintf (File, "\n");
266
267} /* PrintAdaptedTemplates */
268} // namespace tesseract
269
270
271/*---------------------------------------------------------------------------*/
282 int NumTempProtos;
283 int NumConfigs;
284 int i;
285 ADAPT_CLASS Class;
286
287 /* first read high level adapted class structure */
288 Class = static_cast<ADAPT_CLASS>(Emalloc (sizeof (ADAPT_CLASS_STRUCT)));
289 fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
290
291 /* then read in the definitions of the permanent protos and configs */
292 Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
293 Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
294 fp->FRead(Class->PermProtos, sizeof(uint32_t),
295 WordsInVectorOfSize(MAX_NUM_PROTOS));
296 fp->FRead(Class->PermConfigs, sizeof(uint32_t),
297 WordsInVectorOfSize(MAX_NUM_CONFIGS));
298
299 /* then read in the list of temporary protos */
300 fp->FRead(&NumTempProtos, sizeof(int), 1);
301 Class->TempProtos = NIL_LIST;
302 for (i = 0; i < NumTempProtos; i++) {
303 auto TempProto = static_cast<TEMP_PROTO>(malloc(sizeof(TEMP_PROTO_STRUCT)));
304 fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
305 Class->TempProtos = push_last (Class->TempProtos, TempProto);
306 }
307
308 /* then read in the adapted configs */
309 fp->FRead(&NumConfigs, sizeof(int), 1);
310 for (i = 0; i < NumConfigs; i++)
311 if (test_bit (Class->PermConfigs, i))
312 Class->Config[i].Perm = ReadPermConfig(fp);
313 else
314 Class->Config[i].Temp = ReadTempConfig(fp);
315
316 return (Class);
317
318} /* ReadAdaptedClass */
319
320
321/*---------------------------------------------------------------------------*/
322namespace tesseract {
333 ADAPT_TEMPLATES Templates;
334
335 /* first read the high level adaptive template struct */
336 Templates = static_cast<ADAPT_TEMPLATES>(Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT)));
337 fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
338
339 /* then read in the basic integer templates */
340 Templates->Templates = ReadIntTemplates(fp);
341
342 /* then read in the adaptive info for each class */
343 for (int i = 0; i < (Templates->Templates)->NumClasses; i++) {
344 Templates->Class[i] = ReadAdaptedClass(fp);
345 }
346 return (Templates);
347
348} /* ReadAdaptedTemplates */
349} // namespace tesseract
350
351
352/*---------------------------------------------------------------------------*/
363 auto Config = static_cast<PERM_CONFIG>(malloc(sizeof(PERM_CONFIG_STRUCT)));
364 uint8_t NumAmbigs;
365 fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
366 Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
367 fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
368 Config->Ambigs[NumAmbigs] = -1;
369 fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
370
371 return (Config);
372
373} /* ReadPermConfig */
374
375
376/*---------------------------------------------------------------------------*/
387 auto Config = static_cast<TEMP_CONFIG>(malloc(sizeof(TEMP_CONFIG_STRUCT)));
388 fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
389
390 Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
391 fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
392
393 return (Config);
394
395} /* ReadTempConfig */
396
397
398/*---------------------------------------------------------------------------*/
409void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
410 int NumTempProtos;
411 LIST TempProtos;
412 int i;
413
414 /* first write high level adapted class structure */
415 fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
416
417 /* then write out the definitions of the permanent protos and configs */
418 fwrite(Class->PermProtos, sizeof(uint32_t),
419 WordsInVectorOfSize(MAX_NUM_PROTOS), File);
420 fwrite(Class->PermConfigs, sizeof(uint32_t),
421 WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
422
423 /* then write out the list of temporary protos */
424 NumTempProtos = count (Class->TempProtos);
425 fwrite(&NumTempProtos, sizeof(int), 1, File);
426 TempProtos = Class->TempProtos;
427 iterate (TempProtos) {
428 void* proto = first_node(TempProtos);
429 fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
430 }
431
432 /* then write out the adapted configs */
433 fwrite(&NumConfigs, sizeof(int), 1, File);
434 for (i = 0; i < NumConfigs; i++)
435 if (test_bit (Class->PermConfigs, i))
436 WritePermConfig (File, Class->Config[i].Perm);
437 else
438 WriteTempConfig (File, Class->Config[i].Temp);
439
440} /* WriteAdaptedClass */
441
442
443/*---------------------------------------------------------------------------*/
444namespace tesseract {
454 int i;
455
456 /* first write the high level adaptive template struct */
457 fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
458
459 /* then write out the basic integer templates */
461
462 /* then write out the adaptive info for each class */
463 for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
464 WriteAdaptedClass (File, Templates->Class[i],
465 Templates->Templates->Class[i]->NumConfigs);
466 }
467} /* WriteAdaptedTemplates */
468} // namespace tesseract
469
470
471/*---------------------------------------------------------------------------*/
482 uint8_t NumAmbigs = 0;
483
484 assert (Config != nullptr);
485 while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
486
487 fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
488 fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
489 fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
490} /* WritePermConfig */
491
492
493/*---------------------------------------------------------------------------*/
504 assert (Config != nullptr);
505
506 fwrite(Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
507 fwrite(Config->Protos, sizeof (uint32_t), Config->ProtoVectorSize, File);
508
509} /* WriteTempConfig */
int UNICHAR_ID
Definition: unichar.h:34
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:45
PERM_CONFIG ReadPermConfig(TFile *fp)
Definition: adaptive.cpp:362
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:124
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:228
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:74
void FreeTempProto(void *arg)
Definition: adaptive.cpp:81
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:409
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:481
TEMP_CONFIG ReadTempConfig(TFile *fp)
Definition: adaptive.cpp:386
ADAPT_CLASS ReadAdaptedClass(TFile *fp)
Definition: adaptive.cpp:281
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:102
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:182
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:503
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:203
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:79
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:82
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:93
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:91
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:682
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:231
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:698
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:626
#define UnusedClassIdIn(T, c)
Definition: intproto.h:177
#define MAX_NUM_PROTOS
Definition: intproto.h:48
#define MAX_NUM_CONFIGS
Definition: intproto.h:47
#define LegalClassId(c)
Definition: intproto.h:176
#define test_bit(array, bit)
Definition: bitvec.h:59
const size_t BITSINLONG
Definition: bitvec.h:31
void * Emalloc(int Size)
Definition: emalloc.cpp:31
void Efree(void *ptr)
Definition: emalloc.cpp:45
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:227
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:157
int count(LIST var_list)
Definition: oldlist.cpp:95
#define iterate(l)
Definition: oldlist.h:101
#define first_node(l)
Definition: oldlist.h:92
#define NIL_LIST
Definition: oldlist.h:76
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:34
#define MAX_NUM_CLASSES
Definition: matchdefs.h:30
CLUSTERCONFIG Config
UNICHARSET unicharset
Definition: ccutil.h:73
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:271
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
int size() const
Definition: unicharset.h:341
PERM_CONFIG Perm
Definition: adaptive.h:52
TEMP_CONFIG Temp
Definition: adaptive.h:51
BIT_VECTOR PermProtos
Definition: adaptive.h:59
ADAPTED_CONFIG Config[MAX_NUM_CONFIGS]
Definition: adaptive.h:62
uint8_t MaxNumTimesSeen
Definition: adaptive.h:57
uint8_t NumPermConfigs
Definition: adaptive.h:56
BIT_VECTOR PermConfigs
Definition: adaptive.h:60
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:70
INT_TEMPLATES Templates
Definition: adaptive.h:67
uint8_t NumPermClasses
Definition: adaptive.h:69
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:244
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:453
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:151
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1017
int GetFontinfoId(ADAPT_CLASS Class, uint8_t ConfigId)
Definition: adaptive.cpp:173
INT_TEMPLATES ReadIntTemplates(TFile *fp)
Definition: intproto.cpp:718
ADAPT_TEMPLATES ReadAdaptedTemplates(TFile *File)
Definition: adaptive.cpp:332
uint16_t NumProtos
Definition: intproto.h:106
uint8_t NumConfigs
Definition: intproto.h:108
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:121