tesseract 4.1.1
Loading...
Searching...
No Matches
clusttool.cpp File Reference
#include "clusttool.h"
#include <cmath>
#include <locale>
#include <sstream>
#include "emalloc.h"

Go to the source code of this file.

Macros

#define _USE_MATH_DEFINES
 
#define TOKENSIZE   80
 max size of tokens read from an input file More...
 
#define QUOTED_TOKENSIZE   "79"
 
#define MAXSAMPLESIZE   65535
 max num of dimensions in feature space More...
 

Functions

uint16_t ReadSampleSize (TFile *fp)
 
PARAM_DESCReadParamDesc (TFile *fp, uint16_t N)
 
PROTOTYPEReadPrototype (TFile *fp, uint16_t N)
 
void WriteParamDesc (FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uint16_t N, PROTOTYPE *Proto)
 

Macro Definition Documentation

◆ _USE_MATH_DEFINES

#define _USE_MATH_DEFINES

Definition at line 19 of file clusttool.cpp.

◆ MAXSAMPLESIZE

#define MAXSAMPLESIZE   65535

max num of dimensions in feature space

Definition at line 31 of file clusttool.cpp.

◆ QUOTED_TOKENSIZE

#define QUOTED_TOKENSIZE   "79"

Definition at line 30 of file clusttool.cpp.

◆ TOKENSIZE

#define TOKENSIZE   80

max size of tokens read from an input file

Definition at line 29 of file clusttool.cpp.

Function Documentation

◆ ReadParamDesc()

PARAM_DESC * ReadParamDesc ( TFile fp,
uint16_t  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Parameters
fpopen text file to read N parameter descriptions from
Nnumber of parameter descriptions to read
Returns
Pointer to an array of parameter descriptors.
Note
Globals: None

Definition at line 140 of file clusttool.cpp.

140 {
141 PARAM_DESC *ParamDesc;
142
143 ParamDesc = static_cast<PARAM_DESC *>(Emalloc (N * sizeof (PARAM_DESC)));
144 for (int i = 0; i < N; i++) {
145 const int kMaxLineSize = TOKENSIZE * 4;
146 char line[kMaxLineSize];
147 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
148 std::istringstream stream(line);
149 // Use "C" locale (needed for float values Min, Max).
150 stream.imbue(std::locale::classic());
151 std::string linear_token;
152 stream >> linear_token;
153 std::string essential_token;
154 stream >> essential_token;
155 stream >> ParamDesc[i].Min;
156 stream >> ParamDesc[i].Max;
157 ASSERT_HOST(!stream.fail());
158 ParamDesc[i].Circular = (linear_token[0] == 'c');
159 ParamDesc[i].NonEssential = (essential_token[0] != 'e');
160 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
161 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
162 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
163 }
164 return (ParamDesc);
165}
#define ASSERT_HOST(x)
Definition: errcode.h:88
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
void * Emalloc(int Size)
Definition: emalloc.cpp:31
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:249
float HalfRange
Definition: ocrfeatures.h:48
float Range
Definition: ocrfeatures.h:47
bool Circular
Definition: ocrfeatures.h:43
float Max
Definition: ocrfeatures.h:46
float MidRange
Definition: ocrfeatures.h:49
bool NonEssential
Definition: ocrfeatures.h:44
float Min
Definition: ocrfeatures.h:45

◆ ReadPrototype()

PROTOTYPE * ReadPrototype ( TFile fp,
uint16_t  N 
)

This routine reads a textual description of a prototype from the specified file.

Parameters
fpopen text file to read prototype from
Nnumber of dimensions used in prototype
Returns
List of prototypes
Note
Globals: None

Definition at line 176 of file clusttool.cpp.

176 {
177 char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
178 PROTOTYPE *Proto;
179 int SampleCount;
180 int i;
181
182 const int kMaxLineSize = TOKENSIZE * 4;
183 char line[kMaxLineSize];
184 if (fp->FGets(line, kMaxLineSize) == nullptr ||
185 sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
186 sig_token, shape_token, &SampleCount) != 3) {
187 tprintf("Invalid prototype: %s\n", line);
188 return nullptr;
189 }
190 Proto = static_cast<PROTOTYPE *>(Emalloc(sizeof(PROTOTYPE)));
191 Proto->Cluster = nullptr;
192 Proto->Significant = (sig_token[0] == 's');
193
194 switch (shape_token[0]) {
195 case 's':
196 Proto->Style = spherical;
197 break;
198 case 'e':
199 Proto->Style = elliptical;
200 break;
201 case 'a':
202 Proto->Style = automatic;
203 break;
204 default:
205 tprintf("Invalid prototype style specification:%s\n", shape_token);
206 Proto->Style = elliptical;
207 }
208
209 ASSERT_HOST(SampleCount >= 0);
210 Proto->NumSamples = SampleCount;
211
212 Proto->Mean = ReadNFloats(fp, N, nullptr);
213 ASSERT_HOST(Proto->Mean != nullptr);
214
215 switch (Proto->Style) {
216 case spherical:
217 ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
218 Proto->Magnitude.Spherical =
219 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
220 Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
221 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
222 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
223 Proto->Distrib = nullptr;
224 break;
225 case elliptical:
226 Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
227 ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
228 Proto->Magnitude.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
229 Proto->Weight.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
230 Proto->TotalMagnitude = 1.0;
231 for (i = 0; i < N; i++) {
232 Proto->Magnitude.Elliptical[i] =
233 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
234 Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
235 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
236 }
237 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
238 Proto->Distrib = nullptr;
239 break;
240 default:
241 Efree(Proto);
242 tprintf("Invalid prototype style\n");
243 return nullptr;
244 }
245 return Proto;
246}
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
@ elliptical
Definition: cluster.h:44
@ spherical
Definition: cluster.h:44
@ automatic
Definition: cluster.h:44
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:30
void Efree(void *ptr)
Definition: emalloc.cpp:45
float Spherical
Definition: cluster.h:59
float * Elliptical
Definition: cluster.h:60
FLOATUNION Magnitude
Definition: cluster.h:78
unsigned NumSamples
Definition: cluster.h:71
FLOATUNION Variance
Definition: cluster.h:77
unsigned Style
Definition: cluster.h:70
float * Mean
Definition: cluster.h:74
float LogMagnitude
Definition: cluster.h:76
bool Significant
Definition: cluster.h:64
float TotalMagnitude
Definition: cluster.h:75
DISTRIBUTION * Distrib
Definition: cluster.h:73
FLOATUNION Weight
Definition: cluster.h:79
CLUSTER * Cluster
Definition: cluster.h:72

◆ ReadSampleSize()

uint16_t ReadSampleSize ( TFile fp)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
fpopen text file to read sample size from
Returns
Sample size
Note
Globals: None

Definition at line 120 of file clusttool.cpp.

120 {
121 int SampleSize = 0;
122
123 const int kMaxLineSize = 100;
124 char line[kMaxLineSize];
125 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
126 ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
127 ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
128 return SampleSize;
129}
#define MAXSAMPLESIZE
max num of dimensions in feature space
Definition: clusttool.cpp:31

◆ WriteParamDesc()

void WriteParamDesc ( FILE *  File,
uint16_t  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write

Definition at line 255 of file clusttool.cpp.

255 {
256 int i;
257
258 for (i = 0; i < N; i++) {
259 if (ParamDesc[i].Circular)
260 fprintf (File, "circular ");
261 else
262 fprintf (File, "linear ");
263
264 if (ParamDesc[i].NonEssential)
265 fprintf (File, "non-essential ");
266 else
267 fprintf (File, "essential ");
268
269 fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
270 }
271}

◆ WritePrototype()

void WritePrototype ( FILE *  File,
uint16_t  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out

Definition at line 280 of file clusttool.cpp.

280 {
281 int i;
282
283 if (Proto->Significant)
284 fprintf (File, "significant ");
285 else
286 fprintf (File, "insignificant ");
287 WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style));
288 fprintf (File, "%6d\n\t", Proto->NumSamples);
289 WriteNFloats (File, N, Proto->Mean);
290 fprintf (File, "\t");
291
292 switch (Proto->Style) {
293 case spherical:
294 WriteNFloats (File, 1, &(Proto->Variance.Spherical));
295 break;
296 case elliptical:
297 WriteNFloats (File, N, Proto->Variance.Elliptical);
298 break;
299 case mixed:
300 for (i = 0; i < N; i++)
301 switch (Proto->Distrib[i]) {
302 case normal:
303 fprintf (File, " %9s", "normal");
304 break;
305 case uniform:
306 fprintf (File, " %9s", "uniform");
307 break;
308 case D_random:
309 fprintf (File, " %9s", "random");
310 break;
312 ASSERT_HOST(!"Distribution count not allowed!");
313 }
314 fprintf (File, "\n\t");
315 WriteNFloats (File, N, Proto->Variance.Elliptical);
316 }
317}
PROTOSTYLE
Definition: cluster.h:44
@ mixed
Definition: cluster.h:44
@ DISTRIBUTION_COUNT
Definition: cluster.h:56
@ D_random
Definition: cluster.h:56
@ uniform
Definition: cluster.h:56
@ normal
Definition: cluster.h:56