tesseract 4.1.1
Loading...
Searching...
No Matches
clusttool.cpp
Go to the documentation of this file.
1/******************************************************************************
2 ** Filename: clusttool.cpp
3 ** Purpose: Misc. tools for use with the clustering routines
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *****************************************************************************/
17
18//--------------------------Include Files----------------------------------
19#define _USE_MATH_DEFINES // for M_PI
20#include "clusttool.h"
21#include <cmath> // for M_PI, std::isnan
22#include <locale> // for std::locale::classic
23#include <sstream> // for std::stringstream
24#include "emalloc.h"
25
27
28//---------------Global Data Definitions and Declarations--------------------
29#define TOKENSIZE 80
30#define QUOTED_TOKENSIZE "79"
31#define MAXSAMPLESIZE 65535
32
45static float *ReadNFloats(TFile *fp, uint16_t N, float Buffer[]) {
46 const int kMaxLineSize = 1024;
47 char line[kMaxLineSize];
48 if (fp->FGets(line, kMaxLineSize) == nullptr) {
49 tprintf("Hit EOF in ReadNFloats!\n");
50 return nullptr;
51 }
52 bool needs_free = false;
53
54 if (Buffer == nullptr) {
55 Buffer = static_cast<float *>(Emalloc(N * sizeof(float)));
56 needs_free = true;
57 }
58
59 std::stringstream stream(line);
60 // Use "C" locale (needed for float values Buffer[i]).
61 stream.imbue(std::locale::classic());
62 for (uint16_t i = 0; i < N; i++) {
63 float f = NAN;
64 stream >> f;
65 if (std::isnan(f)) {
66 tprintf("Read of %u floats failed!\n", N);
67 if (needs_free) Efree(Buffer);
68 return nullptr;
69 }
70 Buffer[i] = f;
71 }
72 return Buffer;
73}
74
82static void WriteNFloats(FILE * File, uint16_t N, float Array[]) {
83 for (int i = 0; i < N; i++)
84 fprintf(File, " %9.6f", Array[i]);
85 fprintf(File, "\n");
86}
87
95static void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
96 switch (ProtoStyle) {
97 case spherical:
98 fprintf (File, "spherical");
99 break;
100 case elliptical:
101 fprintf (File, "elliptical");
102 break;
103 case mixed:
104 fprintf (File, "mixed");
105 break;
106 case automatic:
107 fprintf (File, "automatic");
108 break;
109 }
110}
111
120uint16_t ReadSampleSize(TFile *fp) {
121 int SampleSize = 0;
122
123 const int kMaxLineSize = 100;
124 char line[kMaxLineSize];
125 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
126 ASSERT_HOST(sscanf(line, "%d", &SampleSize) == 1);
127 ASSERT_HOST(SampleSize >= 0 && SampleSize <= MAXSAMPLESIZE);
128 return SampleSize;
129}
130
140PARAM_DESC *ReadParamDesc(TFile *fp, uint16_t N) {
141 PARAM_DESC *ParamDesc;
142
143 ParamDesc = static_cast<PARAM_DESC *>(Emalloc (N * sizeof (PARAM_DESC)));
144 for (int i = 0; i < N; i++) {
145 const int kMaxLineSize = TOKENSIZE * 4;
146 char line[kMaxLineSize];
147 ASSERT_HOST(fp->FGets(line, kMaxLineSize) != nullptr);
148 std::istringstream stream(line);
149 // Use "C" locale (needed for float values Min, Max).
150 stream.imbue(std::locale::classic());
151 std::string linear_token;
152 stream >> linear_token;
153 std::string essential_token;
154 stream >> essential_token;
155 stream >> ParamDesc[i].Min;
156 stream >> ParamDesc[i].Max;
157 ASSERT_HOST(!stream.fail());
158 ParamDesc[i].Circular = (linear_token[0] == 'c');
159 ParamDesc[i].NonEssential = (essential_token[0] != 'e');
160 ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
161 ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
162 ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
163 }
164 return (ParamDesc);
165}
166
176PROTOTYPE *ReadPrototype(TFile *fp, uint16_t N) {
177 char sig_token[TOKENSIZE], shape_token[TOKENSIZE];
178 PROTOTYPE *Proto;
179 int SampleCount;
180 int i;
181
182 const int kMaxLineSize = TOKENSIZE * 4;
183 char line[kMaxLineSize];
184 if (fp->FGets(line, kMaxLineSize) == nullptr ||
185 sscanf(line, "%" QUOTED_TOKENSIZE "s %" QUOTED_TOKENSIZE "s %d",
186 sig_token, shape_token, &SampleCount) != 3) {
187 tprintf("Invalid prototype: %s\n", line);
188 return nullptr;
189 }
190 Proto = static_cast<PROTOTYPE *>(Emalloc(sizeof(PROTOTYPE)));
191 Proto->Cluster = nullptr;
192 Proto->Significant = (sig_token[0] == 's');
193
194 switch (shape_token[0]) {
195 case 's':
196 Proto->Style = spherical;
197 break;
198 case 'e':
199 Proto->Style = elliptical;
200 break;
201 case 'a':
202 Proto->Style = automatic;
203 break;
204 default:
205 tprintf("Invalid prototype style specification:%s\n", shape_token);
206 Proto->Style = elliptical;
207 }
208
209 ASSERT_HOST(SampleCount >= 0);
210 Proto->NumSamples = SampleCount;
211
212 Proto->Mean = ReadNFloats(fp, N, nullptr);
213 ASSERT_HOST(Proto->Mean != nullptr);
214
215 switch (Proto->Style) {
216 case spherical:
217 ASSERT_HOST(ReadNFloats(fp, 1, &(Proto->Variance.Spherical)) != nullptr);
218 Proto->Magnitude.Spherical =
219 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Spherical);
220 Proto->TotalMagnitude = pow(Proto->Magnitude.Spherical, static_cast<float>(N));
221 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
222 Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
223 Proto->Distrib = nullptr;
224 break;
225 case elliptical:
226 Proto->Variance.Elliptical = ReadNFloats(fp, N, nullptr);
227 ASSERT_HOST(Proto->Variance.Elliptical != nullptr);
228 Proto->Magnitude.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
229 Proto->Weight.Elliptical = static_cast<float *>(Emalloc(N * sizeof(float)));
230 Proto->TotalMagnitude = 1.0;
231 for (i = 0; i < N; i++) {
232 Proto->Magnitude.Elliptical[i] =
233 1.0 / sqrt(2.0 * M_PI * Proto->Variance.Elliptical[i]);
234 Proto->Weight.Elliptical[i] = 1.0 / Proto->Variance.Elliptical[i];
235 Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
236 }
237 Proto->LogMagnitude = log(static_cast<double>(Proto->TotalMagnitude));
238 Proto->Distrib = nullptr;
239 break;
240 default:
241 Efree(Proto);
242 tprintf("Invalid prototype style\n");
243 return nullptr;
244 }
245 return Proto;
246}
247
255void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[]) {
256 int i;
257
258 for (i = 0; i < N; i++) {
259 if (ParamDesc[i].Circular)
260 fprintf (File, "circular ");
261 else
262 fprintf (File, "linear ");
263
264 if (ParamDesc[i].NonEssential)
265 fprintf (File, "non-essential ");
266 else
267 fprintf (File, "essential ");
268
269 fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
270 }
271}
272
280void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto) {
281 int i;
282
283 if (Proto->Significant)
284 fprintf (File, "significant ");
285 else
286 fprintf (File, "insignificant ");
287 WriteProtoStyle (File, static_cast<PROTOSTYLE>(Proto->Style));
288 fprintf (File, "%6d\n\t", Proto->NumSamples);
289 WriteNFloats (File, N, Proto->Mean);
290 fprintf (File, "\t");
291
292 switch (Proto->Style) {
293 case spherical:
294 WriteNFloats (File, 1, &(Proto->Variance.Spherical));
295 break;
296 case elliptical:
297 WriteNFloats (File, N, Proto->Variance.Elliptical);
298 break;
299 case mixed:
300 for (i = 0; i < N; i++)
301 switch (Proto->Distrib[i]) {
302 case normal:
303 fprintf (File, " %9s", "normal");
304 break;
305 case uniform:
306 fprintf (File, " %9s", "uniform");
307 break;
308 case D_random:
309 fprintf (File, " %9s", "random");
310 break;
312 ASSERT_HOST(!"Distribution count not allowed!");
313 }
314 fprintf (File, "\n\t");
315 WriteNFloats (File, N, Proto->Variance.Elliptical);
316 }
317}
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
PROTOSTYLE
Definition: cluster.h:44
@ elliptical
Definition: cluster.h:44
@ spherical
Definition: cluster.h:44
@ automatic
Definition: cluster.h:44
@ mixed
Definition: cluster.h:44
@ DISTRIBUTION_COUNT
Definition: cluster.h:56
@ D_random
Definition: cluster.h:56
@ uniform
Definition: cluster.h:56
@ normal
Definition: cluster.h:56
uint16_t ReadSampleSize(TFile *fp)
Definition: clusttool.cpp:120
void WritePrototype(FILE *File, uint16_t N, PROTOTYPE *Proto)
Definition: clusttool.cpp:280
#define QUOTED_TOKENSIZE
Definition: clusttool.cpp:30
PARAM_DESC * ReadParamDesc(TFile *fp, uint16_t N)
Definition: clusttool.cpp:140
#define MAXSAMPLESIZE
max num of dimensions in feature space
Definition: clusttool.cpp:31
PROTOTYPE * ReadPrototype(TFile *fp, uint16_t N)
Definition: clusttool.cpp:176
void WriteParamDesc(FILE *File, uint16_t N, const PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:255
#define TOKENSIZE
max size of tokens read from an input file
Definition: clusttool.cpp:29
void * Emalloc(int Size)
Definition: emalloc.cpp:31
void Efree(void *ptr)
Definition: emalloc.cpp:45
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:249
float Spherical
Definition: cluster.h:59
float * Elliptical
Definition: cluster.h:60
FLOATUNION Magnitude
Definition: cluster.h:78
unsigned NumSamples
Definition: cluster.h:71
FLOATUNION Variance
Definition: cluster.h:77
unsigned Style
Definition: cluster.h:70
float * Mean
Definition: cluster.h:74
float LogMagnitude
Definition: cluster.h:76
bool Significant
Definition: cluster.h:64
float TotalMagnitude
Definition: cluster.h:75
DISTRIBUTION * Distrib
Definition: cluster.h:73
FLOATUNION Weight
Definition: cluster.h:79
CLUSTER * Cluster
Definition: cluster.h:72
float HalfRange
Definition: ocrfeatures.h:48
float Range
Definition: ocrfeatures.h:47
bool Circular
Definition: ocrfeatures.h:43
float Max
Definition: ocrfeatures.h:46
float MidRange
Definition: ocrfeatures.h:49
bool NonEssential
Definition: ocrfeatures.h:44
float Min
Definition: ocrfeatures.h:45