tesseract 4.1.1
Loading...
Searching...
No Matches
statistc.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: statistc.cpp (Formerly stats.c)
3 * Description: Simple statistical package for integer values.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1991, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19// Include automatically generated configuration file if running autoconf.
20#ifdef HAVE_CONFIG_H
21#include "config_auto.h"
22#endif
23
24#include "statistc.h"
25#include <cstring>
26#include <cmath>
27#include <cstdlib>
28#include "errcode.h"
29#include "helpers.h"
30#include "scrollview.h"
31#include "tprintf.h"
32
34
35/**********************************************************************
36 * STATS::STATS
37 *
38 * Construct a new stats element by allocating and zeroing the memory.
39 **********************************************************************/
40STATS::STATS(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
41 if (max_bucket_value_plus_1 <= min_bucket_value) {
42 min_bucket_value = 0;
43 max_bucket_value_plus_1 = 1;
44 }
45 rangemin_ = min_bucket_value; // setup
46 rangemax_ = max_bucket_value_plus_1;
47 buckets_ = new int32_t[rangemax_ - rangemin_];
48 clear();
49}
50
51/**********************************************************************
52 * STATS::set_range
53 *
54 * Alter the range on an existing stats element.
55 **********************************************************************/
56bool STATS::set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1) {
57 if (max_bucket_value_plus_1 <= min_bucket_value) {
58 return false;
59 }
60 if (rangemax_ - rangemin_ != max_bucket_value_plus_1 - min_bucket_value) {
61 delete [] buckets_;
62 buckets_ = new int32_t[max_bucket_value_plus_1 - min_bucket_value];
63 }
64 rangemin_ = min_bucket_value; // setup
65 rangemax_ = max_bucket_value_plus_1;
66 clear(); // zero it
67 return true;
68}
69
70/**********************************************************************
71 * STATS::clear
72 *
73 * Clear out the STATS class by zeroing all the buckets.
74 **********************************************************************/
75void STATS::clear() { // clear out buckets
76 total_count_ = 0;
77 if (buckets_ != nullptr)
78 memset(buckets_, 0, (rangemax_ - rangemin_) * sizeof(buckets_[0]));
79}
80
81/**********************************************************************
82 * STATS::~STATS
83 *
84 * Destructor for a stats class.
85 **********************************************************************/
86STATS::~STATS() { delete[] buckets_; }
87
88/**********************************************************************
89 * STATS::add
90 *
91 * Add a set of samples to (or delete from) a pile.
92 **********************************************************************/
93void STATS::add(int32_t value, int32_t count) {
94 if (buckets_ == nullptr) {
95 return;
96 }
97 value = ClipToRange(value, rangemin_, rangemax_ - 1);
98 buckets_[value - rangemin_] += count;
99 total_count_ += count; // keep count of total
100}
101
102/**********************************************************************
103 * STATS::mode
104 *
105 * Find the mode of a stats class.
106 **********************************************************************/
107int32_t STATS::mode() const { // get mode of samples
108 if (buckets_ == nullptr) {
109 return rangemin_;
110 }
111 int32_t max = buckets_[0]; // max cell count
112 int32_t maxindex = 0; // index of max
113 for (int index = rangemax_ - rangemin_ - 1; index > 0; --index) {
114 if (buckets_[index] > max) {
115 max = buckets_[index]; // find biggest
116 maxindex = index;
117 }
118 }
119 return maxindex + rangemin_; // index of biggest
120}
121
122/**********************************************************************
123 * STATS::mean
124 *
125 * Find the mean of a stats class.
126 **********************************************************************/
127double STATS::mean() const { //get mean of samples
128 if (buckets_ == nullptr || total_count_ <= 0) {
129 return static_cast<double>(rangemin_);
130 }
131 int64_t sum = 0;
132 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
133 sum += static_cast<int64_t>(index) * buckets_[index];
134 }
135 return static_cast<double>(sum) / total_count_ + rangemin_;
136}
137
138/**********************************************************************
139 * STATS::sd
140 *
141 * Find the standard deviation of a stats class.
142 **********************************************************************/
143double STATS::sd() const { //standard deviation
144 if (buckets_ == nullptr || total_count_ <= 0) {
145 return 0.0;
146 }
147 int64_t sum = 0;
148 double sqsum = 0.0;
149 for (int index = rangemax_ - rangemin_ - 1; index >= 0; --index) {
150 sum += static_cast<int64_t>(index) * buckets_[index];
151 sqsum += static_cast<double>(index) * index * buckets_[index];
152 }
153 double variance = static_cast<double>(sum) / total_count_;
154 variance = sqsum / total_count_ - variance * variance;
155 if (variance > 0.0)
156 return sqrt(variance);
157 return 0.0;
158}
159
160/**********************************************************************
161 * STATS::ile
162 *
163 * Returns the fractile value such that frac fraction (in [0,1]) of samples
164 * has a value less than the return value.
165 **********************************************************************/
166double STATS::ile(double frac) const {
167 if (buckets_ == nullptr || total_count_ == 0) {
168 return static_cast<double>(rangemin_);
169 }
170#if 0
171 // TODO(rays) The existing code doesn't seem to be doing the right thing
172 // with target a double but this substitute crashes the code that uses it.
173 // Investigate and fix properly.
174 int target = IntCastRounded(frac * total_count_);
175 target = ClipToRange(target, 1, total_count_);
176#else
177 double target = frac * total_count_;
178 target = ClipToRange(target, 1.0, static_cast<double>(total_count_));
179#endif
180 int sum = 0;
181 int index = 0;
182 for (index = 0; index < rangemax_ - rangemin_ && sum < target;
183 sum += buckets_[index++]);
184 if (index > 0) {
185 ASSERT_HOST(buckets_[index - 1] > 0);
186 return rangemin_ + index -
187 static_cast<double>(sum - target) / buckets_[index - 1];
188 } else {
189 return static_cast<double>(rangemin_);
190 }
191}
192
193/**********************************************************************
194 * STATS::min_bucket
195 *
196 * Find REAL minimum bucket - ile(0.0) isn't necessarily correct
197 **********************************************************************/
198int32_t STATS::min_bucket() const { // Find min
199 if (buckets_ == nullptr || total_count_ == 0) {
200 return rangemin_;
201 }
202 int32_t min = 0;
203 for (min = 0; (min < rangemax_ - rangemin_) && (buckets_[min] == 0); min++);
204 return rangemin_ + min;
205}
206
207/**********************************************************************
208 * STATS::max_bucket
209 *
210 * Find REAL maximum bucket - ile(1.0) isn't necessarily correct
211 **********************************************************************/
212
213int32_t STATS::max_bucket() const { // Find max
214 if (buckets_ == nullptr || total_count_ == 0) {
215 return rangemin_;
216 }
217 int32_t max;
218 for (max = rangemax_ - rangemin_ - 1; max > 0 && buckets_[max] == 0; max--);
219 return rangemin_ + max;
220}
221
222/**********************************************************************
223 * STATS::median
224 *
225 * Finds a more useful estimate of median than ile(0.5).
226 *
227 * Overcomes a problem with ile() - if the samples are, for example,
228 * 6,6,13,14 ile(0.5) return 7.0 - when a more useful value would be midway
229 * between 6 and 13 = 9.5
230 **********************************************************************/
231double STATS::median() const { //get median
232 if (buckets_ == nullptr) {
233 return static_cast<double>(rangemin_);
234 }
235 double median = ile(0.5);
236 int median_pile = static_cast<int>(floor(median));
237 if ((total_count_ > 1) && (pile_count(median_pile) == 0)) {
238 int32_t min_pile;
239 int32_t max_pile;
240 /* Find preceding non zero pile */
241 for (min_pile = median_pile; pile_count(min_pile) == 0; min_pile--);
242 /* Find following non zero pile */
243 for (max_pile = median_pile; pile_count(max_pile) == 0; max_pile++);
244 median = (min_pile + max_pile) / 2.0;
245 }
246 return median;
247}
248
249/**********************************************************************
250 * STATS::local_min
251 *
252 * Return true if this point is a local min.
253 **********************************************************************/
254bool STATS::local_min(int32_t x) const {
255 if (buckets_ == nullptr) {
256 return false;
257 }
258 x = ClipToRange(x, rangemin_, rangemax_ - 1) - rangemin_;
259 if (buckets_[x] == 0)
260 return true;
261 int32_t index; // table index
262 for (index = x - 1; index >= 0 && buckets_[index] == buckets_[x]; --index);
263 if (index >= 0 && buckets_[index] < buckets_[x])
264 return false;
265 for (index = x + 1; index < rangemax_ - rangemin_ &&
266 buckets_[index] == buckets_[x]; ++index);
267 if (index < rangemax_ - rangemin_ && buckets_[index] < buckets_[x])
268 return false;
269 else
270 return true;
271}
272
273/**********************************************************************
274 * STATS::smooth
275 *
276 * Apply a triangular smoothing filter to the stats.
277 * This makes the modes a bit more useful.
278 * The factor gives the height of the triangle, i.e. the weight of the
279 * centre.
280 **********************************************************************/
281void STATS::smooth(int32_t factor) {
282 if (buckets_ == nullptr || factor < 2) {
283 return;
284 }
285 STATS result(rangemin_, rangemax_);
286 int entrycount = rangemax_ - rangemin_;
287 for (int entry = 0; entry < entrycount; entry++) {
288 //centre weight
289 int count = buckets_[entry] * factor;
290 for (int offset = 1; offset < factor; offset++) {
291 if (entry - offset >= 0)
292 count += buckets_[entry - offset] * (factor - offset);
293 if (entry + offset < entrycount)
294 count += buckets_[entry + offset] * (factor - offset);
295 }
296 result.add(entry + rangemin_, count);
297 }
298 total_count_ = result.total_count_;
299 memcpy(buckets_, result.buckets_, entrycount * sizeof(buckets_[0]));
300}
301
302/**********************************************************************
303 * STATS::cluster
304 *
305 * Cluster the samples into max_cluster clusters.
306 * Each call runs one iteration. The array of clusters must be
307 * max_clusters+1 in size as cluster 0 is used to indicate which samples
308 * have been used.
309 * The return value is the current number of clusters.
310 **********************************************************************/
311
312int32_t STATS::cluster(float lower, // thresholds
313 float upper,
314 float multiple, // distance threshold
315 int32_t max_clusters, // max no to make
316 STATS *clusters) { // array of clusters
317 bool new_cluster; // added one
318 float *centres; // cluster centres
319 int32_t entry; // bucket index
320 int32_t cluster; // cluster index
321 int32_t best_cluster; // one to assign to
322 int32_t new_centre = 0; // residual mode
323 int32_t new_mode; // pile count of new_centre
324 int32_t count; // pile to place
325 float dist; // from cluster
326 float min_dist; // from best_cluster
327 int32_t cluster_count; // no of clusters
328
329 if (buckets_ == nullptr || max_clusters < 1)
330 return 0;
331 centres = new float[max_clusters + 1];
332 for (cluster_count = 1; cluster_count <= max_clusters
333 && clusters[cluster_count].buckets_ != nullptr
334 && clusters[cluster_count].total_count_ > 0;
335 cluster_count++) {
336 centres[cluster_count] =
337 static_cast<float>(clusters[cluster_count].ile(0.5));
338 new_centre = clusters[cluster_count].mode();
339 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
340 && entry >= rangemin_
341 && pile_count(entry) <= pile_count(entry + 1);
342 entry--) {
343 count = pile_count(entry) - clusters[0].pile_count(entry);
344 if (count > 0) {
345 clusters[cluster_count].add(entry, count);
346 clusters[0].add (entry, count);
347 }
348 }
349 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
350 && entry < rangemax_
351 && pile_count(entry) <= pile_count(entry - 1);
352 entry++) {
353 count = pile_count(entry) - clusters[0].pile_count(entry);
354 if (count > 0) {
355 clusters[cluster_count].add(entry, count);
356 clusters[0].add(entry, count);
357 }
358 }
359 }
360 cluster_count--;
361
362 if (cluster_count == 0) {
363 clusters[0].set_range(rangemin_, rangemax_);
364 }
365 do {
366 new_cluster = false;
367 new_mode = 0;
368 for (entry = 0; entry < rangemax_ - rangemin_; entry++) {
369 count = buckets_[entry] - clusters[0].buckets_[entry];
370 //remaining pile
371 if (count > 0) { //any to handle
372 min_dist = static_cast<float>(INT32_MAX);
373 best_cluster = 0;
374 for (cluster = 1; cluster <= cluster_count; cluster++) {
375 dist = entry + rangemin_ - centres[cluster];
376 //find distance
377 if (dist < 0)
378 dist = -dist;
379 if (dist < min_dist) {
380 min_dist = dist; //find least
381 best_cluster = cluster;
382 }
383 }
384 if (min_dist > upper //far enough for new
385 && (best_cluster == 0
386 || entry + rangemin_ > centres[best_cluster] * multiple
387 || entry + rangemin_ < centres[best_cluster] / multiple)) {
388 if (count > new_mode) {
389 new_mode = count;
390 new_centre = entry + rangemin_;
391 }
392 }
393 }
394 }
395 // need new and room
396 if (new_mode > 0 && cluster_count < max_clusters) {
397 cluster_count++;
398 new_cluster = true;
399 if (!clusters[cluster_count].set_range(rangemin_, rangemax_)) {
400 delete [] centres;
401 return 0;
402 }
403 centres[cluster_count] = static_cast<float>(new_centre);
404 clusters[cluster_count].add(new_centre, new_mode);
405 clusters[0].add(new_centre, new_mode);
406 for (entry = new_centre - 1; centres[cluster_count] - entry < lower
407 && entry >= rangemin_
408 && pile_count (entry) <= pile_count(entry + 1); entry--) {
409 count = pile_count(entry) - clusters[0].pile_count(entry);
410 if (count > 0) {
411 clusters[cluster_count].add(entry, count);
412 clusters[0].add(entry, count);
413 }
414 }
415 for (entry = new_centre + 1; entry - centres[cluster_count] < lower
416 && entry < rangemax_
417 && pile_count (entry) <= pile_count(entry - 1); entry++) {
418 count = pile_count(entry) - clusters[0].pile_count(entry);
419 if (count > 0) {
420 clusters[cluster_count].add(entry, count);
421 clusters[0].add (entry, count);
422 }
423 }
424 centres[cluster_count] =
425 static_cast<float>(clusters[cluster_count].ile(0.5));
426 }
427 } while (new_cluster && cluster_count < max_clusters);
428 delete [] centres;
429 return cluster_count;
430}
431
432// Helper tests that the current index is still part of the peak and gathers
433// the data into the peak, returning false when the peak is ended.
434// src_buckets[index] - used_buckets[index] is the unused part of the histogram.
435// prev_count is the histogram count of the previous index on entry and is
436// updated to the current index on return.
437// total_count and total_value are accumulating the mean of the peak.
438static bool GatherPeak(int index, const int* src_buckets, int* used_buckets,
439 int* prev_count, int* total_count, double* total_value) {
440 int pile_count = src_buckets[index] - used_buckets[index];
441 if (pile_count <= *prev_count && pile_count > 0) {
442 // Accumulate count and index.count product.
443 *total_count += pile_count;
444 *total_value += index * pile_count;
445 // Mark this index as used
446 used_buckets[index] = src_buckets[index];
447 *prev_count = pile_count;
448 return true;
449 } else {
450 return false;
451 }
452}
453
454// Finds (at most) the top max_modes modes, well actually the whole peak around
455// each mode, returning them in the given modes vector as a <mean of peak,
456// total count of peak> pair in order of decreasing total count.
457// Since the mean is the key and the count the data in the pair, a single call
458// to sort on the output will re-sort by increasing mean of peak if that is
459// more useful than decreasing total count.
460// Returns the actual number of modes found.
461int STATS::top_n_modes(int max_modes,
462 GenericVector<KDPairInc<float, int> >* modes) const {
463 if (max_modes <= 0) return 0;
464 int src_count = rangemax_ - rangemin_;
465 // Used copies the counts in buckets_ as they get used.
466 STATS used(rangemin_, rangemax_);
467 modes->truncate(0);
468 // Total count of the smallest peak found so far.
469 int least_count = 1;
470 // Mode that is used as a seed for each peak
471 int max_count = 0;
472 do {
473 // Find an unused mode.
474 max_count = 0;
475 int max_index = 0;
476 for (int src_index = 0; src_index < src_count; src_index++) {
477 int pile_count = buckets_[src_index] - used.buckets_[src_index];
478 if (pile_count > max_count) {
479 max_count = pile_count;
480 max_index = src_index;
481 }
482 }
483 if (max_count > 0) {
484 // Copy the bucket count to used so it doesn't get found again.
485 used.buckets_[max_index] = max_count;
486 // Get the entire peak.
487 double total_value = max_index * max_count;
488 int total_count = max_count;
489 int prev_pile = max_count;
490 for (int offset = 1; max_index + offset < src_count; ++offset) {
491 if (!GatherPeak(max_index + offset, buckets_, used.buckets_,
492 &prev_pile, &total_count, &total_value))
493 break;
494 }
495 prev_pile = buckets_[max_index];
496 for (int offset = 1; max_index - offset >= 0; ++offset) {
497 if (!GatherPeak(max_index - offset, buckets_, used.buckets_,
498 &prev_pile, &total_count, &total_value))
499 break;
500 }
501 if (total_count > least_count || modes->size() < max_modes) {
502 // We definitely want this mode, so if we have enough discard the least.
503 if (modes->size() == max_modes)
504 modes->truncate(max_modes - 1);
505 int target_index = 0;
506 // Linear search for the target insertion point.
507 while (target_index < modes->size() &&
508 (*modes)[target_index].data >= total_count)
509 ++target_index;
510 auto peak_mean =
511 static_cast<float>(total_value / total_count + rangemin_);
512 modes->insert(KDPairInc<float, int>(peak_mean, total_count),
513 target_index);
514 least_count = modes->back().data;
515 }
516 }
517 } while (max_count > 0);
518 return modes->size();
519}
520
521/**********************************************************************
522 * STATS::print
523 *
524 * Prints a summary and table of the histogram.
525 **********************************************************************/
526void STATS::print() const {
527 if (buckets_ == nullptr) {
528 return;
529 }
530 int32_t min = min_bucket() - rangemin_;
531 int32_t max = max_bucket() - rangemin_;
532
533 int num_printed = 0;
534 for (int index = min; index <= max; index++) {
535 if (buckets_[index] != 0) {
536 tprintf("%4d:%-3d ", rangemin_ + index, buckets_[index]);
537 if (++num_printed % 8 == 0)
538 tprintf ("\n");
539 }
540 }
541 tprintf ("\n");
543}
544
545
546
547/**********************************************************************
548 * STATS::print_summary
549 *
550 * Print a summary of the stats.
551 **********************************************************************/
553 if (buckets_ == nullptr) {
554 return;
555 }
556 int32_t min = min_bucket();
557 int32_t max = max_bucket();
558 tprintf("Total count=%d\n", total_count_);
559 tprintf("Min=%.2f Really=%d\n", ile(0.0), min);
560 tprintf("Lower quartile=%.2f\n", ile(0.25));
561 tprintf("Median=%.2f, ile(0.5)=%.2f\n", median(), ile(0.5));
562 tprintf("Upper quartile=%.2f\n", ile(0.75));
563 tprintf("Max=%.2f Really=%d\n", ile(1.0), max);
564 tprintf("Range=%d\n", max + 1 - min);
565 tprintf("Mean= %.2f\n", mean());
566 tprintf("SD= %.2f\n", sd());
567}
568
569
570/**********************************************************************
571 * STATS::plot
572 *
573 * Draw a histogram of the stats table.
574 **********************************************************************/
575
576#ifndef GRAPHICS_DISABLED
577void STATS::plot(ScrollView* window, // to draw in
578 float xorigin, // bottom left
579 float yorigin,
580 float xscale, // one x unit
581 float yscale, // one y unit
582 ScrollView::Color colour) const { // colour to draw in
583 if (buckets_ == nullptr) {
584 return;
585 }
586 window->Pen(colour);
587
588 for (int index = 0; index < rangemax_ - rangemin_; index++) {
589 window->Rectangle(xorigin + xscale * index, yorigin,
590 xorigin + xscale * (index + 1),
591 yorigin + yscale * buckets_[index]);
592 }
593}
594#endif
595
596
597/**********************************************************************
598 * STATS::plotline
599 *
600 * Draw a histogram of the stats table. (Line only)
601 **********************************************************************/
602
603#ifndef GRAPHICS_DISABLED
604void STATS::plotline(ScrollView* window, // to draw in
605 float xorigin, // bottom left
606 float yorigin,
607 float xscale, // one x unit
608 float yscale, // one y unit
609 ScrollView::Color colour) const { // colour to draw in
610 if (buckets_ == nullptr) {
611 return;
612 }
613 window->Pen(colour);
614 window->SetCursor(xorigin, yorigin + yscale * buckets_[0]);
615 for (int index = 0; index < rangemax_ - rangemin_; index++) {
616 window->DrawTo(xorigin + xscale * index,
617 yorigin + yscale * buckets_[index]);
618 }
619}
620#endif
621
622
623/**********************************************************************
624 * choose_nth_item
625 *
626 * Returns the index of what would b the nth item in the array
627 * if the members were sorted, without actually sorting.
628 **********************************************************************/
629
630int32_t choose_nth_item(int32_t index, float *array, int32_t count) {
631 int32_t next_sample; // next one to do
632 int32_t next_lesser; // space for new
633 int32_t prev_greater; // last one saved
634 int32_t equal_count; // no of equal ones
635 float pivot; // proposed median
636 float sample; // current sample
637
638 if (count <= 1)
639 return 0;
640 if (count == 2) {
641 if (array[0] < array[1]) {
642 return index >= 1 ? 1 : 0;
643 }
644 else {
645 return index >= 1 ? 0 : 1;
646 }
647 }
648 else {
649 if (index < 0)
650 index = 0; // ensure legal
651 else if (index >= count)
652 index = count - 1;
653 equal_count = static_cast<int32_t>(rand() % count);
654 pivot = array[equal_count];
655 // fill gap
656 array[equal_count] = array[0];
657 next_lesser = 0;
658 prev_greater = count;
659 equal_count = 1;
660 for (next_sample = 1; next_sample < prev_greater;) {
661 sample = array[next_sample];
662 if (sample < pivot) {
663 // shuffle
664 array[next_lesser++] = sample;
665 next_sample++;
666 }
667 else if (sample > pivot) {
668 prev_greater--;
669 // juggle
670 array[next_sample] = array[prev_greater];
671 array[prev_greater] = sample;
672 }
673 else {
674 equal_count++;
675 next_sample++;
676 }
677 }
678 for (next_sample = next_lesser; next_sample < prev_greater;)
679 array[next_sample++] = pivot;
680 if (index < next_lesser)
681 return choose_nth_item (index, array, next_lesser);
682 else if (index < prev_greater)
683 return next_lesser; // in equal bracket
684 else
685 return choose_nth_item (index - prev_greater,
686 array + prev_greater,
687 count - prev_greater) + prev_greater;
688 }
689}
690
691/**********************************************************************
692 * choose_nth_item
693 *
694 * Returns the index of what would be the nth item in the array
695 * if the members were sorted, without actually sorting.
696 **********************************************************************/
697int32_t choose_nth_item(int32_t index, void *array, int32_t count, size_t size,
698 int (*compar)(const void*, const void*)) {
699 int result; // of compar
700 int32_t next_sample; // next one to do
701 int32_t next_lesser; // space for new
702 int32_t prev_greater; // last one saved
703 int32_t equal_count; // no of equal ones
704 int32_t pivot; // proposed median
705
706 if (count <= 1)
707 return 0;
708 if (count == 2) {
709 if (compar (array, static_cast<char *>(array) + size) < 0) {
710 return index >= 1 ? 1 : 0;
711 }
712 else {
713 return index >= 1 ? 0 : 1;
714 }
715 }
716 if (index < 0)
717 index = 0; // ensure legal
718 else if (index >= count)
719 index = count - 1;
720 pivot = static_cast<int32_t>(rand () % count);
721 swap_entries (array, size, pivot, 0);
722 next_lesser = 0;
723 prev_greater = count;
724 equal_count = 1;
725 for (next_sample = 1; next_sample < prev_greater;) {
726 result =
727 compar (static_cast<char *>(array) + size * next_sample,
728 static_cast<char *>(array) + size * next_lesser);
729 if (result < 0) {
730 swap_entries (array, size, next_lesser++, next_sample++);
731 // shuffle
732 }
733 else if (result > 0) {
734 prev_greater--;
735 swap_entries(array, size, prev_greater, next_sample);
736 }
737 else {
738 equal_count++;
739 next_sample++;
740 }
741 }
742 if (index < next_lesser)
743 return choose_nth_item (index, array, next_lesser, size, compar);
744 else if (index < prev_greater)
745 return next_lesser; // in equal bracket
746 else
747 return choose_nth_item (index - prev_greater,
748 static_cast<char *>(array) + size * prev_greater,
749 count - prev_greater, size,
750 compar) + prev_greater;
751}
752
753/**********************************************************************
754 * swap_entries
755 *
756 * Swap 2 entries of arbitrary size in-place in a table.
757 **********************************************************************/
758void swap_entries(void *array, // array of entries
759 size_t size, // size of entry
760 int32_t index1, // entries to swap
761 int32_t index2) {
762 char tmp;
763 char *ptr1; // to entries
764 char *ptr2;
765 size_t count; // of bytes
766
767 ptr1 = static_cast<char *>(array) + index1 * size;
768 ptr2 = static_cast<char *>(array) + index2 * size;
769 for (count = 0; count < size; count++) {
770 tmp = *ptr1;
771 *ptr1++ = *ptr2;
772 *ptr2++ = tmp; // tedious!
773 }
774}
void swap_entries(void *array, size_t size, int32_t index1, int32_t index2)
Definition: statistc.cpp:758
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:630
#define ASSERT_HOST(x)
Definition: errcode.h:88
int IntCastRounded(double x)
Definition: helpers.h:175
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:108
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int count(LIST var_list)
Definition: oldlist.cpp:95
Definition: statistc.h:31
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:577
int32_t max_bucket() const
Definition: statistc.cpp:213
int32_t cluster(float lower, float upper, float multiple, int32_t max_clusters, STATS *clusters)
Definition: statistc.cpp:312
void smooth(int32_t factor)
Definition: statistc.cpp:281
void clear()
Definition: statistc.cpp:75
int32_t pile_count(int32_t value) const
Definition: statistc.h:76
double mean() const
Definition: statistc.cpp:127
void add(int32_t value, int32_t count)
Definition: statistc.cpp:93
double sd() const
Definition: statistc.cpp:143
bool set_range(int32_t min_bucket_value, int32_t max_bucket_value_plus_1)
Definition: statistc.cpp:56
double median() const
Definition: statistc.cpp:231
int top_n_modes(int max_modes, GenericVector< tesseract::KDPairInc< float, int > > *modes) const
Definition: statistc.cpp:461
void print() const
Definition: statistc.cpp:526
STATS()=default
int32_t min_bucket() const
Definition: statistc.cpp:198
void plotline(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:604
double ile(double frac) const
Definition: statistc.cpp:166
~STATS()
Definition: statistc.cpp:86
bool local_min(int32_t x) const
Definition: statistc.cpp:254
void print_summary() const
Definition: statistc.cpp:552
int32_t mode() const
Definition: statistc.cpp:107
Definition: cluster.h:32
void DrawTo(int x, int y)
Definition: scrollview.cpp:525
void SetCursor(int x, int y)
Definition: scrollview.cpp:519
void Pen(Color color)
Definition: scrollview.cpp:719
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:600