tesseract 4.1.1
Loading...
Searching...
No Matches
sorthelper.h
Go to the documentation of this file.
1
2// File: sorthelper.h
3// Description: Generic sort and maxfinding class.
4// Author: Ray Smith
5// Created: Thu May 20 17:48:21 PDT 2010
6//
7// (C) Copyright 2010, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#ifndef TESSERACT_CCUTIL_SORTHELPER_H_
21#define TESSERACT_CCUTIL_SORTHELPER_H_
22
23#include <cstdlib>
24#include "genericvector.h"
25
26// Generic class to provide functions based on a <value,count> pair.
27// T is the value type.
28// The class keeps a count of each value and can return the most frequent
29// value or a sorted array of the values with counts.
30// Note that this class uses linear search for adding. It is better
31// to use the STATS class to get the mode of a large number of values
32// in a small space. SortHelper is better to get the mode of a small number
33// of values from a large space.
34// T must have a copy constructor.
35template <typename T>
37 public:
38 // Simple pair class to hold the values and counts.
39 template<typename PairT> struct SortPair {
40 PairT value;
41 int count;
42 };
43 // qsort function to sort by decreasing count.
44 static int SortPairsByCount(const void* v1, const void* v2) {
45 const auto* p1 = static_cast<const SortPair<T>*>(v1);
46 const auto* p2 = static_cast<const SortPair<T>*>(v2);
47 return p2->count - p1->count;
48 }
49 // qsort function to sort by decreasing value.
50 static int SortPairsByValue(const void* v1, const void* v2) {
51 const auto* p1 = static_cast<const SortPair<T>*>(v1);
52 const auto* p2 = static_cast<const SortPair<T>*>(v2);
53 if (p2->value - p1->value < 0) return -1;
54 if (p2->value - p1->value > 0) return 1;
55 return 0;
56 }
57
58 // Constructor takes a hint of the array size, but it need not be accurate.
59 explicit SortHelper(int sizehint) {
60 counts_.reserve(sizehint);
61 }
62
63 // Add a value that may be a duplicate of an existing value.
64 // Uses a linear search.
65 void Add(T value, int count) {
66 // Linear search for value.
67 for (int i = 0; i < counts_.size(); ++i) {
68 if (counts_[i].value == value) {
69 counts_[i].count += count;
70 return;
71 }
72 }
73 SortPair<T> new_pair = {value, count};
74 counts_.push_back(SortPair<T>(new_pair));
75 }
76
77 // Returns the frequency of the most frequent value.
78 // If max_value is not nullptr, returns the most frequent value.
79 // If the array is empty, returns -INT32_MAX and max_value is unchanged.
80 int MaxCount(T* max_value) const {
81 int best_count = -INT32_MAX;
82 for (int i = 0; i < counts_.size(); ++i) {
83 if (counts_[i].count > best_count) {
84 best_count = counts_[i].count;
85 if (max_value != nullptr)
86 *max_value = counts_[i].value;
87 }
88 }
89 return best_count;
90 }
91
92 // Returns the data array sorted by decreasing frequency.
94 counts_.sort(&SortPairsByCount);
95 return counts_;
96 }
97 // Returns the data array sorted by decreasing value.
99 counts_.sort(&SortPairsByValue);
100 return counts_;
101 }
102
103 private:
105};
106
107
108#endif // TESSERACT_CCUTIL_SORTHELPER_H_.
int count(LIST var_list)
Definition: oldlist.cpp:95
void Add(T value, int count)
Definition: sorthelper.h:65
int MaxCount(T *max_value) const
Definition: sorthelper.h:80
const GenericVector< SortPair< T > > & SortByValue()
Definition: sorthelper.h:98
static int SortPairsByValue(const void *v1, const void *v2)
Definition: sorthelper.h:50
const GenericVector< SortPair< T > > & SortByCount()
Definition: sorthelper.h:93
SortHelper(int sizehint)
Definition: sorthelper.h:59
static int SortPairsByCount(const void *v1, const void *v2)
Definition: sorthelper.h:44