tesseract 4.1.1
Loading...
Searching...
No Matches
genericheap.h
Go to the documentation of this file.
1// Copyright 2012 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: genericheap.h
5// Description: Template heap class.
6// Author: Ray Smith, based on Dan Johnson's original code.
7// Created: Wed Mar 14 08:13:00 PDT 2012
8//
9// (C) Copyright 2012, Google Inc.
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13// http://www.apache.org/licenses/LICENSE-2.0
14// Unless required by applicable law or agreed to in writing, software
15// distributed under the License is distributed on an "AS IS" BASIS,
16// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17// See the License for the specific language governing permissions and
18// limitations under the License.
19//
21
22#ifndef TESSERACT_CCUTIL_GENERICHEAP_H_
23#define TESSERACT_CCUTIL_GENERICHEAP_H_
24
25#include "errcode.h"
26#include "genericvector.h"
27
28namespace tesseract {
29
30// GenericHeap requires 1 template argument:
31// Pair will normally be either KDPairInc<Key, Data> or KDPairDec<Key, Data>
32// for some arbitrary Key and scalar, smart pointer, or non-ownership pointer
33// Data type, according to whether a MIN heap or a MAX heap is desired,
34// respectively. Using KDPtrPairInc<Key, Data> or KDPtrPairDec<Key, Data>,
35// GenericHeap can also handle simple Data pointers and own them.
36// If no additional data is required, Pair can also be a scalar, since
37// GenericHeap doesn't look inside it except for operator<.
38//
39// The heap is stored as a packed binary tree in an array hosted by a
40// GenericVector<Pair>, with the invariant that the children of each node are
41// both NOT Pair::operator< the parent node. KDPairInc defines Pair::operator<
42// to use Key::operator< to generate a MIN heap and KDPairDec defines
43// Pair::operator< to use Key::operator> to generate a MAX heap by reversing
44// all the comparisons.
45// See http://en.wikipedia.org/wiki/Heap_(data_structure) for more detail on
46// the basic heap implementation.
47//
48// Insertion and removal are both O(log n) and, unlike the STL heap, an
49// explicit Reshuffle function allows a node to be repositioned in time O(log n)
50// after changing its value.
51//
52// Accessing the element for revaluation is a more complex matter, since the
53// index and pointer can be changed arbitrarily by heap operations.
54// Revaluation can be done by making the Data type in the Pair derived from or
55// contain a DoublePtr as its first data element, making it possible to convert
56// the pointer to a Pair using KDPairInc::RecastDataPointer.
57template <typename Pair>
59 public:
60 GenericHeap() = default;
61 // The initial size is only a GenericVector::reserve. It is not enforced as
62 // the size limit of the heap. Caller must implement their own enforcement.
63 explicit GenericHeap(int initial_size) {
64 heap_.reserve(initial_size);
65 }
66
67 // Simple accessors.
68 bool empty() const {
69 return heap_.empty();
70 }
71 int size() const {
72 return heap_.size();
73 }
74 int size_reserved() const {
75 return heap_.size_reserved();
76 }
77 void clear() {
78 // Clear truncates to 0 to keep the number reserved in tact.
79 heap_.truncate(0);
80 }
81 // Provides access to the underlying vector.
82 // Caution! any changes that modify the keys will invalidate the heap!
84 return &heap_;
85 }
86 // Provides read-only access to an element of the underlying vector.
87 const Pair& get(int index) const {
88 return heap_[index];
89 }
90
91 // Add entry to the heap, keeping the smallest item at the top, by operator<.
92 // Note that *entry is used as the source of operator=, but it is non-const
93 // to allow for a smart pointer to be contained within.
94 // Time = O(log n).
95 void Push(Pair* entry) {
96 int hole_index = heap_.size();
97 // Make a hole in the end of heap_ and sift it up to be the correct
98 // location for the new *entry. To avoid needing a default constructor
99 // for primitive types, and to allow for use of DoublePtr in the Pair
100 // somewhere, we have to incur a double copy here.
101 heap_.push_back(*entry);
102 *entry = heap_.back();
103 hole_index = SiftUp(hole_index, *entry);
104 heap_[hole_index] = *entry;
105 }
106
107 // Get the value of the top (smallest, defined by operator< ) element.
108 const Pair& PeekTop() const {
109 return heap_[0];
110 }
111 // Get the value of the worst (largest, defined by operator< ) element.
112 const Pair& PeekWorst() const { return heap_[IndexOfWorst()]; }
113
114 // Removes the top element of the heap. If entry is not nullptr, the element
115 // is copied into *entry, otherwise it is discarded.
116 // Returns false if the heap was already empty.
117 // Time = O(log n).
118 bool Pop(Pair* entry) {
119 int new_size = heap_.size() - 1;
120 if (new_size < 0)
121 return false; // Already empty.
122 if (entry != nullptr)
123 *entry = heap_[0];
124 if (new_size > 0) {
125 // Sift the hole at the start of the heap_ downwards to match the last
126 // element.
127 Pair hole_pair = heap_[new_size];
128 heap_.truncate(new_size);
129 int hole_index = SiftDown(0, hole_pair);
130 heap_[hole_index] = hole_pair;
131 } else {
132 heap_.truncate(new_size);
133 }
134 return true;
135 }
136
137 // Removes the MAXIMUM element of the heap. (MIN from a MAX heap.) If entry is
138 // not nullptr, the element is copied into *entry, otherwise it is discarded.
139 // Time = O(n). Returns false if the heap was already empty.
140 bool PopWorst(Pair* entry) {
141 int worst_index = IndexOfWorst();
142 if (worst_index < 0) return false; // It cannot be empty!
143 // Extract the worst element from the heap, leaving a hole at worst_index.
144 if (entry != nullptr)
145 *entry = heap_[worst_index];
146 int heap_size = heap_.size() - 1;
147 if (heap_size > 0) {
148 // Sift the hole upwards to match the last element of the heap_
149 Pair hole_pair = heap_[heap_size];
150 int hole_index = SiftUp(worst_index, hole_pair);
151 heap_[hole_index] = hole_pair;
152 }
153 heap_.truncate(heap_size);
154 return true;
155 }
156
157 // Returns the index of the worst element. Time = O(n/2).
158 int IndexOfWorst() const {
159 int heap_size = heap_.size();
160 if (heap_size == 0) return -1; // It cannot be empty!
161
162 // Find the maximum element. Its index is guaranteed to be greater than
163 // the index of the parent of the last element, since by the heap invariant
164 // the parent must be less than or equal to the children.
165 int worst_index = heap_size - 1;
166 int end_parent = ParentNode(worst_index);
167 for (int i = worst_index - 1; i > end_parent; --i) {
168 if (heap_[worst_index] < heap_[i]) worst_index = i;
169 }
170 return worst_index;
171 }
172
173 // The pointed-to Pair has changed its key value, so the location of pair
174 // is reshuffled to maintain the heap invariant.
175 // Must be a valid pointer to an element of the heap_!
176 // Caution! Since GenericHeap is based on GenericVector, reallocs may occur
177 // whenever the vector is extended and elements may get shuffled by any
178 // Push or Pop operation. Therefore use this function only if Data in Pair is
179 // of type DoublePtr, derived (first) from DoublePtr, or has a DoublePtr as
180 // its first element. Reshuffles the heap to maintain the invariant.
181 // Time = O(log n).
182 void Reshuffle(Pair* pair) {
183 int index = pair - &heap_[0];
184 Pair hole_pair = heap_[index];
185 index = SiftDown(index, hole_pair);
186 index = SiftUp(index, hole_pair);
187 heap_[index] = hole_pair;
188 }
189
190 private:
191 // A hole in the heap exists at hole_index, and we want to fill it with the
192 // given pair. SiftUp sifts the hole upward to the correct position and
193 // returns the destination index without actually putting pair there.
194 int SiftUp(int hole_index, const Pair& pair) {
195 int parent;
196 while (hole_index > 0 && pair < heap_[parent = ParentNode(hole_index)]) {
197 heap_[hole_index] = heap_[parent];
198 hole_index = parent;
199 }
200 return hole_index;
201 }
202
203 // A hole in the heap exists at hole_index, and we want to fill it with the
204 // given pair. SiftDown sifts the hole downward to the correct position and
205 // returns the destination index without actually putting pair there.
206 int SiftDown(int hole_index, const Pair& pair) {
207 int heap_size = heap_.size();
208 int child;
209 while ((child = LeftChild(hole_index)) < heap_size) {
210 if (child + 1 < heap_size && heap_[child + 1] < heap_[child])
211 ++child;
212 if (heap_[child] < pair) {
213 heap_[hole_index] = heap_[child];
214 hole_index = child;
215 } else {
216 break;
217 }
218 }
219 return hole_index;
220 }
221
222 // Functions to navigate the tree. Unlike the original implementation, we
223 // store the root at index 0.
224 int ParentNode(int index) const {
225 return (index + 1) / 2 - 1;
226 }
227 int LeftChild(int index) const {
228 return index * 2 + 1;
229 }
230
231 private:
233};
234
235} // namespace tesseract
236
237#endif // TESSERACT_CCUTIL_GENERICHEAP_H_
int push_back(T object)
bool empty() const
Definition: genericvector.h:91
int size_reserved() const
Definition: genericvector.h:82
int size() const
Definition: genericvector.h:72
T & back() const
void truncate(int size)
void reserve(int size)
bool empty() const
Definition: genericheap.h:68
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
GenericVector< Pair > * heap()
Definition: genericheap.h:83
const Pair & PeekTop() const
Definition: genericheap.h:108
const Pair & PeekWorst() const
Definition: genericheap.h:112
bool Pop(Pair *entry)
Definition: genericheap.h:118
int IndexOfWorst() const
Definition: genericheap.h:158
void Reshuffle(Pair *pair)
Definition: genericheap.h:182
int size_reserved() const
Definition: genericheap.h:74
const Pair & get(int index) const
Definition: genericheap.h:87
GenericHeap(int initial_size)
Definition: genericheap.h:63
void Push(Pair *entry)
Definition: genericheap.h:95