tesseract 4.1.1
Loading...
Searching...
No Matches
genericvector.h
Go to the documentation of this file.
1
2// File: genericvector.h
3// Description: Generic vector class
4// Author: Daria Antonova
5//
6// (C) Copyright 2007, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18//
19#ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
20#define TESSERACT_CCUTIL_GENERICVECTOR_H_
21
22#include <algorithm>
23#include <cassert>
24#include <climits> // for LONG_MAX
25#include <cstdint> // for uint32_t
26#include <cstdio>
27#include <cstdlib>
28
29#include "helpers.h"
30#include "serialis.h"
31#include "strngs.h"
32#include "tesscallback.h"
33
34// Use PointerVector<T> below in preference to GenericVector<T*>, as that
35// provides automatic deletion of pointers, [De]Serialize that works, and
36// sort that works.
37template <typename T>
39 public:
42 }
43 GenericVector(int size, const T& init_val) {
44 init(size);
45 init_to_size(size, init_val);
46 }
47
48 // Copy
50 this->init(other.size());
51 this->operator+=(other);
52 }
55
57
58 // Reserve some memory.
59 void reserve(int size);
60 // Double the size of the internal array.
62
63 // Resizes to size and sets all values to t.
64 void init_to_size(int size, const T& t);
65 // Resizes to size without any initialization.
66 void resize_no_init(int size) {
69 }
70
71 // Return the size used.
72 int size() const {
73 return size_used_;
74 }
75 // Workaround to avoid g++ -Wsign-compare warnings.
76 size_t unsigned_size() const {
77 static_assert(sizeof(size_used_) <= sizeof(size_t),
78 "Wow! sizeof(size_t) < sizeof(int32_t)!!");
79 assert(0 <= size_used_);
80 return static_cast<size_t>(size_used_);
81 }
82 int size_reserved() const {
83 return size_reserved_;
84 }
85
86 int length() const {
87 return size_used_;
88 }
89
90 // Return true if empty.
91 bool empty() const {
92 return size_used_ == 0;
93 }
94
95 // Return the object from an index.
96 T& get(int index) const;
97 T& back() const;
98 T& operator[](int index) const;
99 // Returns the last object and removes it.
101
102 // Return the index of the T object.
103 // This method NEEDS a compare_callback to be passed to
104 // set_compare_callback.
105 int get_index(const T& object) const;
106
107 // Return true if T is in the array
108 bool contains(const T& object) const;
109
110 // Return true if the index is valid
111 T contains_index(int index) const;
112
113 // Push an element in the end of the array
114 int push_back(T object);
115 void operator+=(const T& t);
116
117 // Push an element in the end of the array if the same
118 // element is not already contained in the array.
119 int push_back_new(const T& object);
120
121 // Push an element in the front of the array
122 // Note: This function is O(n)
123 int push_front(const T& object);
124
125 // Set the value at the given index
126 void set(const T& t, int index);
127
128 // Insert t at the given index, push other elements to the right.
129 void insert(const T& t, int index);
130
131 // Removes an element at the given index and
132 // shifts the remaining elements to the left.
133 void remove(int index);
134
135 // Truncates the array to the given size by removing the end.
136 // If the current size is less, the array is not expanded.
137 void truncate(int size) {
138 if (size < size_used_) {
140 }
141 }
142
143 // Add a callback to be called to delete the elements when the array took
144 // their ownership.
146 clear_cb_ = cb;
147 }
148
149 // Add a callback to be called to compare the elements when needed (contains,
150 // get_id, ...)
152 compare_cb_ = cb;
153 }
154
155 // Clear the array, calling the clear callback function if any.
156 // All the owned callbacks are also deleted.
157 // If you don't want the callbacks to be deleted, before calling clear, set
158 // the callback to nullptr.
159 void clear();
160
161 // Delete objects pointed to by data_[i]
163
164 // This method clears the current object, then, does a shallow copy of
165 // its argument, and finally invalidates its argument.
166 // Callbacks are moved to the current object;
168
169 // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
170 // The callback given must be permanent since they will be called more than
171 // once. The given callback will be deleted at the end.
172 // If the callbacks are nullptr, then the data is simply read/written using
173 // fread (and swapping)/fwrite.
174 // Returns false on error or if the callback returns false.
175 // DEPRECATED. Use [De]Serialize[Classes] instead.
179 // Writes a vector of simple types to the given file. Assumes that bitwise
180 // read/write of T will work. Returns false in case of error.
181 // TODO(rays) Change all callers to use TFile and remove deprecated methods.
182 bool Serialize(FILE* fp) const;
184 // Reads a vector of simple types from the given file. Assumes that bitwise
185 // read/write will work with ReverseN according to sizeof(T).
186 // Returns false in case of error.
187 // If swap is true, assumes a big/little-endian swap is needed.
188 // TFile is assumed to know about swapping.
189 bool DeSerialize(bool swap, FILE* fp);
191 // Skips the deserialization of the vector.
193 // Writes a vector of classes to the given file. Assumes the existence of
194 // bool T::Serialize(FILE* fp) const that returns false in case of error.
195 // Returns false in case of error.
196 bool SerializeClasses(FILE* fp) const;
198 // Reads a vector of classes from the given file. Assumes the existence of
199 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
200 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
201 // this function. Returns false in case of error.
202 // If swap is true, assumes a big/little-endian swap is needed.
203 bool DeSerializeClasses(bool swap, FILE* fp);
205 // Calls SkipDeSerialize on the elements of the vector.
207
208 // Allocates a new array of double the current_size, copies over the
209 // information from data to the new location, deletes data and returns
210 // the pointed to the new larger array.
211 // This function uses memcpy to copy the data, instead of invoking
212 // operator=() for each element like double_the_size() does.
213 static T* double_the_size_memcpy(int current_size, T* data) {
214 T* data_new = new T[current_size * 2];
215 memcpy(data_new, data, sizeof(T) * current_size);
216 delete[] data;
217 return data_new;
218 }
219
220 // Reverses the elements of the vector.
221 void reverse() {
222 for (int i = 0; i < size_used_ / 2; ++i) {
223 Swap(&data_[i], &data_[size_used_ - 1 - i]);
224 }
225 }
226
227 // Sorts the members of this vector using the less than comparator (cmp_lt),
228 // which compares the values. Useful for GenericVectors to primitive types.
229 // Will not work so great for pointers (unless you just want to sort some
230 // pointers). You need to provide a specialization to sort_cmp to use
231 // your type.
232 void sort();
233
234 // Sort the array into the order defined by the qsort function comparator.
235 // The comparator function is as defined by qsort, ie. it receives pointers
236 // to two Ts and returns negative if the first element is to appear earlier
237 // in the result and positive if it is to appear later, with 0 for equal.
238 void sort(int (*comparator)(const void*, const void*)) {
239 qsort(data_, size_used_, sizeof(*data_), comparator);
240 }
241
242 // Searches the array (assuming sorted in ascending order, using sort()) for
243 // an element equal to target and returns true if it is present.
244 // Use binary_search to get the index of target, or its nearest candidate.
245 bool bool_binary_search(const T& target) const {
246 int index = binary_search(target);
247 if (index >= size_used_) {
248 return false;
249 }
250 return data_[index] == target;
251 }
252 // Searches the array (assuming sorted in ascending order, using sort()) for
253 // an element equal to target and returns the index of the best candidate.
254 // The return value is conceptually the largest index i such that
255 // data_[i] <= target or 0 if target < the whole vector.
256 // NOTE that this function uses operator> so really the return value is
257 // the largest index i such that data_[i] > target is false.
258 int binary_search(const T& target) const {
259 int bottom = 0;
260 int top = size_used_;
261 while (top - bottom > 1) {
262 int middle = (bottom + top) / 2;
263 if (data_[middle] > target) {
264 top = middle;
265 } else {
266 bottom = middle;
267 }
268 }
269 return bottom;
270 }
271
272 // Compact the vector by deleting elements using operator!= on basic types.
273 // The vector must be sorted.
275 if (size_used_ == 0) {
276 return;
277 }
278
279 // First element is in no matter what, hence the i = 1.
280 int last_write = 0;
281 for (int i = 1; i < size_used_; ++i) {
282 // Finds next unique item and writes it.
283 if (data_[last_write] != data_[i]) {
284 data_[++last_write] = data_[i];
285 }
286 }
287 // last_write is the index of a valid data cell, so add 1.
288 size_used_ = last_write + 1;
289 }
290
291 // Compact the vector by deleting elements for which delete_cb returns
292 // true. delete_cb is a permanent callback and will be deleted.
294 int new_size = 0;
295 int old_index = 0;
296 // Until the callback returns true, the elements stay the same.
297 while (old_index < size_used_ && !delete_cb->Run(old_index++)) {
298 ++new_size;
299 }
300 // Now just copy anything else that gets false from delete_cb.
301 for (; old_index < size_used_; ++old_index) {
302 if (!delete_cb->Run(old_index)) {
303 data_[new_size++] = data_[old_index];
304 }
305 }
306 size_used_ = new_size;
307 delete delete_cb;
308 }
309
310 T dot_product(const GenericVector<T>& other) const {
311 T result = static_cast<T>(0);
312 for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i) {
313 result += data_[i] * other.data_[i];
314 }
315 return result;
316 }
317
318 // Returns the index of what would be the target_index_th item in the array
319 // if the members were sorted, without actually sorting. Members are
320 // shuffled around, but it takes O(n) time.
321 // NOTE: uses operator< and operator== on the members.
322 int choose_nth_item(int target_index) {
323 // Make sure target_index is legal.
324 if (target_index < 0) {
325 target_index = 0; // ensure legal
326 } else if (target_index >= size_used_) {
327 target_index = size_used_ - 1;
328 }
329 unsigned int seed = 1;
330 return choose_nth_item(target_index, 0, size_used_, &seed);
331 }
332
333 // Swaps the elements with the given indices.
334 void swap(int index1, int index2) {
335 if (index1 != index2) {
336 T tmp = data_[index1];
337 data_[index1] = data_[index2];
338 data_[index2] = tmp;
339 }
340 }
341 // Returns true if all elements of *this are within the given range.
342 // Only uses operator<
343 bool WithinBounds(const T& rangemin, const T& rangemax) const {
344 for (int i = 0; i < size_used_; ++i) {
345 if (data_[i] < rangemin || rangemax < data_[i]) {
346 return false;
347 }
348 }
349 return true;
350 }
351
352 protected:
353 // Internal recursive version of choose_nth_item.
354 int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
355
356 // Init the object, allocating size memory.
357 void init(int size);
358
359 // We are assuming that the object generally placed in the
360 // vector are small enough that for efficiency it makes sense
361 // to start with a larger initial size.
362 static const int kDefaultVectorSize = 4;
363 int32_t size_used_{};
364 int32_t size_reserved_{};
367 // Mutable because Run method is not const
369};
370
371namespace tesseract {
372
373// The default FileReader loads the whole file into the vector of char,
374// returning false on error.
375inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
376 bool result = false;
377 FILE* fp = fopen(filename, "rb");
378 if (fp != nullptr) {
379 fseek(fp, 0, SEEK_END);
380 auto size = std::ftell(fp);
381 fseek(fp, 0, SEEK_SET);
382 // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
383 if (size > 0 && size < LONG_MAX) {
384 // reserve an extra byte in case caller wants to append a '\0' character
385 data->reserve(size + 1);
386 data->resize_no_init(size);
387 result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
388 }
389 fclose(fp);
390 }
391 return result;
392}
393
394inline bool LoadDataFromFile(const STRING& filename,
395 GenericVector<char>* data) {
396 return LoadDataFromFile(filename.string(), data);
397}
398
399// The default FileWriter writes the vector of char to the filename file,
400// returning false on error.
401inline bool SaveDataToFile(const GenericVector<char>& data,
402 const STRING& filename) {
403 FILE* fp = fopen(filename.string(), "wb");
404 if (fp == nullptr) {
405 return false;
406 }
407 bool result =
408 static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
409 fclose(fp);
410 return result;
411}
412
413template <typename T>
414bool cmp_eq(T const& t1, T const& t2) {
415 return t1 == t2;
416}
417
418// Used by sort()
419// return < 0 if t1 < t2
420// return 0 if t1 == t2
421// return > 0 if t1 > t2
422template <typename T>
423int sort_cmp(const void* t1, const void* t2) {
424 const T* a = static_cast<const T*>(t1);
425 const T* b = static_cast<const T*>(t2);
426 if (*a < *b) {
427 return -1;
428 }
429 if (*b < *a) {
430 return 1;
431 }
432 return 0;
433}
434
435// Used by PointerVector::sort()
436// return < 0 if t1 < t2
437// return 0 if t1 == t2
438// return > 0 if t1 > t2
439template <typename T>
440int sort_ptr_cmp(const void* t1, const void* t2) {
441 const T* a = *static_cast<T* const*>(t1);
442 const T* b = *static_cast<T* const*>(t2);
443 if (*a < *b) {
444 return -1;
445 }
446 if (*b < *a) {
447 return 1;
448 }
449 return 0;
450}
451
452// Subclass for a vector of pointers. Use in preference to GenericVector<T*>
453// as it provides automatic deletion and correct serialization, with the
454// corollary that all copy operations are deep copies of the pointed-to objects.
455template <typename T>
456class PointerVector : public GenericVector<T*> {
457 public:
459 explicit PointerVector(int size) : GenericVector<T*>(size) {}
461 // Clear must be called here, even though it is called again by the base,
462 // as the base will call the wrong clear.
463 clear();
464 }
465 // Copy must be deep, as the pointers will be automatically deleted on
466 // destruction.
467 PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
468 this->init(other.size());
469 this->operator+=(other);
470 }
472 this->reserve(this->size_used_ + other.size_used_);
473 for (int i = 0; i < other.size(); ++i) {
474 this->push_back(new T(*other.data_[i]));
475 }
476 return *this;
477 }
478
480 if (&other != this) {
481 this->truncate(0);
482 this->operator+=(other);
483 }
484 return *this;
485 }
486
487 // Removes an element at the given index and
488 // shifts the remaining elements to the left.
489 void remove(int index) {
490 delete GenericVector<T*>::data_[index];
492 }
493
494 // Truncates the array to the given size by removing the end.
495 // If the current size is less, the array is not expanded.
496 void truncate(int size) {
497 for (int i = size; i < GenericVector<T*>::size_used_; ++i) {
498 delete GenericVector<T*>::data_[i];
499 }
501 }
502
503 // Compact the vector by deleting elements for which delete_cb returns
504 // true. delete_cb is a permanent callback and will be deleted.
506 int new_size = 0;
507 int old_index = 0;
508 // Until the callback returns true, the elements stay the same.
509 while (old_index < GenericVector<T*>::size_used_ &&
510 !delete_cb->Run(GenericVector<T*>::data_[old_index++])) {
511 ++new_size;
512 }
513 // Now just copy anything else that gets false from delete_cb.
514 for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
515 if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
516 GenericVector<T*>::data_[new_size++] =
517 GenericVector<T*>::data_[old_index];
518 } else {
519 delete GenericVector<T*>::data_[old_index];
520 }
521 }
523 delete delete_cb;
524 }
525
526 // Clear the array, calling the clear callback function if any.
527 // All the owned callbacks are also deleted.
528 // If you don't want the callbacks to be deleted, before calling clear, set
529 // the callback to nullptr.
530 void clear() {
533 }
534
535 // Writes a vector of (pointers to) classes to the given file. Assumes the
536 // existence of bool T::Serialize(FILE*) const that returns false in case of
537 // error. There is no Serialize for simple types, as you would have a
538 // normal GenericVector of those.
539 // Returns false in case of error.
540 bool Serialize(FILE* fp) const {
541 int32_t used = GenericVector<T*>::size_used_;
542 if (fwrite(&used, sizeof(used), 1, fp) != 1) {
543 return false;
544 }
545 for (int i = 0; i < used; ++i) {
546 int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
547 if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) {
548 return false;
549 }
550 if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
551 return false;
552 }
553 }
554 return true;
555 }
556 bool Serialize(TFile* fp) const {
557 int32_t used = GenericVector<T*>::size_used_;
558 if (fp->FWrite(&used, sizeof(used), 1) != 1) {
559 return false;
560 }
561 for (int i = 0; i < used; ++i) {
562 int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
563 if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) {
564 return false;
565 }
566 if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) {
567 return false;
568 }
569 }
570 return true;
571 }
572 // Reads a vector of (pointers to) classes to the given file. Assumes the
573 // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
574 // case of error. There is no Serialize for simple types, as you would have a
575 // normal GenericVector of those.
576 // If swap is true, assumes a big/little-endian swap is needed.
577 // Also needs T::T(), as new T is used in this function.
578 // Returns false in case of error.
579 bool DeSerialize(bool swap, FILE* fp) {
580 uint32_t reserved;
581 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
582 return false;
583 }
584 if (swap) {
585 Reverse32(&reserved);
586 }
587 // Arbitrarily limit the number of elements to protect against bad data.
588 assert(reserved <= UINT16_MAX);
589 if (reserved > UINT16_MAX) {
590 return false;
591 }
593 truncate(0);
594 for (uint32_t i = 0; i < reserved; ++i) {
595 int8_t non_null;
596 if (fread(&non_null, sizeof(non_null), 1, fp) != 1) {
597 return false;
598 }
599 T* item = nullptr;
600 if (non_null != 0) {
601 item = new T;
602 if (!item->DeSerialize(swap, fp)) {
603 delete item;
604 return false;
605 }
606 this->push_back(item);
607 } else {
608 // Null elements should keep their place in the vector.
609 this->push_back(nullptr);
610 }
611 }
612 return true;
613 }
614 bool DeSerialize(TFile* fp) {
615 int32_t reserved;
616 if (!DeSerializeSize(fp, &reserved)) {
617 return false;
618 }
620 truncate(0);
621 for (int i = 0; i < reserved; ++i) {
622 if (!DeSerializeElement(fp)) {
623 return false;
624 }
625 }
626 return true;
627 }
628 // Enables deserialization of a selection of elements. Note that in order to
629 // retain the integrity of the stream, the caller must call some combination
630 // of DeSerializeElement and DeSerializeSkip of the exact number returned in
631 // *size, assuming a true return.
632 static bool DeSerializeSize(TFile* fp, int32_t* size) {
633 return fp->FReadEndian(size, sizeof(*size), 1) == 1;
634 }
635 // Reads and appends to the vector the next element of the serialization.
637 int8_t non_null;
638 if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
639 return false;
640 }
641 T* item = nullptr;
642 if (non_null != 0) {
643 item = new T;
644 if (!item->DeSerialize(fp)) {
645 delete item;
646 return false;
647 }
648 this->push_back(item);
649 } else {
650 // Null elements should keep their place in the vector.
651 this->push_back(nullptr);
652 }
653 return true;
654 }
655 // Skips the next element of the serialization.
656 static bool DeSerializeSkip(TFile* fp) {
657 int8_t non_null;
658 if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) {
659 return false;
660 }
661 if (non_null != 0) {
662 if (!T::SkipDeSerialize(fp)) {
663 return false;
664 }
665 }
666 return true;
667 }
668
669 // Sorts the items pointed to by the members of this vector using
670 // t::operator<().
671 void sort() {
672 this->GenericVector<T*>::sort(&sort_ptr_cmp<T>);
673 }
674};
675
676} // namespace tesseract
677
678// A useful vector that uses operator== to do comparisons.
679template <typename T>
681 public:
684 NewPermanentTessCallback(tesseract::cmp_eq<T>));
685 }
688 NewPermanentTessCallback(tesseract::cmp_eq<T>));
689 }
690};
691
692template <typename T>
694 size_used_ = 0;
695 if (size <= 0) {
696 data_ = nullptr;
697 size_reserved_ = 0;
698 } else {
699 if (size < kDefaultVectorSize) {
700 size = kDefaultVectorSize;
701 }
702 data_ = new T[size];
703 size_reserved_ = size;
704 }
705 clear_cb_ = nullptr;
706 compare_cb_ = nullptr;
707}
708
709template <typename T>
711 clear();
712}
713
714// Reserve some memory. If the internal array contains elements, they are
715// copied.
716template <typename T>
718 if (size_reserved_ >= size || size <= 0) {
719 return;
720 }
721 if (size < kDefaultVectorSize) {
722 size = kDefaultVectorSize;
723 }
724 T* new_array = new T[size];
725 for (int i = 0; i < size_used_; ++i) {
726 new_array[i] = data_[i];
727 }
728 delete[] data_;
729 data_ = new_array;
730 size_reserved_ = size;
731}
732
733template <typename T>
735 if (size_reserved_ == 0) {
736 reserve(kDefaultVectorSize);
737 } else {
738 reserve(2 * size_reserved_);
739 }
740}
741
742// Resizes to size and sets all values to t.
743template <typename T>
744void GenericVector<T>::init_to_size(int size, const T& t) {
745 reserve(size);
746 size_used_ = size;
747 for (int i = 0; i < size; ++i) {
748 data_[i] = t;
749 }
750}
751
752// Return the object from an index.
753template <typename T>
754T& GenericVector<T>::get(int index) const {
755 assert(index >= 0 && index < size_used_);
756 return data_[index];
757}
758
759template <typename T>
760T& GenericVector<T>::operator[](int index) const {
761 assert(index >= 0 && index < size_used_);
762 return data_[index];
763}
764
765template <typename T>
767 assert(size_used_ > 0);
768 return data_[size_used_ - 1];
769}
770// Returns the last object and removes it.
771template <typename T>
773 assert(size_used_ > 0);
774 return data_[--size_used_];
775}
776
777// Return the object from an index.
778template <typename T>
779void GenericVector<T>::set(const T& t, int index) {
780 assert(index >= 0 && index < size_used_);
781 data_[index] = t;
782}
783
784// Shifts the rest of the elements to the right to make
785// space for the new elements and inserts the given element
786// at the specified index.
787template <typename T>
788void GenericVector<T>::insert(const T& t, int index) {
789 assert(index >= 0 && index <= size_used_);
790 if (size_reserved_ == size_used_) {
791 double_the_size();
792 }
793 for (int i = size_used_; i > index; --i) {
794 data_[i] = data_[i - 1];
795 }
796 data_[index] = t;
797 size_used_++;
798}
799
800// Removes an element at the given index and
801// shifts the remaining elements to the left.
802template <typename T>
804 assert(index >= 0 && index < size_used_);
805 for (int i = index; i < size_used_ - 1; ++i) {
806 data_[i] = data_[i + 1];
807 }
808 size_used_--;
809}
810
811// Return true if the index is valindex
812template <typename T>
814 return index >= 0 && index < size_used_;
815}
816
817// Return the index of the T object.
818template <typename T>
819int GenericVector<T>::get_index(const T& object) const {
820 for (int i = 0; i < size_used_; ++i) {
821 assert(compare_cb_ != nullptr);
822 if (compare_cb_->Run(object, data_[i])) {
823 return i;
824 }
825 }
826 return -1;
827}
828
829// Return true if T is in the array
830template <typename T>
831bool GenericVector<T>::contains(const T& object) const {
832 return get_index(object) != -1;
833}
834
835// Add an element in the array
836template <typename T>
838 int index = 0;
839 if (size_used_ == size_reserved_) {
840 double_the_size();
841 }
842 index = size_used_++;
843 data_[index] = object;
844 return index;
845}
846
847template <typename T>
849 int index = get_index(object);
850 if (index >= 0) {
851 return index;
852 }
853 return push_back(object);
854}
855
856// Add an element in the array (front)
857template <typename T>
858int GenericVector<T>::push_front(const T& object) {
859 if (size_used_ == size_reserved_) {
860 double_the_size();
861 }
862 for (int i = size_used_; i > 0; --i) {
863 data_[i] = data_[i - 1];
864 }
865 data_[0] = object;
866 ++size_used_;
867 return 0;
868}
869
870template <typename T>
872 push_back(t);
873}
874
875template <typename T>
877 this->reserve(size_used_ + other.size_used_);
878 for (int i = 0; i < other.size(); ++i) {
879 this->operator+=(other.data_[i]);
880 }
881 return *this;
882}
883
884template <typename T>
886 if (&other != this) {
887 this->truncate(0);
888 this->operator+=(other);
889 }
890 return *this;
891}
892
893// Clear the array, calling the callback function if any.
894template <typename T>
896 if (size_reserved_ > 0 && clear_cb_ != nullptr) {
897 for (int i = 0; i < size_used_; ++i) {
898 clear_cb_->Run(data_[i]);
899 }
900 }
901 delete[] data_;
902 data_ = nullptr;
903 size_used_ = 0;
904 size_reserved_ = 0;
905 delete clear_cb_;
906 clear_cb_ = nullptr;
907 delete compare_cb_;
908 compare_cb_ = nullptr;
909}
910
911template <typename T>
913 for (int i = 0; i < size_used_; ++i) {
914 delete data_[i];
915 }
916}
917
918template <typename T>
921 if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) {
922 return false;
923 }
924 if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) {
925 return false;
926 }
927 if (cb != nullptr) {
928 for (int i = 0; i < size_used_; ++i) {
929 if (!cb->Run(f, data_[i])) {
930 delete cb;
931 return false;
932 }
933 }
934 delete cb;
935 } else {
936 if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size()) {
937 return false;
938 }
939 }
940 return true;
941}
942
943template <typename T>
946 int32_t reserved;
947 if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
948 return false;
949 }
950 reserve(reserved);
951 if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) {
952 return false;
953 }
954 if (cb != nullptr) {
955 for (int i = 0; i < size_used_; ++i) {
956 if (!cb->Run(f, data_ + i)) {
957 delete cb;
958 return false;
959 }
960 }
961 delete cb;
962 } else {
963 if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_) {
964 return false;
965 }
966 }
967 return true;
968}
969
970// Writes a vector of simple types to the given file. Assumes that bitwise
971// read/write of T will work. Returns false in case of error.
972template <typename T>
973bool GenericVector<T>::Serialize(FILE* fp) const {
974 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
975 return false;
976 }
977 if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size()) {
978 return false;
979 }
980 return true;
981}
982template <typename T>
984 if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
985 return false;
986 }
987 if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) {
988 return false;
989 }
990 return true;
991}
992
993// Reads a vector of simple types from the given file. Assumes that bitwise
994// read/write will work with ReverseN according to sizeof(T).
995// Returns false in case of error.
996// If swap is true, assumes a big/little-endian swap is needed.
997template <typename T>
998bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
999 uint32_t reserved;
1000 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1001 return false;
1002 }
1003 if (swap) {
1004 Reverse32(&reserved);
1005 }
1006 // Arbitrarily limit the number of elements to protect against bad data.
1007 assert(reserved <= UINT16_MAX);
1008 if (reserved > UINT16_MAX) {
1009 return false;
1010 }
1011 reserve(reserved);
1012 size_used_ = reserved;
1013 if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) {
1014 return false;
1015 }
1016 if (swap) {
1017 for (int i = 0; i < size_used_; ++i) {
1018 ReverseN(&data_[i], sizeof(data_[i]));
1019 }
1020 }
1021 return true;
1022}
1023template <typename T>
1025 uint32_t reserved;
1026 if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1027 return false;
1028 }
1029 // Arbitrarily limit the number of elements to protect against bad data.
1030 const uint32_t limit = 50000000;
1031 assert(reserved <= limit);
1032 if (reserved > limit) {
1033 return false;
1034 }
1035 reserve(reserved);
1036 size_used_ = reserved;
1037 return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
1038}
1039template <typename T>
1041 uint32_t reserved;
1042 if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1043 return false;
1044 }
1045 return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
1046}
1047
1048// Writes a vector of classes to the given file. Assumes the existence of
1049// bool T::Serialize(FILE* fp) const that returns false in case of error.
1050// Returns false in case of error.
1051template <typename T>
1053 if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) {
1054 return false;
1055 }
1056 for (int i = 0; i < size_used_; ++i) {
1057 if (!data_[i].Serialize(fp)) {
1058 return false;
1059 }
1060 }
1061 return true;
1062}
1063template <typename T>
1065 if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) {
1066 return false;
1067 }
1068 for (int i = 0; i < size_used_; ++i) {
1069 if (!data_[i].Serialize(fp)) {
1070 return false;
1071 }
1072 }
1073 return true;
1074}
1075
1076// Reads a vector of classes from the given file. Assumes the existence of
1077// bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
1078// error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1079// this function. Returns false in case of error.
1080// If swap is true, assumes a big/little-endian swap is needed.
1081template <typename T>
1082bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1083 int32_t reserved;
1084 if (fread(&reserved, sizeof(reserved), 1, fp) != 1) {
1085 return false;
1086 }
1087 if (swap) {
1088 Reverse32(&reserved);
1089 }
1090 T empty;
1091 init_to_size(reserved, empty);
1092 for (int i = 0; i < reserved; ++i) {
1093 if (!data_[i].DeSerialize(swap, fp)) {
1094 return false;
1095 }
1096 }
1097 return true;
1098}
1099template <typename T>
1101 int32_t reserved;
1102 if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1103 return false;
1104 }
1105 T empty;
1106 init_to_size(reserved, empty);
1107 for (int i = 0; i < reserved; ++i) {
1108 if (!data_[i].DeSerialize(fp)) {
1109 return false;
1110 }
1111 }
1112 return true;
1113}
1114template <typename T>
1116 int32_t reserved;
1117 if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) {
1118 return false;
1119 }
1120 for (int i = 0; i < reserved; ++i) {
1121 if (!T::SkipDeSerialize(fp)) {
1122 return false;
1123 }
1124 }
1125 return true;
1126}
1127
1128// This method clear the current object, then, does a shallow copy of
1129// its argument, and finally invalidates its argument.
1130template <typename T>
1132 this->clear();
1133 this->data_ = from->data_;
1134 this->size_reserved_ = from->size_reserved_;
1135 this->size_used_ = from->size_used_;
1136 this->compare_cb_ = from->compare_cb_;
1137 this->clear_cb_ = from->clear_cb_;
1138 from->data_ = nullptr;
1139 from->clear_cb_ = nullptr;
1140 from->compare_cb_ = nullptr;
1141 from->size_used_ = 0;
1142 from->size_reserved_ = 0;
1143}
1144
1145template <typename T>
1147 sort(&tesseract::sort_cmp<T>);
1148}
1149
1150// Internal recursive version of choose_nth_item.
1151// The algorithm used comes from "Algorithms" by Sedgewick:
1152// http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1153// The principle is to choose a random pivot, and move everything less than
1154// the pivot to its left, and everything greater than the pivot to the end
1155// of the array, then recurse on the part that contains the desired index, or
1156// just return the answer if it is in the equal section in the middle.
1157// The random pivot guarantees average linear time for the same reason that
1158// n times vector::push_back takes linear time on average.
1159// target_index, start and and end are all indices into the full array.
1160// Seed is a seed for rand_r for thread safety purposes. Its value is
1161// unimportant as the random numbers do not affect the result except
1162// between equal answers.
1163template <typename T>
1164int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1165 unsigned int* seed) {
1166 // Number of elements to process.
1167 int num_elements = end - start;
1168 // Trivial cases.
1169 if (num_elements <= 1) {
1170 return start;
1171 }
1172 if (num_elements == 2) {
1173 if (data_[start] < data_[start + 1]) {
1174 return target_index > start ? start + 1 : start;
1175 }
1176 return target_index > start ? start : start + 1;
1177 }
1178// Place the pivot at start.
1179#ifndef rand_r // _MSC_VER, ANDROID
1180 srand(*seed);
1181# define rand_r(seed) rand()
1182#endif // _MSC_VER
1183 int pivot = rand_r(seed) % num_elements + start;
1184 swap(pivot, start);
1185 // The invariant condition here is that items [start, next_lesser) are less
1186 // than the pivot (which is at index next_lesser) and items
1187 // [prev_greater, end) are greater than the pivot, with items
1188 // [next_lesser, prev_greater) being equal to the pivot.
1189 int next_lesser = start;
1190 int prev_greater = end;
1191 for (int next_sample = start + 1; next_sample < prev_greater;) {
1192 if (data_[next_sample] < data_[next_lesser]) {
1193 swap(next_lesser++, next_sample++);
1194 } else if (data_[next_sample] == data_[next_lesser]) {
1195 ++next_sample;
1196 } else {
1197 swap(--prev_greater, next_sample);
1198 }
1199 }
1200 // Now the invariant is set up, we recurse on just the section that contains
1201 // the desired index.
1202 if (target_index < next_lesser) {
1203 return choose_nth_item(target_index, start, next_lesser, seed);
1204 }
1205 if (target_index < prev_greater) {
1206 return next_lesser; // In equal bracket.
1207 }
1208 return choose_nth_item(target_index, prev_greater, end, seed);
1209}
1210
1211#endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:381
int32_t choose_nth_item(int32_t index, float *array, int32_t count)
Definition: statistc.cpp:630
#define rand_r(seed)
void Reverse32(void *ptr)
Definition: helpers.h:202
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:185
void Swap(T *p1, T *p2)
Definition: helpers.h:95
_ConstTessMemberResultCallback_5_0< false, R, T1, P1, P2, P3, P4, P5 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)(P1, P2, P3, P4, P5) const, typename Identity< P1 >::type p1, typename Identity< P2 >::type p2, typename Identity< P3 >::type p3, typename Identity< P4 >::type p4, typename Identity< P5 >::type p5)
Definition: tesscallback.h:258
bool cmp_eq(T const &t1, T const &t2)
int sort_cmp(const void *t1, const void *t2)
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
int sort_ptr_cmp(const void *t1, const void *t2)
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
void init_to_size(int size, const T &t)
void compact(TessResultCallback1< bool, int > *delete_cb)
void resize_no_init(int size)
Definition: genericvector.h:66
int push_back(T object)
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
bool empty() const
Definition: genericvector.h:91
static const int kDefaultVectorSize
int size_reserved() const
Definition: genericvector.h:82
bool Serialize(FILE *fp) const
int size() const
Definition: genericvector.h:72
void set(const T &t, int index)
GenericVector(const GenericVector &other)
Definition: genericvector.h:49
bool DeSerialize(tesseract::TFile *fp)
bool WithinBounds(const T &rangemin, const T &rangemax) const
bool Serialize(tesseract::TFile *fp) const
void remove(int index)
void sort(int(*comparator)(const void *, const void *))
size_t unsigned_size() const
Definition: genericvector.h:76
T & back() const
int32_t size_reserved_
int get_index(const T &object) const
bool contains(const T &object) const
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T * > *cb)
TessCallback1< T > * clear_cb_
void insert(const T &t, int index)
void init(int size)
int choose_nth_item(int target_index)
void set_compare_callback(TessResultCallback2< bool, T const &, T const & > *cb)
bool DeSerializeClasses(bool swap, FILE *fp)
int length() const
Definition: genericvector.h:86
static T * double_the_size_memcpy(int current_size, T *data)
T dot_product(const GenericVector< T > &other) const
bool SerializeClasses(tesseract::TFile *fp) const
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const & > *cb) const
bool bool_binary_search(const T &target) const
void compact_sorted()
static bool SkipDeSerialize(tesseract::TFile *fp)
void truncate(int size)
void delete_data_pointers()
int32_t size_used_
void reserve(int size)
int push_back_new(const T &object)
bool DeSerialize(bool swap, FILE *fp)
void move(GenericVector< T > *from)
T & get(int index) const
void swap(int index1, int index2)
T contains_index(int index) const
bool SerializeClasses(FILE *fp) const
int binary_search(const T &target) const
int push_front(const T &object)
bool DeSerializeClasses(tesseract::TFile *fp)
int choose_nth_item(int target_index, int start, int end, unsigned int *seed)
void operator+=(const T &t)
TessResultCallback2< bool, T const &, T const & > * compare_cb_
GenericVector(int size, const T &init_val)
Definition: genericvector.h:43
void double_the_size()
void set_clear_callback(TessCallback1< T > *cb)
GenericVector< T > & operator=(const GenericVector &other)
GenericVector< T > & operator+=(const GenericVector &other)
T & operator[](int index) const
GenericVectorEqEq(int size)
virtual R Run(A1, A2)=0
static bool DeSerializeSkip(TFile *fp)
bool Serialize(FILE *fp) const
void compact(TessResultCallback1< bool, const T * > *delete_cb)
bool DeSerializeElement(TFile *fp)
bool DeSerialize(bool swap, FILE *fp)
PointerVector(const PointerVector &other)
bool Serialize(TFile *fp) const
bool DeSerialize(TFile *fp)
void remove(int index)
PointerVector< T > & operator+=(const PointerVector &other)
PointerVector< T > & operator=(const PointerVector &other)
static bool DeSerializeSize(TFile *fp, int32_t *size)
int FWrite(const void *buffer, size_t size, int count)
Definition: serialis.cpp:319
int FReadEndian(void *buffer, size_t size, int count)
Definition: serialis.cpp:260
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:271
Definition: strngs.h:45
const char * string() const
Definition: strngs.cpp:194
virtual R Run(A1)=0