tesseract 4.1.1
Loading...
Searching...
No Matches
bitvector.h
Go to the documentation of this file.
1
2// File: bitvector.h
3// Description: Class replacement for BITVECTOR.
4// Author: Ray Smith
5//
6// (C) Copyright 2011, Google Inc.
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License at
10// http://www.apache.org/licenses/LICENSE-2.0
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16//
18
19#ifndef TESSERACT_CCUTIL_BITVECTOR_H_
20#define TESSERACT_CCUTIL_BITVECTOR_H_
21
22#include <cassert>
23#include <cstdint> // for uint8_t
24#include <cstdio>
25
26namespace tesseract {
27
28// Trivial class to encapsulate a fixed-length array of bits, with
29// Serialize/DeSerialize. Replaces the old macros.
30class BitVector {
31 public:
32 // Fast lookup table to get the first least significant set bit in a byte.
33 // For zero, the table has 255, but since it is a special case, most code
34 // that uses this table will check for zero before looking up lsb_index_.
35 static const uint8_t lsb_index_[256];
36 // Fast lookup table to get the residual bits after zeroing the least
37 // significant set bit in a byte.
38 static const uint8_t lsb_eroded_[256];
39 // Fast lookup table to give the number of set bits in a byte.
40 static const int hamming_table_[256];
41
42 BitVector();
43 // Initializes the array to length * false.
44 explicit BitVector(int length);
45 BitVector(const BitVector& src);
46 BitVector& operator=(const BitVector& src);
47 ~BitVector();
48
49 // Initializes the array to length * false.
50 void Init(int length);
51
52 // Returns the number of bits that are accessible in the vector.
53 int size() const {
54 return bit_size_;
55 }
56
57 // Writes to the given file. Returns false in case of error.
58 bool Serialize(FILE* fp) const;
59 // Reads from the given file. Returns false in case of error.
60 // If swap is true, assumes a big/little-endian swap is needed.
61 bool DeSerialize(bool swap, FILE* fp);
62
63 void SetAllFalse();
64 void SetAllTrue();
65
66 // Accessors to set/reset/get bits.
67 // The range of index is [0, size()-1].
68 // There is debug-only bounds checking.
69 void SetBit(int index) {
70 array_[WordIndex(index)] |= BitMask(index);
71 }
72 void ResetBit(int index) {
73 array_[WordIndex(index)] &= ~BitMask(index);
74 }
75 void SetValue(int index, bool value) {
76 if (value)
77 SetBit(index);
78 else
79 ResetBit(index);
80 }
81 bool At(int index) const {
82 return (array_[WordIndex(index)] & BitMask(index)) != 0;
83 }
84 bool operator[](int index) const {
85 return (array_[WordIndex(index)] & BitMask(index)) != 0;
86 }
87
88 // Returns the index of the next set bit after the given index.
89 // Useful for quickly iterating through the set bits in a sparse vector.
90 int NextSetBit(int prev_bit) const;
91
92 // Returns the number of set bits in the vector.
93 int NumSetBits() const;
94
95 // Logical in-place operations on whole bit vectors. Tries to do something
96 // sensible if they aren't the same size, but they should be really.
97 void operator|=(const BitVector& other);
98 void operator&=(const BitVector& other);
99 void operator^=(const BitVector& other);
100 // Set subtraction *this = v1 - v2.
101 void SetSubtract(const BitVector& v1, const BitVector& v2);
102
103 private:
104 // Allocates memory for a vector of the given length.
105 void Alloc(int length);
106
107 // Computes the index to array_ for the given index, with debug range
108 // checking.
109 int WordIndex(int index) const {
110 assert(0 <= index && index < bit_size_);
111 return index / kBitFactor;
112 }
113 // Returns a mask to select the appropriate bit for the given index.
114 uint32_t BitMask(int index) const {
115 return 1 << (index & (kBitFactor - 1));
116 }
117 // Returns the number of array elements needed to represent the current
118 // bit_size_.
119 int WordLength() const {
120 return (bit_size_ + kBitFactor - 1) / kBitFactor;
121 }
122 // Returns the number of bytes consumed by the array_.
123 int ByteLength() const {
124 return WordLength() * sizeof(*array_);
125 }
126
127 // Number of bits in this BitVector.
128 int32_t bit_size_;
129 // Array of words used to pack the bits.
130 // Bits are stored little-endian by uint32_t word, ie by word first and then
131 // starting with the least significant bit in each word.
132 uint32_t* array_;
133 // Number of bits in an array_ element.
134 static const int kBitFactor = sizeof(uint32_t) * 8;
135};
136
137} // namespace tesseract.
138
139#endif // TESSERACT_CCUTIL_BITVECTOR_H_
void Init(int length)
Definition: bitvector.cpp:139
void SetValue(int index, bool value)
Definition: bitvector.h:75
static const uint8_t lsb_index_[256]
Definition: bitvector.h:35
void operator^=(const BitVector &other)
Definition: bitvector.cpp:243
static const int hamming_table_[256]
Definition: bitvector.h:40
bool At(int index) const
Definition: bitvector.h:81
BitVector & operator=(const BitVector &src)
Definition: bitvector.cpp:126
void SetSubtract(const BitVector &v1, const BitVector &v2)
Definition: bitvector.cpp:249
bool operator[](int index) const
Definition: bitvector.h:84
void SetBit(int index)
Definition: bitvector.h:69
void operator&=(const BitVector &other)
Definition: bitvector.cpp:236
int NumSetBits() const
Definition: bitvector.cpp:216
bool DeSerialize(bool swap, FILE *fp)
Definition: bitvector.cpp:153
int NextSetBit(int prev_bit) const
Definition: bitvector.cpp:178
void operator|=(const BitVector &other)
Definition: bitvector.cpp:231
int size() const
Definition: bitvector.h:53
void ResetBit(int index)
Definition: bitvector.h:72
bool Serialize(FILE *fp) const
Definition: bitvector.cpp:145
static const uint8_t lsb_eroded_[256]
Definition: bitvector.h:38