tesseract 4.1.1
Loading...
Searching...
No Matches
strngs.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: strngs.h (Formerly strings.h)
3 * Description: STRING class definition.
4 * Author: Ray Smith
5 *
6 * (C) Copyright 1991, Hewlett-Packard Ltd.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#ifndef STRNGS_H
20#define STRNGS_H
21
22#include <cassert> // for assert
23#include <cstdint> // for uint32_t
24#include <cstdio> // for FILE
25#include <cstring> // for strncpy
26#include "platform.h" // for TESS_API
27
28namespace tesseract {
29class TFile;
30} // namespace tesseract.
31
32// STRING_IS_PROTECTED means that string[index] = X is invalid
33// because you have to go through strings interface to modify it.
34// This allows the string to ensure internal integrity and maintain
35// its own string length. Unfortunately this is not possible because
36// STRINGS are used as direct-manipulation data buffers for things
37// like length arrays and many places cast away the const on string()
38// to mutate the string. Turning this off means that internally we
39// cannot assume we know the strlen.
40#define STRING_IS_PROTECTED 0
41
42template <typename T>
43class GenericVector;
44
46 public:
47 STRING();
48 STRING(const STRING& string);
49 STRING(const char* string);
50 STRING(const char* data, int length);
51 ~STRING();
52
53 // Writes to the given file. Returns false in case of error.
54 bool Serialize(FILE* fp) const;
55 // Reads from the given file. Returns false in case of error.
56 // If swap is true, assumes a big/little-endian swap is needed.
57 bool DeSerialize(bool swap, FILE* fp);
58 // Writes to the given file. Returns false in case of error.
59 bool Serialize(tesseract::TFile* fp) const;
60 // Reads from the given file. Returns false in case of error.
61 // If swap is true, assumes a big/little-endian swap is needed.
62 bool DeSerialize(tesseract::TFile* fp);
63 // As DeSerialize, but only seeks past the data - hence a static method.
64 static bool SkipDeSerialize(tesseract::TFile* fp);
65
66 bool contains(char c) const;
67 int32_t length() const;
68 int32_t size() const {
69 return length();
70 }
71 // Workaround to avoid g++ -Wsign-compare warnings.
72 uint32_t unsigned_size() const {
73 const int32_t len = length();
74 assert(0 <= len);
75 return static_cast<uint32_t>(len);
76 }
77 const char* string() const;
78 const char* c_str() const;
79
80 inline char* strdup() const {
81 int32_t len = length() + 1;
82 return strncpy(new char[len], GetCStr(), len);
83 }
84
85#if STRING_IS_PROTECTED
86 const char& operator[](int32_t index) const;
87 // len is number of chars in s to insert starting at index in this string
88 void insert_range(int32_t index, const char* s, int len);
89 void erase_range(int32_t index, int len);
90#else
91 char& operator[](int32_t index) const;
92#endif
93 void split(char c, GenericVector<STRING>* splited);
94 void truncate_at(int32_t index);
95
96 bool operator==(const STRING& string) const;
97 bool operator!=(const STRING& string) const;
98 bool operator!=(const char* string) const;
99
100 STRING& operator=(const char* string);
101 STRING& operator=(const STRING& string);
102
103 STRING operator+(const STRING& string) const;
104 STRING operator+(char ch) const;
105
106 STRING& operator+=(const char* string);
107 STRING& operator+=(const STRING& string);
108 STRING& operator+=(char ch);
109
110 // Assignment for strings which are not null-terminated.
111 void assign(const char* cstr, int len);
112
113 // Appends the given string and int (as a %d) to this.
114 // += cannot be used for ints as there as a char += operator that would
115 // be ambiguous, and ints usually need a string before or between them
116 // anyway.
117 void add_str_int(const char* str, int number);
118 // Appends the given string and double (as a %.8g) to this.
119 void add_str_double(const char* str, double number);
120
121 // ensure capacity but keep pointer encapsulated
122 inline void ensure(int32_t min_capacity) {
123 ensure_cstr(min_capacity);
124 }
125
126 private:
127 typedef struct STRING_HEADER {
128 // How much space was allocated in the string buffer for char data.
129 int capacity_;
130
131 // used_ is how much of the capacity is currently being used,
132 // including a '\0' terminator.
133 //
134 // If used_ is 0 then string is nullptr (not even the '\0')
135 // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
136 // else strlen is >= 0 (not nullptr) but needs to be computed.
137 // this condition is set when encapsulation is violated because
138 // an API returned a mutable string.
139 //
140 // capacity_ - used_ = excess capacity that the string can grow
141 // without reallocating
142 mutable int used_;
143 } STRING_HEADER;
144
145 // To preserve the behavior of the old serialization, we only have space
146 // for one pointer in this structure. So we are embedding a data structure
147 // at the start of the storage that will hold additional state variables,
148 // then storing the actual string contents immediately after.
149 STRING_HEADER* data_;
150
151 // returns the header part of the storage
152 inline STRING_HEADER* GetHeader() {
153 return data_;
154 }
155 inline const STRING_HEADER* GetHeader() const {
156 return data_;
157 }
158
159 // returns the string data part of storage
160 inline char* GetCStr() {
161 return (reinterpret_cast<char*>(data_)) + sizeof(STRING_HEADER);
162 }
163
164 inline const char* GetCStr() const {
165 return (reinterpret_cast<const char*>(data_)) + sizeof(STRING_HEADER);
166 }
167 inline bool InvariantOk() const {
168#if STRING_IS_PROTECTED
169 return (GetHeader()->used_ == 0)
170 ? (string() == nullptr)
171 : (GetHeader()->used_ == (strlen(string()) + 1));
172#else
173 return true;
174#endif
175 }
176
177 // Ensure string has requested capacity as optimization
178 // to avoid unnecessary reallocations.
179 // The return value is a cstr buffer with at least requested capacity
180 char* ensure_cstr(int32_t min_capacity);
181
182 void FixHeader() const; // make used_ non-negative, even if const
183
184 char* AllocData(int used, int capacity);
185 void DiscardData();
186};
187
188#endif
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
Definition: points.h:381
ICOORD operator+(const ICOORD &op1, const ICOORD &op2)
Definition: points.h:363
#define TESS_API
Definition: platform.h:54
Definition: strngs.h:45
int32_t size() const
Definition: strngs.h:68
void ensure(int32_t min_capacity)
Definition: strngs.h:122
char * strdup() const
Definition: strngs.h:80
uint32_t unsigned_size() const
Definition: strngs.h:72