tesseract 4.1.1
Loading...
Searching...
No Matches
ocrclass.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: ocrclass.h
3 * Description: Class definitions and constants for the OCR API.
4 * Author: Hewlett-Packard Co
5 *
6 * (C) Copyright 1996, Hewlett-Packard Co.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19/**********************************************************************
20 * This file contains typedefs for all the structures used by
21 * the HP OCR interface.
22 * The structures are designed to allow them to be used with any
23 * structure alignment up to 8.
24 **********************************************************************/
25
26#ifndef CCUTIL_OCRCLASS_H_
27#define CCUTIL_OCRCLASS_H_
28
29#include <chrono>
30#include <ctime>
31#ifdef _WIN32
32#include <winsock2.h> // for timeval
33#endif
34
35/**********************************************************************
36 * EANYCODE_CHAR
37 * Description of a single character. The character code is defined by
38 * the character set of the current font.
39 * Output text is sent as an array of these structures.
40 * Spaces and line endings in the output are represented in the
41 * structures of the surrounding characters. They are not directly
42 * represented as characters.
43 * The first character in a word has a positive value of blanks.
44 * Missing information should be set to the defaults in the comments.
45 * If word bounds are known, but not character bounds, then the top and
46 * bottom of each character should be those of the word. The left of the
47 * first and right of the last char in each word should be set. All other
48 * lefts and rights should be set to -1.
49 * If set, the values of right and bottom are left+width and top+height.
50 * Most of the members come directly from the parameters to ocr_append_char.
51 * The formatting member uses the enhancement parameter and combines the
52 * line direction stuff into the top 3 bits.
53 * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
54 * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
55 * the coding is, only that it is backwards compatible with the previous
56 * version.
57 **********************************************************************/
58
59typedef struct { /*single character */
60 // It should be noted that the format for char_code for version 2.0 and beyond
61 // is UTF8 which means that ASCII characters will come out as one structure
62 // but other characters will be returned in two or more instances of this
63 // structure with a single byte of the UTF8 code in each, but each will have
64 // the same bounding box. Programs which want to handle languagues with
65 // different characters sets will need to handle extended characters
66 // appropriately, but *all* code needs to be prepared to receive UTF8 coded
67 // characters for characters such as bullet and fancy quotes.
68 uint16_t char_code; /*character itself */
69 int16_t left; /*of char (-1) */
70 int16_t right; /*of char (-1) */
71 int16_t top; /*of char (-1) */
72 int16_t bottom; /*of char (-1) */
73 int16_t font_index; /*what font (0) */
74 uint8_t confidence; /*0=perfect, 100=reject (0/100) */
75 uint8_t point_size; /*of char, 72=i inch, (10) */
76 int8_t blanks; /*no of spaces before this char (1) */
77 uint8_t formatting; /*char formatting (0) */
78} EANYCODE_CHAR; /*single character */
79
80/**********************************************************************
81 * ETEXT_DESC
82 * Description of the output of the OCR engine.
83 * This structure is used as both a progress monitor and the final
84 * output header, since it needs to be a valid progress monitor while
85 * the OCR engine is storing its output to shared memory.
86 * During progress, all the buffer info is -1.
87 * Progress starts at 0 and increases to 100 during OCR. No other constraint.
88 * Additionally the progress callback contains the bounding box of the word that
89 * is currently being processed.
90 * Every progress callback, the OCR engine must set ocr_alive to 1.
91 * The HP side will set ocr_alive to 0. Repeated failure to reset
92 * to 1 indicates that the OCR engine is dead.
93 * If the cancel function is not null then it is called with the number of
94 * user words found. If it returns true then operation is cancelled.
95 **********************************************************************/
96class ETEXT_DESC;
97
98using CANCEL_FUNC = bool (*)(void*, int);
99using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
100using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC*, int, int, int, int);
101
102class ETEXT_DESC { // output header
103 public:
104 int16_t count{0};
105 int16_t progress{0};
109 int8_t more_to_come{0};
110 volatile int8_t ocr_alive{0};
111 int8_t err_code{0};
114 nullptr};
116 void* cancel_this{nullptr};
117 struct timeval end_time;
121
122 ETEXT_DESC() : progress_callback2(&default_progress_func) {
123 auto chrono_end_time = std::chrono::time_point<std::chrono::steady_clock,
124 std::chrono::milliseconds>();
125 timePointToTimeval(chrono_end_time, &end_time);
126 }
127
128 // Sets the end time to be deadline_msecs milliseconds from now.
129 void set_deadline_msecs(int32_t deadline_msecs) {
130 if (deadline_msecs > 0) {
131 auto chrono_end_time = std::chrono::steady_clock::now() +
132 std::chrono::milliseconds(deadline_msecs);
133 timePointToTimeval(chrono_end_time, &end_time);
134 }
135 }
136
137 // Returns false if we've not passed the end_time, or have not set a deadline.
138 bool deadline_exceeded() const {
139 if (end_time.tv_sec == 0 && end_time.tv_usec == 0)
140 return false;
141 auto chrono_now = std::chrono::steady_clock::now();
142 struct timeval now;
143 timePointToTimeval(chrono_now, &now);
144 return (now.tv_sec > end_time.tv_sec ||
145 (now.tv_sec == end_time.tv_sec && now.tv_usec > end_time.tv_usec));
146 }
147
148 private:
149 static void timePointToTimeval(
150 std::chrono::steady_clock::time_point chrono_point, struct timeval* tv) {
151 auto millisecs = std::chrono::duration_cast<std::chrono::milliseconds>(
152 chrono_point.time_since_epoch());
153 tv->tv_sec = millisecs.count() / 1000;
154 tv->tv_usec = (millisecs.count() % 1000) * 1000;
155 }
156
157 static bool default_progress_func(ETEXT_DESC* ths, int left, int right,
158 int top, int bottom) {
159 if (ths->progress_callback != nullptr) {
160 return (*(ths->progress_callback))(ths->progress, left, right, top,
161 bottom);
162 }
163 return true;
164 }
165};
166
167#endif // CCUTIL_OCRCLASS_H_
bool(*)(ETEXT_DESC *, int, int, int, int) PROGRESS_FUNC2
Definition: ocrclass.h:100
bool(*)(int, int, int, int, int) PROGRESS_FUNC
Definition: ocrclass.h:99
bool(*)(void *, int) CANCEL_FUNC
Definition: ocrclass.h:98
uint8_t formatting
Definition: ocrclass.h:77
int16_t font_index
Definition: ocrclass.h:73
int16_t right
Definition: ocrclass.h:70
uint8_t point_size
Definition: ocrclass.h:75
uint16_t char_code
Definition: ocrclass.h:68
int8_t blanks
Definition: ocrclass.h:76
int16_t left
Definition: ocrclass.h:69
int16_t top
Definition: ocrclass.h:71
uint8_t confidence
Definition: ocrclass.h:74
int16_t bottom
Definition: ocrclass.h:72
ETEXT_DESC()
character data
Definition: ocrclass.h:122
struct timeval end_time
this or other data for cancel
Definition: ocrclass.h:117
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:129
volatile int8_t ocr_alive
true if not last
Definition: ocrclass.h:110
int16_t progress
chars in this buffer(0)
Definition: ocrclass.h:105
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:116
PROGRESS_FUNC progress_callback
returns true to cancel
Definition: ocrclass.h:113
PROGRESS_FUNC2 progress_callback2
called whenever progress increases
Definition: ocrclass.h:115
bool deadline_exceeded() const
Definition: ocrclass.h:138
int8_t err_code
ocr sets to 1, HP 0
Definition: ocrclass.h:111
int16_t count
Definition: ocrclass.h:104
int8_t more_to_come
percent complete increasing (0-100)
Definition: ocrclass.h:109
EANYCODE_CHAR text[1]
Definition: ocrclass.h:120
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:112