tesseract 4.1.1
Loading...
Searching...
No Matches
wordstrboxrenderer.cpp
Go to the documentation of this file.
1/**********************************************************************
2 * File: wordstrboxrenderer.cpp
3 * Description: Renderer for creating box file with WordStr strings.
4 * based on the tsv renderer.
5 *
6 * (C) Copyright 2019, Google Inc.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 *
17 **********************************************************************/
18
19#include "baseapi.h" // for TessBaseAPI
20#include "renderer.h"
21#include "tesseractclass.h" // for Tesseract
22
23namespace tesseract {
24
31char* TessBaseAPI::GetWordStrBoxText(int page_number=0) {
32 if (tesseract_ == nullptr || (page_res_ == nullptr && Recognize(nullptr) < 0))
33 return nullptr;
34
35 STRING wordstr_box_str("");
36 int left = 0, top = 0, right = 0, bottom = 0;
37
38 bool first_line = true;
39
41 while (!res_it->Empty(RIL_BLOCK)) {
42 if (res_it->Empty(RIL_WORD)) {
43 res_it->Next(RIL_WORD);
44 continue;
45 }
46
47 if (res_it->IsAtBeginningOf(RIL_TEXTLINE)) {
48 if (!first_line) {
49 wordstr_box_str.add_str_int("\n\t ", right + 1);
50 wordstr_box_str.add_str_int(" ", image_height_ - bottom);
51 wordstr_box_str.add_str_int(" ", right + 5);
52 wordstr_box_str.add_str_int(" ", image_height_ - top);
53 wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
54 wordstr_box_str += "\n";
55 } else {
56 first_line = false;
57 }
58 // Use bounding box for whole line for WordStr
59 res_it->BoundingBox(RIL_TEXTLINE, &left, &top, &right, &bottom);
60 wordstr_box_str.add_str_int("WordStr ", left);
61 wordstr_box_str.add_str_int(" ", image_height_ - bottom);
62 wordstr_box_str.add_str_int(" ", right);
63 wordstr_box_str.add_str_int(" ", image_height_ - top);
64 wordstr_box_str.add_str_int(" ", page_number); // word
65 wordstr_box_str += " #";
66 }
67 do {
68 wordstr_box_str +=
69 std::unique_ptr<const char[]>(res_it->GetUTF8Text(RIL_WORD)).get();
70 wordstr_box_str += " ";
71 res_it->Next(RIL_WORD);
72 } while (!res_it->Empty(RIL_BLOCK) && !res_it->IsAtBeginningOf(RIL_WORD));
73 }
74
75 if (left != 0 && top != 0 && right != 0 && bottom != 0) {
76 wordstr_box_str.add_str_int("\n\t ", right + 1);
77 wordstr_box_str.add_str_int(" ", image_height_ - bottom);
78 wordstr_box_str.add_str_int(" ", right + 5);
79 wordstr_box_str.add_str_int(" ", image_height_ - top);
80 wordstr_box_str.add_str_int(" ", page_number); // row for tab for EOL
81 wordstr_box_str += "\n";
82 }
83 char* ret = new char[wordstr_box_str.length() + 1];
84 strcpy(ret, wordstr_box_str.string());
85 delete res_it;
86 return ret;
87}
88
89/**********************************************************************
90 * WordStrBox Renderer interface implementation
91 **********************************************************************/
93 : TessResultRenderer(outputbase, "box") {}
94
96 const std::unique_ptr<const char[]> wordstrbox(
98 if (wordstrbox == nullptr) return false;
99
100 AppendString(wordstrbox.get());
101
102 return true;
103}
104
105} // namespace tesseract.
int Recognize(ETEXT_DESC *monitor)
Definition: baseapi.cpp:830
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:895
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:888
char * GetWordStrBoxText(int page_number)
TESS_LOCAL LTRResultIterator * GetLTRIterator()
Definition: baseapi.cpp:1307
void AppendString(const char *s)
Definition: renderer.cpp:102
bool AddImageHandler(TessBaseAPI *api) override
TessWordStrBoxRenderer(const char *outputbase)
char * GetUTF8Text(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
bool Empty(PageIteratorLevel level) const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
int32_t length() const
Definition: strngs.cpp:189
const char * string() const
Definition: strngs.cpp:194