tesseract 4.1.1
Loading...
Searching...
No Matches
devanagari_processing.h
Go to the documentation of this file.
1// Copyright 2008 Google Inc. All Rights Reserved.
2// Author: shobhitsaxena@google.com (Shobhit Saxena)
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6// http://www.apache.org/licenses/LICENSE-2.0
7// Unless required by applicable law or agreed to in writing, software
8// distributed under the License is distributed on an "AS IS" BASIS,
9// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10// See the License for the specific language governing permissions and
11// limitations under the License.
12
13#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15
16#include "allheaders.h"
17#include "ocrblock.h"
18#include "params.h"
19
20struct Pix;
21struct Box;
22struct Boxa;
23
24extern
26 "Debug level for split shiro-rekha process.");
27
28extern
30 "Whether to create a debug image for split shiro-rekha process.");
31
32class TBOX;
33
34namespace tesseract {
35
37 public:
39 hist_ = nullptr;
40 length_ = 0;
41 }
42
44 Clear();
45 }
46
47 void Clear() {
48 delete[] hist_;
49 length_ = 0;
50 }
51
52 int* hist() const { return hist_; }
53
54 int length() const {
55 return length_;
56 }
57
58 // Methods to construct histograms from images. These clear any existing data.
59 void ConstructVerticalCountHist(Pix* pix);
60 void ConstructHorizontalCountHist(Pix* pix);
61
62 // This method returns the global-maxima for the histogram. The frequency of
63 // the global maxima is returned in count, if specified.
64 int GetHistogramMaximum(int* count) const;
65
66 private:
67 int* hist_;
68 int length_;
69};
70
72 public:
74 NO_SPLIT = 0, // No splitting is performed for the phase.
75 MINIMAL_SPLIT, // Blobs are split minimally.
76 MAXIMAL_SPLIT // Blobs are split maximally.
77 };
78
80 virtual ~ShiroRekhaSplitter();
81
82 // Top-level method to perform splitting based on current settings.
83 // Returns true if a split was actually performed.
84 // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
85 // splitting. If false, the ocr_split_strategy_ is used.
86 bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
87
88 // Clears the memory held by this object.
89 void Clear();
90
91 // Refreshes the words in the segmentation block list by using blobs in the
92 // input blob list.
93 // The segmentation block list must be set.
94 void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
95
96 // Returns true if the split strategies for pageseg and ocr are different.
98 return pageseg_split_strategy_ != ocr_split_strategy_;
99 }
100
101 // This only keeps a copy of the block list pointer. At split call, the list
102 // object should still be alive. This block list is used as a golden
103 // segmentation when performing splitting.
104 void set_segmentation_block_list(BLOCK_LIST* block_list) {
105 segmentation_block_list_ = block_list;
106 }
107
108 static const int kUnspecifiedXheight = -1;
109
110 void set_global_xheight(int xheight) {
111 global_xheight_ = xheight;
112 }
113
114 void set_perform_close(bool perform) {
115 perform_close_ = perform;
116 }
117
118 // Returns the image obtained from shiro-rekha splitting. The returned object
119 // is owned by this class. Callers may want to clone the returned pix to keep
120 // it alive beyond the life of ShiroRekhaSplitter object.
122 return splitted_image_;
123 }
124
125 // On setting the input image, a clone of it is owned by this class.
126 void set_orig_pix(Pix* pix);
127
128 // Returns the input image provided to the object. This object is owned by
129 // this class. Callers may want to clone the returned pix to work with it.
130 Pix* orig_pix() {
131 return orig_pix_;
132 }
133
135 return ocr_split_strategy_;
136 }
137
139 ocr_split_strategy_ = strategy;
140 }
141
143 return pageseg_split_strategy_;
144 }
145
147 pageseg_split_strategy_ = strategy;
148 }
149
151 return segmentation_block_list_;
152 }
153
154 // This method returns the computed mode-height of blobs in the pix.
155 // It also prunes very small blobs from calculation. Could be used to provide
156 // a global xheight estimate for images which have the same point-size text.
157 static int GetModeHeight(Pix* pix);
158
159 private:
160 // Method to perform a close operation on the input image. The xheight
161 // estimate decides the size of sel used.
162 static void PerformClose(Pix* pix, int xheight_estimate);
163
164 // This method resolves the cc bbox to a particular row and returns the row's
165 // xheight. This uses block_list_ if available, else just returns the
166 // global_xheight_ estimate currently set in the object.
167 int GetXheightForCC(Box* cc_bbox);
168
169 // Returns a list of regions (boxes) which should be cleared in the original
170 // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
171 // (or less) word only. Xheight measure could be the global estimate, the row
172 // estimate, or unspecified. If unspecified, over splitting may occur, since a
173 // conservative estimate of stroke width along with an associated multiplier
174 // is used in its place. It is advisable to have a specified xheight when
175 // splitting for classification/training.
176 void SplitWordShiroRekha(SplitStrategy split_strategy,
177 Pix* pix,
178 int xheight,
179 int word_left,
180 int word_top,
181 Boxa* regions_to_clear);
182
183 // Returns a new box object for the corresponding TBOX, based on the original
184 // image's coordinate system.
185 Box* GetBoxForTBOX(const TBOX& tbox) const;
186
187 // This method returns y-extents of the shiro-rekha computed from the input
188 // word image.
189 static void GetShiroRekhaYExtents(Pix* word_pix,
190 int* shirorekha_top,
191 int* shirorekha_bottom,
192 int* shirorekha_ylevel);
193
194 Pix* orig_pix_; // Just a clone of the input image passed.
195 Pix* splitted_image_; // Image produced after the last splitting round. The
196 // object is owned by this class.
197 SplitStrategy pageseg_split_strategy_;
198 SplitStrategy ocr_split_strategy_;
199 Pix* debug_image_;
200 // This block list is used as a golden segmentation when performing splitting.
201 BLOCK_LIST* segmentation_block_list_;
202 int global_xheight_;
203 bool perform_close_; // Whether a morphological close operation should be
204 // performed before CCs are run through splitting.
205};
206
207} // namespace tesseract.
208
209#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:297
#define INT_VAR_H(name, val, comment)
Definition: params.h:295
int count(LIST var_list)
Definition: oldlist.cpp:95
bool devanagari_split_debugimage
int devanagari_split_debuglevel
Definition: rect.h:34
int GetHistogramMaximum(int *count) const
void set_pageseg_split_strategy(SplitStrategy strategy)
void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs)
bool Split(bool split_for_pageseg, DebugPixa *pixa_debug)
void set_segmentation_block_list(BLOCK_LIST *block_list)
SplitStrategy pageseg_split_strategy() const
SplitStrategy ocr_split_strategy() const
void set_ocr_split_strategy(SplitStrategy strategy)