tesseract 4.1.1
Loading...
Searching...
No Matches
shapeclassifier.cpp
Go to the documentation of this file.
1// Copyright 2011 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: shapeclassifier.cpp
5// Description: Base interface class for classifiers that return a
6// shape index.
7// Author: Ray Smith
8// Created: Thu Dec 15 15:24:27 PST 2011
9//
10// (C) Copyright 2011, Google Inc.
11// Licensed under the Apache License, Version 2.0 (the "License");
12// you may not use this file except in compliance with the License.
13// You may obtain a copy of the License at
14// http://www.apache.org/licenses/LICENSE-2.0
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20//
22
23#ifdef HAVE_CONFIG_H
24#include "config_auto.h"
25#endif
26
27#include "shapeclassifier.h"
28#include "genericvector.h"
29#include "scrollview.h"
30#include "shapetable.h"
31#include "svmnode.h"
32#include "trainingsample.h"
33#include "tprintf.h"
34
35namespace tesseract {
36
37// Classifies the given [training] sample, writing to results.
38// See shapeclassifier.h for a full description.
39// Default implementation calls the ShapeRating version.
41 const TrainingSample& sample, Pix* page_pix, int debug,
42 UNICHAR_ID keep_this, GenericVector<UnicharRating>* results) {
43 results->truncate(0);
44 GenericVector<ShapeRating> shape_results;
45 int num_shape_results = ClassifySample(sample, page_pix, debug, keep_this,
46 &shape_results);
47 const ShapeTable* shapes = GetShapeTable();
48 GenericVector<int> unichar_map;
49 unichar_map.init_to_size(shapes->unicharset().size(), -1);
50 for (int r = 0; r < num_shape_results; ++r) {
51 shapes->AddShapeToResults(shape_results[r], &unichar_map, results);
52 }
53 return results->size();
54}
55
56// Classifies the given [training] sample, writing to results.
57// See shapeclassifier.h for a full description.
58// Default implementation aborts.
60 int debug, int keep_this,
62 ASSERT_HOST("Must implement ClassifySample!" == nullptr);
63 return 0;
64}
65
66// Returns the shape that contains unichar_id that has the best result.
67// If result is not nullptr, it is set with the shape_id and rating.
68// Does not need to be overridden if ClassifySample respects the keep_this
69// rule.
71 Pix* page_pix, UNICHAR_ID unichar_id,
72 ShapeRating* result) {
74 const ShapeTable* shapes = GetShapeTable();
75 int num_results = ClassifySample(sample, page_pix, 0, unichar_id, &results);
76 for (int r = 0; r < num_results; ++r) {
77 if (shapes->GetShape(results[r].shape_id).ContainsUnichar(unichar_id)) {
78 if (result != nullptr)
79 *result = results[r];
80 return results[r].shape_id;
81 }
82 }
83 return -1;
84}
85
86// Provides access to the UNICHARSET that this classifier works with.
87// Only needs to be overridden if GetShapeTable() can return nullptr.
89 return GetShapeTable()->unicharset();
90}
91
92// Visual debugger classifies the given sample, displays the results and
93// solicits user input to display other classifications. Returns when
94// the user has finished with debugging the sample.
95// Probably doesn't need to be overridden if the subclass provides
96// DisplayClassifyAs.
98 Pix* page_pix,
99 UNICHAR_ID unichar_id) {
100#ifndef GRAPHICS_DISABLED
101 static ScrollView* terminator = nullptr;
102 if (terminator == nullptr) {
103 terminator = new ScrollView("XIT", 0, 0, 50, 50, 50, 50, true);
104 }
105 ScrollView* debug_win = CreateFeatureSpaceWindow("ClassifierDebug", 0, 0);
106 // Provide a right-click menu to choose the class.
107 auto* popup_menu = new SVMenuNode();
108 popup_menu->AddChild("Choose class to debug", 0, "x", "Class to debug");
109 popup_menu->BuildMenu(debug_win, false);
110 // Display the features in green.
111 const INT_FEATURE_STRUCT* features = sample.features();
112 uint32_t num_features = sample.num_features();
113 for (uint32_t f = 0; f < num_features; ++f) {
114 RenderIntFeature(debug_win, &features[f], ScrollView::GREEN);
115 }
116 debug_win->Update();
118 // Debug classification until the user quits.
119 const UNICHARSET& unicharset = GetUnicharset();
120 SVEvent* ev;
121 SVEventType ev_type;
122 do {
124 if (unichar_id >= 0) {
125 tprintf("Debugging class %d = %s\n",
126 unichar_id, unicharset.id_to_unichar(unichar_id));
127 UnicharClassifySample(sample, page_pix, 1, unichar_id, &results);
128 DisplayClassifyAs(sample, page_pix, unichar_id, 1, &windows);
129 } else {
130 tprintf("Invalid unichar_id: %d\n", unichar_id);
131 UnicharClassifySample(sample, page_pix, 1, -1, &results);
132 }
133 if (unichar_id >= 0) {
134 tprintf("Debugged class %d = %s\n",
135 unichar_id, unicharset.id_to_unichar(unichar_id));
136 }
137 tprintf("Right-click in ClassifierDebug window to choose debug class,");
138 tprintf(" Left-click or close window to quit...\n");
139 UNICHAR_ID old_unichar_id;
140 do {
141 old_unichar_id = unichar_id;
142 ev = debug_win->AwaitEvent(SVET_ANY);
143 ev_type = ev->type;
144 if (ev_type == SVET_POPUP) {
145 if (unicharset.contains_unichar(ev->parameter)) {
146 unichar_id = unicharset.unichar_to_id(ev->parameter);
147 } else {
148 tprintf("Char class '%s' not found in unicharset", ev->parameter);
149 }
150 }
151 delete ev;
152 } while (unichar_id == old_unichar_id &&
153 ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
154 } while (ev_type != SVET_CLICK && ev_type != SVET_DESTROY);
155 delete debug_win;
156#endif // GRAPHICS_DISABLED
157}
158
159// Displays classification as the given shape_id. Creates as many windows
160// as it feels fit, using index as a guide for placement. Adds any created
161// windows to the windows output and returns a new index that may be used
162// by any subsequent classifiers. Caller waits for the user to view and
163// then destroys the windows by clearing the vector.
165 const TrainingSample& sample, Pix* page_pix,
166 UNICHAR_ID unichar_id, int index,
167 PointerVector<ScrollView>* windows) {
168 // Does nothing in the default implementation.
169 return index;
170}
171
172// Prints debug information on the results.
174 const char* context, const GenericVector<UnicharRating>& results) const {
175 tprintf("%s\n", context);
176 for (int i = 0; i < results.size(); ++i) {
177 tprintf("%g: c_id=%d=%s", results[i].rating, results[i].unichar_id,
178 GetUnicharset().id_to_unichar(results[i].unichar_id));
179 if (!results[i].fonts.empty()) {
180 tprintf(" Font Vector:");
181 for (int f = 0; f < results[i].fonts.size(); ++f) {
182 tprintf(" %d", results[i].fonts[f].fontinfo_id);
183 }
184 }
185 tprintf("\n");
186 }
187}
189 const char* context, const GenericVector<ShapeRating>& results) const {
190 tprintf("%s\n", context);
191 for (int i = 0; i < results.size(); ++i) {
192 tprintf("%g:", results[i].rating);
193 if (results[i].joined)
194 tprintf("[J]");
195 if (results[i].broken)
196 tprintf("[B]");
197 tprintf(" %s\n", GetShapeTable()->DebugStr(results[i].shape_id).string());
198 }
199}
200
201// Removes any result that has all its unichars covered by a better choice,
202// regardless of font.
204 GenericVector<ShapeRating>* results) const {
205 GenericVector<ShapeRating> filtered_results;
206 // Copy results to filtered results and knock out duplicate unichars.
207 const ShapeTable* shapes = GetShapeTable();
208 for (int r = 0; r < results->size(); ++r) {
209 if (r > 0) {
210 const Shape& shape_r = shapes->GetShape((*results)[r].shape_id);
211 int c;
212 for (c = 0; c < shape_r.size(); ++c) {
213 int unichar_id = shape_r[c].unichar_id;
214 int s;
215 for (s = 0; s < r; ++s) {
216 const Shape& shape_s = shapes->GetShape((*results)[s].shape_id);
217 if (shape_s.ContainsUnichar(unichar_id))
218 break; // We found unichar_id.
219 }
220 if (s == r)
221 break; // We didn't find unichar_id.
222 }
223 if (c == shape_r.size())
224 continue; // We found all the unichar ids in previous answers.
225 }
226 filtered_results.push_back((*results)[r]);
227 }
228 *results = filtered_results;
229}
230
231} // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
int UNICHAR_ID
Definition: unichar.h:34
ScrollView * CreateFeatureSpaceWindow(const char *name, int xpos, int ypos)
Definition: intproto.cpp:1763
void RenderIntFeature(ScrollView *window, const INT_FEATURE_STRUCT *Feature, ScrollView::Color color)
Definition: intproto.cpp:1602
SVEventType
Definition: scrollview.h:45
@ SVET_CLICK
Definition: scrollview.h:48
@ SVET_POPUP
Definition: scrollview.h:54
@ SVET_ANY
Definition: scrollview.h:56
@ SVET_DESTROY
Definition: scrollview.h:46
void init_to_size(int size, const T &t)
int push_back(T object)
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
void truncate(int size)
bool contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:671
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
int size() const
Definition: unicharset.h:341
UNICHAR_ID unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:210
Definition: cluster.h:32
virtual int DisplayClassifyAs(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id, int index, PointerVector< ScrollView > *windows)
virtual int ClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< ShapeRating > *results)
virtual const ShapeTable * GetShapeTable() const =0
void FilterDuplicateUnichars(GenericVector< ShapeRating > *results) const
virtual void PrintResults(const char *context, const GenericVector< ShapeRating > &results) const
virtual int BestShapeForUnichar(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id, ShapeRating *result)
virtual const UNICHARSET & GetUnicharset() const
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
virtual void UnicharPrintResults(const char *context, const GenericVector< UnicharRating > &results) const
int size() const
Definition: shapetable.h:199
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:147
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
Definition: shapetable.cpp:687
const UNICHARSET & unicharset() const
Definition: shapetable.h:277
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:319
SVEventType type
Definition: scrollview.h:64
char * parameter
Definition: scrollview.h:66
static void Update()
Definition: scrollview.cpp:709
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:443