tesseract 4.1.1
Loading...
Searching...
No Matches
shapetable.cpp
Go to the documentation of this file.
1// Copyright 2010 Google Inc. All Rights Reserved.
2// Author: rays@google.com (Ray Smith)
4// File: shapetable.cpp
5// Description: Class to map a classifier shape index to unicharset
6// indices and font indices.
7// Author: Ray Smith
8// Created: Tue Nov 02 15:31:32 PDT 2010
9//
10// (C) Copyright 2010, Google Inc.
11// Licensed under the Apache License, Version 2.0 (the "License");
12// you may not use this file except in compliance with the License.
13// You may obtain a copy of the License at
14// http://www.apache.org/licenses/LICENSE-2.0
15// Unless required by applicable law or agreed to in writing, software
16// distributed under the License is distributed on an "AS IS" BASIS,
17// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18// See the License for the specific language governing permissions and
19// limitations under the License.
20//
22
23#include "shapetable.h"
24
25#include "bitvector.h"
26#include "fontinfo.h"
27#include "intfeaturespace.h"
28#include "strngs.h"
29#include "unicharset.h"
30#include "unicity_table.h"
31
32#include <algorithm>
33
34namespace tesseract {
35
36// Helper function to get the index of the first result with the required
37// unichar_id. If the results are sorted by rating, this will also be the
38// best result with the required unichar_id.
39// Returns -1 if the unichar_id is not found
41 const GenericVector<ShapeRating>& results,
42 const ShapeTable& shape_table,
43 UNICHAR_ID unichar_id) {
44 for (int r = 0; r < results.size(); ++r) {
45 const int shape_id = results[r].shape_id;
46 const Shape& shape = shape_table.GetShape(shape_id);
47 if (shape.ContainsUnichar(unichar_id)) {
48 return r;
49 }
50 }
51 return -1;
52}
53
54// Helper function to get the index of the first result with the required
55// unichar_id. If the results are sorted by rating, this will also be the
56// best result with the required unichar_id.
57// Returns -1 if the unichar_id is not found
59 const GenericVector<UnicharRating>& results,
60 UNICHAR_ID unichar_id) {
61 for (int r = 0; r < results.size(); ++r) {
62 if (results[r].unichar_id == unichar_id)
63 return r;
64 }
65 return -1;
66}
67
68// Writes to the given file. Returns false in case of error.
69bool UnicharAndFonts::Serialize(FILE* fp) const {
71}
72// Reads from the given file. Returns false in case of error.
73
75 return fp->DeSerialize(&unichar_id) && font_ids.DeSerialize(fp);
76}
77
78// Sort function to sort a pair of UnicharAndFonts by unichar_id.
79int UnicharAndFonts::SortByUnicharId(const void* v1, const void* v2) {
80 const auto* p1 = static_cast<const UnicharAndFonts*>(v1);
81 const auto* p2 = static_cast<const UnicharAndFonts*>(v2);
82 return p1->unichar_id - p2->unichar_id;
83}
84
85// Writes to the given file. Returns false in case of error.
86bool Shape::Serialize(FILE* fp) const {
87 uint8_t sorted = unichars_sorted_;
88 return tesseract::Serialize(fp, &sorted) && unichars_.SerializeClasses(fp);
89}
90// Reads from the given file. Returns false in case of error.
91
93 uint8_t sorted;
94 if (!fp->DeSerialize(&sorted)) return false;
95 unichars_sorted_ = sorted != 0;
96 return unichars_.DeSerializeClasses(fp);
97}
98
99// Adds a font_id for the given unichar_id. If the unichar_id is not
100// in the shape, it is added.
101void Shape::AddToShape(int unichar_id, int font_id) {
102 for (int c = 0; c < unichars_.size(); ++c) {
103 if (unichars_[c].unichar_id == unichar_id) {
104 // Found the unichar in the shape table.
105 GenericVector<int>& font_list = unichars_[c].font_ids;
106 for (int f = 0; f < font_list.size(); ++f) {
107 if (font_list[f] == font_id)
108 return; // Font is already there.
109 }
110 font_list.push_back(font_id);
111 return;
112 }
113 }
114 // Unichar_id is not in shape, so add it to shape.
115 unichars_.push_back(UnicharAndFonts(unichar_id, font_id));
116 unichars_sorted_ = unichars_.size() <= 1;
117}
118
119// Adds everything in other to this.
120void Shape::AddShape(const Shape& other) {
121 for (int c = 0; c < other.unichars_.size(); ++c) {
122 for (int f = 0; f < other.unichars_[c].font_ids.size(); ++f) {
123 AddToShape(other.unichars_[c].unichar_id,
124 other.unichars_[c].font_ids[f]);
125 }
126 }
127 unichars_sorted_ = unichars_.size() <= 1;
128}
129
130// Returns true if the shape contains the given unichar_id, font_id pair.
131bool Shape::ContainsUnicharAndFont(int unichar_id, int font_id) const {
132 for (int c = 0; c < unichars_.size(); ++c) {
133 if (unichars_[c].unichar_id == unichar_id) {
134 // Found the unichar, so look for the font.
135 GenericVector<int>& font_list = unichars_[c].font_ids;
136 for (int f = 0; f < font_list.size(); ++f) {
137 if (font_list[f] == font_id)
138 return true;
139 }
140 return false;
141 }
142 }
143 return false;
144}
145
146// Returns true if the shape contains the given unichar_id, ignoring font.
147bool Shape::ContainsUnichar(int unichar_id) const {
148 for (int c = 0; c < unichars_.size(); ++c) {
149 if (unichars_[c].unichar_id == unichar_id) {
150 return true;
151 }
152 }
153 return false;
154}
155
156// Returns true if the shape contains the given font, ignoring unichar_id.
157bool Shape::ContainsFont(int font_id) const {
158 for (int c = 0; c < unichars_.size(); ++c) {
159 GenericVector<int>& font_list = unichars_[c].font_ids;
160 for (int f = 0; f < font_list.size(); ++f) {
161 if (font_list[f] == font_id)
162 return true;
163 }
164 }
165 return false;
166}
167// Returns true if the shape contains the given font properties, ignoring
168// unichar_id.
170 uint32_t properties) const {
171 for (int c = 0; c < unichars_.size(); ++c) {
172 GenericVector<int>& font_list = unichars_[c].font_ids;
173 for (int f = 0; f < font_list.size(); ++f) {
174 if (font_table.get(font_list[f]).properties == properties)
175 return true;
176 }
177 }
178 return false;
179}
180// Returns true if the shape contains multiple different font properties,
181// ignoring unichar_id.
183 const FontInfoTable& font_table) const {
184 uint32_t properties = font_table.get(unichars_[0].font_ids[0]).properties;
185 for (int c = 0; c < unichars_.size(); ++c) {
186 GenericVector<int>& font_list = unichars_[c].font_ids;
187 for (int f = 0; f < font_list.size(); ++f) {
188 if (font_table.get(font_list[f]).properties != properties)
189 return true;
190 }
191 }
192 return false;
193}
194
195// Returns true if this shape is equal to other (ignoring order of unichars
196// and fonts).
197bool Shape::operator==(const Shape& other) const {
198 return IsSubsetOf(other) && other.IsSubsetOf(*this);
199}
200
201// Returns true if this is a subset (including equal) of other.
202bool Shape::IsSubsetOf(const Shape& other) const {
203 for (int c = 0; c < unichars_.size(); ++c) {
204 int unichar_id = unichars_[c].unichar_id;
205 const GenericVector<int>& font_list = unichars_[c].font_ids;
206 for (int f = 0; f < font_list.size(); ++f) {
207 if (!other.ContainsUnicharAndFont(unichar_id, font_list[f]))
208 return false;
209 }
210 }
211 return true;
212}
213
214// Returns true if the lists of unichar ids are the same in this and other,
215// ignoring fonts.
216// NOT const, as it will sort the unichars on demand.
218 if (unichars_.size() != other->unichars_.size()) return false;
219 if (!unichars_sorted_) SortUnichars();
220 if (!other->unichars_sorted_) other->SortUnichars();
221 for (int c = 0; c < unichars_.size(); ++c) {
222 if (unichars_[c].unichar_id != other->unichars_[c].unichar_id)
223 return false;
224 }
225 return true;
226}
227
228// Sorts the unichars_ vector by unichar.
229void Shape::SortUnichars() {
230 unichars_.sort(UnicharAndFonts::SortByUnicharId);
231 unichars_sorted_ = true;
232}
233
234ShapeTable::ShapeTable() : unicharset_(nullptr), num_fonts_(0) {
235}
237 : unicharset_(&unicharset), num_fonts_(0) {
238}
239
240// Writes to the given file. Returns false in case of error.
241bool ShapeTable::Serialize(FILE* fp) const {
242 return shape_table_.Serialize(fp);
243}
244// Reads from the given file. Returns false in case of error.
245
247 if (!shape_table_.DeSerialize(fp)) return false;
248 num_fonts_ = 0;
249 return true;
250}
251
252// Returns the number of fonts used in this ShapeTable, computing it if
253// necessary.
255 if (num_fonts_ <= 0) {
256 for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
257 const Shape& shape = *shape_table_[shape_id];
258 for (int c = 0; c < shape.size(); ++c) {
259 for (int f = 0; f < shape[c].font_ids.size(); ++f) {
260 if (shape[c].font_ids[f] >= num_fonts_)
261 num_fonts_ = shape[c].font_ids[f] + 1;
262 }
263 }
264 }
265 }
266 return num_fonts_;
267}
268
269// Re-indexes the class_ids in the shapetable according to the given map.
270// Useful in conjunction with set_unicharset.
272 for (int shape_id = 0; shape_id < shape_table_.size(); ++shape_id) {
273 Shape* shape = shape_table_[shape_id];
274 for (int c = 0; c < shape->size(); ++c) {
275 shape->SetUnicharId(c, unicharset_map[(*shape)[c].unichar_id]);
276 }
277 }
278}
279
280// Returns a string listing the classes/fonts in a shape.
281STRING ShapeTable::DebugStr(int shape_id) const {
282 if (shape_id < 0 || shape_id >= shape_table_.size())
283 return STRING("INVALID_UNICHAR_ID");
284 const Shape& shape = GetShape(shape_id);
285 STRING result;
286 result.add_str_int("Shape", shape_id);
287 if (shape.size() > 100) {
288 result.add_str_int(" Num unichars=", shape.size());
289 return result;
290 }
291 for (int c = 0; c < shape.size(); ++c) {
292 result.add_str_int(" c_id=", shape[c].unichar_id);
293 result += "=";
294 result += unicharset_->id_to_unichar(shape[c].unichar_id);
295 if (shape.size() < 10) {
296 result.add_str_int(", ", shape[c].font_ids.size());
297 result += " fonts =";
298 int num_fonts = shape[c].font_ids.size();
299 if (num_fonts > 10) {
300 result.add_str_int(" ", shape[c].font_ids[0]);
301 result.add_str_int(" ... ", shape[c].font_ids[num_fonts - 1]);
302 } else {
303 for (int f = 0; f < num_fonts; ++f) {
304 result.add_str_int(" ", shape[c].font_ids[f]);
305 }
306 }
307 }
308 }
309 return result;
310}
311
312// Returns a debug string summarizing the table.
314 int max_unichars = 0;
315 int num_multi_shapes = 0;
316 int num_master_shapes = 0;
317 for (int s = 0; s < shape_table_.size(); ++s) {
318 if (MasterDestinationIndex(s) != s) continue;
319 ++num_master_shapes;
320 int shape_size = GetShape(s).size();
321 if (shape_size > 1)
322 ++num_multi_shapes;
323 if (shape_size > max_unichars)
324 max_unichars = shape_size;
325 }
326 STRING result;
327 result.add_str_int("Number of shapes = ", num_master_shapes);
328 result.add_str_int(" max unichars = ", max_unichars);
329 result.add_str_int(" number with multiple unichars = ", num_multi_shapes);
330 return result;
331}
332
333
334// Adds a new shape starting with the given unichar_id and font_id.
335// Returns the assigned index.
336int ShapeTable::AddShape(int unichar_id, int font_id) {
337 int index = shape_table_.size();
338 auto* shape = new Shape;
339 shape->AddToShape(unichar_id, font_id);
340 shape_table_.push_back(shape);
341 num_fonts_ = std::max(num_fonts_, font_id + 1);
342 return index;
343}
344
345// Adds a copy of the given shape unless it is already present.
346// Returns the assigned index or index of existing shape if already present.
347int ShapeTable::AddShape(const Shape& other) {
348 int index;
349 for (index = 0; index < shape_table_.size() &&
350 !(other == *shape_table_[index]); ++index)
351 continue;
352 if (index == shape_table_.size()) {
353 auto* shape = new Shape(other);
354 shape_table_.push_back(shape);
355 }
356 num_fonts_ = 0;
357 return index;
358}
359
360// Removes the shape given by the shape index.
361void ShapeTable::DeleteShape(int shape_id) {
362 delete shape_table_[shape_id];
363 shape_table_[shape_id] = nullptr;
364 shape_table_.remove(shape_id);
365}
366
367// Adds a font_id to the given existing shape index for the given
368// unichar_id. If the unichar_id is not in the shape, it is added.
369void ShapeTable::AddToShape(int shape_id, int unichar_id, int font_id) {
370 Shape& shape = *shape_table_[shape_id];
371 shape.AddToShape(unichar_id, font_id);
372 num_fonts_ = std::max(num_fonts_, font_id + 1);
373}
374
375// Adds the given shape to the existing shape with the given index.
376void ShapeTable::AddShapeToShape(int shape_id, const Shape& other) {
377 Shape& shape = *shape_table_[shape_id];
378 shape.AddShape(other);
379 num_fonts_ = 0;
380}
381
382// Returns the id of the shape that contains the given unichar and font.
383// If not found, returns -1.
384// If font_id < 0, the font_id is ignored and the first shape that matches
385// the unichar_id is returned.
386int ShapeTable::FindShape(int unichar_id, int font_id) const {
387 for (int s = 0; s < shape_table_.size(); ++s) {
388 const Shape& shape = GetShape(s);
389 for (int c = 0; c < shape.size(); ++c) {
390 if (shape[c].unichar_id == unichar_id) {
391 if (font_id < 0)
392 return s; // We don't care about the font.
393 for (int f = 0; f < shape[c].font_ids.size(); ++f) {
394 if (shape[c].font_ids[f] == font_id)
395 return s;
396 }
397 }
398 }
399 }
400 return -1;
401}
402
403// Returns the first unichar_id and font_id in the given shape.
405 int* unichar_id, int* font_id) const {
406 const UnicharAndFonts& unichar_and_fonts = (*shape_table_[shape_id])[0];
407 *unichar_id = unichar_and_fonts.unichar_id;
408 *font_id = unichar_and_fonts.font_ids[0];
409}
410
411// Expands all the classes/fonts in the shape individually to build
412// a ShapeTable.
414 const ShapeTable& master_shapes) {
415 BitVector shape_map(master_shapes.NumShapes());
416 for (int u_ind = 0; u_ind < shape.size(); ++u_ind) {
417 for (int f_ind = 0; f_ind < shape[u_ind].font_ids.size(); ++f_ind) {
418 int c = shape[u_ind].unichar_id;
419 int f = shape[u_ind].font_ids[f_ind];
420 int master_id = master_shapes.FindShape(c, f);
421 if (master_id >= 0) {
422 shape_map.SetBit(master_id);
423 } else if (FindShape(c, f) < 0) {
424 AddShape(c, f);
425 }
426 }
427 }
428 int num_masters = 0;
429 for (int s = 0; s < master_shapes.NumShapes(); ++s) {
430 if (shape_map[s]) {
431 AddShape(master_shapes.GetShape(s));
432 ++num_masters;
433 }
434 }
435 return num_masters;
436}
437
438// Returns true if the shapes are already merged.
439bool ShapeTable::AlreadyMerged(int shape_id1, int shape_id2) const {
440 return MasterDestinationIndex(shape_id1) == MasterDestinationIndex(shape_id2);
441}
442
443// Returns true if any shape contains multiple unichars.
445 int num_shapes = NumShapes();
446 for (int s1 = 0; s1 < num_shapes; ++s1) {
447 if (MasterDestinationIndex(s1) != s1) continue;
448 if (GetShape(s1).size() > 1)
449 return true;
450 }
451 return false;
452}
453
454// Returns the maximum number of unichars over all shapes.
456 int max_num_unichars = 0;
457 int num_shapes = NumShapes();
458 for (int s = 0; s < num_shapes; ++s) {
459 if (GetShape(s).size() > max_num_unichars)
460 max_num_unichars = GetShape(s).size();
461 }
462 return max_num_unichars;
463}
464
465
466// Merges shapes with a common unichar over the [start, end) interval.
467// Assumes single unichar per shape.
468void ShapeTable::ForceFontMerges(int start, int end) {
469 for (int s1 = start; s1 < end; ++s1) {
470 if (MasterDestinationIndex(s1) == s1 && GetShape(s1).size() == 1) {
471 int unichar_id = GetShape(s1)[0].unichar_id;
472 for (int s2 = s1 + 1; s2 < end; ++s2) {
473 if (MasterDestinationIndex(s2) == s2 && GetShape(s2).size() == 1 &&
474 unichar_id == GetShape(s2)[0].unichar_id) {
475 MergeShapes(s1, s2);
476 }
477 }
478 }
479 }
480 ShapeTable compacted(*unicharset_);
481 compacted.AppendMasterShapes(*this, nullptr);
482 *this = compacted;
483}
484
485// Returns the number of unichars in the master shape.
486int ShapeTable::MasterUnicharCount(int shape_id) const {
487 int master_id = MasterDestinationIndex(shape_id);
488 return GetShape(master_id).size();
489}
490
491// Returns the sum of the font counts in the master shape.
492int ShapeTable::MasterFontCount(int shape_id) const {
493 int master_id = MasterDestinationIndex(shape_id);
494 const Shape& shape = GetShape(master_id);
495 int font_count = 0;
496 for (int c = 0; c < shape.size(); ++c) {
497 font_count += shape[c].font_ids.size();
498 }
499 return font_count;
500}
501
502// Returns the number of unichars that would result from merging the shapes.
503int ShapeTable::MergedUnicharCount(int shape_id1, int shape_id2) const {
504 // Do it the easy way for now.
505 int master_id1 = MasterDestinationIndex(shape_id1);
506 int master_id2 = MasterDestinationIndex(shape_id2);
507 Shape combined_shape(*shape_table_[master_id1]);
508 combined_shape.AddShape(*shape_table_[master_id2]);
509 return combined_shape.size();
510}
511
512// Merges two shape_ids, leaving shape_id2 marked as merged.
513void ShapeTable::MergeShapes(int shape_id1, int shape_id2) {
514 int master_id1 = MasterDestinationIndex(shape_id1);
515 int master_id2 = MasterDestinationIndex(shape_id2);
516 // Point master_id2 (and all merged shapes) to master_id1.
517 shape_table_[master_id2]->set_destination_index(master_id1);
518 // Add all the shapes of master_id2 to master_id1.
519 shape_table_[master_id1]->AddShape(*shape_table_[master_id2]);
520}
521
522// Swaps two shape_ids.
523void ShapeTable::SwapShapes(int shape_id1, int shape_id2) {
524 Shape* tmp = shape_table_[shape_id1];
525 shape_table_[shape_id1] = shape_table_[shape_id2];
526 shape_table_[shape_id2] = tmp;
527}
528
529// Returns the destination of this shape, (if merged), taking into account
530// the fact that the destination may itself have been merged.
531int ShapeTable::MasterDestinationIndex(int shape_id) const {
532 int dest_id = shape_table_[shape_id]->destination_index();
533 if (dest_id == shape_id || dest_id < 0)
534 return shape_id; // Is master already.
535 int master_id = shape_table_[dest_id]->destination_index();
536 if (master_id == dest_id || master_id < 0)
537 return dest_id; // Dest is the master and shape_id points to it.
538 master_id = MasterDestinationIndex(master_id);
539 return master_id;
540}
541
542// Returns false if the unichars in neither shape is a subset of the other.
543bool ShapeTable::SubsetUnichar(int shape_id1, int shape_id2) const {
544 const Shape& shape1 = GetShape(shape_id1);
545 const Shape& shape2 = GetShape(shape_id2);
546 int c1, c2;
547 for (c1 = 0; c1 < shape1.size(); ++c1) {
548 int unichar_id1 = shape1[c1].unichar_id;
549 if (!shape2.ContainsUnichar(unichar_id1))
550 break;
551 }
552 for (c2 = 0; c2 < shape2.size(); ++c2) {
553 int unichar_id2 = shape2[c2].unichar_id;
554 if (!shape1.ContainsUnichar(unichar_id2))
555 break;
556 }
557 return c1 == shape1.size() || c2 == shape2.size();
558}
559
560// Returns false if the unichars in neither shape is a subset of the other.
561bool ShapeTable::MergeSubsetUnichar(int merge_id1, int merge_id2,
562 int shape_id) const {
563 const Shape& merge1 = GetShape(merge_id1);
564 const Shape& merge2 = GetShape(merge_id2);
565 const Shape& shape = GetShape(shape_id);
566 int cm1, cm2, cs;
567 for (cs = 0; cs < shape.size(); ++cs) {
568 int unichar_id = shape[cs].unichar_id;
569 if (!merge1.ContainsUnichar(unichar_id) &&
570 !merge2.ContainsUnichar(unichar_id))
571 break; // Shape is not a subset of the merge.
572 }
573 for (cm1 = 0; cm1 < merge1.size(); ++cm1) {
574 int unichar_id1 = merge1[cm1].unichar_id;
575 if (!shape.ContainsUnichar(unichar_id1))
576 break; // Merge is not a subset of shape
577 }
578 for (cm2 = 0; cm2 < merge2.size(); ++cm2) {
579 int unichar_id2 = merge2[cm2].unichar_id;
580 if (!shape.ContainsUnichar(unichar_id2))
581 break; // Merge is not a subset of shape
582 }
583 return cs == shape.size() || (cm1 == merge1.size() && cm2 == merge2.size());
584}
585
586// Returns true if the unichar sets are equal between the shapes.
587bool ShapeTable::EqualUnichars(int shape_id1, int shape_id2) const {
588 const Shape& shape1 = GetShape(shape_id1);
589 const Shape& shape2 = GetShape(shape_id2);
590 for (int c1 = 0; c1 < shape1.size(); ++c1) {
591 int unichar_id1 = shape1[c1].unichar_id;
592 if (!shape2.ContainsUnichar(unichar_id1))
593 return false;
594 }
595 for (int c2 = 0; c2 < shape2.size(); ++c2) {
596 int unichar_id2 = shape2[c2].unichar_id;
597 if (!shape1.ContainsUnichar(unichar_id2))
598 return false;
599 }
600 return true;
601}
602
603// Returns true if the unichar sets are equal between the shapes.
604bool ShapeTable::MergeEqualUnichars(int merge_id1, int merge_id2,
605 int shape_id) const {
606 const Shape& merge1 = GetShape(merge_id1);
607 const Shape& merge2 = GetShape(merge_id2);
608 const Shape& shape = GetShape(shape_id);
609 for (int cs = 0; cs < shape.size(); ++cs) {
610 int unichar_id = shape[cs].unichar_id;
611 if (!merge1.ContainsUnichar(unichar_id) &&
612 !merge2.ContainsUnichar(unichar_id))
613 return false; // Shape has a unichar that appears in neither merge.
614 }
615 for (int cm1 = 0; cm1 < merge1.size(); ++cm1) {
616 int unichar_id1 = merge1[cm1].unichar_id;
617 if (!shape.ContainsUnichar(unichar_id1))
618 return false; // Merge has a unichar that is not in shape.
619 }
620 for (int cm2 = 0; cm2 < merge2.size(); ++cm2) {
621 int unichar_id2 = merge2[cm2].unichar_id;
622 if (!shape.ContainsUnichar(unichar_id2))
623 return false; // Merge has a unichar that is not in shape.
624 }
625 return true;
626}
627
628// Returns true if there is a common unichar between the shapes.
629bool ShapeTable::CommonUnichars(int shape_id1, int shape_id2) const {
630 const Shape& shape1 = GetShape(shape_id1);
631 const Shape& shape2 = GetShape(shape_id2);
632 for (int c1 = 0; c1 < shape1.size(); ++c1) {
633 int unichar_id1 = shape1[c1].unichar_id;
634 if (shape2.ContainsUnichar(unichar_id1))
635 return true;
636 }
637 return false;
638}
639
640// Returns true if there is a common font id between the shapes.
641bool ShapeTable::CommonFont(int shape_id1, int shape_id2) const {
642 const Shape& shape1 = GetShape(shape_id1);
643 const Shape& shape2 = GetShape(shape_id2);
644 for (int c1 = 0; c1 < shape1.size(); ++c1) {
645 const GenericVector<int>& font_list1 = shape1[c1].font_ids;
646 for (int f = 0; f < font_list1.size(); ++f) {
647 if (shape2.ContainsFont(font_list1[f]))
648 return true;
649 }
650 }
651 return false;
652}
653
654// Appends the master shapes from other to this.
655// If not nullptr, shape_map is set to map other shape_ids to this's shape_ids.
657 GenericVector<int>* shape_map) {
658 if (shape_map != nullptr)
659 shape_map->init_to_size(other.NumShapes(), -1);
660 for (int s = 0; s < other.shape_table_.size(); ++s) {
661 if (other.shape_table_[s]->destination_index() < 0) {
662 int index = AddShape(*other.shape_table_[s]);
663 if (shape_map != nullptr)
664 (*shape_map)[s] = index;
665 }
666 }
667}
668
669// Returns the number of master shapes remaining after merging.
671 int num_shapes = 0;
672 for (int s = 0; s < shape_table_.size(); ++s) {
673 if (shape_table_[s]->destination_index() < 0)
674 ++num_shapes;
675 }
676 return num_shapes;
677}
678
679
680// Adds the unichars of the given shape_id to the vector of results. Any
681// unichar_id that is already present just has the fonts added to the
682// font set for that result without adding a new entry in the vector.
683// NOTE: it is assumed that the results are given to this function in order
684// of decreasing rating.
685// The unichar_map vector indicates the index of the results entry containing
686// each unichar, or -1 if the unichar is not yet included in results.
688 GenericVector<int>* unichar_map,
689 GenericVector<UnicharRating>* results)const {
690 if (shape_rating.joined) {
691 AddUnicharToResults(UNICHAR_JOINED, shape_rating.rating, unichar_map,
692 results);
693 }
694 if (shape_rating.broken) {
695 AddUnicharToResults(UNICHAR_BROKEN, shape_rating.rating, unichar_map,
696 results);
697 }
698 const Shape& shape = GetShape(shape_rating.shape_id);
699 for (int u = 0; u < shape.size(); ++u) {
700 int result_index = AddUnicharToResults(shape[u].unichar_id,
701 shape_rating.rating,
702 unichar_map, results);
703 for (int f = 0; f < shape[u].font_ids.size(); ++f) {
704 (*results)[result_index].fonts.push_back(
705 ScoredFont(shape[u].font_ids[f],
706 IntCastRounded(shape_rating.rating * INT16_MAX)));
707 }
708 }
709}
710
711// Adds the given unichar_id to the results if needed, updating unichar_map
712// and returning the index of unichar in results.
713int ShapeTable::AddUnicharToResults(
714 int unichar_id, float rating, GenericVector<int>* unichar_map,
715 GenericVector<UnicharRating>* results) const {
716 int result_index = unichar_map->get(unichar_id);
717 if (result_index < 0) {
718 UnicharRating result(unichar_id, rating);
719 result_index = results->push_back(result);
720 (*unichar_map)[unichar_id] = result_index;
721 }
722 return result_index;
723}
724
725
726} // namespace tesseract
int IntCastRounded(double x)
Definition: helpers.h:175
int UNICHAR_ID
Definition: unichar.h:34
@ UNICHAR_BROKEN
Definition: unicharset.h:36
@ UNICHAR_JOINED
Definition: unicharset.h:35
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:60
void init_to_size(int size, const T &t)
int push_back(T object)
bool Serialize(FILE *fp) const
int size() const
Definition: genericvector.h:72
bool DeSerialize(bool swap, FILE *fp)
T & get(int index) const
void SetBit(int index)
Definition: bitvector.h:69
bool DeSerialize(char *data, size_t count=1)
Definition: serialis.cpp:104
Definition: strngs.h:45
void add_str_int(const char *str, int number)
Definition: strngs.cpp:377
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:291
static int FirstResultWithUnichar(const GenericVector< UnicharRating > &results, UNICHAR_ID unichar_id)
Definition: shapetable.cpp:58
static int FirstResultWithUnichar(const GenericVector< ShapeRating > &results, const ShapeTable &shape_table, UNICHAR_ID unichar_id)
Definition: shapetable.cpp:40
static int SortByUnicharId(const void *v1, const void *v2)
Definition: shapetable.cpp:79
GenericVector< int32_t > font_ids
Definition: shapetable.h:174
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:74
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:69
bool IsSubsetOf(const Shape &other) const
Definition: shapetable.cpp:202
bool ContainsMultipleFontProperties(const FontInfoTable &font_table) const
Definition: shapetable.cpp:182
bool ContainsUnicharAndFont(int unichar_id, int font_id) const
Definition: shapetable.cpp:131
void SetUnicharId(int index, int unichar_id)
Definition: shapetable.h:208
bool ContainsFont(int font_id) const
Definition: shapetable.cpp:157
void AddToShape(int unichar_id, int font_id)
Definition: shapetable.cpp:101
int size() const
Definition: shapetable.h:199
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:86
void AddShape(const Shape &other)
Definition: shapetable.cpp:120
bool IsEqualUnichars(Shape *other)
Definition: shapetable.cpp:217
bool ContainsFontProperties(const FontInfoTable &font_table, uint32_t properties) const
Definition: shapetable.cpp:169
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:92
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:147
bool operator==(const Shape &other) const
Definition: shapetable.cpp:197
void SwapShapes(int shape_id1, int shape_id2)
Definition: shapetable.cpp:523
bool AnyMultipleUnichars() const
Definition: shapetable.cpp:444
int NumShapes() const
Definition: shapetable.h:274
bool DeSerialize(TFile *fp)
Definition: shapetable.cpp:246
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:404
bool CommonUnichars(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:629
STRING SummaryStr() const
Definition: shapetable.cpp:313
int AddShape(int unichar_id, int font_id)
Definition: shapetable.cpp:336
int MergedUnicharCount(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:503
bool SubsetUnichar(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:543
int MasterFontCount(int shape_id) const
Definition: shapetable.cpp:492
bool AlreadyMerged(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:439
int NumMasterShapes() const
Definition: shapetable.cpp:670
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:281
void AddShapeToResults(const ShapeRating &shape_rating, GenericVector< int > *unichar_map, GenericVector< UnicharRating > *results) const
Definition: shapetable.cpp:687
void MergeShapes(int shape_id1, int shape_id2)
Definition: shapetable.cpp:513
bool Serialize(FILE *fp) const
Definition: shapetable.cpp:241
int MasterUnicharCount(int shape_id) const
Definition: shapetable.cpp:486
void DeleteShape(int shape_id)
Definition: shapetable.cpp:361
int MaxNumUnichars() const
Definition: shapetable.cpp:455
void AddShapeToShape(int shape_id, const Shape &other)
Definition: shapetable.cpp:376
bool CommonFont(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:641
bool MergeEqualUnichars(int merge_id1, int merge_id2, int shape_id) const
Definition: shapetable.cpp:604
int BuildFromShape(const Shape &shape, const ShapeTable &master_shapes)
Definition: shapetable.cpp:413
int MasterDestinationIndex(int shape_id) const
Definition: shapetable.cpp:531
void AddToShape(int shape_id, int unichar_id, int font_id)
Definition: shapetable.cpp:369
bool EqualUnichars(int shape_id1, int shape_id2) const
Definition: shapetable.cpp:587
bool MergeSubsetUnichar(int merge_id1, int merge_id2, int shape_id) const
Definition: shapetable.cpp:561
void ReMapClassIds(const GenericVector< int > &unicharset_map)
Definition: shapetable.cpp:271
void ForceFontMerges(int start, int end)
Definition: shapetable.cpp:468
int FindShape(int unichar_id, int font_id) const
Definition: shapetable.cpp:386
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:319
void AppendMasterShapes(const ShapeTable &other, GenericVector< int > *shape_map)
Definition: shapetable.cpp:656