tesseract 4.1.1
Loading...
Searching...
No Matches
associate.h
Go to the documentation of this file.
1
2// File: associate.h
3// Description: Structs, classes, typedefs useful for the segmentation
4// search. Functions for scoring segmentation paths according
5// to their character widths, gap widths and seam cuts.
6// Author: Daria Antonova
7// Created: Mon Mar 8 11:26:43 PDT 2010
8//
9// (C) Copyright 2010, Google Inc.
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13// http://www.apache.org/licenses/LICENSE-2.0
14// Unless required by applicable law or agreed to in writing, software
15// distributed under the License is distributed on an "AS IS" BASIS,
16// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17// See the License for the specific language governing permissions and
18// limitations under the License.
19//
21
22#ifndef ASSOCIATE_H
23#define ASSOCIATE_H
24
25#include "blobs.h"
26#include "elst.h"
27#include "ratngs.h"
28#include "seam.h"
29#include "split.h"
30
31class WERD_RES;
32
33namespace tesseract {
34
35// Statisitcs about character widths, gaps and seams.
38
39 void Clear() {
40 shape_cost = 0.0f;
41 bad_shape = false;
42 full_wh_ratio = 0.0f;
44 full_wh_ratio_var = 0.0f;
47 gap_sum = 0;
48 }
49
50 void Print() { tprintf("AssociateStats: s(%g %d)\n", shape_cost, bad_shape); }
51
52 float shape_cost; // cost of blob shape
53 bool bad_shape; // true if the shape of the blob is unacceptable
54 float full_wh_ratio; // width-to-hight ratio + gap on the right
55 float full_wh_ratio_total; // sum of width-to-hight ratios
56 // on the path terminating at this blob
57 float full_wh_ratio_var; // variance of full_wh_ratios on the path
58 bool bad_fixed_pitch_right_gap; // true if there is no gap before
59 // the blob on the right
60 bool bad_fixed_pitch_wh_ratio; // true if the blobs has width-to-hight
61 // ratio > kMaxFixedPitchCharAspectRatio
62 int gap_sum; // sum of gaps within the blob
63};
64
65// Utility functions for scoring segmentation paths according to their
66// character widths, gap widths, seam characteristics.
68 public:
70 static const float kMinGap;
71
72 // Returns outline length of the given blob is computed as:
73 // rating_cert_scale * rating / certainty
74 // Since from Wordrec::SegSearch() in segsearch.cpp
75 // rating_cert_scale = -1.0 * getDict().certainty_scale / rating_scale
76 // And from Classify::ConvertMatchesToChoices() in adaptmatch.cpp
77 // Rating = Certainty = next.rating
78 // Rating *= rating_scale * Results->BlobLength
79 // Certainty *= -(getDict().certainty_scale)
80 static inline float ComputeOutlineLength(float rating_cert_scale,
81 const BLOB_CHOICE &b) {
82 return rating_cert_scale * b.rating() / b.certainty();
83 }
84 static inline float ComputeRating(float rating_cert_scale,
85 float cert, int width) {
86 return static_cast<float>(width) * cert / rating_cert_scale;
87 }
88
89 // Computes character widths, gaps and seams stats given the
90 // AssociateStats of the path so far, col, row of the blob that
91 // is being added to the path, and WERD_RES containing information
92 // about character widths, gaps and seams.
93 // Fills associate_cost with the combined shape, gap and seam cost
94 // of adding a unichar from (col, row) to the path (note that since
95 // this function could be used to compute the prioritization for
96 // pain points, (col, row) entry might not be classified yet; thus
97 // information in the (col, row) entry of the ratings matrix is not used).
98 //
99 // Note: the function assumes that word_res, stats and
100 // associate_cost pointers are not nullptr.
101 static void ComputeStats(int col, int row,
102 const AssociateStats *parent_stats,
103 int parent_path_length,
104 bool fixed_pitch,
105 float max_char_wh_ratio,
106 WERD_RES *word_res,
107 bool debug,
108 AssociateStats *stats);
109
110 // Returns the width cost for fixed-pitch text.
111 static float FixedPitchWidthCost(float norm_width, float right_gap,
112 bool end_pos, float max_char_wh_ratio);
113
114 // Returns the gap cost for fixed-pitch text (penalizes vertically
115 // overlapping components).
116 static inline float FixedPitchGapCost(float norm_gap, bool end_pos) {
117 return (norm_gap < 0.05 && !end_pos) ? 5.0f : 0.0f;
118 }
119};
120
121} // namespace tesseract
122
123#endif
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
float certainty() const
Definition: ratngs.h:83
float rating() const
Definition: ratngs.h:80
static float ComputeRating(float rating_cert_scale, float cert, int width)
Definition: associate.h:84
static const float kMaxFixedPitchCharAspectRatio
Definition: associate.h:69
static float ComputeOutlineLength(float rating_cert_scale, const BLOB_CHOICE &b)
Definition: associate.h:80
static float FixedPitchGapCost(float norm_gap, bool end_pos)
Definition: associate.h:116
static const float kMinGap
Definition: associate.h:70
static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
Definition: associate.cpp:156
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
Definition: associate.cpp:34