tesseract 4.1.1
Loading...
Searching...
No Matches
associate.cpp
Go to the documentation of this file.
1
2// File: associate.cpp
3// Description: Functions for scoring segmentation paths according to
4// their character widths, gap widths and seam cuts.
5// Author: Daria Antonova
6// Created: Mon Mar 8 11:26:43 PDT 2010
7//
8// (C) Copyright 2010, Google Inc.
9// Licensed under the Apache License, Version 2.0 (the "License");
10// you may not use this file except in compliance with the License.
11// You may obtain a copy of the License at
12// http://www.apache.org/licenses/LICENSE-2.0
13// Unless required by applicable law or agreed to in writing, software
14// distributed under the License is distributed on an "AS IS" BASIS,
15// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16// See the License for the specific language governing permissions and
17// limitations under the License.
18//
20
21
22#include <cmath>
23#include <cstdio>
24
25#include "associate.h"
26#include "normalis.h"
27#include "pageres.h"
28
29namespace tesseract {
30
32const float AssociateUtils::kMinGap = 0.03f;
33
34void AssociateUtils::ComputeStats(int col, int row,
35 const AssociateStats *parent_stats,
36 int parent_path_length,
37 bool fixed_pitch,
38 float max_char_wh_ratio,
39 WERD_RES *word_res,
40 bool debug,
41 AssociateStats *stats) {
42 stats->Clear();
43
44 ASSERT_HOST(word_res != nullptr);
45 if (word_res->blob_widths.empty()) {
46 return;
47 }
48 if (debug) {
49 tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n",
50 col, row, fixed_pitch ? " (fixed pitch)" : "");
51 }
52 float normalizing_height = kBlnXHeight;
53 ROW* blob_row = word_res->blob_row;
54 // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?
55 if (fixed_pitch && blob_row != nullptr) {
56 // For fixed pitch language like CJK, we use the full text height
57 // as the normalizing factor so we are not dependent on xheight
58 // calculation.
59 if (blob_row->body_size() > 0.0f) {
60 normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();
61 } else {
62 normalizing_height = word_res->denorm.y_scale() *
63 (blob_row->x_height() + blob_row->ascenders());
64 }
65 if (debug) {
66 tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n",
67 normalizing_height, word_res->denorm.y_scale(),
68 blob_row->x_height(), blob_row->ascenders());
69 }
70 }
71 float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;
72 if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true;
73 // Compute the gap sum for this shape. If there are only negative or only
74 // positive gaps, record their sum in stats->gap_sum. However, if there is
75 // a mixture, record only the sum of the positive gaps.
76 // TODO(antonova): explain fragment.
77 int negative_gap_sum = 0;
78 for (int c = col; c < row; ++c) {
79 int gap = word_res->GetBlobsGap(c);
80 (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;
81 }
82 if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum;
83 if (debug) {
84 tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n",
85 wh_ratio, max_char_wh_ratio, stats->gap_sum,
86 stats->bad_shape ? "bad_shape" : "");
87 }
88 // Compute shape_cost (for fixed pitch mode).
89 if (fixed_pitch) {
90 bool end_row = (row == (word_res->ratings->dimension() - 1));
91
92 // Ensure that the blob has gaps on the left and the right sides
93 // (except for beginning and ending punctuation) and that there is
94 // no cutting through ink at the blob boundaries.
95 if (col > 0) {
96 float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;
97 SEAM *left_seam = word_res->seam_array[col - 1];
98 if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) {
99 stats->bad_shape = true;
100 }
101 if (debug) {
102 tprintf("left_gap %g, left_seam %g %s\n", left_gap,
103 left_seam->priority(), stats->bad_shape ? "bad_shape" : "");
104 }
105 }
106 float right_gap = 0.0f;
107 if (!end_row) {
108 right_gap = word_res->GetBlobsGap(row) / normalizing_height;
109 SEAM *right_seam = word_res->seam_array[row];
110 if (right_gap < kMinGap || right_seam->priority() > 0.0f) {
111 stats->bad_shape = true;
112 if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true;
113 }
114 if (debug) {
115 tprintf("right_gap %g right_seam %g %s\n", right_gap,
116 right_seam->priority(), stats->bad_shape ? "bad_shape" : "");
117 }
118 }
119
120 // Impose additional segmentation penalties if blob widths or gaps
121 // distribution don't fit a fixed-pitch model.
122 // Since we only know the widths and gaps of the path explored so far,
123 // the means and variances are computed for the path so far (not
124 // considering characters to the right of the last character on the path).
125 stats->full_wh_ratio = wh_ratio + right_gap;
126 if (parent_stats != nullptr) {
127 stats->full_wh_ratio_total =
128 (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);
129 float mean =
130 stats->full_wh_ratio_total / static_cast<float>(parent_path_length+1);
131 stats->full_wh_ratio_var =
132 parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2);
133 } else {
134 stats->full_wh_ratio_total = stats->full_wh_ratio;
135 }
136 if (debug) {
137 tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n",
138 stats->full_wh_ratio, stats->full_wh_ratio_total,
139 stats->full_wh_ratio_var);
140 }
141
142 stats->shape_cost =
143 FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio);
144
145 // For some reason Tesseract prefers to treat the whole CJ words
146 // as one blob when the initial segmentation is particularly bad.
147 // This hack is to avoid favoring such states.
148 if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) {
149 stats->shape_cost += 10;
150 }
151 stats->shape_cost += stats->full_wh_ratio_var;
152 if (debug) tprintf("shape_cost %g\n", stats->shape_cost);
153 }
154}
155
157 float right_gap,
158 bool end_pos,
159 float max_char_wh_ratio) {
160 float cost = 0.0f;
161 if (norm_width > max_char_wh_ratio) cost += norm_width;
162 if (norm_width > kMaxFixedPitchCharAspectRatio)
163 cost += norm_width * norm_width; // extra penalty for merging CJK chars
164 // Penalize skinny blobs, except for punctuation in the last position.
165 if (norm_width+right_gap < 0.5f && !end_pos) {
166 cost += 1.0f - (norm_width + right_gap);
167 }
168 return cost;
169}
170
171} // namespace tesseract
const int kBlnXHeight
Definition: normalis.h:24
#define ASSERT_HOST(x)
Definition: errcode.h:88
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
bool empty() const
Definition: genericvector.h:91
int dimension() const
Definition: matrix.h:536
float y_scale() const
Definition: normalis.h:270
Definition: ocrrow.h:37
float body_size() const
Definition: ocrrow.h:73
float ascenders() const
Definition: ocrrow.h:82
float x_height() const
Definition: ocrrow.h:64
int GetBlobsGap(int blob_index)
Definition: pageres.cpp:740
DENORM denorm
Definition: pageres.h:201
GenericVector< int > blob_widths
Definition: pageres.h:216
GenericVector< SEAM * > seam_array
Definition: pageres.h:214
int GetBlobsWidth(int start_blob, int last_blob)
Definition: pageres.cpp:730
ROW * blob_row
Definition: pageres.h:197
MATRIX * ratings
Definition: pageres.h:237
Definition: seam.h:38
float priority() const
Definition: seam.h:59
static const float kMaxFixedPitchCharAspectRatio
Definition: associate.h:69
static const float kMinGap
Definition: associate.h:70
static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
Definition: associate.cpp:156
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
Definition: associate.cpp:34