tesseract 4.1.1
Loading...
Searching...
No Matches
detlinefit.cpp
Go to the documentation of this file.
1
2// File: detlinefit.cpp
3// Description: Deterministic least median squares line fitting.
4// Author: Ray Smith
5// Created: Thu Feb 28 14:45:01 PDT 2008
6//
7// (C) Copyright 2008, Google Inc.
8// Licensed under the Apache License, Version 2.0 (the "License");
9// you may not use this file except in compliance with the License.
10// You may obtain a copy of the License at
11// http://www.apache.org/licenses/LICENSE-2.0
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17//
19
20#include "detlinefit.h"
21#include "statistc.h"
22#include "tprintf.h"
23
24#include <algorithm>
25#include <cfloat> // for FLT_MAX
26
27namespace tesseract {
28
29// The number of points to consider at each end.
30const int kNumEndPoints = 3;
31// The minimum number of points at which to switch to number of points
32// for badly fitted lines.
33// To ensure a sensible error metric, kMinPointsForErrorCount should be at
34// least kMaxRealDistance / (1 - %ile) where %ile is the fractile used in
35// ComputeUpperQuartileError.
37// The maximum real distance to use before switching to number of
38// mis-fitted points, which will get square-rooted for true distance.
39const int kMaxRealDistance = 2.0;
40
41DetLineFit::DetLineFit() : square_length_(0.0) {
42}
43
44// Delete all Added points.
46 pts_.clear();
47 distances_.clear();
48}
49
50// Add a new point. Takes a copy - the pt doesn't need to stay in scope.
51void DetLineFit::Add(const ICOORD& pt) {
52 pts_.push_back(PointWidth(pt, 0));
53}
54// Associates a half-width with the given point if a point overlaps the
55// previous point by more than half the width, and its distance is further
56// than the previous point, then the more distant point is ignored in the
57// distance calculation. Useful for ignoring i dots and other diacritics.
58void DetLineFit::Add(const ICOORD& pt, int halfwidth) {
59 pts_.push_back(PointWidth(pt, halfwidth));
60}
61
62// Fits a line to the points, ignoring the skip_first initial points and the
63// skip_last final points, returning the fitted line as a pair of points,
64// and the upper quartile error.
65double DetLineFit::Fit(int skip_first, int skip_last,
66 ICOORD* pt1, ICOORD* pt2) {
67 // Do something sensible with no points.
68 if (pts_.empty()) {
69 pt1->set_x(0);
70 pt1->set_y(0);
71 *pt2 = *pt1;
72 return 0.0;
73 }
74 // Count the points and find the first and last kNumEndPoints.
75 int pt_count = pts_.size();
76 ICOORD* starts[kNumEndPoints];
77 if (skip_first >= pt_count) skip_first = pt_count - 1;
78 int start_count = 0;
79 int end_i = std::min(skip_first + kNumEndPoints, pt_count);
80 for (int i = skip_first; i < end_i; ++i) {
81 starts[start_count++] = &pts_[i].pt;
82 }
83 ICOORD* ends[kNumEndPoints];
84 if (skip_last >= pt_count) skip_last = pt_count - 1;
85 int end_count = 0;
86 end_i = std::max(0, pt_count - kNumEndPoints - skip_last);
87 for (int i = pt_count - 1 - skip_last; i >= end_i; --i) {
88 ends[end_count++] = &pts_[i].pt;
89 }
90 // 1 or 2 points need special treatment.
91 if (pt_count <= 2) {
92 *pt1 = *starts[0];
93 if (pt_count > 1)
94 *pt2 = *ends[0];
95 else
96 *pt2 = *pt1;
97 return 0.0;
98 }
99 // Although with between 2 and 2*kNumEndPoints-1 points, there will be
100 // overlap in the starts, ends sets, this is OK and taken care of by the
101 // if (*start != *end) test below, which also tests for equal input points.
102 double best_uq = -1.0;
103 // Iterate each pair of points and find the best fitting line.
104 for (int i = 0; i < start_count; ++i) {
105 ICOORD* start = starts[i];
106 for (int j = 0; j < end_count; ++j) {
107 ICOORD* end = ends[j];
108 if (*start != *end) {
109 ComputeDistances(*start, *end);
110 // Compute the upper quartile error from the line.
111 double dist = EvaluateLineFit();
112 if (dist < best_uq || best_uq < 0.0) {
113 best_uq = dist;
114 *pt1 = *start;
115 *pt2 = *end;
116 }
117 }
118 }
119 }
120 // Finally compute the square root to return the true distance.
121 return best_uq > 0.0 ? sqrt(best_uq) : best_uq;
122}
123
124// Constrained fit with a supplied direction vector. Finds the best line_pt,
125// that is one of the supplied points having the median cross product with
126// direction, ignoring points that have a cross product outside of the range
127// [min_dist, max_dist]. Returns the resulting error metric using the same
128// reduced set of points.
129// *Makes use of floating point arithmetic*
130double DetLineFit::ConstrainedFit(const FCOORD& direction,
131 double min_dist, double max_dist,
132 bool debug, ICOORD* line_pt) {
133 ComputeConstrainedDistances(direction, min_dist, max_dist);
134 // Do something sensible with no points or computed distances.
135 if (pts_.empty() || distances_.empty()) {
136 line_pt->set_x(0);
137 line_pt->set_y(0);
138 return 0.0;
139 }
140 int median_index = distances_.choose_nth_item(distances_.size() / 2);
141 *line_pt = distances_[median_index].data;
142 if (debug) {
143 tprintf("Constrained fit to dir %g, %g = %d, %d :%d distances:\n",
144 direction.x(), direction.y(),
145 line_pt->x(), line_pt->y(), distances_.size());
146 for (int i = 0; i < distances_.size(); ++i) {
147 tprintf("%d: %d, %d -> %g\n", i, distances_[i].data.x(),
148 distances_[i].data.y(), distances_[i].key);
149 }
150 tprintf("Result = %d\n", median_index);
151 }
152 // Center distances on the fitted point.
153 double dist_origin = direction * *line_pt;
154 for (int i = 0; i < distances_.size(); ++i) {
155 distances_[i].key -= dist_origin;
156 }
157 return sqrt(EvaluateLineFit());
158}
159
160// Returns true if there were enough points at the last call to Fit or
161// ConstrainedFit for the fitted points to be used on a badly fitted line.
163 return distances_.size() >= kMinPointsForErrorCount;
164}
165
166// Backwards compatible fit returning a gradient and constant.
167// Deprecated. Prefer Fit(ICOORD*, ICOORD*) where possible, but use this
168// function in preference to the LMS class.
169double DetLineFit::Fit(float* m, float* c) {
170 ICOORD start, end;
171 double error = Fit(&start, &end);
172 if (end.x() != start.x()) {
173 *m = static_cast<float>(end.y() - start.y()) / (end.x() - start.x());
174 *c = start.y() - *m * start.x();
175 } else {
176 *m = 0.0f;
177 *c = 0.0f;
178 }
179 return error;
180}
181
182// Backwards compatible constrained fit with a supplied gradient.
183// Deprecated. Use ConstrainedFit(const FCOORD& direction) where possible
184// to avoid potential difficulties with infinite gradients.
185double DetLineFit::ConstrainedFit(double m, float* c) {
186 // Do something sensible with no points.
187 if (pts_.empty()) {
188 *c = 0.0f;
189 return 0.0;
190 }
191 double cos = 1.0 / sqrt(1.0 + m * m);
192 FCOORD direction(cos, m * cos);
193 ICOORD line_pt;
194 double error = ConstrainedFit(direction, -FLT_MAX, FLT_MAX, false, &line_pt);
195 *c = line_pt.y() - line_pt.x() * m;
196 return error;
197}
198
199// Computes and returns the squared evaluation metric for a line fit.
200double DetLineFit::EvaluateLineFit() {
201 // Compute the upper quartile error from the line.
202 double dist = ComputeUpperQuartileError();
203 if (distances_.size() >= kMinPointsForErrorCount &&
205 // Use the number of mis-fitted points as the error metric, as this
206 // gives a better measure of fit for badly fitted lines where more
207 // than a quarter are badly fitted.
208 double threshold = kMaxRealDistance * sqrt(square_length_);
209 dist = NumberOfMisfittedPoints(threshold);
210 }
211 return dist;
212}
213
214// Computes the absolute error distances of the points from the line,
215// and returns the squared upper-quartile error distance.
216double DetLineFit::ComputeUpperQuartileError() {
217 int num_errors = distances_.size();
218 if (num_errors == 0) return 0.0;
219 // Get the absolute values of the errors.
220 for (int i = 0; i < num_errors; ++i) {
221 if (distances_[i].key < 0) distances_[i].key = -distances_[i].key;
222 }
223 // Now get the upper quartile distance.
224 int index = distances_.choose_nth_item(3 * num_errors / 4);
225 double dist = distances_[index].key;
226 // The true distance is the square root of the dist squared / square_length.
227 // Don't bother with the square root. Just return the square distance.
228 return square_length_ > 0.0 ? dist * dist / square_length_ : 0.0;
229}
230
231// Returns the number of sample points that have an error more than threshold.
232int DetLineFit::NumberOfMisfittedPoints(double threshold) const {
233 int num_misfits = 0;
234 int num_dists = distances_.size();
235 // Get the absolute values of the errors.
236 for (int i = 0; i < num_dists; ++i) {
237 if (distances_[i].key > threshold)
238 ++num_misfits;
239 }
240 return num_misfits;
241}
242
243// Computes all the cross product distances of the points from the line,
244// storing the actual (signed) cross products in distances.
245// Ignores distances of points that are further away than the previous point,
246// and overlaps the previous point by at least half.
247void DetLineFit::ComputeDistances(const ICOORD& start, const ICOORD& end) {
248 distances_.truncate(0);
249 ICOORD line_vector = end;
250 line_vector -= start;
251 square_length_ = line_vector.sqlength();
252 int line_length = IntCastRounded(sqrt(square_length_));
253 // Compute the distance of each point from the line.
254 int prev_abs_dist = 0;
255 int prev_dot = 0;
256 for (int i = 0; i < pts_.size(); ++i) {
257 ICOORD pt_vector = pts_[i].pt;
258 pt_vector -= start;
259 int dot = line_vector % pt_vector;
260 // Compute |line_vector||pt_vector|sin(angle between)
261 int dist = line_vector * pt_vector;
262 int abs_dist = dist < 0 ? -dist : dist;
263 if (abs_dist > prev_abs_dist && i > 0) {
264 // Ignore this point if it overlaps the previous one.
265 int separation = abs(dot - prev_dot);
266 if (separation < line_length * pts_[i].halfwidth ||
267 separation < line_length * pts_[i - 1].halfwidth)
268 continue;
269 }
270 distances_.push_back(DistPointPair(dist, pts_[i].pt));
271 prev_abs_dist = abs_dist;
272 prev_dot = dot;
273 }
274}
275
276// Computes all the cross product distances of the points perpendicular to
277// the given direction, ignoring distances outside of the give distance range,
278// storing the actual (signed) cross products in distances_.
279void DetLineFit::ComputeConstrainedDistances(const FCOORD& direction,
280 double min_dist, double max_dist) {
281 distances_.truncate(0);
282 square_length_ = direction.sqlength();
283 // Compute the distance of each point from the line.
284 for (int i = 0; i < pts_.size(); ++i) {
285 FCOORD pt_vector = pts_[i].pt;
286 // Compute |line_vector||pt_vector|sin(angle between)
287 double dist = direction * pt_vector;
288 if (min_dist <= dist && dist <= max_dist)
289 distances_.push_back(DistPointPair(dist, pts_[i].pt));
290 }
291}
292
293} // namespace tesseract.
int IntCastRounded(double x)
Definition: helpers.h:175
DLLSYM void tprintf(const char *format,...)
Definition: tprintf.cpp:35
const int kMaxRealDistance
Definition: detlinefit.cpp:39
const int kMinPointsForErrorCount
Definition: detlinefit.cpp:36
const int kNumEndPoints
Definition: detlinefit.cpp:30
int push_back(T object)
bool empty() const
Definition: genericvector.h:91
int size() const
Definition: genericvector.h:72
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:51
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:130
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
bool SufficientPointsForIndependentFit() const
Definition: detlinefit.cpp:162
integer coordinate
Definition: points.h:32
void set_x(int16_t xin)
rewrite function
Definition: points.h:61
int16_t y() const
access_function
Definition: points.h:56
void set_y(int16_t yin)
rewrite function
Definition: points.h:65
float sqlength() const
find sq length
Definition: points.h:73
int16_t x() const
access function
Definition: points.h:52
Definition: points.h:189
float y() const
Definition: points.h:210
float sqlength() const
find sq length
Definition: points.h:223
float x() const
Definition: points.h:207