tesseract 4.1.1
Loading...
Searching...
No Matches
linlsq.h
Go to the documentation of this file.
1/**********************************************************************
2 * File: linlsq.h (Formerly llsq.h)
3 * Description: Linear Least squares fitting code.
4 * Author: Ray Smith
5 * Created: Thu Sep 12 08:44:51 BST 1991
6 *
7 * (C) Copyright 1991, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19
20#ifndef TESSERACT_CCSTRUCT_LINLSQ_H_
21#define TESSERACT_CCSTRUCT_LINLSQ_H_
22
23#include <cstdint> // for int32_t
24#include "points.h" // for FCOORD
25
26template <typename T> class GenericVector;
27
28class LLSQ {
29 public:
30 LLSQ() { // constructor
31 clear(); // set to zeros
32 }
33 void clear(); // initialize
34
35 // Adds an element with a weight of 1.
36 void add(double x, double y);
37 // Adds an element with a specified weight.
38 void add(double x, double y, double weight);
39 // Adds a whole LLSQ.
40 void add(const LLSQ& other);
41 // Deletes an element with a weight of 1.
42 void remove(double x, double y);
43 int32_t count() const { // no of elements
44 return static_cast<int>(total_weight + 0.5);
45 }
46
47 double m() const; // get gradient
48 double c(double m) const; // get constant
49 double rms(double m, double c) const; // get error
50 double pearson() const; // get correlation coefficient.
51
52 // Returns the x,y means as an FCOORD.
53 FCOORD mean_point() const;
54
55 // Returns the average sum of squared perpendicular error from a line
56 // through mean_point() in the direction dir.
57 double rms_orth(const FCOORD &dir) const;
58
59 // Returns the direction of the fitted line as a unit vector, using the
60 // least mean squared perpendicular distance. The line runs through the
61 // mean_point, i.e. a point p on the line is given by:
62 // p = mean_point() + lambda * vector_fit() for some real number lambda.
63 // Note that the result (0<=x<=1, -1<=y<=-1) is directionally ambiguous
64 // and may be negated without changing its meaning, since a line is only
65 // unique to a range of pi radians.
66 // Modernists prefer to think of this as an Eigenvalue problem, but
67 // Pearson had the simple solution in 1901.
68 //
69 // Note that this is equivalent to returning the Principal Component in PCA,
70 // or the eigenvector corresponding to the largest eigenvalue in the
71 // covariance matrix.
72 FCOORD vector_fit() const;
73
74 // Returns the covariance.
75 double covariance() const {
76 if (total_weight > 0.0)
77 return (sigxy - sigx * sigy / total_weight) / total_weight;
78 else
79 return 0.0;
80 }
81 double x_variance() const {
82 if (total_weight > 0.0)
83 return (sigxx - sigx * sigx / total_weight) / total_weight;
84 else
85 return 0.0;
86 }
87 double y_variance() const {
88 if (total_weight > 0.0)
89 return (sigyy - sigy * sigy / total_weight) / total_weight;
90 else
91 return 0.0;
92 }
93
94 private:
95 double total_weight; // no of elements or sum of weights.
96 double sigx; // sum of x
97 double sigy; // sum of y
98 double sigxx; // sum x squared
99 double sigxy; // sum of xy
100 double sigyy; // sum y squared
101};
102
103
104// Returns the median value of the vector, given that the values are
105// circular, with the given modulus. Values may be signed or unsigned,
106// eg range from -pi to pi (modulus 2pi) or from 0 to 2pi (modulus 2pi).
107// NOTE that the array is shuffled, but the time taken is linear.
108// An assumption is made that most of the values are spread over no more than
109// half the range, but wrap-around is accounted for if the median is near
110// the wrap-around point.
111// Cannot be a member of GenericVector, as it makes heavy used of LLSQ.
112// T must be an integer or float/double type.
113template<typename T> T MedianOfCircularValues(T modulus, GenericVector<T>* v) {
114 LLSQ stats;
115 T halfrange = static_cast<T>(modulus / 2);
116 int num_elements = v->size();
117 for (int i = 0; i < num_elements; ++i) {
118 stats.add((*v)[i], (*v)[i] + halfrange);
119 }
120 bool offset_needed = stats.y_variance() < stats.x_variance();
121 if (offset_needed) {
122 for (int i = 0; i < num_elements; ++i) {
123 (*v)[i] += halfrange;
124 }
125 }
126 int median_index = v->choose_nth_item(num_elements / 2);
127 if (offset_needed) {
128 for (int i = 0; i < num_elements; ++i) {
129 (*v)[i] -= halfrange;
130 }
131 }
132 return (*v)[median_index];
133}
134
135
136#endif // TESSERACT_CCSTRUCT_LINLSQ_H_
T MedianOfCircularValues(T modulus, GenericVector< T > *v)
Definition: linlsq.h:113
int size() const
Definition: genericvector.h:72
int choose_nth_item(int target_index)
Definition: linlsq.h:28
double y_variance() const
Definition: linlsq.h:87
double m() const
Definition: linlsq.cpp:100
double pearson() const
Definition: linlsq.cpp:153
void remove(double x, double y)
Definition: linlsq.cpp:82
double c(double m) const
Definition: linlsq.cpp:116
double x_variance() const
Definition: linlsq.h:81
double rms(double m, double c) const
Definition: linlsq.cpp:130
double covariance() const
Definition: linlsq.h:75
void clear()
Definition: linlsq.cpp:32
void add(double x, double y)
Definition: linlsq.cpp:48
double rms_orth(const FCOORD &dir) const
Definition: linlsq.cpp:195
int32_t count() const
Definition: linlsq.h:43
FCOORD mean_point() const
Definition: linlsq.cpp:166
FCOORD vector_fit() const
Definition: linlsq.cpp:251
LLSQ()
Definition: linlsq.h:30
Definition: points.h:189