diff --git a/src/Numerics.Tests/FitTests.cs b/src/Numerics.Tests/FitTests.cs index d85ce792c..af6e2f8cb 100644 --- a/src/Numerics.Tests/FitTests.cs +++ b/src/Numerics.Tests/FitTests.cs @@ -359,5 +359,201 @@ public void FitsCurveToBestLine() Assert.AreEqual(7.01013 - 2.08551 * z, resf(z), 1e-4); } } + + #region Fit.Deming - Deming/Orthogonal regression + + [Test] + public void FitDemingToExactLineWhenPointsAreOnLine() + { + var x = new[] { 30.0, 40.0, 50.0, 12.0, -3.4, 100.5 }; + var y = x.Select(z => 4.0 - 1.5 * z).ToArray(); + + var (a, b, c) = Fit.Deming(x, y); + Assert.AreEqual(1.0, a); + Assert.AreEqual(2.0 / 3.0, b, 1e-10); + Assert.AreEqual(-8.0 / 3.0, c, 1e-10); + var (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(-1.5, ay, 1e-10); + Assert.AreEqual(4.0, by, 1e-10); + + (a, b, c) = Fit.Deming(y, x); + Assert.AreEqual(2.0 / 3.0, a, 1e-10); + Assert.AreEqual(1, b); + Assert.AreEqual(-8.0 / 3.0, c, 1e-10); + (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(-2.0 / 3.0, ay, 1e-10); + Assert.AreEqual(8.0 / 3.0, by, 1e-10); + } + + [Test] + public void FitDemingToBestLine() + { + // Same data as for FitToBestLine + + var x = Enumerable.Range(1, 6).Select(Convert.ToDouble).ToArray(); + var y = new[] { 4.986, 2.347, 2.061, -2.995, -2.352, -5.782 }; + + var (a, b, c) = Fit.Deming(x, y); + Assert.AreEqual(1.0, a); + Assert.AreEqual(0.45061, b, 1e-4); + Assert.AreEqual(-3.36970, c, 1e-4); + var (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(-2.21922, ay, 1e-4); + Assert.AreEqual(7.47810, by, 1e-4); + + (a, b, c) = Fit.Deming(y, x); + Assert.AreEqual(0.45061, a, 1e-4); + Assert.AreEqual(1.0, b, 1e-4); + Assert.AreEqual(-3.36970, c, 1e-4); + (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(-0.45061, ay, 1e-4); + Assert.AreEqual(3.36970, by, 1e-4); + } + + [Test] + public void FitDemingToBestLineRS() + { + // Example data from: + // https://real-statistics.com/regression/deming-regression/deming-regression-basic-concepts/ + // -> y = 1.018*x - 0.1708 + var x = new[] { 5.1, 5.6, 6.8, 5.9, 4.0, 5.6, 6.6, 6.7, 5.2, 4.5 }; + var y = new[] { 5.4, 5.6, 6.3, 6.1, 4.7, 5.1, 6.6, 6.8, 4.6, 4.1 }; + double varx = 0.05; + double vary = 0.02; + + var (a, b, c) = Fit.Deming(x, y, vary/varx); + Assert.AreEqual(1.0, a); + Assert.AreEqual(-0.98232, b, 1e-4); + Assert.AreEqual(-0.16778, c, 1e-4); + var (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(1.01800, ay, 1e-4); + Assert.AreEqual(-0.17080, by, 1e-4); + + (a, b, c) = Fit.Deming(y, x, varx/vary); + Assert.AreEqual(-0.98232, a, 1e-4); + Assert.AreEqual(1.0, b); + Assert.AreEqual(-0.16778, c, 1e-4); + } + + [Test] + public void FitDemingToBestLineR() + { + // R : + // library(SimplyAgree) + // dat = data.frame( + // x = c(7, 8.3, 10.5, 9, 5.1, 8.2, 10.2, 10.3, 7.1, 5.9), + // y = c(7.9, 8.2, 9.6, 9, 6.5, 7.3, 10.2, 10.6, 6.3, 5.2) + // dem1 = dem_reg(x = "x", y = "y", data = dat, error.ratio = 4, weighted = FALSE) + // -> 1.00119 * x - 0.08974 + // Note that in R: error.ratio = var_x/var_y, while delta here is defined as var_y/var_x + // https://cran.r-project.org/web/packages/SimplyAgree/vignettes/Deming.html + + var x = new[] { 7.0, 8.3, 10.5, 9.0, 5.1, 8.2, 10.2, 10.3, 7.1, 5.9 }; + var y = new[] { 7.9, 8.2, 9.6, 9.0, 6.5, 7.3, 10.2, 10.6, 6.3, 5.2 }; + + var (a, b, c) = Fit.Deming(x, y, 1.0/4.0); + Assert.AreEqual(1.0, a); + Assert.AreEqual(-0.99881, b, 1e-4); + Assert.AreEqual(-0.08964, c, 1e-4); + var (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(1.00119, ay, 1e-4); + Assert.AreEqual(-0.08974, by, 1e-4); + + (a, b, c) = Fit.Deming(y, x, 4.0); + Assert.AreEqual(-0.99881, a, 1e-4); + Assert.AreEqual(1.0, b); + Assert.AreEqual(-0.08964, c, 1e-4); + } + + [Test] + public void FitDemingToExactHorizontalVerticalLineWhenPointsAreOnLine() + { + // Special case, testing when sxy and either sxx or syy goes zero + var x = Enumerable.Range(1, 6).Select(Convert.ToDouble).ToArray(); + var y = new[] { 5.0, 5.0, 5.0, 5.0, 5.0, 5.0 }; + + var (a, b, c) = Fit.Deming(x, y); + Assert.AreEqual(0.0, a); + Assert.AreEqual(1.0, b); + Assert.AreEqual(-5.0, c); + var (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(0, ay); + Assert.AreEqual(5.0, by); + + (a, b, c) = Fit.Deming(y, x); + Assert.AreEqual(1.0, a); + Assert.AreEqual(0.0, b); + Assert.AreEqual(-5.0, c); + (ay, by) = StandardLineToYxLine(a, b, c); + Assert.AreEqual(double.PositiveInfinity, ay); + Assert.AreEqual(5.0, by); + } + + [Test] + public void FitDemingToHorizontalVerticalLine() + { + // Special case, testing when sxy goes zero + var x = new[] { 1.0, 1.0, 5.0, 5.0, 7.0, 7.0 }; + var y = new[] { 3.0, 5.0, 3.0, 5.0, 3.0, 5.0 }; + + var (a, b, c) = Fit.Deming(x, y); + Assert.AreEqual(0.0, a); + Assert.AreEqual(1.0, b); + Assert.AreEqual(-4.0, c); + + (a, b, c) = Fit.Deming(y, x); + Assert.AreEqual(1.0, a); + Assert.AreEqual(0.0, b); + Assert.AreEqual(-4.0, c); + + } + + [Test] + public void FitDemingSymmetricData() + { + // Special case, two equally good solutions + var x = new[] { 3.0, 4.0, 3.0, 4.0 }; + var y = new[] { 1.0, 1.0, 2.0, 2.0 }; + var (a, b, c) = Fit.Deming(x, y); + Assert.AreEqual(1, a); + Assert.AreEqual(0, b); + Assert.AreEqual(-3.5, c); + } + + [Test] + public void FitDemingDegenerateLine() + { + // Special case, no solution + var x = new[] { 3.0, 3.0, 3.0 }; + var y = new[] { 1.0, 1.0, 1.0 }; + var (a, b, c) = Fit.Deming(x, y); + Assert.IsNaN(a); + Assert.IsNaN(b); + Assert.IsNaN(c); + } + + /// + /// Convert line coefficients on the form + /// + /// a*x + b*y + c = 0 + /// + /// to coefficients on the form + /// + /// y = ay*x + by + /// + /// If is zero, the ay will return . + /// + public static (double ay, double by) StandardLineToYxLine(double a, double b, double c) + { + if (Math.Abs(b) > (Math.Abs(a) + Math.Abs(c)) * 1e-10) + { + double ay = -a / b; + double by = -c / b; + return (ay, by); + } + return (double.PositiveInfinity, -c); + } + + #endregion } } diff --git a/src/Numerics/Fit.cs b/src/Numerics/Fit.cs index 84bfa3e48..427f65f12 100644 --- a/src/Numerics/Fit.cs +++ b/src/Numerics/Fit.cs @@ -80,6 +80,34 @@ public static Func LineThroughOriginFunc(double[] x, double[] y) return z => slope * z; } + /// + /// Deming/Orthogonal regression, least-Squares fitting the points in + /// the 2D dataset (x,y) to a line + /// + /// a*x + b*y + c = 0 + /// + /// For equal 1 (the default value), this is + /// performing orthogonal regression, minimizing the sum of squared + /// perpendicular distances from the data points to the regression line. + /// + /// Orthogonal regression is a special case of Deming regression, + /// and is assuming equal error variances on the x and y data, + /// and applied by the argument default value of 1.0. + /// + /// + /// The parameters (a,b,c) are scaled such that a and b + /// in absolute values are always less than one. + /// + /// + /// X data + /// Y data + /// Ratio of variances of x and y data, var(y)/var(x). Default value is 1.0. + /// returning its best fitting parameters as (a, b, c) tuple. + public static (double A, double B, double C) Deming(double[] x, double[] y, double delta = 1.0) + { + return DemingRegression.Fit(x, y, delta); + } + /// /// Least-Squares fitting the points (x,y) to an exponential y : x -> a*exp(r*x), /// returning its best fitting parameters as (a, r) tuple. diff --git a/src/Numerics/LinearRegression/DemingRegression.cs b/src/Numerics/LinearRegression/DemingRegression.cs new file mode 100644 index 000000000..92597ece9 --- /dev/null +++ b/src/Numerics/LinearRegression/DemingRegression.cs @@ -0,0 +1,205 @@ +// +// Math.NET Numerics, part of the Math.NET Project +// http://numerics.mathdotnet.com +// http://github.com/mathnet/mathnet-numerics +// +// Copyright (c) 2009-2023 Math.NET +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. +// + +using System; +using System.Collections.Generic; + +namespace MathNet.Numerics.LinearRegression +{ + public static class DemingRegression + { + /// + /// Deming/Orthogonal regression, least-squares fitting the points in + /// the 2D dataset (x,y) to a line + /// + /// a*x + b*y + c = 0 + /// + /// For equal 1 (the default value), this is + /// performing orthogonal regression, minimizing the sum of squared + /// perpendicular distances from the data points to the regression line. + /// + /// Orthogonal regression is a special case of Deming regression, + /// and is assuming equal error variances on the x and y data, + /// and applied by the argument default value of 1.0. + /// + /// + /// The parameters (a,b,c) are scaled such that a and b + /// in absolute values are always less than one. + /// + /// + /// X data + /// Y data + /// Ratio of variances of x and y data, var(y)/var(x). Default value is 1 + /// returning its best fitting parameters as (a, b, c) tuple. + /// + /// Solution algorithm as described from: + /// "Deming regression MethComp package", May 2007, + /// Anders Christian Jensen, Steno Diabetes Center, Gentofte, Denmark + /// https://en.wikipedia.org/wiki/Deming_regression + /// https://docplayer.gr/85660531-Deming-regression-methcomp-package-may.html + /// Computations are offset to be centered around (mean(x), mean(y)), + /// for improved numerical stability. + /// + public static (double A, double B, double C) Fit(double[] x, double[] y, double delta = 1.0) + { + if (x.Length != y.Length) + { + throw new ArgumentException($"All sample vectors must have the same length. However, vectors with disagreeing length {x.Length} and {y.Length} have been provided. A sample with index i is given by the value at index i of each provided vector."); + } + + if (x.Length <= 1) + { + throw new ArgumentException($"A regression of the requested order requires at least 2 samples. Only {x.Length} samples have been provided."); + } + + // First Pass: Mean and abs-max values + double mx = 0.0; + double my = 0.0; + double maxax = 0; + double maxay = 0; + for (int i = 0; i < x.Length; i++) + { + mx += x[i]; + my += y[i]; + + double ax = Math.Abs(x[i]); + double ay = Math.Abs(y[i]); + if (ax > maxax) + maxax = ax; + if (ay > maxay) + maxay = ay; + } + + // Mean values + mx /= x.Length; + my /= y.Length; + + // Second Pass: Second-degree central sample moments + double sxx = 0.0, sxy = 0.0, syy = 0; + for (int i = 0; i < x.Length; i++) + { + double xdiff = x[i] - mx; + double ydiff = y[i] - my; + sxx += xdiff * xdiff; + sxy += xdiff * ydiff; + syy += ydiff * ydiff; + } + + sxx /= x.Length; + sxy /= x.Length; + syy /= x.Length; + + // Need to either solve for a for y = -a*x: + // −sxy * a^2 + (syy-delta*sxx) * a + delta*sxy = 0 + // or solve for b for x = -b*y: + // −sxy * b^2 + (sxx-syy/delta) * b + sxy/delta = 0 + // Which to choose: Depends on whether the slope is larger than one + + // If sxy is zero, either: + // - data is all symmetric around the mid-point + // - x or y is constant + // Then either a or b, or both, are zero + if (Math.Abs(sxy) <= (delta * sxx + syy) * 1e-10) + { + // When sxx has a value, it is a vertical line + if (delta * sxx > syy && sxx > maxax * 1e-10) + return (0, 1, -my); + // when syy has a value, it is a horizontal line + else if (syy > maxay * 1e-10) + return (1, 0, -mx); + // No line could be calculated + else + return (double.NaN, double.NaN, double.NaN); + } + + // Pick a or b solution such that both a and b are always <= 1 + if (syy <= delta*sxx) + { + double sxydiff = syy - delta*sxx; + double a = -(sxydiff + Math.Sqrt(sxydiff * sxydiff + 4 * delta * sxy * sxy)) / (2 * sxy); + double b = 1; + double c = -my - a * mx; + return (a, b, c); + } + else + { + double syxdiff = sxx - syy/delta; + double a = 1; + double b = -(syxdiff + Math.Sqrt(syxdiff * syxdiff + 4 / delta * sxy * sxy)) / (2 * sxy); + double c = -b * my - mx; + return (a, b, c); + } + + } + + /// + /// Deming/Orthogonal regression, least-Squares fitting the points in + /// the 2D dataset (x,y) to a line + /// + /// a*x + b*y + c = 0 + /// + /// For equal 1 (the default value), this is + /// performing orthogonal regression, minimizing the sum of squared + /// perpendicular distances from the data points to the regression line. + /// + /// See for more details. + /// + /// + /// (x,y) data as tuples + /// Ratio of variances of x and y data, var(y)/var(x). Default value is 1 + /// returning its best fitting parameters as (a, b, c) tuple. + public static (double A, double B, double C) Fit(IEnumerable> samples, double delta = 1.0) + { + var (u, v) = samples.UnpackSinglePass(); + return Fit(u, v); + } + + /// + /// Deming/Orthogonal regression, least-Squares fitting the points in + /// the 2D dataset (x,y) to a line + /// + /// a*x + b*y + c = 0 + /// + /// For equal 1 (the default value), this is + /// performing orthogonal regression, minimizing the sum of squared + /// perpendicular distances from the data points to the regression line. + /// + /// See for more details. + /// + /// + /// (x,y) data as tuples + /// Ratio of variances of x and y data, var(y)/var(x). Default value is 1 + /// returning its best fitting parameters as (a, b, c) tuple. + public static (double A, double B, double C) Fit(IEnumerable<(double, double)> samples, double delta = 1.0) + { + var (u, v) = samples.UnpackSinglePass(); + return Fit(u, v); + } + } +} diff --git a/src/Numerics/LinearRegression/SimpleRegression.cs b/src/Numerics/LinearRegression/SimpleRegression.cs index 5a3897fe8..81ff5df67 100644 --- a/src/Numerics/LinearRegression/SimpleRegression.cs +++ b/src/Numerics/LinearRegression/SimpleRegression.cs @@ -50,7 +50,7 @@ public static (double A, double B) Fit(double[] x, double[] y) if (x.Length <= 1) { - throw new ArgumentException($"A regression of the requested order requires at least {2} samples. Only {x.Length} samples have been provided."); + throw new ArgumentException($"A regression of the requested order requires at least 2 samples. Only {x.Length} samples have been provided."); } // First Pass: Mean (Less robust but faster than ArrayStatistics.Mean)