|
| 1 | +// <copyright file="ParameterStatisticsTests.cs" company="Math.NET"> |
| 2 | +// Math.NET Numerics, part of the Math.NET Project |
| 3 | +// https://numerics.mathdotnet.com |
| 4 | +// https://github.com/mathnet/mathnet-numerics |
| 5 | +// |
| 6 | +// Copyright (c) 2009-$CURRENT_YEAR$ Math.NET |
| 7 | +// |
| 8 | +// Permission is hereby granted, free of charge, to any person |
| 9 | +// obtaining a copy of this software and associated documentation |
| 10 | +// files (the "Software"), to deal in the Software without |
| 11 | +// restriction, including without limitation the rights to use, |
| 12 | +// copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 13 | +// copies of the Software, and to permit persons to whom the |
| 14 | +// Software is furnished to do so, subject to the following |
| 15 | +// conditions: |
| 16 | +// |
| 17 | +// The above copyright notice and this permission notice shall be |
| 18 | +// included in all copies or substantial portions of the Software. |
| 19 | +// |
| 20 | +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 21 | +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES |
| 22 | +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 23 | +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT |
| 24 | +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, |
| 25 | +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 26 | +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
| 27 | +// OTHER DEALINGS IN THE SOFTWARE. |
| 28 | +// </copyright> |
| 29 | + |
| 30 | +using MathNet.Numerics.LinearAlgebra; |
| 31 | +using MathNet.Numerics.Statistics; |
| 32 | +using NUnit.Framework; |
| 33 | +using System; |
| 34 | +using System.Linq; |
| 35 | + |
| 36 | +namespace MathNet.Numerics.Tests.StatisticsTests |
| 37 | +{ |
| 38 | + [TestFixture] |
| 39 | + public class ParameterStatisticsTests |
| 40 | + { |
| 41 | + #region Polynomial Regression Tests |
| 42 | + |
| 43 | + [Test] |
| 44 | + public void PolynomialRegressionTest() |
| 45 | + { |
| 46 | + // https://github.com/mathnet/mathnet-numerics/discussions/801 |
| 47 | + |
| 48 | + // Y = B0 + B1*X + B2*X^2 |
| 49 | + // Parameter Value Error t-value Pr(>|t|) LCL UCL CI half_width |
| 50 | + // -------------------------------------------------------------------------------------------- |
| 51 | + // B0 -0.24 3.07019 -0.07817 0.94481 -13.44995 12.96995 13.20995 |
| 52 | + // B1 3.46286 2.33969 1.48005 0.27700 -6.60401 13.52972 10.06686 |
| 53 | + // B2 2.64286 0.38258 6.90799 0.02032 0.99675 4.28897 1.64611 |
| 54 | + // -------------------------------------------------------------------------------------------- |
| 55 | + // |
| 56 | + // Fit statistics |
| 57 | + // ----------------------------------------- |
| 58 | + // Degree of freedom 2 |
| 59 | + // Reduced Chi-Sqr 2.04914 |
| 60 | + // Residual Sum of Sqaures 4.09829 |
| 61 | + // R Value 0.99947 |
| 62 | + // R-Square(COD) 0.99893 |
| 63 | + // Adj. R-Square 0.99786 |
| 64 | + // Root-MSE(SD) 1.43148 |
| 65 | + // ----------------------------------------- |
| 66 | + |
| 67 | + double[] x = { 1, 2, 3, 4, 5 }; |
| 68 | + double[] y = { 6.2, 16.9, 33, 57.5, 82.5 }; |
| 69 | + var order = 2; |
| 70 | + |
| 71 | + var Ns = x.Length; |
| 72 | + var k = order + 1; // number of parameters |
| 73 | + var dof = Ns - k; // degree of freedom |
| 74 | + |
| 75 | + // Create the [Ns X k] design matrix |
| 76 | + // This matrix transforms the polynomial regression problem into a linear system |
| 77 | + // Each row represents one data point, and columns represent polynomial terms: |
| 78 | + // - First column: constant term (x^0 = 1) |
| 79 | + // - Second column: linear term (x^1) |
| 80 | + // - Third column: quadratic term (x^2) |
| 81 | + // The matrix looks like: |
| 82 | + // [ 1 x1 x1^2 ] |
| 83 | + // [ 1 x2 x2^2 ] |
| 84 | + // [ ... ] |
| 85 | + // [ 1 xN xN^2 ] |
| 86 | + var X = Matrix<double>.Build.Dense(Ns, k, (i, j) => Math.Pow(x[i], j)); |
| 87 | + |
| 88 | + // Create the Y vector |
| 89 | + var Y = Vector<double>.Build.DenseOfArray(y); |
| 90 | + |
| 91 | + // Calculate best-fitted parameters using normal equations |
| 92 | + var XtX = X.TransposeThisAndMultiply(X); |
| 93 | + var XtXInv = XtX.Inverse(); |
| 94 | + var Xty = X.TransposeThisAndMultiply(Y); |
| 95 | + var parameters = XtXInv.Multiply(Xty); |
| 96 | + |
| 97 | + // Calculate the residuals |
| 98 | + var residuals = X.Multiply(parameters) - Y; |
| 99 | + |
| 100 | + // Calculate residual variance (RSS/dof) |
| 101 | + var RSS = residuals.DotProduct(residuals); |
| 102 | + var residualVariance = RSS / dof; |
| 103 | + |
| 104 | + var covariance = ParameterStatistics.CovarianceMatrixForLinearRegression(X, residualVariance); |
| 105 | + var standardErrors = ParameterStatistics.StandardErrors(covariance); |
| 106 | + var tStatistics = ParameterStatistics.TStatistics(parameters, standardErrors); |
| 107 | + var pValues = ParameterStatistics.PValues(tStatistics, dof); |
| 108 | + var confIntervals = ParameterStatistics.ConfidenceIntervalHalfWidths(standardErrors, dof, 0.95); |
| 109 | + |
| 110 | + // Calculate total sum of squares for R-squared |
| 111 | + var yMean = Y.Average(); |
| 112 | + var TSS = Y.Select(y_i => Math.Pow(y_i - yMean, 2)).Sum(); |
| 113 | + var rSquared = 1.0 - RSS / TSS; |
| 114 | + var adjustedRSquared = 1 - (1 - rSquared) * (Ns - 1) / dof; |
| 115 | + var rootMSE = Math.Sqrt(residualVariance); |
| 116 | + |
| 117 | + // Check parameters |
| 118 | + Assert.That(parameters[0], Is.EqualTo(-0.24).Within(0.001)); |
| 119 | + Assert.That(parameters[1], Is.EqualTo(3.46286).Within(0.001)); |
| 120 | + Assert.That(parameters[2], Is.EqualTo(2.64286).Within(0.001)); |
| 121 | + |
| 122 | + // Check standard errors |
| 123 | + Assert.That(standardErrors[0], Is.EqualTo(3.07019).Within(0.001)); |
| 124 | + Assert.That(standardErrors[1], Is.EqualTo(2.33969).Within(0.001)); |
| 125 | + Assert.That(standardErrors[2], Is.EqualTo(0.38258).Within(0.001)); |
| 126 | + |
| 127 | + // Check t-statistics |
| 128 | + Assert.That(tStatistics[0], Is.EqualTo(-0.07817).Within(0.001)); |
| 129 | + Assert.That(tStatistics[1], Is.EqualTo(1.48005).Within(0.001)); |
| 130 | + Assert.That(tStatistics[2], Is.EqualTo(6.90799).Within(0.001)); |
| 131 | + |
| 132 | + // Check p-values |
| 133 | + Assert.That(pValues[0], Is.EqualTo(0.94481).Within(0.001)); |
| 134 | + Assert.That(pValues[1], Is.EqualTo(0.27700).Within(0.001)); |
| 135 | + Assert.That(pValues[2], Is.EqualTo(0.02032).Within(0.001)); |
| 136 | + |
| 137 | + // Check confidence intervals |
| 138 | + Assert.That(confIntervals[0], Is.EqualTo(13.20995).Within(0.001)); |
| 139 | + Assert.That(confIntervals[1], Is.EqualTo(10.06686).Within(0.001)); |
| 140 | + Assert.That(confIntervals[2], Is.EqualTo(1.64611).Within(0.001)); |
| 141 | + |
| 142 | + // Check fit statistics |
| 143 | + Assert.That(dof, Is.EqualTo(2)); |
| 144 | + Assert.That(residualVariance, Is.EqualTo(2.04914).Within(0.001)); |
| 145 | + Assert.That(RSS, Is.EqualTo(4.09829).Within(0.001)); |
| 146 | + Assert.That(Math.Sqrt(rSquared), Is.EqualTo(0.99947).Within(0.001)); // R value |
| 147 | + Assert.That(rSquared, Is.EqualTo(0.99893).Within(0.001)); |
| 148 | + Assert.That(adjustedRSquared, Is.EqualTo(0.99786).Within(0.001)); |
| 149 | + Assert.That(rootMSE, Is.EqualTo(1.43148).Within(0.001)); |
| 150 | + } |
| 151 | + |
| 152 | + #endregion |
| 153 | + |
| 154 | + #region Matrix Utility Tests |
| 155 | + |
| 156 | + [Test] |
| 157 | + public void CorrelationFromCovarianceTest() |
| 158 | + { |
| 159 | + var covariance = Matrix<double>.Build.DenseOfArray(new double[,] { |
| 160 | + {4.0, 1.2, -0.8}, |
| 161 | + {1.2, 9.0, 0.6}, |
| 162 | + {-0.8, 0.6, 16.0} |
| 163 | + }); |
| 164 | + |
| 165 | + var correlation = ParameterStatistics.CorrelationFromCovariance(covariance); |
| 166 | + |
| 167 | + Assert.That(correlation.RowCount, Is.EqualTo(3)); |
| 168 | + Assert.That(correlation.ColumnCount, Is.EqualTo(3)); |
| 169 | + |
| 170 | + // Diagonal elements should be 1 |
| 171 | + for (var i = 0; i < correlation.RowCount; i++) |
| 172 | + { |
| 173 | + Assert.That(correlation[i, i], Is.EqualTo(1.0).Within(1e-10)); |
| 174 | + } |
| 175 | + |
| 176 | + // Off-diagonal elements should be between -1 and 1 |
| 177 | + for (var i = 0; i < correlation.RowCount; i++) |
| 178 | + { |
| 179 | + for (var j = 0; j < correlation.ColumnCount; j++) |
| 180 | + { |
| 181 | + if (i != j) |
| 182 | + { |
| 183 | + Assert.That(correlation[i, j], Is.GreaterThanOrEqualTo(-1.0).And.LessThanOrEqualTo(1.0)); |
| 184 | + } |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + // Check specific values (manually calculated) |
| 189 | + Assert.That(correlation[0, 1], Is.EqualTo(0.2).Within(1e-10)); |
| 190 | + Assert.That(correlation[0, 2], Is.EqualTo(-0.1).Within(1e-10)); |
| 191 | + Assert.That(correlation[1, 2], Is.EqualTo(0.05).Within(1e-10)); |
| 192 | + } |
| 193 | + |
| 194 | + #endregion |
| 195 | + |
| 196 | + #region Special Cases Tests |
| 197 | + |
| 198 | + [Test] |
| 199 | + public void DependenciesTest() |
| 200 | + { |
| 201 | + // Create a correlation matrix with high multicollinearity |
| 202 | + var correlation = Matrix<double>.Build.DenseOfArray(new double[,] { |
| 203 | + {1.0, 0.95, 0.3}, |
| 204 | + {0.95, 1.0, 0.2}, |
| 205 | + {0.3, 0.2, 1.0} |
| 206 | + }); |
| 207 | + |
| 208 | + var dependencies = ParameterStatistics.DependenciesFromCorrelation(correlation); |
| 209 | + |
| 210 | + Assert.That(dependencies.Count, Is.EqualTo(3)); |
| 211 | + |
| 212 | + // First two parameters should have high dependency values |
| 213 | + Assert.That(dependencies[0], Is.GreaterThan(0.8)); |
| 214 | + Assert.That(dependencies[1], Is.GreaterThan(0.8)); |
| 215 | + |
| 216 | + // Third parameter should have lower dependency |
| 217 | + Assert.That(dependencies[2], Is.LessThan(0.3)); |
| 218 | + } |
| 219 | + |
| 220 | + [Test] |
| 221 | + public void ConfidenceIntervalsTest() |
| 222 | + { |
| 223 | + var standardErrors = Vector<double>.Build.Dense(new double[] { 0.1, 0.2, 0.5 }); |
| 224 | + var df = 10; // Degrees of freedom |
| 225 | + var confidenceLevel = 0.95; // 95% confidence |
| 226 | + |
| 227 | + var halfWidths = ParameterStatistics.ConfidenceIntervalHalfWidths(standardErrors, df, confidenceLevel); |
| 228 | + |
| 229 | + Assert.That(halfWidths.Count, Is.EqualTo(3)); |
| 230 | + |
| 231 | + // t-critical for df=10, 95% confidence (two-tailed) is approximately 2.228 |
| 232 | + var expectedFactor = 2.228; |
| 233 | + Assert.That(halfWidths[0], Is.EqualTo(standardErrors[0] * expectedFactor).Within(0.1)); |
| 234 | + Assert.That(halfWidths[1], Is.EqualTo(standardErrors[1] * expectedFactor).Within(0.1)); |
| 235 | + Assert.That(halfWidths[2], Is.EqualTo(standardErrors[2] * expectedFactor).Within(0.1)); |
| 236 | + } |
| 237 | + |
| 238 | + #endregion |
| 239 | + } |
| 240 | +} |
0 commit comments