001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.correlation; 018 019 import org.apache.commons.math.MathException; 020 import org.apache.commons.math.MathRuntimeException; 021 import org.apache.commons.math.distribution.TDistribution; 022 import org.apache.commons.math.distribution.TDistributionImpl; 023 import org.apache.commons.math.linear.RealMatrix; 024 import org.apache.commons.math.linear.BlockRealMatrix; 025 import org.apache.commons.math.stat.regression.SimpleRegression; 026 027 /** 028 * Computes Pearson's product-moment correlation coefficients for pairs of arrays 029 * or columns of a matrix. 030 * 031 * <p>The constructors that take <code>RealMatrix</code> or 032 * <code>double[][]</code> arguments generate correlation matrices. The 033 * columns of the input matrices are assumed to represent variable values. 034 * Correlations are given by the formula</p> 035 * <code>cor(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / [(n - 1)s(X)s(Y)]</code> 036 * where <code>E(X)</code> is the mean of <code>X</code>, <code>E(Y)</code> 037 * is the mean of the <code>Y</code> values and s(X), s(Y) are standard deviations. 038 * 039 * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $ 040 * @since 2.0 041 */ 042 public class PearsonsCorrelation { 043 044 /** correlation matrix */ 045 private final RealMatrix correlationMatrix; 046 047 /** number of observations */ 048 private final int nObs; 049 050 /** 051 * Create a PearsonsCorrelation instance without data 052 */ 053 public PearsonsCorrelation() { 054 super(); 055 correlationMatrix = null; 056 nObs = 0; 057 } 058 059 /** 060 * Create a PearsonsCorrelation from a rectangular array 061 * whose columns represent values of variables to be correlated. 062 * 063 * @param data rectangular array with columns representing variables 064 * @throws IllegalArgumentException if the input data array is not 065 * rectangular with at least two rows and two columns. 066 */ 067 public PearsonsCorrelation(double[][] data) { 068 this(new BlockRealMatrix(data)); 069 } 070 071 /** 072 * Create a PearsonsCorrelation from a RealMatrix whose columns 073 * represent variables to be correlated. 074 * 075 * @param matrix matrix with columns representing variables to correlate 076 */ 077 public PearsonsCorrelation(RealMatrix matrix) { 078 checkSufficientData(matrix); 079 nObs = matrix.getRowDimension(); 080 correlationMatrix = computeCorrelationMatrix(matrix); 081 } 082 083 /** 084 * Create a PearsonsCorrelation from a {@link Covariance}. The correlation 085 * matrix is computed by scaling the Covariance's covariance matrix. 086 * The Covariance instance must have been created from a data matrix with 087 * columns representing variable values. 088 * 089 * @param covariance Covariance instance 090 */ 091 public PearsonsCorrelation(Covariance covariance) { 092 RealMatrix covarianceMatrix = covariance.getCovarianceMatrix(); 093 if (covarianceMatrix == null) { 094 throw MathRuntimeException.createIllegalArgumentException("covariance matrix is null"); 095 } 096 nObs = covariance.getN(); 097 correlationMatrix = covarianceToCorrelation(covarianceMatrix); 098 } 099 100 /** 101 * Create a PearsonsCorrelation from a covariance matrix. The correlation 102 * matrix is computed by scaling the covariance matrix. 103 * 104 * @param covarianceMatrix covariance matrix 105 * @param numberOfObservations the number of observations in the dataset used to compute 106 * the covariance matrix 107 */ 108 public PearsonsCorrelation(RealMatrix covarianceMatrix, int numberOfObservations) { 109 nObs = numberOfObservations; 110 correlationMatrix = covarianceToCorrelation(covarianceMatrix); 111 112 } 113 114 /** 115 * Returns the correlation matrix 116 * 117 * @return correlation matrix 118 */ 119 public RealMatrix getCorrelationMatrix() { 120 return correlationMatrix; 121 } 122 123 /** 124 * Returns a matrix of standard errors associated with the estimates 125 * in the correlation matrix.<br/> 126 * <code>getCorrelationStandardErrors().getEntry(i,j)</code> is the standard 127 * error associated with <code>getCorrelationMatrix.getEntry(i,j)</code> 128 * <p>The formula used to compute the standard error is <br/> 129 * <code>SE<sub>r</sub> = ((1 - r<sup>2</sup>) / (n - 2))<sup>1/2</sup></code> 130 * where <code>r</code> is the estimated correlation coefficient and 131 * <code>n</code> is the number of observations in the source dataset.</p> 132 * 133 * @return matrix of correlation standard errors 134 */ 135 public RealMatrix getCorrelationStandardErrors() { 136 int nVars = correlationMatrix.getColumnDimension(); 137 double[][] out = new double[nVars][nVars]; 138 for (int i = 0; i < nVars; i++) { 139 for (int j = 0; j < nVars; j++) { 140 double r = correlationMatrix.getEntry(i, j); 141 out[i][j] = Math.sqrt((1 - r * r) /(nObs - 2)); 142 } 143 } 144 return new BlockRealMatrix(out); 145 } 146 147 /** 148 * Returns a matrix of p-values associated with the (two-sided) null 149 * hypothesis that the corresponding correlation coefficient is zero. 150 * <p><code>getCorrelationPValues().getEntry(i,j)</code> is the probability 151 * that a random variable distributed as <code>t<sub>n-2</sub></code> takes 152 * a value with absolute value greater than or equal to <br> 153 * <code>|r|((n - 2) / (1 - r<sup>2</sup>))<sup>1/2</sup></code></p> 154 * <p>The values in the matrix are sometimes referred to as the 155 * <i>significance</i> of the corresponding correlation coefficients.</p> 156 * 157 * @return matrix of p-values 158 * @throws MathException if an error occurs estimating probabilities 159 */ 160 public RealMatrix getCorrelationPValues() throws MathException { 161 TDistribution tDistribution = new TDistributionImpl(nObs - 2); 162 int nVars = correlationMatrix.getColumnDimension(); 163 double[][] out = new double[nVars][nVars]; 164 for (int i = 0; i < nVars; i++) { 165 for (int j = 0; j < nVars; j++) { 166 if (i == j) { 167 out[i][j] = 0d; 168 } else { 169 double r = correlationMatrix.getEntry(i, j); 170 double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r))); 171 out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t)); 172 } 173 } 174 } 175 return new BlockRealMatrix(out); 176 } 177 178 179 /** 180 * Computes the correlation matrix for the columns of the 181 * input matrix. 182 * 183 * @param matrix matrix with columns representing variables to correlate 184 * @return correlation matrix 185 */ 186 public RealMatrix computeCorrelationMatrix(RealMatrix matrix) { 187 int nVars = matrix.getColumnDimension(); 188 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); 189 for (int i = 0; i < nVars; i++) { 190 for (int j = 0; j < i; j++) { 191 double corr = correlation(matrix.getColumn(i), matrix.getColumn(j)); 192 outMatrix.setEntry(i, j, corr); 193 outMatrix.setEntry(j, i, corr); 194 } 195 outMatrix.setEntry(i, i, 1d); 196 } 197 return outMatrix; 198 } 199 200 /** 201 * Computes the correlation matrix for the columns of the 202 * input rectangular array. The colums of the array represent values 203 * of variables to be correlated. 204 * 205 * @param data matrix with columns representing variables to correlate 206 * @return correlation matrix 207 */ 208 public RealMatrix computeCorrelationMatrix(double[][] data) { 209 return computeCorrelationMatrix(new BlockRealMatrix(data)); 210 } 211 212 /** 213 * Computes the Pearson's product-moment correlation coefficient between the two arrays. 214 * 215 * </p>Throws IllegalArgumentException if the arrays do not have the same length 216 * or their common length is less than 2</p> 217 * 218 * @param xArray first data array 219 * @param yArray second data array 220 * @return Returns Pearson's correlation coefficient for the two arrays 221 * @throws IllegalArgumentException if the arrays lengths do not match or 222 * there is insufficient data 223 */ 224 public double correlation(final double[] xArray, final double[] yArray) throws IllegalArgumentException { 225 SimpleRegression regression = new SimpleRegression(); 226 if(xArray.length == yArray.length && xArray.length > 1) { 227 for(int i=0; i<xArray.length; i++) { 228 regression.addData(xArray[i], yArray[i]); 229 } 230 return regression.getR(); 231 } 232 else { 233 throw MathRuntimeException.createIllegalArgumentException( 234 "invalid array dimensions. xArray has size {0}; yArray has {1} elements", 235 xArray.length, yArray.length); 236 } 237 } 238 239 /** 240 * Derives a correlation matrix from a covariance matrix. 241 * 242 * <p>Uses the formula <br/> 243 * <code>r(X,Y) = cov(X,Y)/s(X)s(Y)</code> where 244 * <code>r(·,·)</code> is the correlation coefficient and 245 * <code>s(·)</code> means standard deviation.</p> 246 * 247 * @param covarianceMatrix the covariance matrix 248 * @return correlation matrix 249 */ 250 public RealMatrix covarianceToCorrelation(RealMatrix covarianceMatrix) { 251 int nVars = covarianceMatrix.getColumnDimension(); 252 RealMatrix outMatrix = new BlockRealMatrix(nVars, nVars); 253 for (int i = 0; i < nVars; i++) { 254 double sigma = Math.sqrt(covarianceMatrix.getEntry(i, i)); 255 outMatrix.setEntry(i, i, 1d); 256 for (int j = 0; j < i; j++) { 257 double entry = covarianceMatrix.getEntry(i, j) / 258 (sigma * Math.sqrt(covarianceMatrix.getEntry(j, j))); 259 outMatrix.setEntry(i, j, entry); 260 outMatrix.setEntry(j, i, entry); 261 } 262 } 263 return outMatrix; 264 } 265 266 /** 267 * Throws IllegalArgumentException of the matrix does not have at least 268 * two columns and two rows 269 * 270 * @param matrix matrix to check for sufficiency 271 */ 272 private void checkSufficientData(final RealMatrix matrix) { 273 int nRows = matrix.getRowDimension(); 274 int nCols = matrix.getColumnDimension(); 275 if (nRows < 2 || nCols < 2) { 276 throw MathRuntimeException.createIllegalArgumentException( 277 "insufficient data: only {0} rows and {1} columns.", 278 nRows, nCols); 279 } 280 } 281 }