001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.correlation;
018    
019    import org.apache.commons.math.MathRuntimeException;
020    import org.apache.commons.math.linear.RealMatrix;
021    import org.apache.commons.math.linear.BlockRealMatrix;
022    import org.apache.commons.math.stat.descriptive.moment.Mean;
023    import org.apache.commons.math.stat.descriptive.moment.Variance;
024    
025    /**
026     * Computes covariances for pairs of arrays or columns of a matrix.
027     *
028     * <p>The constructors that take <code>RealMatrix</code> or
029     * <code>double[][]</code> arguments generate covariance matrices.  The
030     * columns of the input matrices are assumed to represent variable values.</p>
031     *
032     * <p>The constructor argument <code>biasCorrected</code> determines whether or
033     * not computed covariances are bias-corrected.</p>
034     *
035     * <p>Unbiased covariances are given by the formula</p>
036     * <code>cov(X, Y) = &Sigma;[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
037     * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
038     * is the mean of the <code>Y</code> values.
039     *
040     * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
041     *
042     * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
043     * @since 2.0
044     */
045    public class Covariance {
046    
047        /** covariance matrix */
048        private final RealMatrix covarianceMatrix;
049    
050        /**
051         * Create an empty covariance matrix.
052         */
053        /** Number of observations (length of covariate vectors) */
054        private final int n;
055    
056        /**
057         * Create a Covariance with no data
058         */
059        public Covariance() {
060            super();
061            covarianceMatrix = null;
062            n = 0;
063        }
064    
065        /**
066         * Create a Covariance matrix from a rectangular array
067         * whose columns represent covariates.
068         *
069         * <p>The <code>biasCorrected</code> parameter determines whether or not
070         * covariance estimates are bias-corrected.</p>
071         *
072         * <p>The input array must be rectangular with at least two columns
073         * and two rows.</p>
074         *
075         * @param data rectangular array with columns representing covariates
076         * @param biasCorrected true means covariances are bias-corrected
077         * @throws IllegalArgumentException if the input data array is not
078         * rectangular with at least two rows and two columns.
079         */
080        public Covariance(double[][] data, boolean biasCorrected) {
081            this(new BlockRealMatrix(data), biasCorrected);
082        }
083    
084        /**
085         * Create a Covariance matrix from a rectangular array
086         * whose columns represent covariates.
087         *
088         * <p>The input array must be rectangular with at least two columns
089         * and two rows</p>
090         *
091         * @param data rectangular array with columns representing covariates
092         * @throws IllegalArgumentException if the input data array is not
093         * rectangular with at least two rows and two columns.
094         */
095        public Covariance(double[][] data) {
096            this(data, true);
097        }
098    
099        /**
100         * Create a covariance matrix from a matrix whose columns
101         * represent covariates.
102         *
103         * <p>The <code>biasCorrected</code> parameter determines whether or not
104         * covariance estimates are bias-corrected.</p>
105         *
106         * <p>The matrix must have at least two columns and two rows</p>
107         *
108         * @param matrix matrix with columns representing covariates
109         * @param biasCorrected true means covariances are bias-corrected
110         * @throws IllegalArgumentException if the input matrix does not have
111         * at least two rows and two columns
112         */
113        public Covariance(RealMatrix matrix, boolean biasCorrected) {
114           checkSufficientData(matrix);
115           n = matrix.getRowDimension();
116           covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
117        }
118    
119        /**
120         * Create a covariance matrix from a matrix whose columns
121         * represent covariates.
122         *
123         * <p>The matrix must have at least two columns and two rows</p>
124         *
125         * @param matrix matrix with columns representing covariates
126         * @throws IllegalArgumentException if the input matrix does not have
127         * at least two rows and two columns
128         */
129        public Covariance(RealMatrix matrix) {
130            this(matrix, true);
131        }
132    
133        /**
134         * Returns the covariance matrix
135         *
136         * @return covariance matrix
137         */
138        public RealMatrix getCovarianceMatrix() {
139            return covarianceMatrix;
140        }
141    
142        /**
143         * Returns the number of observations (length of covariate vectors)
144         *
145         * @return number of observations
146         */
147    
148        public int getN() {
149            return n;
150        }
151    
152        /**
153         * Compute a covariance matrix from a matrix whose columns represent
154         * covariates.
155         * @param matrix input matrix (must have at least two columns and two rows)
156         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
157         * @return covariance matrix
158         */
159        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
160            int dimension = matrix.getColumnDimension();
161            Variance variance = new Variance(biasCorrected);
162            RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
163            for (int i = 0; i < dimension; i++) {
164                for (int j = 0; j < i; j++) {
165                  double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
166                  outMatrix.setEntry(i, j, cov);
167                  outMatrix.setEntry(j, i, cov);
168                }
169                outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
170            }
171            return outMatrix;
172        }
173    
174        /**
175         * Create a covariance matrix from a matrix whose columns represent
176         * covariates. Covariances are computed using the bias-corrected formula.
177         * @param matrix input matrix (must have at least two columns and two rows)
178         * @return covariance matrix
179         * @see #Covariance
180         */
181        protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
182            return computeCovarianceMatrix(matrix, true);
183        }
184    
185        /**
186         * Compute a covariance matrix from a rectangular array whose columns represent
187         * covariates.
188         * @param data input array (must have at least two columns and two rows)
189         * @param biasCorrected determines whether or not covariance estimates are bias-corrected
190         * @return covariance matrix
191         */
192        protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
193            return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
194        }
195    
196        /**
197         * Create a covariance matrix from a rectangual array whose columns represent
198         * covariates. Covariances are computed using the bias-corrected formula.
199         * @param data input array (must have at least two columns and two rows)
200         * @return covariance matrix
201         * @see #Covariance
202         */
203        protected RealMatrix computeCovarianceMatrix(double[][] data) {
204            return computeCovarianceMatrix(data, true);
205        }
206    
207        /**
208         * Computes the covariance between the two arrays.
209         *
210         * <p>Array lengths must match and the common length must be at least 2.</p>
211         *
212         * @param xArray first data array
213         * @param yArray second data array
214         * @param biasCorrected if true, returned value will be bias-corrected
215         * @return returns the covariance for the two arrays
216         * @throws  IllegalArgumentException if the arrays lengths do not match or
217         * there is insufficient data
218         */
219        public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
220            throws IllegalArgumentException {
221            Mean mean = new Mean();
222            double result = 0d;
223            int length = xArray.length;
224            if(length == yArray.length && length > 1) {
225                double xMean = mean.evaluate(xArray);
226                double yMean = mean.evaluate(yArray);
227                for (int i = 0; i < length; i++) {
228                    double xDev = xArray[i] - xMean;
229                    double yDev = yArray[i] - yMean;
230                    result += (xDev * yDev - result) / (i + 1);
231                }
232            }
233            else {
234                throw MathRuntimeException.createIllegalArgumentException(
235                   "arrays must have the same length and both must have at " +
236                   "least two elements. xArray has size {0}, yArray has {1} elements",
237                        length, yArray.length);
238            }
239            return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
240        }
241    
242        /**
243         * Computes the covariance between the two arrays, using the bias-corrected
244         * formula.
245         *
246         * <p>Array lengths must match and the common length must be at least 2.</p>
247         *
248         * @param xArray first data array
249         * @param yArray second data array
250         * @return returns the covariance for the two arrays
251         * @throws  IllegalArgumentException if the arrays lengths do not match or
252         * there is insufficient data
253         */
254        public double covariance(final double[] xArray, final double[] yArray)
255            throws IllegalArgumentException {
256            return covariance(xArray, yArray, true);
257        }
258    
259        /**
260         * Throws IllegalArgumentException of the matrix does not have at least
261         * two columns and two rows
262         * @param matrix matrix to check
263         */
264        private void checkSufficientData(final RealMatrix matrix) {
265            int nRows = matrix.getRowDimension();
266            int nCols = matrix.getColumnDimension();
267            if (nRows < 2 || nCols < 2) {
268                throw MathRuntimeException.createIllegalArgumentException(
269                        "insufficient data: only {0} rows and {1} columns.",
270                        nRows, nCols);
271            }
272        }
273    }