001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.inference;
018    
019    import org.apache.commons.math.MathException;
020    import org.apache.commons.math.MathRuntimeException;
021    import org.apache.commons.math.distribution.ChiSquaredDistribution;
022    import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023    
024    /**
025     * Implements Chi-Square test statistics defined in the
026     * {@link UnknownDistributionChiSquareTest} interface.
027     *
028     * @version $Revision: 811833 $ $Date: 2009-09-06 12:27:50 -0400 (Sun, 06 Sep 2009) $
029     */
030    public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
031    
032        /** Distribution used to compute inference statistics. */
033        private ChiSquaredDistribution distribution;
034    
035        /**
036         * Construct a ChiSquareTestImpl
037         */
038        public ChiSquareTestImpl() {
039            this(new ChiSquaredDistributionImpl(1.0));
040        }
041    
042        /**
043         * Create a test instance using the given distribution for computing
044         * inference statistics.
045         * @param x distribution used to compute inference statistics.
046         * @since 1.2
047         */
048        public ChiSquareTestImpl(ChiSquaredDistribution x) {
049            super();
050            setDistribution(x);
051        }
052         /**
053         * {@inheritDoc}
054         * <p><strong>Note: </strong>This implementation rescales the
055         * <code>expected</code> array if necessary to ensure that the sum of the
056         * expected and observed counts are equal.</p>
057         *
058         * @param observed array of observed frequency counts
059         * @param expected array of expected frequency counts
060         * @return chi-square test statistic
061         * @throws IllegalArgumentException if preconditions are not met
062         * or length is less than 2
063         */
064        public double chiSquare(double[] expected, long[] observed)
065            throws IllegalArgumentException {
066            if (expected.length < 2) {
067                throw MathRuntimeException.createIllegalArgumentException(
068                      "expected array length = {0}, must be at least 2",
069                      expected.length);
070            }
071            if (expected.length != observed.length) {
072                throw MathRuntimeException.createIllegalArgumentException(
073                      "dimension mismatch {0} != {1}", expected.length, observed.length);
074            }
075            checkPositive(expected);
076            checkNonNegative(observed);
077            double sumExpected = 0d;
078            double sumObserved = 0d;
079            for (int i = 0; i < observed.length; i++) {
080                sumExpected += expected[i];
081                sumObserved += observed[i];
082            }
083            double ratio = 1.0d;
084            boolean rescale = false;
085            if (Math.abs(sumExpected - sumObserved) > 10E-6) {
086                ratio = sumObserved / sumExpected;
087                rescale = true;
088            }
089            double sumSq = 0.0d;
090            for (int i = 0; i < observed.length; i++) {
091                if (rescale) {
092                    final double dev = observed[i] - ratio * expected[i];
093                    sumSq += dev * dev / (ratio * expected[i]);
094                } else {
095                    final double dev = observed[i] - expected[i];
096                    sumSq += dev * dev / expected[i];
097                }
098            }
099            return sumSq;
100        }
101    
102        /**
103         * {@inheritDoc}
104         * <p><strong>Note: </strong>This implementation rescales the
105         * <code>expected</code> array if necessary to ensure that the sum of the
106         * expected and observed counts are equal.</p>
107         *
108         * @param observed array of observed frequency counts
109         * @param expected array of expected frequency counts
110         * @return p-value
111         * @throws IllegalArgumentException if preconditions are not met
112         * @throws MathException if an error occurs computing the p-value
113         */
114        public double chiSquareTest(double[] expected, long[] observed)
115            throws IllegalArgumentException, MathException {
116            distribution.setDegreesOfFreedom(expected.length - 1.0);
117            return 1.0 - distribution.cumulativeProbability(
118                chiSquare(expected, observed));
119        }
120    
121        /**
122         * {@inheritDoc}
123         * <p><strong>Note: </strong>This implementation rescales the
124         * <code>expected</code> array if necessary to ensure that the sum of the
125         * expected and observed counts are equal.</p>
126         *
127         * @param observed array of observed frequency counts
128         * @param expected array of expected frequency counts
129         * @param alpha significance level of the test
130         * @return true iff null hypothesis can be rejected with confidence
131         * 1 - alpha
132         * @throws IllegalArgumentException if preconditions are not met
133         * @throws MathException if an error occurs performing the test
134         */
135        public boolean chiSquareTest(double[] expected, long[] observed,
136                double alpha) throws IllegalArgumentException, MathException {
137            if ((alpha <= 0) || (alpha > 0.5)) {
138                throw MathRuntimeException.createIllegalArgumentException(
139                      "out of bounds significance level {0}, must be between {1} and {2}",
140                      alpha, 0, 0.5);
141            }
142            return chiSquareTest(expected, observed) < alpha;
143        }
144    
145        /**
146         * @param counts array representation of 2-way table
147         * @return chi-square test statistic
148         * @throws IllegalArgumentException if preconditions are not met
149         */
150        public double chiSquare(long[][] counts) throws IllegalArgumentException {
151    
152            checkArray(counts);
153            int nRows = counts.length;
154            int nCols = counts[0].length;
155    
156            // compute row, column and total sums
157            double[] rowSum = new double[nRows];
158            double[] colSum = new double[nCols];
159            double total = 0.0d;
160            for (int row = 0; row < nRows; row++) {
161                for (int col = 0; col < nCols; col++) {
162                    rowSum[row] += counts[row][col];
163                    colSum[col] += counts[row][col];
164                    total += counts[row][col];
165                }
166            }
167    
168            // compute expected counts and chi-square
169            double sumSq = 0.0d;
170            double expected = 0.0d;
171            for (int row = 0; row < nRows; row++) {
172                for (int col = 0; col < nCols; col++) {
173                    expected = (rowSum[row] * colSum[col]) / total;
174                    sumSq += ((counts[row][col] - expected) *
175                            (counts[row][col] - expected)) / expected;
176                }
177            }
178            return sumSq;
179        }
180    
181        /**
182         * @param counts array representation of 2-way table
183         * @return p-value
184         * @throws IllegalArgumentException if preconditions are not met
185         * @throws MathException if an error occurs computing the p-value
186         */
187        public double chiSquareTest(long[][] counts)
188        throws IllegalArgumentException, MathException {
189            checkArray(counts);
190            double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
191            distribution.setDegreesOfFreedom(df);
192            return 1 - distribution.cumulativeProbability(chiSquare(counts));
193        }
194    
195        /**
196         * @param counts array representation of 2-way table
197         * @param alpha significance level of the test
198         * @return true iff null hypothesis can be rejected with confidence
199         * 1 - alpha
200         * @throws IllegalArgumentException if preconditions are not met
201         * @throws MathException if an error occurs performing the test
202         */
203        public boolean chiSquareTest(long[][] counts, double alpha)
204        throws IllegalArgumentException, MathException {
205            if ((alpha <= 0) || (alpha > 0.5)) {
206                throw MathRuntimeException.createIllegalArgumentException(
207                      "out of bounds significance level {0}, must be between {1} and {2}",
208                      alpha, 0.0, 0.5);
209            }
210            return chiSquareTest(counts) < alpha;
211        }
212    
213        /**
214         * @param observed1 array of observed frequency counts of the first data set
215         * @param observed2 array of observed frequency counts of the second data set
216         * @return chi-square test statistic
217         * @throws IllegalArgumentException if preconditions are not met
218         * @since 1.2
219         */
220        public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
221            throws IllegalArgumentException {
222    
223            // Make sure lengths are same
224            if (observed1.length < 2) {
225                throw MathRuntimeException.createIllegalArgumentException(
226                      "observed array length = {0}, must be at least 2",
227                      observed1.length);
228            }
229            if (observed1.length != observed2.length) {
230                throw MathRuntimeException.createIllegalArgumentException(
231                      "dimension mismatch {0} != {1}",
232                      observed1.length, observed2.length);
233            }
234    
235            // Ensure non-negative counts
236            checkNonNegative(observed1);
237            checkNonNegative(observed2);
238    
239            // Compute and compare count sums
240            long countSum1 = 0;
241            long countSum2 = 0;
242            boolean unequalCounts = false;
243            double weight = 0.0;
244            for (int i = 0; i < observed1.length; i++) {
245                countSum1 += observed1[i];
246                countSum2 += observed2[i];
247            }
248            // Ensure neither sample is uniformly 0
249            if (countSum1 == 0) {
250                throw MathRuntimeException.createIllegalArgumentException(
251                      "observed counts are all 0 in first observed array");
252            }
253            if (countSum2 == 0) {
254                throw MathRuntimeException.createIllegalArgumentException(
255                      "observed counts are all 0 in second observed array");
256            }
257            // Compare and compute weight only if different
258            unequalCounts = countSum1 != countSum2;
259            if (unequalCounts) {
260                weight = Math.sqrt((double) countSum1 / (double) countSum2);
261            }
262            // Compute ChiSquare statistic
263            double sumSq = 0.0d;
264            double dev = 0.0d;
265            double obs1 = 0.0d;
266            double obs2 = 0.0d;
267            for (int i = 0; i < observed1.length; i++) {
268                if (observed1[i] == 0 && observed2[i] == 0) {
269                    throw MathRuntimeException.createIllegalArgumentException(
270                          "observed counts are both zero for entry {0}", i);
271                } else {
272                    obs1 = observed1[i];
273                    obs2 = observed2[i];
274                    if (unequalCounts) { // apply weights
275                        dev = obs1/weight - obs2 * weight;
276                    } else {
277                        dev = obs1 - obs2;
278                    }
279                    sumSq += (dev * dev) / (obs1 + obs2);
280                }
281            }
282            return sumSq;
283        }
284    
285        /**
286         * @param observed1 array of observed frequency counts of the first data set
287         * @param observed2 array of observed frequency counts of the second data set
288         * @return p-value
289         * @throws IllegalArgumentException if preconditions are not met
290         * @throws MathException if an error occurs computing the p-value
291         * @since 1.2
292         */
293        public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294            throws IllegalArgumentException, MathException {
295            distribution.setDegreesOfFreedom((double) observed1.length - 1);
296            return 1 - distribution.cumulativeProbability(
297                    chiSquareDataSetsComparison(observed1, observed2));
298        }
299    
300        /**
301         * @param observed1 array of observed frequency counts of the first data set
302         * @param observed2 array of observed frequency counts of the second data set
303         * @param alpha significance level of the test
304         * @return true iff null hypothesis can be rejected with confidence
305         * 1 - alpha
306         * @throws IllegalArgumentException if preconditions are not met
307         * @throws MathException if an error occurs performing the test
308         * @since 1.2
309         */
310        public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311                double alpha) throws IllegalArgumentException, MathException {
312            if ((alpha <= 0) || (alpha > 0.5)) {
313                throw MathRuntimeException.createIllegalArgumentException(
314                      "out of bounds significance level {0}, must be between {1} and {2}",
315                      alpha, 0.0, 0.5);
316            }
317            return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318        }
319    
320        /**
321         * Checks to make sure that the input long[][] array is rectangular,
322         * has at least 2 rows and 2 columns, and has all non-negative entries,
323         * throwing IllegalArgumentException if any of these checks fail.
324         *
325         * @param in input 2-way table to check
326         * @throws IllegalArgumentException if the array is not valid
327         */
328        private void checkArray(long[][] in) throws IllegalArgumentException {
329    
330            if (in.length < 2) {
331                throw MathRuntimeException.createIllegalArgumentException(
332                      "invalid row dimension: {0} (must be at least 2)",
333                      in.length);
334            }
335    
336            if (in[0].length < 2) {
337                throw MathRuntimeException.createIllegalArgumentException(
338                      "invalid column dimension: {0} (must be at least 2)",
339                      in[0].length);
340            }
341    
342            checkRectangular(in);
343            checkNonNegative(in);
344    
345        }
346    
347        //---------------------  Private array methods -- should find a utility home for these
348    
349        /**
350         * Throws IllegalArgumentException if the input array is not rectangular.
351         *
352         * @param in array to be tested
353         * @throws NullPointerException if input array is null
354         * @throws IllegalArgumentException if input array is not rectangular
355         */
356        private void checkRectangular(long[][] in) {
357            for (int i = 1; i < in.length; i++) {
358                if (in[i].length != in[0].length) {
359                    throw MathRuntimeException.createIllegalArgumentException(
360                          "some rows have length {0} while others have length {1}",
361                          in[i].length, in[0].length);
362                }
363            }
364        }
365    
366        /**
367         * Check all entries of the input array are > 0.
368         *
369         * @param in array to be tested
370         * @exception IllegalArgumentException if one entry is not positive
371         */
372        private void checkPositive(double[] in) throws IllegalArgumentException {
373            for (int i = 0; i < in.length; i++) {
374                if (in[i] <= 0) {
375                    throw MathRuntimeException.createIllegalArgumentException(
376                          "element {0} is not positive: {1}",
377                          i, in[i]);
378                }
379            }
380        }
381    
382        /**
383         * Check all entries of the input array are >= 0.
384         *
385         * @param in array to be tested
386         * @exception IllegalArgumentException if one entry is negative
387         */
388        private void checkNonNegative(long[] in) throws IllegalArgumentException {
389            for (int i = 0; i < in.length; i++) {
390                if (in[i] < 0) {
391                    throw MathRuntimeException.createIllegalArgumentException(
392                          "element {0} is negative: {1}",
393                          i, in[i]);
394                }
395            }
396        }
397    
398        /**
399         * Check all entries of the input array are >= 0.
400         *
401         * @param in array to be tested
402         * @exception IllegalArgumentException if one entry is negative
403         */
404        private void checkNonNegative(long[][] in) throws IllegalArgumentException {
405            for (int i = 0; i < in.length; i ++) {
406                for (int j = 0; j < in[i].length; j++) {
407                    if (in[i][j] < 0) {
408                        throw MathRuntimeException.createIllegalArgumentException(
409                              "element ({0}, {1}) is negative: {2}",
410                              i, j, in[i][j]);
411                    }
412                }
413            }
414        }
415    
416        /**
417         * Modify the distribution used to compute inference statistics.
418         *
419         * @param value
420         *            the new distribution
421         * @since 1.2
422         */
423        public void setDistribution(ChiSquaredDistribution value) {
424            distribution = value;
425        }
426    }