001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat;
018    
019    import org.apache.commons.math.MathRuntimeException;
020    import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
021    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
022    import org.apache.commons.math.stat.descriptive.moment.Mean;
023    import org.apache.commons.math.stat.descriptive.moment.Variance;
024    import org.apache.commons.math.stat.descriptive.rank.Max;
025    import org.apache.commons.math.stat.descriptive.rank.Min;
026    import org.apache.commons.math.stat.descriptive.rank.Percentile;
027    import org.apache.commons.math.stat.descriptive.summary.Product;
028    import org.apache.commons.math.stat.descriptive.summary.Sum;
029    import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031    
032    /**
033     * StatUtils provides static methods for computing statistics based on data
034     * stored in double[] arrays.
035     *
036     * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
037     */
038    public final class StatUtils {
039    
040        /** sum */
041        private static final UnivariateStatistic SUM = new Sum();
042    
043        /** sumSq */
044        private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
045    
046        /** prod */
047        private static final UnivariateStatistic PRODUCT = new Product();
048    
049        /** sumLog */
050        private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
051    
052        /** min */
053        private static final UnivariateStatistic MIN = new Min();
054    
055        /** max */
056        private static final UnivariateStatistic MAX = new Max();
057    
058        /** mean */
059        private static final UnivariateStatistic MEAN = new Mean();
060    
061        /** variance */
062        private static final Variance VARIANCE = new Variance();
063    
064        /** percentile */
065        private static final Percentile PERCENTILE = new Percentile();
066    
067        /** geometric mean */
068        private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
069    
070        /**
071         * Private Constructor
072         */
073        private StatUtils() {
074        }
075    
076        /**
077         * Returns the sum of the values in the input array, or
078         * <code>Double.NaN</code> if the array is empty.
079         * <p>
080         * Throws <code>IllegalArgumentException</code> if the input array
081         * is null.</p>
082         *
083         * @param values  array of values to sum
084         * @return the sum of the values or <code>Double.NaN</code> if the array
085         * is empty
086         * @throws IllegalArgumentException if the array is null
087         */
088        public static double sum(final double[] values) {
089            return SUM.evaluate(values);
090        }
091    
092        /**
093         * Returns the sum of the entries in the specified portion of
094         * the input array, or <code>Double.NaN</code> if the designated subarray
095         * is empty.
096         * <p>
097         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
098         *
099         * @param values the input array
100         * @param begin index of the first array element to include
101         * @param length the number of elements to include
102         * @return the sum of the values or Double.NaN if length = 0
103         * @throws IllegalArgumentException if the array is null or the array index
104         *  parameters are not valid
105         */
106        public static double sum(final double[] values, final int begin,
107                final int length) {
108            return SUM.evaluate(values, begin, length);
109        }
110    
111        /**
112         * Returns the sum of the squares of the entries in the input array, or
113         * <code>Double.NaN</code> if the array is empty.
114         * <p>
115         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
116         *
117         * @param values  input array
118         * @return the sum of the squared values or <code>Double.NaN</code> if the
119         * array is empty
120         * @throws IllegalArgumentException if the array is null
121         */
122        public static double sumSq(final double[] values) {
123            return SUM_OF_SQUARES.evaluate(values);
124        }
125    
126        /**
127         * Returns the sum of the squares of the entries in the specified portion of
128         * the input array, or <code>Double.NaN</code> if the designated subarray
129         * is empty.
130         * <p>
131         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
132         *
133         * @param values the input array
134         * @param begin index of the first array element to include
135         * @param length the number of elements to include
136         * @return the sum of the squares of the values or Double.NaN if length = 0
137         * @throws IllegalArgumentException if the array is null or the array index
138         * parameters are not valid
139         */
140        public static double sumSq(final double[] values, final int begin,
141                final int length) {
142            return SUM_OF_SQUARES.evaluate(values, begin, length);
143        }
144    
145        /**
146         * Returns the product of the entries in the input array, or
147         * <code>Double.NaN</code> if the array is empty.
148         * <p>
149         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
150         *
151         * @param values the input array
152         * @return the product of the values or Double.NaN if the array is empty
153         * @throws IllegalArgumentException if the array is null
154         */
155        public static double product(final double[] values) {
156            return PRODUCT.evaluate(values);
157        }
158    
159        /**
160         * Returns the product of the entries in the specified portion of
161         * the input array, or <code>Double.NaN</code> if the designated subarray
162         * is empty.
163         * <p>
164         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
165         *
166         * @param values the input array
167         * @param begin index of the first array element to include
168         * @param length the number of elements to include
169         * @return the product of the values or Double.NaN if length = 0
170         * @throws IllegalArgumentException if the array is null or the array index
171         * parameters are not valid
172         */
173        public static double product(final double[] values, final int begin,
174                final int length) {
175            return PRODUCT.evaluate(values, begin, length);
176        }
177    
178        /**
179         * Returns the sum of the natural logs of the entries in the input array, or
180         * <code>Double.NaN</code> if the array is empty.
181         * <p>
182         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
183         * <p>
184         * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
185         * </p>
186         *
187         * @param values the input array
188         * @return the sum of the natural logs of the values or Double.NaN if
189         * the array is empty
190         * @throws IllegalArgumentException if the array is null
191         */
192        public static double sumLog(final double[] values) {
193            return SUM_OF_LOGS.evaluate(values);
194        }
195    
196        /**
197         * Returns the sum of the natural logs of the entries in the specified portion of
198         * the input array, or <code>Double.NaN</code> if the designated subarray
199         * is empty.
200         * <p>
201         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
202         * <p>
203         * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
204         * </p>
205         *
206         * @param values the input array
207         * @param begin index of the first array element to include
208         * @param length the number of elements to include
209         * @return the sum of the natural logs of the values or Double.NaN if
210         * length = 0
211         * @throws IllegalArgumentException if the array is null or the array index
212         * parameters are not valid
213         */
214        public static double sumLog(final double[] values, final int begin,
215                final int length) {
216            return SUM_OF_LOGS.evaluate(values, begin, length);
217        }
218    
219        /**
220         * Returns the arithmetic mean of the entries in the input array, or
221         * <code>Double.NaN</code> if the array is empty.
222         * <p>
223         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
224         * <p>
225         * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
226         * details on the computing algorithm.</p>
227         *
228         * @param values the input array
229         * @return the mean of the values or Double.NaN if the array is empty
230         * @throws IllegalArgumentException if the array is null
231         */
232        public static double mean(final double[] values) {
233            return MEAN.evaluate(values);
234        }
235    
236        /**
237         * Returns the arithmetic mean of the entries in the specified portion of
238         * the input array, or <code>Double.NaN</code> if the designated subarray
239         * is empty.
240         * <p>
241         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
242         * <p>
243         * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
244         * details on the computing algorithm.</p>
245         *
246         * @param values the input array
247         * @param begin index of the first array element to include
248         * @param length the number of elements to include
249         * @return the mean of the values or Double.NaN if length = 0
250         * @throws IllegalArgumentException if the array is null or the array index
251         * parameters are not valid
252         */
253        public static double mean(final double[] values, final int begin,
254                final int length) {
255            return MEAN.evaluate(values, begin, length);
256        }
257    
258        /**
259         * Returns the geometric mean of the entries in the input array, or
260         * <code>Double.NaN</code> if the array is empty.
261         * <p>
262         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
263         * <p>
264         * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
265         * for details on the computing algorithm.</p>
266         *
267         * @param values the input array
268         * @return the geometric mean of the values or Double.NaN if the array is empty
269         * @throws IllegalArgumentException if the array is null
270         */
271        public static double geometricMean(final double[] values) {
272            return GEOMETRIC_MEAN.evaluate(values);
273        }
274    
275        /**
276         * Returns the geometric mean of the entries in the specified portion of
277         * the input array, or <code>Double.NaN</code> if the designated subarray
278         * is empty.
279         * <p>
280         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
281         * <p>
282         * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
283         * for details on the computing algorithm.</p>
284         *
285         * @param values the input array
286         * @param begin index of the first array element to include
287         * @param length the number of elements to include
288         * @return the geometric mean of the values or Double.NaN if length = 0
289         * @throws IllegalArgumentException if the array is null or the array index
290         * parameters are not valid
291         */
292        public static double geometricMean(final double[] values, final int begin,
293                final int length) {
294            return GEOMETRIC_MEAN.evaluate(values, begin, length);
295        }
296    
297    
298        /**
299         * Returns the variance of the entries in the input array, or
300         * <code>Double.NaN</code> if the array is empty.
301         * <p>
302         * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
303         * details on the computing algorithm.</p>
304         * <p>
305         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
306         * <p>
307         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
308         *
309         * @param values the input array
310         * @return the variance of the values or Double.NaN if the array is empty
311         * @throws IllegalArgumentException if the array is null
312         */
313        public static double variance(final double[] values) {
314            return VARIANCE.evaluate(values);
315        }
316    
317        /**
318         * Returns the variance of the entries in the specified portion of
319         * the input array, or <code>Double.NaN</code> if the designated subarray
320         * is empty.
321         * <p>
322         * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
323         * details on the computing algorithm.</p>
324         * <p>
325         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
326         * <p>
327         * Throws <code>IllegalArgumentException</code> if the array is null or the
328         * array index parameters are not valid.</p>
329         *
330         * @param values the input array
331         * @param begin index of the first array element to include
332         * @param length the number of elements to include
333         * @return the variance of the values or Double.NaN if length = 0
334         * @throws IllegalArgumentException if the array is null or the array index
335         *  parameters are not valid
336         */
337        public static double variance(final double[] values, final int begin,
338                final int length) {
339            return VARIANCE.evaluate(values, begin, length);
340        }
341    
342        /**
343         * Returns the variance of the entries in the specified portion of
344         * the input array, using the precomputed mean value.  Returns
345         * <code>Double.NaN</code> if the designated subarray is empty.
346         * <p>
347         * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
348         * details on the computing algorithm.</p>
349         * <p>
350         * The formula used assumes that the supplied mean value is the arithmetic
351         * mean of the sample data, not a known population parameter.  This method
352         * is supplied only to save computation when the mean has already been
353         * computed.</p>
354         * <p>
355         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
356         * <p>
357         * Throws <code>IllegalArgumentException</code> if the array is null or the
358         * array index parameters are not valid.</p>
359         *
360         * @param values the input array
361         * @param mean the precomputed mean value
362         * @param begin index of the first array element to include
363         * @param length the number of elements to include
364         * @return the variance of the values or Double.NaN if length = 0
365         * @throws IllegalArgumentException if the array is null or the array index
366         *  parameters are not valid
367         */
368        public static double variance(final double[] values, final double mean,
369                final int begin, final int length) {
370            return VARIANCE.evaluate(values, mean, begin, length);
371        }
372    
373        /**
374         * Returns the variance of the entries in the input array, using the
375         * precomputed mean value.  Returns <code>Double.NaN</code> if the array
376         * is empty.
377         * <p>
378         * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
379         * details on the computing algorithm.</p>
380         * <p>
381         * The formula used assumes that the supplied mean value is the arithmetic
382         * mean of the sample data, not a known population parameter.  This method
383         * is supplied only to save computation when the mean has already been
384         * computed.</p>
385         * <p>
386         * Returns 0 for a single-value (i.e. length = 1) sample.</p>
387         * <p>
388         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
389         *
390         * @param values the input array
391         * @param mean the precomputed mean value
392         * @return the variance of the values or Double.NaN if the array is empty
393         * @throws IllegalArgumentException if the array is null
394         */
395        public static double variance(final double[] values, final double mean) {
396            return VARIANCE.evaluate(values, mean);
397        }
398    
399        /**
400         * Returns the maximum of the entries in the input array, or
401         * <code>Double.NaN</code> if the array is empty.
402         * <p>
403         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
404         * <p>
405         * <ul>
406         * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
407         * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
408         * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
409         * the result is <code>Double.POSITIVE_INFINITY.</code></li>
410         * </ul></p>
411         *
412         * @param values the input array
413         * @return the maximum of the values or Double.NaN if the array is empty
414         * @throws IllegalArgumentException if the array is null
415         */
416        public static double max(final double[] values) {
417            return MAX.evaluate(values);
418        }
419    
420        /**
421         * Returns the maximum of the entries in the specified portion of
422         * the input array, or <code>Double.NaN</code> if the designated subarray
423         * is empty.
424         * <p>
425         * Throws <code>IllegalArgumentException</code> if the array is null or
426         * the array index parameters are not valid.</p>
427         * <p>
428         * <ul>
429         * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
430         * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
431         * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
432         * the result is <code>Double.POSITIVE_INFINITY.</code></li>
433         * </ul></p>
434         *
435         * @param values the input array
436         * @param begin index of the first array element to include
437         * @param length the number of elements to include
438         * @return the maximum of the values or Double.NaN if length = 0
439         * @throws IllegalArgumentException if the array is null or the array index
440         * parameters are not valid
441         */
442        public static double max(final double[] values, final int begin,
443                final int length) {
444            return MAX.evaluate(values, begin, length);
445        }
446    
447         /**
448         * Returns the minimum of the entries in the input array, or
449         * <code>Double.NaN</code> if the array is empty.
450         * <p>
451         * Throws <code>IllegalArgumentException</code> if the array is null.</p>
452         * <p>
453         * <ul>
454         * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
455         * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
456         * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
457         * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
458         * </ul> </p>
459         *
460         * @param values the input array
461         * @return the minimum of the values or Double.NaN if the array is empty
462         * @throws IllegalArgumentException if the array is null
463         */
464        public static double min(final double[] values) {
465            return MIN.evaluate(values);
466        }
467    
468         /**
469         * Returns the minimum of the entries in the specified portion of
470         * the input array, or <code>Double.NaN</code> if the designated subarray
471         * is empty.
472         * <p>
473         * Throws <code>IllegalArgumentException</code> if the array is null or
474         * the array index parameters are not valid.</p>
475         * <p>
476         * <ul>
477         * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
478         * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
479         * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
480         * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
481         * </ul></p>
482         *
483         * @param values the input array
484         * @param begin index of the first array element to include
485         * @param length the number of elements to include
486         * @return the minimum of the values or Double.NaN if length = 0
487         * @throws IllegalArgumentException if the array is null or the array index
488         * parameters are not valid
489         */
490        public static double min(final double[] values, final int begin,
491                final int length) {
492            return MIN.evaluate(values, begin, length);
493        }
494    
495        /**
496         * Returns an estimate of the <code>p</code>th percentile of the values
497         * in the <code>values</code> array.
498         * <p>
499         * <ul>
500         * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
501         * <code>0</code></li></p>
502         * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
503         *  if <code>values</code> has length <code>1</code></li>
504         * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
505         * is null  or p is not a valid quantile value (p must be greater than 0
506         * and less than or equal to 100)</li>
507         * </ul></p>
508         * <p>
509         * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
510         * a description of the percentile estimation algorithm used.</p>
511         *
512         * @param values input array of values
513         * @param p the percentile value to compute
514         * @return the percentile value or Double.NaN if the array is empty
515         * @throws IllegalArgumentException if <code>values</code> is null
516         * or p is invalid
517         */
518        public static double percentile(final double[] values, final double p) {
519                return PERCENTILE.evaluate(values,p);
520        }
521    
522         /**
523         * Returns an estimate of the <code>p</code>th percentile of the values
524         * in the <code>values</code> array, starting with the element in (0-based)
525         * position <code>begin</code> in the array and including <code>length</code>
526         * values.
527         * <p>
528         * <ul>
529         * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
530         * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
531         *  if <code>length = 1 </code></li>
532         * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
533         *  is null , <code>begin</code> or <code>length</code> is invalid, or
534         * <code>p</code> is not a valid quantile value (p must be greater than 0
535         * and less than or equal to 100)</li>
536         * </ul></p>
537         * <p>
538          * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
539          * a description of the percentile estimation algorithm used.</p>
540         *
541         * @param values array of input values
542         * @param p  the percentile to compute
543         * @param begin  the first (0-based) element to include in the computation
544         * @param length  the number of array elements to include
545         * @return  the percentile value
546         * @throws IllegalArgumentException if the parameters are not valid or the
547         * input array is null
548         */
549        public static double percentile(final double[] values, final int begin,
550                final int length, final double p) {
551            return PERCENTILE.evaluate(values, begin, length, p);
552        }
553    
554        /**
555         * Returns the sum of the (signed) differences between corresponding elements of the
556         * input arrays -- i.e., sum(sample1[i] - sample2[i]).
557         *
558         * @param sample1  the first array
559         * @param sample2  the second array
560         * @return sum of paired differences
561         * @throws IllegalArgumentException if the arrays do not have the same
562         * (positive) length
563         */
564        public static double sumDifference(final double[] sample1, final double[] sample2)
565            throws IllegalArgumentException {
566            int n = sample1.length;
567            if ((n  != sample2.length) || (n < 1)) {
568                throw MathRuntimeException.createIllegalArgumentException(
569                      "input arrays must have the same positive length ({0} and {1})",
570                      n, sample2.length);
571            }
572            double result = 0;
573            for (int i = 0; i < n; i++) {
574                result += sample1[i] - sample2[i];
575            }
576            return result;
577        }
578    
579        /**
580         * Returns the mean of the (signed) differences between corresponding elements of the
581         * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
582         *
583         * @param sample1  the first array
584         * @param sample2  the second array
585         * @return mean of paired differences
586         * @throws IllegalArgumentException if the arrays do not have the same
587         * (positive) length
588         */
589        public static double meanDifference(final double[] sample1, final double[] sample2)
590        throws IllegalArgumentException {
591            return sumDifference(sample1, sample2) / sample1.length;
592        }
593    
594        /**
595         * Returns the variance of the (signed) differences between corresponding elements of the
596         * input arrays -- i.e., var(sample1[i] - sample2[i]).
597         *
598         * @param sample1  the first array
599         * @param sample2  the second array
600         * @param meanDifference   the mean difference between corresponding entries
601         * @see #meanDifference(double[],double[])
602         * @return variance of paired differences
603         * @throws IllegalArgumentException if the arrays do not have the same
604         * length or their common length is less than 2.
605         */
606        public static double varianceDifference(final double[] sample1, final double[] sample2,
607                double meanDifference)  throws IllegalArgumentException {
608            double sum1 = 0d;
609            double sum2 = 0d;
610            double diff = 0d;
611            int n = sample1.length;
612            if (n < 2 || n != sample2.length) {
613                throw MathRuntimeException.createIllegalArgumentException(
614                      "input arrays must have the same length and at least two elements ({0} and {1})",
615                      n, sample2.length);
616            }
617            for (int i = 0; i < n; i++) {
618                diff = sample1[i] - sample2[i];
619                sum1 += (diff - meanDifference) *(diff - meanDifference);
620                sum2 += diff - meanDifference;
621            }
622            return (sum1 - (sum2 * sum2 / n)) / (n - 1);
623        }
624    
625    }