001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat.descriptive;
018    
019    import java.io.Serializable;
020    
021    import org.apache.commons.math.MathRuntimeException;
022    import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
023    import org.apache.commons.math.stat.descriptive.moment.Mean;
024    import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
025    import org.apache.commons.math.stat.descriptive.moment.Variance;
026    import org.apache.commons.math.stat.descriptive.rank.Max;
027    import org.apache.commons.math.stat.descriptive.rank.Min;
028    import org.apache.commons.math.stat.descriptive.summary.Sum;
029    import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030    import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031    import org.apache.commons.math.util.MathUtils;
032    
033    /**
034     * <p>
035     * Computes summary statistics for a stream of data values added using the
036     * {@link #addValue(double) addValue} method. The data values are not stored in
037     * memory, so this class can be used to compute statistics for very large data
038     * streams.
039     * </p>
040     * <p>
041     * The {@link StorelessUnivariateStatistic} instances used to maintain summary
042     * state and compute statistics are configurable via setters. For example, the
043     * default implementation for the variance can be overridden by calling
044     * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
045     * these methods must implement the {@link StorelessUnivariateStatistic}
046     * interface and configuration must be completed before <code>addValue</code>
047     * is called. No configuration is necessary to use the default, commons-math
048     * provided implementations.
049     * </p>
050     * <p>
051     * Note: This class is not thread-safe. Use
052     * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
053     * threads is required.
054     * </p>
055     * @version $Revision: 811833 $ $Date: 2009-09-06 12:27:50 -0400 (Sun, 06 Sep 2009) $
056     */
057    public class SummaryStatistics implements StatisticalSummary, Serializable {
058    
059        /** Serialization UID */
060        private static final long serialVersionUID = -2021321786743555871L;
061    
062        /** count of values that have been added */
063        protected long n = 0;
064    
065        /** SecondMoment is used to compute the mean and variance */
066        protected SecondMoment secondMoment = new SecondMoment();
067    
068        /** sum of values that have been added */
069        protected Sum sum = new Sum();
070    
071        /** sum of the square of each value that has been added */
072        protected SumOfSquares sumsq = new SumOfSquares();
073    
074        /** min of values that have been added */
075        protected Min min = new Min();
076    
077        /** max of values that have been added */
078        protected Max max = new Max();
079    
080        /** sumLog of values that have been added */
081        protected SumOfLogs sumLog = new SumOfLogs();
082    
083        /** geoMean of values that have been added */
084        protected GeometricMean geoMean = new GeometricMean(sumLog);
085    
086        /** mean of values that have been added */
087        protected Mean mean = new Mean();
088    
089        /** variance of values that have been added */
090        protected Variance variance = new Variance();
091    
092        /** Sum statistic implementation - can be reset by setter. */
093        private StorelessUnivariateStatistic sumImpl = sum;
094    
095        /** Sum of squares statistic implementation - can be reset by setter. */
096        private StorelessUnivariateStatistic sumsqImpl = sumsq;
097    
098        /** Minimum statistic implementation - can be reset by setter. */
099        private StorelessUnivariateStatistic minImpl = min;
100    
101        /** Maximum statistic implementation - can be reset by setter. */
102        private StorelessUnivariateStatistic maxImpl = max;
103    
104        /** Sum of log statistic implementation - can be reset by setter. */
105        private StorelessUnivariateStatistic sumLogImpl = sumLog;
106    
107        /** Geometric mean statistic implementation - can be reset by setter. */
108        private StorelessUnivariateStatistic geoMeanImpl = geoMean;
109    
110        /** Mean statistic implementation - can be reset by setter. */
111        private StorelessUnivariateStatistic meanImpl = mean;
112    
113        /** Variance statistic implementation - can be reset by setter. */
114        private StorelessUnivariateStatistic varianceImpl = variance;
115    
116        /**
117         * Construct a SummaryStatistics instance
118         */
119        public SummaryStatistics() {
120        }
121    
122        /**
123         * A copy constructor. Creates a deep-copy of the {@code original}.
124         *
125         * @param original the {@code SummaryStatistics} instance to copy
126         */
127        public SummaryStatistics(SummaryStatistics original) {
128            copy(original, this);
129        }
130    
131        /**
132         * Return a {@link StatisticalSummaryValues} instance reporting current
133         * statistics.
134         * @return Current values of statistics
135         */
136        public StatisticalSummary getSummary() {
137            return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
138                    getMax(), getMin(), getSum());
139        }
140    
141        /**
142         * Add a value to the data
143         * @param value the value to add
144         */
145        public void addValue(double value) {
146            sumImpl.increment(value);
147            sumsqImpl.increment(value);
148            minImpl.increment(value);
149            maxImpl.increment(value);
150            sumLogImpl.increment(value);
151            secondMoment.increment(value);
152            // If mean, variance or geomean have been overridden,
153            // need to increment these
154            if (!(meanImpl instanceof Mean)) {
155                meanImpl.increment(value);
156            }
157            if (!(varianceImpl instanceof Variance)) {
158                varianceImpl.increment(value);
159            }
160            if (!(geoMeanImpl instanceof GeometricMean)) {
161                geoMeanImpl.increment(value);
162            }
163            n++;
164        }
165    
166        /**
167         * Returns the number of available values
168         * @return The number of available values
169         */
170        public long getN() {
171            return n;
172        }
173    
174        /**
175         * Returns the sum of the values that have been added
176         * @return The sum or <code>Double.NaN</code> if no values have been added
177         */
178        public double getSum() {
179            return sumImpl.getResult();
180        }
181    
182        /**
183         * Returns the sum of the squares of the values that have been added.
184         * <p>
185         * Double.NaN is returned if no values have been added.
186         * </p>
187         * @return The sum of squares
188         */
189        public double getSumsq() {
190            return sumsqImpl.getResult();
191        }
192    
193        /**
194         * Returns the mean of the values that have been added.
195         * <p>
196         * Double.NaN is returned if no values have been added.
197         * </p>
198         * @return the mean
199         */
200        public double getMean() {
201            if (mean == meanImpl) {
202                return new Mean(secondMoment).getResult();
203            } else {
204                return meanImpl.getResult();
205            }
206        }
207    
208        /**
209         * Returns the standard deviation of the values that have been added.
210         * <p>
211         * Double.NaN is returned if no values have been added.
212         * </p>
213         * @return the standard deviation
214         */
215        public double getStandardDeviation() {
216            double stdDev = Double.NaN;
217            if (getN() > 0) {
218                if (getN() > 1) {
219                    stdDev = Math.sqrt(getVariance());
220                } else {
221                    stdDev = 0.0;
222                }
223            }
224            return stdDev;
225        }
226    
227        /**
228         * Returns the variance of the values that have been added.
229         * <p>
230         * Double.NaN is returned if no values have been added.
231         * </p>
232         * @return the variance
233         */
234        public double getVariance() {
235            if (varianceImpl == variance) {
236                return new Variance(secondMoment).getResult();
237            } else {
238                return varianceImpl.getResult();
239            }
240        }
241    
242        /**
243         * Returns the maximum of the values that have been added.
244         * <p>
245         * Double.NaN is returned if no values have been added.
246         * </p>
247         * @return the maximum
248         */
249        public double getMax() {
250            return maxImpl.getResult();
251        }
252    
253        /**
254         * Returns the minimum of the values that have been added.
255         * <p>
256         * Double.NaN is returned if no values have been added.
257         * </p>
258         * @return the minimum
259         */
260        public double getMin() {
261            return minImpl.getResult();
262        }
263    
264        /**
265         * Returns the geometric mean of the values that have been added.
266         * <p>
267         * Double.NaN is returned if no values have been added.
268         * </p>
269         * @return the geometric mean
270         */
271        public double getGeometricMean() {
272            return geoMeanImpl.getResult();
273        }
274    
275        /**
276         * Returns the sum of the logs of the values that have been added.
277         * <p>
278         * Double.NaN is returned if no values have been added.
279         * </p>
280         * @return the sum of logs
281         * @since 1.2
282         */
283        public double getSumOfLogs() {
284            return sumLogImpl.getResult();
285        }
286    
287        /**
288         * Returns a statistic related to the Second Central Moment.  Specifically,
289         * what is returned is the sum of squared deviations from the sample mean
290         * among the values that have been added.
291         * <p>
292         * Returns <code>Double.NaN</code> if no data values have been added and
293         * returns <code>0</code> if there is just one value in the data set.</p>
294         * <p>
295         * @return second central moment statistic
296         * @since 2.0
297         */
298        public double getSecondMoment() {
299            return secondMoment.getResult();
300        }
301    
302        /**
303         * Generates a text report displaying summary statistics from values that
304         * have been added.
305         * @return String with line feeds displaying statistics
306         * @since 1.2
307         */
308        @Override
309        public String toString() {
310            StringBuffer outBuffer = new StringBuffer();
311            String endl = "\n";
312            outBuffer.append("SummaryStatistics:").append(endl);
313            outBuffer.append("n: ").append(getN()).append(endl);
314            outBuffer.append("min: ").append(getMin()).append(endl);
315            outBuffer.append("max: ").append(getMax()).append(endl);
316            outBuffer.append("mean: ").append(getMean()).append(endl);
317            outBuffer.append("geometric mean: ").append(getGeometricMean())
318                .append(endl);
319            outBuffer.append("variance: ").append(getVariance()).append(endl);
320            outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321            outBuffer.append("standard deviation: ").append(getStandardDeviation())
322                .append(endl);
323            outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324            return outBuffer.toString();
325        }
326    
327        /**
328         * Resets all statistics and storage
329         */
330        public void clear() {
331            this.n = 0;
332            minImpl.clear();
333            maxImpl.clear();
334            sumImpl.clear();
335            sumLogImpl.clear();
336            sumsqImpl.clear();
337            geoMeanImpl.clear();
338            secondMoment.clear();
339            if (meanImpl != mean) {
340                meanImpl.clear();
341            }
342            if (varianceImpl != variance) {
343                varianceImpl.clear();
344            }
345        }
346    
347        /**
348         * Returns true iff <code>object</code> is a
349         * <code>SummaryStatistics</code> instance and all statistics have the
350         * same values as this.
351         * @param object the object to test equality against.
352         * @return true if object equals this
353         */
354        @Override
355        public boolean equals(Object object) {
356            if (object == this) {
357                return true;
358            }
359            if (object instanceof SummaryStatistics == false) {
360                return false;
361            }
362            SummaryStatistics stat = (SummaryStatistics)object;
363            return MathUtils.equals(stat.getGeometricMean(), getGeometricMean()) &&
364                   MathUtils.equals(stat.getMax(),           getMax())           &&
365                   MathUtils.equals(stat.getMean(),          getMean())          &&
366                   MathUtils.equals(stat.getMin(),           getMin())           &&
367                   MathUtils.equals(stat.getN(),             getN())             &&
368                   MathUtils.equals(stat.getSum(),           getSum())           &&
369                   MathUtils.equals(stat.getSumsq(),         getSumsq())         &&
370                   MathUtils.equals(stat.getVariance(),      getVariance());
371        }
372    
373        /**
374         * Returns hash code based on values of statistics
375         * @return hash code
376         */
377        @Override
378        public int hashCode() {
379            int result = 31 + MathUtils.hash(getGeometricMean());
380            result = result * 31 + MathUtils.hash(getGeometricMean());
381            result = result * 31 + MathUtils.hash(getMax());
382            result = result * 31 + MathUtils.hash(getMean());
383            result = result * 31 + MathUtils.hash(getMin());
384            result = result * 31 + MathUtils.hash(getN());
385            result = result * 31 + MathUtils.hash(getSum());
386            result = result * 31 + MathUtils.hash(getSumsq());
387            result = result * 31 + MathUtils.hash(getVariance());
388            return result;
389        }
390    
391        // Getters and setters for statistics implementations
392        /**
393         * Returns the currently configured Sum implementation
394         * @return the StorelessUnivariateStatistic implementing the sum
395         * @since 1.2
396         */
397        public StorelessUnivariateStatistic getSumImpl() {
398            return sumImpl;
399        }
400    
401        /**
402         * <p>
403         * Sets the implementation for the Sum.
404         * </p>
405         * <p>
406         * This method must be activated before any data has been added - i.e.,
407         * before {@link #addValue(double) addValue} has been used to add data;
408         * otherwise an IllegalStateException will be thrown.
409         * </p>
410         * @param sumImpl the StorelessUnivariateStatistic instance to use for
411         *        computing the Sum
412         * @throws IllegalStateException if data has already been added (i.e if n >
413         *         0)
414         * @since 1.2
415         */
416        public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
417            checkEmpty();
418            this.sumImpl = sumImpl;
419        }
420    
421        /**
422         * Returns the currently configured sum of squares implementation
423         * @return the StorelessUnivariateStatistic implementing the sum of squares
424         * @since 1.2
425         */
426        public StorelessUnivariateStatistic getSumsqImpl() {
427            return sumsqImpl;
428        }
429    
430        /**
431         * <p>
432         * Sets the implementation for the sum of squares.
433         * </p>
434         * <p>
435         * This method must be activated before any data has been added - i.e.,
436         * before {@link #addValue(double) addValue} has been used to add data;
437         * otherwise an IllegalStateException will be thrown.
438         * </p>
439         * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
440         *        computing the sum of squares
441         * @throws IllegalStateException if data has already been added (i.e if n >
442         *         0)
443         * @since 1.2
444         */
445        public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
446            checkEmpty();
447            this.sumsqImpl = sumsqImpl;
448        }
449    
450        /**
451         * Returns the currently configured minimum implementation
452         * @return the StorelessUnivariateStatistic implementing the minimum
453         * @since 1.2
454         */
455        public StorelessUnivariateStatistic getMinImpl() {
456            return minImpl;
457        }
458    
459        /**
460         * <p>
461         * Sets the implementation for the minimum.
462         * </p>
463         * <p>
464         * This method must be activated before any data has been added - i.e.,
465         * before {@link #addValue(double) addValue} has been used to add data;
466         * otherwise an IllegalStateException will be thrown.
467         * </p>
468         * @param minImpl the StorelessUnivariateStatistic instance to use for
469         *        computing the minimum
470         * @throws IllegalStateException if data has already been added (i.e if n >
471         *         0)
472         * @since 1.2
473         */
474        public void setMinImpl(StorelessUnivariateStatistic minImpl) {
475            checkEmpty();
476            this.minImpl = minImpl;
477        }
478    
479        /**
480         * Returns the currently configured maximum implementation
481         * @return the StorelessUnivariateStatistic implementing the maximum
482         * @since 1.2
483         */
484        public StorelessUnivariateStatistic getMaxImpl() {
485            return maxImpl;
486        }
487    
488        /**
489         * <p>
490         * Sets the implementation for the maximum.
491         * </p>
492         * <p>
493         * This method must be activated before any data has been added - i.e.,
494         * before {@link #addValue(double) addValue} has been used to add data;
495         * otherwise an IllegalStateException will be thrown.
496         * </p>
497         * @param maxImpl the StorelessUnivariateStatistic instance to use for
498         *        computing the maximum
499         * @throws IllegalStateException if data has already been added (i.e if n >
500         *         0)
501         * @since 1.2
502         */
503        public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
504            checkEmpty();
505            this.maxImpl = maxImpl;
506        }
507    
508        /**
509         * Returns the currently configured sum of logs implementation
510         * @return the StorelessUnivariateStatistic implementing the log sum
511         * @since 1.2
512         */
513        public StorelessUnivariateStatistic getSumLogImpl() {
514            return sumLogImpl;
515        }
516    
517        /**
518         * <p>
519         * Sets the implementation for the sum of logs.
520         * </p>
521         * <p>
522         * This method must be activated before any data has been added - i.e.,
523         * before {@link #addValue(double) addValue} has been used to add data;
524         * otherwise an IllegalStateException will be thrown.
525         * </p>
526         * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
527         *        computing the log sum
528         * @throws IllegalStateException if data has already been added (i.e if n >
529         *         0)
530         * @since 1.2
531         */
532        public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
533            checkEmpty();
534            this.sumLogImpl = sumLogImpl;
535            geoMean.setSumLogImpl(sumLogImpl);
536        }
537    
538        /**
539         * Returns the currently configured geometric mean implementation
540         * @return the StorelessUnivariateStatistic implementing the geometric mean
541         * @since 1.2
542         */
543        public StorelessUnivariateStatistic getGeoMeanImpl() {
544            return geoMeanImpl;
545        }
546    
547        /**
548         * <p>
549         * Sets the implementation for the geometric mean.
550         * </p>
551         * <p>
552         * This method must be activated before any data has been added - i.e.,
553         * before {@link #addValue(double) addValue} has been used to add data;
554         * otherwise an IllegalStateException will be thrown.
555         * </p>
556         * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
557         *        computing the geometric mean
558         * @throws IllegalStateException if data has already been added (i.e if n >
559         *         0)
560         * @since 1.2
561         */
562        public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
563            checkEmpty();
564            this.geoMeanImpl = geoMeanImpl;
565        }
566    
567        /**
568         * Returns the currently configured mean implementation
569         * @return the StorelessUnivariateStatistic implementing the mean
570         * @since 1.2
571         */
572        public StorelessUnivariateStatistic getMeanImpl() {
573            return meanImpl;
574        }
575    
576        /**
577         * <p>
578         * Sets the implementation for the mean.
579         * </p>
580         * <p>
581         * This method must be activated before any data has been added - i.e.,
582         * before {@link #addValue(double) addValue} has been used to add data;
583         * otherwise an IllegalStateException will be thrown.
584         * </p>
585         * @param meanImpl the StorelessUnivariateStatistic instance to use for
586         *        computing the mean
587         * @throws IllegalStateException if data has already been added (i.e if n >
588         *         0)
589         * @since 1.2
590         */
591        public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
592            checkEmpty();
593            this.meanImpl = meanImpl;
594        }
595    
596        /**
597         * Returns the currently configured variance implementation
598         * @return the StorelessUnivariateStatistic implementing the variance
599         * @since 1.2
600         */
601        public StorelessUnivariateStatistic getVarianceImpl() {
602            return varianceImpl;
603        }
604    
605        /**
606         * <p>
607         * Sets the implementation for the variance.
608         * </p>
609         * <p>
610         * This method must be activated before any data has been added - i.e.,
611         * before {@link #addValue(double) addValue} has been used to add data;
612         * otherwise an IllegalStateException will be thrown.
613         * </p>
614         * @param varianceImpl the StorelessUnivariateStatistic instance to use for
615         *        computing the variance
616         * @throws IllegalStateException if data has already been added (i.e if n >
617         *         0)
618         * @since 1.2
619         */
620        public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
621            checkEmpty();
622            this.varianceImpl = varianceImpl;
623        }
624    
625        /**
626         * Throws IllegalStateException if n > 0.
627         */
628        private void checkEmpty() {
629            if (n > 0) {
630                throw MathRuntimeException.createIllegalStateException(
631                        "{0} values have been added before statistic is configured",
632                        n);
633            }
634        }
635    
636        /**
637         * Returns a copy of this SummaryStatistics instance with the same internal state.
638         *
639         * @return a copy of this
640         */
641        public SummaryStatistics copy() {
642            SummaryStatistics result = new SummaryStatistics();
643            copy(this, result);
644            return result;
645        }
646    
647        /**
648         * Copies source to dest.
649         * <p>Neither source nor dest can be null.</p>
650         *
651         * @param source SummaryStatistics to copy
652         * @param dest SummaryStatistics to copy to
653         * @throws NullPointerException if either source or dest is null
654         */
655        public static void copy(SummaryStatistics source, SummaryStatistics dest) {
656            dest.maxImpl = source.maxImpl.copy();
657            dest.meanImpl = source.meanImpl.copy();
658            dest.minImpl = source.minImpl.copy();
659            dest.sumImpl = source.sumImpl.copy();
660            dest.varianceImpl = source.varianceImpl.copy();
661            dest.sumLogImpl = source.sumLogImpl.copy();
662            dest.sumsqImpl = source.sumsqImpl.copy();
663            if (source.getGeoMeanImpl() instanceof GeometricMean) {
664                // Keep geoMeanImpl, sumLogImpl in synch
665                dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
666            } else {
667                dest.geoMeanImpl = source.geoMeanImpl.copy();
668            }
669            SecondMoment.copy(source.secondMoment, dest.secondMoment);
670            dest.n = source.n;
671    
672            // Make sure that if stat == statImpl in source, same
673            // holds in dest; otherwise copy stat
674            if (source.geoMean == source.geoMeanImpl) {
675                dest.geoMean = (GeometricMean) dest.geoMeanImpl;
676            } else {
677                GeometricMean.copy(source.geoMean, dest.geoMean);
678            }
679            if (source.max == source.maxImpl) {
680                dest.max = (Max) dest.maxImpl;
681            } else {
682                Max.copy(source.max, dest.max);
683            }
684            if (source.mean == source.meanImpl) {
685                dest.mean = (Mean) dest.meanImpl;
686            } else {
687                Mean.copy(source.mean, dest.mean);
688            }
689            if (source.min == source.minImpl) {
690                dest.min = (Min) dest.minImpl;
691            } else {
692                Min.copy(source.min, dest.min);
693            }
694            if (source.sum == source.sumImpl) {
695                dest.sum = (Sum) dest.sumImpl;
696            } else {
697                Sum.copy(source.sum, dest.sum);
698            }
699            if (source.variance == source.varianceImpl) {
700                dest.variance = (Variance) dest.varianceImpl;
701            } else {
702                Variance.copy(source.variance, dest.variance);
703            }
704            if (source.sumLog == source.sumLogImpl) {
705                dest.sumLog = (SumOfLogs) dest.sumLogImpl;
706            } else {
707                SumOfLogs.copy(source.sumLog, dest.sumLog);
708            }
709            if (source.sumsq == source.sumsqImpl) {
710                dest.sumsq = (SumOfSquares) dest.sumsqImpl;
711            } else {
712                SumOfSquares.copy(source.sumsq, dest.sumsq);
713            }
714        }
715    }