001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive; 018 019 import java.io.Serializable; 020 021 import org.apache.commons.math.MathRuntimeException; 022 import org.apache.commons.math.stat.descriptive.moment.GeometricMean; 023 import org.apache.commons.math.stat.descriptive.moment.Mean; 024 import org.apache.commons.math.stat.descriptive.moment.SecondMoment; 025 import org.apache.commons.math.stat.descriptive.moment.Variance; 026 import org.apache.commons.math.stat.descriptive.rank.Max; 027 import org.apache.commons.math.stat.descriptive.rank.Min; 028 import org.apache.commons.math.stat.descriptive.summary.Sum; 029 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs; 030 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares; 031 import org.apache.commons.math.util.MathUtils; 032 033 /** 034 * <p> 035 * Computes summary statistics for a stream of data values added using the 036 * {@link #addValue(double) addValue} method. The data values are not stored in 037 * memory, so this class can be used to compute statistics for very large data 038 * streams. 039 * </p> 040 * <p> 041 * The {@link StorelessUnivariateStatistic} instances used to maintain summary 042 * state and compute statistics are configurable via setters. For example, the 043 * default implementation for the variance can be overridden by calling 044 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to 045 * these methods must implement the {@link StorelessUnivariateStatistic} 046 * interface and configuration must be completed before <code>addValue</code> 047 * is called. No configuration is necessary to use the default, commons-math 048 * provided implementations. 049 * </p> 050 * <p> 051 * Note: This class is not thread-safe. Use 052 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple 053 * threads is required. 054 * </p> 055 * @version $Revision: 811833 $ $Date: 2009-09-06 12:27:50 -0400 (Sun, 06 Sep 2009) $ 056 */ 057 public class SummaryStatistics implements StatisticalSummary, Serializable { 058 059 /** Serialization UID */ 060 private static final long serialVersionUID = -2021321786743555871L; 061 062 /** count of values that have been added */ 063 protected long n = 0; 064 065 /** SecondMoment is used to compute the mean and variance */ 066 protected SecondMoment secondMoment = new SecondMoment(); 067 068 /** sum of values that have been added */ 069 protected Sum sum = new Sum(); 070 071 /** sum of the square of each value that has been added */ 072 protected SumOfSquares sumsq = new SumOfSquares(); 073 074 /** min of values that have been added */ 075 protected Min min = new Min(); 076 077 /** max of values that have been added */ 078 protected Max max = new Max(); 079 080 /** sumLog of values that have been added */ 081 protected SumOfLogs sumLog = new SumOfLogs(); 082 083 /** geoMean of values that have been added */ 084 protected GeometricMean geoMean = new GeometricMean(sumLog); 085 086 /** mean of values that have been added */ 087 protected Mean mean = new Mean(); 088 089 /** variance of values that have been added */ 090 protected Variance variance = new Variance(); 091 092 /** Sum statistic implementation - can be reset by setter. */ 093 private StorelessUnivariateStatistic sumImpl = sum; 094 095 /** Sum of squares statistic implementation - can be reset by setter. */ 096 private StorelessUnivariateStatistic sumsqImpl = sumsq; 097 098 /** Minimum statistic implementation - can be reset by setter. */ 099 private StorelessUnivariateStatistic minImpl = min; 100 101 /** Maximum statistic implementation - can be reset by setter. */ 102 private StorelessUnivariateStatistic maxImpl = max; 103 104 /** Sum of log statistic implementation - can be reset by setter. */ 105 private StorelessUnivariateStatistic sumLogImpl = sumLog; 106 107 /** Geometric mean statistic implementation - can be reset by setter. */ 108 private StorelessUnivariateStatistic geoMeanImpl = geoMean; 109 110 /** Mean statistic implementation - can be reset by setter. */ 111 private StorelessUnivariateStatistic meanImpl = mean; 112 113 /** Variance statistic implementation - can be reset by setter. */ 114 private StorelessUnivariateStatistic varianceImpl = variance; 115 116 /** 117 * Construct a SummaryStatistics instance 118 */ 119 public SummaryStatistics() { 120 } 121 122 /** 123 * A copy constructor. Creates a deep-copy of the {@code original}. 124 * 125 * @param original the {@code SummaryStatistics} instance to copy 126 */ 127 public SummaryStatistics(SummaryStatistics original) { 128 copy(original, this); 129 } 130 131 /** 132 * Return a {@link StatisticalSummaryValues} instance reporting current 133 * statistics. 134 * @return Current values of statistics 135 */ 136 public StatisticalSummary getSummary() { 137 return new StatisticalSummaryValues(getMean(), getVariance(), getN(), 138 getMax(), getMin(), getSum()); 139 } 140 141 /** 142 * Add a value to the data 143 * @param value the value to add 144 */ 145 public void addValue(double value) { 146 sumImpl.increment(value); 147 sumsqImpl.increment(value); 148 minImpl.increment(value); 149 maxImpl.increment(value); 150 sumLogImpl.increment(value); 151 secondMoment.increment(value); 152 // If mean, variance or geomean have been overridden, 153 // need to increment these 154 if (!(meanImpl instanceof Mean)) { 155 meanImpl.increment(value); 156 } 157 if (!(varianceImpl instanceof Variance)) { 158 varianceImpl.increment(value); 159 } 160 if (!(geoMeanImpl instanceof GeometricMean)) { 161 geoMeanImpl.increment(value); 162 } 163 n++; 164 } 165 166 /** 167 * Returns the number of available values 168 * @return The number of available values 169 */ 170 public long getN() { 171 return n; 172 } 173 174 /** 175 * Returns the sum of the values that have been added 176 * @return The sum or <code>Double.NaN</code> if no values have been added 177 */ 178 public double getSum() { 179 return sumImpl.getResult(); 180 } 181 182 /** 183 * Returns the sum of the squares of the values that have been added. 184 * <p> 185 * Double.NaN is returned if no values have been added. 186 * </p> 187 * @return The sum of squares 188 */ 189 public double getSumsq() { 190 return sumsqImpl.getResult(); 191 } 192 193 /** 194 * Returns the mean of the values that have been added. 195 * <p> 196 * Double.NaN is returned if no values have been added. 197 * </p> 198 * @return the mean 199 */ 200 public double getMean() { 201 if (mean == meanImpl) { 202 return new Mean(secondMoment).getResult(); 203 } else { 204 return meanImpl.getResult(); 205 } 206 } 207 208 /** 209 * Returns the standard deviation of the values that have been added. 210 * <p> 211 * Double.NaN is returned if no values have been added. 212 * </p> 213 * @return the standard deviation 214 */ 215 public double getStandardDeviation() { 216 double stdDev = Double.NaN; 217 if (getN() > 0) { 218 if (getN() > 1) { 219 stdDev = Math.sqrt(getVariance()); 220 } else { 221 stdDev = 0.0; 222 } 223 } 224 return stdDev; 225 } 226 227 /** 228 * Returns the variance of the values that have been added. 229 * <p> 230 * Double.NaN is returned if no values have been added. 231 * </p> 232 * @return the variance 233 */ 234 public double getVariance() { 235 if (varianceImpl == variance) { 236 return new Variance(secondMoment).getResult(); 237 } else { 238 return varianceImpl.getResult(); 239 } 240 } 241 242 /** 243 * Returns the maximum of the values that have been added. 244 * <p> 245 * Double.NaN is returned if no values have been added. 246 * </p> 247 * @return the maximum 248 */ 249 public double getMax() { 250 return maxImpl.getResult(); 251 } 252 253 /** 254 * Returns the minimum of the values that have been added. 255 * <p> 256 * Double.NaN is returned if no values have been added. 257 * </p> 258 * @return the minimum 259 */ 260 public double getMin() { 261 return minImpl.getResult(); 262 } 263 264 /** 265 * Returns the geometric mean of the values that have been added. 266 * <p> 267 * Double.NaN is returned if no values have been added. 268 * </p> 269 * @return the geometric mean 270 */ 271 public double getGeometricMean() { 272 return geoMeanImpl.getResult(); 273 } 274 275 /** 276 * Returns the sum of the logs of the values that have been added. 277 * <p> 278 * Double.NaN is returned if no values have been added. 279 * </p> 280 * @return the sum of logs 281 * @since 1.2 282 */ 283 public double getSumOfLogs() { 284 return sumLogImpl.getResult(); 285 } 286 287 /** 288 * Returns a statistic related to the Second Central Moment. Specifically, 289 * what is returned is the sum of squared deviations from the sample mean 290 * among the values that have been added. 291 * <p> 292 * Returns <code>Double.NaN</code> if no data values have been added and 293 * returns <code>0</code> if there is just one value in the data set.</p> 294 * <p> 295 * @return second central moment statistic 296 * @since 2.0 297 */ 298 public double getSecondMoment() { 299 return secondMoment.getResult(); 300 } 301 302 /** 303 * Generates a text report displaying summary statistics from values that 304 * have been added. 305 * @return String with line feeds displaying statistics 306 * @since 1.2 307 */ 308 @Override 309 public String toString() { 310 StringBuffer outBuffer = new StringBuffer(); 311 String endl = "\n"; 312 outBuffer.append("SummaryStatistics:").append(endl); 313 outBuffer.append("n: ").append(getN()).append(endl); 314 outBuffer.append("min: ").append(getMin()).append(endl); 315 outBuffer.append("max: ").append(getMax()).append(endl); 316 outBuffer.append("mean: ").append(getMean()).append(endl); 317 outBuffer.append("geometric mean: ").append(getGeometricMean()) 318 .append(endl); 319 outBuffer.append("variance: ").append(getVariance()).append(endl); 320 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl); 321 outBuffer.append("standard deviation: ").append(getStandardDeviation()) 322 .append(endl); 323 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl); 324 return outBuffer.toString(); 325 } 326 327 /** 328 * Resets all statistics and storage 329 */ 330 public void clear() { 331 this.n = 0; 332 minImpl.clear(); 333 maxImpl.clear(); 334 sumImpl.clear(); 335 sumLogImpl.clear(); 336 sumsqImpl.clear(); 337 geoMeanImpl.clear(); 338 secondMoment.clear(); 339 if (meanImpl != mean) { 340 meanImpl.clear(); 341 } 342 if (varianceImpl != variance) { 343 varianceImpl.clear(); 344 } 345 } 346 347 /** 348 * Returns true iff <code>object</code> is a 349 * <code>SummaryStatistics</code> instance and all statistics have the 350 * same values as this. 351 * @param object the object to test equality against. 352 * @return true if object equals this 353 */ 354 @Override 355 public boolean equals(Object object) { 356 if (object == this) { 357 return true; 358 } 359 if (object instanceof SummaryStatistics == false) { 360 return false; 361 } 362 SummaryStatistics stat = (SummaryStatistics)object; 363 return MathUtils.equals(stat.getGeometricMean(), getGeometricMean()) && 364 MathUtils.equals(stat.getMax(), getMax()) && 365 MathUtils.equals(stat.getMean(), getMean()) && 366 MathUtils.equals(stat.getMin(), getMin()) && 367 MathUtils.equals(stat.getN(), getN()) && 368 MathUtils.equals(stat.getSum(), getSum()) && 369 MathUtils.equals(stat.getSumsq(), getSumsq()) && 370 MathUtils.equals(stat.getVariance(), getVariance()); 371 } 372 373 /** 374 * Returns hash code based on values of statistics 375 * @return hash code 376 */ 377 @Override 378 public int hashCode() { 379 int result = 31 + MathUtils.hash(getGeometricMean()); 380 result = result * 31 + MathUtils.hash(getGeometricMean()); 381 result = result * 31 + MathUtils.hash(getMax()); 382 result = result * 31 + MathUtils.hash(getMean()); 383 result = result * 31 + MathUtils.hash(getMin()); 384 result = result * 31 + MathUtils.hash(getN()); 385 result = result * 31 + MathUtils.hash(getSum()); 386 result = result * 31 + MathUtils.hash(getSumsq()); 387 result = result * 31 + MathUtils.hash(getVariance()); 388 return result; 389 } 390 391 // Getters and setters for statistics implementations 392 /** 393 * Returns the currently configured Sum implementation 394 * @return the StorelessUnivariateStatistic implementing the sum 395 * @since 1.2 396 */ 397 public StorelessUnivariateStatistic getSumImpl() { 398 return sumImpl; 399 } 400 401 /** 402 * <p> 403 * Sets the implementation for the Sum. 404 * </p> 405 * <p> 406 * This method must be activated before any data has been added - i.e., 407 * before {@link #addValue(double) addValue} has been used to add data; 408 * otherwise an IllegalStateException will be thrown. 409 * </p> 410 * @param sumImpl the StorelessUnivariateStatistic instance to use for 411 * computing the Sum 412 * @throws IllegalStateException if data has already been added (i.e if n > 413 * 0) 414 * @since 1.2 415 */ 416 public void setSumImpl(StorelessUnivariateStatistic sumImpl) { 417 checkEmpty(); 418 this.sumImpl = sumImpl; 419 } 420 421 /** 422 * Returns the currently configured sum of squares implementation 423 * @return the StorelessUnivariateStatistic implementing the sum of squares 424 * @since 1.2 425 */ 426 public StorelessUnivariateStatistic getSumsqImpl() { 427 return sumsqImpl; 428 } 429 430 /** 431 * <p> 432 * Sets the implementation for the sum of squares. 433 * </p> 434 * <p> 435 * This method must be activated before any data has been added - i.e., 436 * before {@link #addValue(double) addValue} has been used to add data; 437 * otherwise an IllegalStateException will be thrown. 438 * </p> 439 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for 440 * computing the sum of squares 441 * @throws IllegalStateException if data has already been added (i.e if n > 442 * 0) 443 * @since 1.2 444 */ 445 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) { 446 checkEmpty(); 447 this.sumsqImpl = sumsqImpl; 448 } 449 450 /** 451 * Returns the currently configured minimum implementation 452 * @return the StorelessUnivariateStatistic implementing the minimum 453 * @since 1.2 454 */ 455 public StorelessUnivariateStatistic getMinImpl() { 456 return minImpl; 457 } 458 459 /** 460 * <p> 461 * Sets the implementation for the minimum. 462 * </p> 463 * <p> 464 * This method must be activated before any data has been added - i.e., 465 * before {@link #addValue(double) addValue} has been used to add data; 466 * otherwise an IllegalStateException will be thrown. 467 * </p> 468 * @param minImpl the StorelessUnivariateStatistic instance to use for 469 * computing the minimum 470 * @throws IllegalStateException if data has already been added (i.e if n > 471 * 0) 472 * @since 1.2 473 */ 474 public void setMinImpl(StorelessUnivariateStatistic minImpl) { 475 checkEmpty(); 476 this.minImpl = minImpl; 477 } 478 479 /** 480 * Returns the currently configured maximum implementation 481 * @return the StorelessUnivariateStatistic implementing the maximum 482 * @since 1.2 483 */ 484 public StorelessUnivariateStatistic getMaxImpl() { 485 return maxImpl; 486 } 487 488 /** 489 * <p> 490 * Sets the implementation for the maximum. 491 * </p> 492 * <p> 493 * This method must be activated before any data has been added - i.e., 494 * before {@link #addValue(double) addValue} has been used to add data; 495 * otherwise an IllegalStateException will be thrown. 496 * </p> 497 * @param maxImpl the StorelessUnivariateStatistic instance to use for 498 * computing the maximum 499 * @throws IllegalStateException if data has already been added (i.e if n > 500 * 0) 501 * @since 1.2 502 */ 503 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) { 504 checkEmpty(); 505 this.maxImpl = maxImpl; 506 } 507 508 /** 509 * Returns the currently configured sum of logs implementation 510 * @return the StorelessUnivariateStatistic implementing the log sum 511 * @since 1.2 512 */ 513 public StorelessUnivariateStatistic getSumLogImpl() { 514 return sumLogImpl; 515 } 516 517 /** 518 * <p> 519 * Sets the implementation for the sum of logs. 520 * </p> 521 * <p> 522 * This method must be activated before any data has been added - i.e., 523 * before {@link #addValue(double) addValue} has been used to add data; 524 * otherwise an IllegalStateException will be thrown. 525 * </p> 526 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for 527 * computing the log sum 528 * @throws IllegalStateException if data has already been added (i.e if n > 529 * 0) 530 * @since 1.2 531 */ 532 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) { 533 checkEmpty(); 534 this.sumLogImpl = sumLogImpl; 535 geoMean.setSumLogImpl(sumLogImpl); 536 } 537 538 /** 539 * Returns the currently configured geometric mean implementation 540 * @return the StorelessUnivariateStatistic implementing the geometric mean 541 * @since 1.2 542 */ 543 public StorelessUnivariateStatistic getGeoMeanImpl() { 544 return geoMeanImpl; 545 } 546 547 /** 548 * <p> 549 * Sets the implementation for the geometric mean. 550 * </p> 551 * <p> 552 * This method must be activated before any data has been added - i.e., 553 * before {@link #addValue(double) addValue} has been used to add data; 554 * otherwise an IllegalStateException will be thrown. 555 * </p> 556 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for 557 * computing the geometric mean 558 * @throws IllegalStateException if data has already been added (i.e if n > 559 * 0) 560 * @since 1.2 561 */ 562 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) { 563 checkEmpty(); 564 this.geoMeanImpl = geoMeanImpl; 565 } 566 567 /** 568 * Returns the currently configured mean implementation 569 * @return the StorelessUnivariateStatistic implementing the mean 570 * @since 1.2 571 */ 572 public StorelessUnivariateStatistic getMeanImpl() { 573 return meanImpl; 574 } 575 576 /** 577 * <p> 578 * Sets the implementation for the mean. 579 * </p> 580 * <p> 581 * This method must be activated before any data has been added - i.e., 582 * before {@link #addValue(double) addValue} has been used to add data; 583 * otherwise an IllegalStateException will be thrown. 584 * </p> 585 * @param meanImpl the StorelessUnivariateStatistic instance to use for 586 * computing the mean 587 * @throws IllegalStateException if data has already been added (i.e if n > 588 * 0) 589 * @since 1.2 590 */ 591 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) { 592 checkEmpty(); 593 this.meanImpl = meanImpl; 594 } 595 596 /** 597 * Returns the currently configured variance implementation 598 * @return the StorelessUnivariateStatistic implementing the variance 599 * @since 1.2 600 */ 601 public StorelessUnivariateStatistic getVarianceImpl() { 602 return varianceImpl; 603 } 604 605 /** 606 * <p> 607 * Sets the implementation for the variance. 608 * </p> 609 * <p> 610 * This method must be activated before any data has been added - i.e., 611 * before {@link #addValue(double) addValue} has been used to add data; 612 * otherwise an IllegalStateException will be thrown. 613 * </p> 614 * @param varianceImpl the StorelessUnivariateStatistic instance to use for 615 * computing the variance 616 * @throws IllegalStateException if data has already been added (i.e if n > 617 * 0) 618 * @since 1.2 619 */ 620 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) { 621 checkEmpty(); 622 this.varianceImpl = varianceImpl; 623 } 624 625 /** 626 * Throws IllegalStateException if n > 0. 627 */ 628 private void checkEmpty() { 629 if (n > 0) { 630 throw MathRuntimeException.createIllegalStateException( 631 "{0} values have been added before statistic is configured", 632 n); 633 } 634 } 635 636 /** 637 * Returns a copy of this SummaryStatistics instance with the same internal state. 638 * 639 * @return a copy of this 640 */ 641 public SummaryStatistics copy() { 642 SummaryStatistics result = new SummaryStatistics(); 643 copy(this, result); 644 return result; 645 } 646 647 /** 648 * Copies source to dest. 649 * <p>Neither source nor dest can be null.</p> 650 * 651 * @param source SummaryStatistics to copy 652 * @param dest SummaryStatistics to copy to 653 * @throws NullPointerException if either source or dest is null 654 */ 655 public static void copy(SummaryStatistics source, SummaryStatistics dest) { 656 dest.maxImpl = source.maxImpl.copy(); 657 dest.meanImpl = source.meanImpl.copy(); 658 dest.minImpl = source.minImpl.copy(); 659 dest.sumImpl = source.sumImpl.copy(); 660 dest.varianceImpl = source.varianceImpl.copy(); 661 dest.sumLogImpl = source.sumLogImpl.copy(); 662 dest.sumsqImpl = source.sumsqImpl.copy(); 663 if (source.getGeoMeanImpl() instanceof GeometricMean) { 664 // Keep geoMeanImpl, sumLogImpl in synch 665 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl); 666 } else { 667 dest.geoMeanImpl = source.geoMeanImpl.copy(); 668 } 669 SecondMoment.copy(source.secondMoment, dest.secondMoment); 670 dest.n = source.n; 671 672 // Make sure that if stat == statImpl in source, same 673 // holds in dest; otherwise copy stat 674 if (source.geoMean == source.geoMeanImpl) { 675 dest.geoMean = (GeometricMean) dest.geoMeanImpl; 676 } else { 677 GeometricMean.copy(source.geoMean, dest.geoMean); 678 } 679 if (source.max == source.maxImpl) { 680 dest.max = (Max) dest.maxImpl; 681 } else { 682 Max.copy(source.max, dest.max); 683 } 684 if (source.mean == source.meanImpl) { 685 dest.mean = (Mean) dest.meanImpl; 686 } else { 687 Mean.copy(source.mean, dest.mean); 688 } 689 if (source.min == source.minImpl) { 690 dest.min = (Min) dest.minImpl; 691 } else { 692 Min.copy(source.min, dest.min); 693 } 694 if (source.sum == source.sumImpl) { 695 dest.sum = (Sum) dest.sumImpl; 696 } else { 697 Sum.copy(source.sum, dest.sum); 698 } 699 if (source.variance == source.varianceImpl) { 700 dest.variance = (Variance) dest.varianceImpl; 701 } else { 702 Variance.copy(source.variance, dest.variance); 703 } 704 if (source.sumLog == source.sumLogImpl) { 705 dest.sumLog = (SumOfLogs) dest.sumLogImpl; 706 } else { 707 SumOfLogs.copy(source.sumLog, dest.sumLog); 708 } 709 if (source.sumsq == source.sumsqImpl) { 710 dest.sumsq = (SumOfSquares) dest.sumsqImpl; 711 } else { 712 SumOfSquares.copy(source.sumsq, dest.sumsq); 713 } 714 } 715 }