001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat; 018 019 import java.io.Serializable; 020 import java.text.NumberFormat; 021 import java.util.Iterator; 022 import java.util.Comparator; 023 import java.util.TreeMap; 024 025 import org.apache.commons.math.MathRuntimeException; 026 027 /** 028 * Maintains a frequency distribution. 029 * <p> 030 * Accepts int, long, char or Comparable values. New values added must be 031 * comparable to those that have been added, otherwise the add method will 032 * throw an IllegalArgumentException.</p> 033 * <p> 034 * Integer values (int, long, Integer, Long) are not distinguished by type -- 035 * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have 036 * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p> 037 * <p> 038 * char values are converted by <code>addValue</code> to Character instances. 039 * As such, these values are not comparable to integral values, so attempts 040 * to combine integral types with chars in a frequency distribution will fail. 041 * </p> 042 * <p> 043 * The values are ordered using the default (natural order), unless a 044 * <code>Comparator</code> is supplied in the constructor.</p> 045 * 046 * @version $Revision: 922722 $ $Date: 2010-03-13 21:15:01 -0500 (Sat, 13 Mar 2010) $ 047 */ 048 public class Frequency implements Serializable { 049 050 /** Serializable version identifier */ 051 private static final long serialVersionUID = -3845586908418844111L; 052 053 /** underlying collection */ 054 private final TreeMap<Comparable<?>, Long> freqTable; 055 056 /** 057 * Default constructor. 058 */ 059 public Frequency() { 060 freqTable = new TreeMap<Comparable<?>, Long>(); 061 } 062 063 /** 064 * Constructor allowing values Comparator to be specified. 065 * 066 * @param comparator Comparator used to order values 067 */ 068 @SuppressWarnings("unchecked") // TODO is the cast OK? 069 public Frequency(Comparator<?> comparator) { 070 freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator); 071 } 072 073 /** 074 * Return a string representation of this frequency 075 * distribution. 076 * 077 * @return a string representation. 078 */ 079 @Override 080 public String toString() { 081 NumberFormat nf = NumberFormat.getPercentInstance(); 082 StringBuffer outBuffer = new StringBuffer(); 083 outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n"); 084 Iterator<Comparable<?>> iter = freqTable.keySet().iterator(); 085 while (iter.hasNext()) { 086 Comparable<?> value = iter.next(); 087 outBuffer.append(value); 088 outBuffer.append('\t'); 089 outBuffer.append(getCount(value)); 090 outBuffer.append('\t'); 091 outBuffer.append(nf.format(getPct(value))); 092 outBuffer.append('\t'); 093 outBuffer.append(nf.format(getCumPct(value))); 094 outBuffer.append('\n'); 095 } 096 return outBuffer.toString(); 097 } 098 099 /** 100 * Adds 1 to the frequency count for v. 101 * <p> 102 * If other objects have already been added to this Frequency, v must 103 * be comparable to those that have already been added. 104 * </p> 105 * 106 * @param v the value to add. 107 * @throws IllegalArgumentException if <code>v</code> is not Comparable, 108 * or is not comparable with previous entries 109 * @deprecated use {@link #addValue(Comparable)} instead 110 */ 111 @Deprecated 112 public void addValue(Object v) { 113 if (v instanceof Comparable<?>){ 114 addValue((Comparable<?>) v); 115 } else { 116 throw MathRuntimeException.createIllegalArgumentException( 117 "class ({0}) does not implement Comparable", 118 v.getClass().getName()); 119 } 120 } 121 122 /** 123 * Adds 1 to the frequency count for v. 124 * <p> 125 * If other objects have already been added to this Frequency, v must 126 * be comparable to those that have already been added. 127 * </p> 128 * 129 * @param v the value to add. 130 * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries 131 */ 132 public void addValue(Comparable<?> v){ 133 Comparable<?> obj = v; 134 if (v instanceof Integer) { 135 obj = Long.valueOf(((Integer) v).longValue()); 136 } 137 try { 138 Long count = freqTable.get(obj); 139 if (count == null) { 140 freqTable.put(obj, Long.valueOf(1)); 141 } else { 142 freqTable.put(obj, Long.valueOf(count.longValue() + 1)); 143 } 144 } catch (ClassCastException ex) { 145 //TreeMap will throw ClassCastException if v is not comparable 146 throw MathRuntimeException.createIllegalArgumentException( 147 "instance of class {0} not comparable to existing values", 148 v.getClass().getName()); 149 } 150 } 151 152 /** 153 * Adds 1 to the frequency count for v. 154 * 155 * @param v the value to add. 156 */ 157 public void addValue(int v) { 158 addValue(Long.valueOf(v)); 159 } 160 161 /** 162 * Adds 1 to the frequency count for v. 163 * 164 * @param v the value to add. 165 * @deprecated to be removed in math 3.0 166 */ 167 @Deprecated 168 public void addValue(Integer v) { 169 addValue(Long.valueOf(v.longValue())); 170 } 171 172 /** 173 * Adds 1 to the frequency count for v. 174 * 175 * @param v the value to add. 176 */ 177 public void addValue(long v) { 178 addValue(Long.valueOf(v)); 179 } 180 181 /** 182 * Adds 1 to the frequency count for v. 183 * 184 * @param v the value to add. 185 */ 186 public void addValue(char v) { 187 addValue(Character.valueOf(v)); 188 } 189 190 /** Clears the frequency table */ 191 public void clear() { 192 freqTable.clear(); 193 } 194 195 /** 196 * Returns an Iterator over the set of values that have been added. 197 * <p> 198 * If added values are integral (i.e., integers, longs, Integers, or Longs), 199 * they are converted to Longs when they are added, so the objects returned 200 * by the Iterator will in this case be Longs.</p> 201 * 202 * @return values Iterator 203 */ 204 public Iterator<Comparable<?>> valuesIterator() { 205 return freqTable.keySet().iterator(); 206 } 207 208 //------------------------------------------------------------------------- 209 210 /** 211 * Returns the sum of all frequencies. 212 * 213 * @return the total frequency count. 214 */ 215 public long getSumFreq() { 216 long result = 0; 217 Iterator<Long> iterator = freqTable.values().iterator(); 218 while (iterator.hasNext()) { 219 result += iterator.next().longValue(); 220 } 221 return result; 222 } 223 224 /** 225 * Returns the number of values = v. 226 * Returns 0 if the value is not comparable. 227 * 228 * @param v the value to lookup. 229 * @return the frequency of v. 230 * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0 231 */ 232 @Deprecated 233 public long getCount(Object v) { 234 return getCount((Comparable<?>) v); 235 } 236 237 /** 238 * Returns the number of values = v. 239 * Returns 0 if the value is not comparable. 240 * 241 * @param v the value to lookup. 242 * @return the frequency of v. 243 */ 244 public long getCount(Comparable<?> v) { 245 if (v instanceof Integer) { 246 return getCount(((Integer) v).longValue()); 247 } 248 long result = 0; 249 try { 250 Long count = freqTable.get(v); 251 if (count != null) { 252 result = count.longValue(); 253 } 254 } catch (ClassCastException ex) { 255 // ignore and return 0 -- ClassCastException will be thrown if value is not comparable 256 } 257 return result; 258 } 259 260 /** 261 * Returns the number of values = v. 262 * 263 * @param v the value to lookup. 264 * @return the frequency of v. 265 */ 266 public long getCount(int v) { 267 return getCount(Long.valueOf(v)); 268 } 269 270 /** 271 * Returns the number of values = v. 272 * 273 * @param v the value to lookup. 274 * @return the frequency of v. 275 */ 276 public long getCount(long v) { 277 return getCount(Long.valueOf(v)); 278 } 279 280 /** 281 * Returns the number of values = v. 282 * 283 * @param v the value to lookup. 284 * @return the frequency of v. 285 */ 286 public long getCount(char v) { 287 return getCount(Character.valueOf(v)); 288 } 289 290 //------------------------------------------------------------- 291 292 /** 293 * Returns the percentage of values that are equal to v 294 * (as a proportion between 0 and 1). 295 * <p> 296 * Returns <code>Double.NaN</code> if no values have been added.</p> 297 * 298 * @param v the value to lookup 299 * @return the proportion of values equal to v 300 * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0 301 */ 302 @Deprecated 303 public double getPct(Object v) { 304 return getPct((Comparable<?>) v); 305 } 306 307 /** 308 * Returns the percentage of values that are equal to v 309 * (as a proportion between 0 and 1). 310 * <p> 311 * Returns <code>Double.NaN</code> if no values have been added.</p> 312 * 313 * @param v the value to lookup 314 * @return the proportion of values equal to v 315 */ 316 public double getPct(Comparable<?> v) { 317 final long sumFreq = getSumFreq(); 318 if (sumFreq == 0) { 319 return Double.NaN; 320 } 321 return (double) getCount(v) / (double) sumFreq; 322 } 323 324 /** 325 * Returns the percentage of values that are equal to v 326 * (as a proportion between 0 and 1). 327 * 328 * @param v the value to lookup 329 * @return the proportion of values equal to v 330 */ 331 public double getPct(int v) { 332 return getPct(Long.valueOf(v)); 333 } 334 335 /** 336 * Returns the percentage of values that are equal to v 337 * (as a proportion between 0 and 1). 338 * 339 * @param v the value to lookup 340 * @return the proportion of values equal to v 341 */ 342 public double getPct(long v) { 343 return getPct(Long.valueOf(v)); 344 } 345 346 /** 347 * Returns the percentage of values that are equal to v 348 * (as a proportion between 0 and 1). 349 * 350 * @param v the value to lookup 351 * @return the proportion of values equal to v 352 */ 353 public double getPct(char v) { 354 return getPct(Character.valueOf(v)); 355 } 356 357 //----------------------------------------------------------------------------------------- 358 359 /** 360 * Returns the cumulative frequency of values less than or equal to v. 361 * <p> 362 * Returns 0 if v is not comparable to the values set.</p> 363 * 364 * @param v the value to lookup. 365 * @return the proportion of values equal to v 366 * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0 367 */ 368 @Deprecated 369 public long getCumFreq(Object v) { 370 return getCumFreq((Comparable<?>) v); 371 } 372 373 /** 374 * Returns the cumulative frequency of values less than or equal to v. 375 * <p> 376 * Returns 0 if v is not comparable to the values set.</p> 377 * 378 * @param v the value to lookup. 379 * @return the proportion of values equal to v 380 */ 381 public long getCumFreq(Comparable<?> v) { 382 if (getSumFreq() == 0) { 383 return 0; 384 } 385 if (v instanceof Integer) { 386 return getCumFreq(((Integer) v).longValue()); 387 } 388 @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?> 389 Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator(); 390 if (c == null) { 391 c = new NaturalComparator(); 392 } 393 long result = 0; 394 395 try { 396 Long value = freqTable.get(v); 397 if (value != null) { 398 result = value.longValue(); 399 } 400 } catch (ClassCastException ex) { 401 return result; // v is not comparable 402 } 403 404 if (c.compare(v, freqTable.firstKey()) < 0) { 405 return 0; // v is comparable, but less than first value 406 } 407 408 if (c.compare(v, freqTable.lastKey()) >= 0) { 409 return getSumFreq(); // v is comparable, but greater than the last value 410 } 411 412 Iterator<Comparable<?>> values = valuesIterator(); 413 while (values.hasNext()) { 414 Comparable<?> nextValue = values.next(); 415 if (c.compare(v, nextValue) > 0) { 416 result += getCount(nextValue); 417 } else { 418 return result; 419 } 420 } 421 return result; 422 } 423 424 /** 425 * Returns the cumulative frequency of values less than or equal to v. 426 * <p> 427 * Returns 0 if v is not comparable to the values set.</p> 428 * 429 * @param v the value to lookup 430 * @return the proportion of values equal to v 431 */ 432 public long getCumFreq(int v) { 433 return getCumFreq(Long.valueOf(v)); 434 } 435 436 /** 437 * Returns the cumulative frequency of values less than or equal to v. 438 * <p> 439 * Returns 0 if v is not comparable to the values set.</p> 440 * 441 * @param v the value to lookup 442 * @return the proportion of values equal to v 443 */ 444 public long getCumFreq(long v) { 445 return getCumFreq(Long.valueOf(v)); 446 } 447 448 /** 449 * Returns the cumulative frequency of values less than or equal to v. 450 * <p> 451 * Returns 0 if v is not comparable to the values set.</p> 452 * 453 * @param v the value to lookup 454 * @return the proportion of values equal to v 455 */ 456 public long getCumFreq(char v) { 457 return getCumFreq(Character.valueOf(v)); 458 } 459 460 //---------------------------------------------------------------------------------------------- 461 462 /** 463 * Returns the cumulative percentage of values less than or equal to v 464 * (as a proportion between 0 and 1). 465 * <p> 466 * Returns <code>Double.NaN</code> if no values have been added. 467 * Returns 0 if at least one value has been added, but v is not comparable 468 * to the values set.</p> 469 * 470 * @param v the value to lookup 471 * @return the proportion of values less than or equal to v 472 * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0 473 */ 474 @Deprecated 475 public double getCumPct(Object v) { 476 return getCumPct((Comparable<?>) v); 477 478 } 479 480 /** 481 * Returns the cumulative percentage of values less than or equal to v 482 * (as a proportion between 0 and 1). 483 * <p> 484 * Returns <code>Double.NaN</code> if no values have been added. 485 * Returns 0 if at least one value has been added, but v is not comparable 486 * to the values set.</p> 487 * 488 * @param v the value to lookup 489 * @return the proportion of values less than or equal to v 490 */ 491 public double getCumPct(Comparable<?> v) { 492 final long sumFreq = getSumFreq(); 493 if (sumFreq == 0) { 494 return Double.NaN; 495 } 496 return (double) getCumFreq(v) / (double) sumFreq; 497 } 498 499 /** 500 * Returns the cumulative percentage of values less than or equal to v 501 * (as a proportion between 0 and 1). 502 * <p> 503 * Returns 0 if v is not comparable to the values set.</p> 504 * 505 * @param v the value to lookup 506 * @return the proportion of values less than or equal to v 507 */ 508 public double getCumPct(int v) { 509 return getCumPct(Long.valueOf(v)); 510 } 511 512 /** 513 * Returns the cumulative percentage of values less than or equal to v 514 * (as a proportion between 0 and 1). 515 * <p> 516 * Returns 0 if v is not comparable to the values set.</p> 517 * 518 * @param v the value to lookup 519 * @return the proportion of values less than or equal to v 520 */ 521 public double getCumPct(long v) { 522 return getCumPct(Long.valueOf(v)); 523 } 524 525 /** 526 * Returns the cumulative percentage of values less than or equal to v 527 * (as a proportion between 0 and 1). 528 * <p> 529 * Returns 0 if v is not comparable to the values set.</p> 530 * 531 * @param v the value to lookup 532 * @return the proportion of values less than or equal to v 533 */ 534 public double getCumPct(char v) { 535 return getCumPct(Character.valueOf(v)); 536 } 537 538 /** 539 * A Comparator that compares comparable objects using the 540 * natural order. Copied from Commons Collections ComparableComparator. 541 */ 542 private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable { 543 544 /** Serializable version identifier */ 545 private static final long serialVersionUID = -3852193713161395148L; 546 547 /** 548 * Compare the two {@link Comparable Comparable} arguments. 549 * This method is equivalent to: 550 * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre> 551 * 552 * @param o1 the first object 553 * @param o2 the second object 554 * @return result of comparison 555 * @throws NullPointerException when <i>o1</i> is <code>null</code>, 556 * or when <code>((Comparable)o1).compareTo(o2)</code> does 557 * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable}, 558 * or when <code>((Comparable)o1).compareTo(o2)</code> does 559 */ 560 @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc 561 public int compare(Comparable<T> o1, Comparable<T> o2) { 562 return o1.compareTo((T) o2); 563 } 564 } 565 566 /** {@inheritDoc} */ 567 @Override 568 public int hashCode() { 569 final int prime = 31; 570 int result = 1; 571 result = prime * result + 572 ((freqTable == null) ? 0 : freqTable.hashCode()); 573 return result; 574 } 575 576 /** {@inheritDoc} */ 577 @Override 578 public boolean equals(Object obj) { 579 if (this == obj) 580 return true; 581 if (!(obj instanceof Frequency)) 582 return false; 583 Frequency other = (Frequency) obj; 584 if (freqTable == null) { 585 if (other.freqTable != null) 586 return false; 587 } else if (!freqTable.equals(other.freqTable)) 588 return false; 589 return true; 590 } 591 592 }