001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat;
018    
019    import java.io.Serializable;
020    import java.text.NumberFormat;
021    import java.util.Iterator;
022    import java.util.Comparator;
023    import java.util.TreeMap;
024    
025    import org.apache.commons.math.MathRuntimeException;
026    
027    /**
028     * Maintains a frequency distribution.
029     * <p>
030     * Accepts int, long, char or Comparable values.  New values added must be
031     * comparable to those that have been added, otherwise the add method will
032     * throw an IllegalArgumentException.</p>
033     * <p>
034     * Integer values (int, long, Integer, Long) are not distinguished by type --
035     * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
036     * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
037     * <p>
038     * char values are converted by <code>addValue</code> to Character instances.
039     * As such, these values are not comparable to integral values, so attempts
040     * to combine integral types with chars in a frequency distribution will fail.
041     * </p>
042     * <p>
043     * The values are ordered using the default (natural order), unless a
044     * <code>Comparator</code> is supplied in the constructor.</p>
045     *
046     * @version $Revision: 922722 $ $Date: 2010-03-13 21:15:01 -0500 (Sat, 13 Mar 2010) $
047     */
048    public class Frequency implements Serializable {
049    
050        /** Serializable version identifier */
051        private static final long serialVersionUID = -3845586908418844111L;
052    
053        /** underlying collection */
054        private final TreeMap<Comparable<?>, Long> freqTable;
055    
056        /**
057         * Default constructor.
058         */
059        public Frequency() {
060            freqTable = new TreeMap<Comparable<?>, Long>();
061        }
062    
063        /**
064         * Constructor allowing values Comparator to be specified.
065         *
066         * @param comparator Comparator used to order values
067         */
068        @SuppressWarnings("unchecked") // TODO is the cast OK?
069        public Frequency(Comparator<?> comparator) {
070            freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
071        }
072    
073        /**
074         * Return a string representation of this frequency
075         * distribution.
076         *
077         * @return a string representation.
078         */
079        @Override
080        public String toString() {
081            NumberFormat nf = NumberFormat.getPercentInstance();
082            StringBuffer outBuffer = new StringBuffer();
083            outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
084            Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
085            while (iter.hasNext()) {
086                Comparable<?> value = iter.next();
087                outBuffer.append(value);
088                outBuffer.append('\t');
089                outBuffer.append(getCount(value));
090                outBuffer.append('\t');
091                outBuffer.append(nf.format(getPct(value)));
092                outBuffer.append('\t');
093                outBuffer.append(nf.format(getCumPct(value)));
094                outBuffer.append('\n');
095            }
096            return outBuffer.toString();
097        }
098    
099        /**
100         * Adds 1 to the frequency count for v.
101         * <p>
102         * If other objects have already been added to this Frequency, v must
103         * be comparable to those that have already been added.
104         * </p>
105         *
106         * @param v the value to add.
107         * @throws IllegalArgumentException if <code>v</code> is not Comparable,
108         *         or is not comparable with previous entries
109         * @deprecated use {@link #addValue(Comparable)} instead
110         */
111        @Deprecated
112        public void addValue(Object v) {
113            if (v instanceof Comparable<?>){
114                addValue((Comparable<?>) v);
115            } else {
116                throw MathRuntimeException.createIllegalArgumentException(
117                      "class ({0}) does not implement Comparable",
118                      v.getClass().getName());
119            }
120        }
121    
122        /**
123         * Adds 1 to the frequency count for v.
124         * <p>
125         * If other objects have already been added to this Frequency, v must
126         * be comparable to those that have already been added.
127         * </p>
128         *
129         * @param v the value to add.
130         * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
131         */
132        public void addValue(Comparable<?> v){
133            Comparable<?> obj = v;
134            if (v instanceof Integer) {
135               obj = Long.valueOf(((Integer) v).longValue());
136            }
137            try {
138                Long count = freqTable.get(obj);
139                if (count == null) {
140                    freqTable.put(obj, Long.valueOf(1));
141                } else {
142                    freqTable.put(obj, Long.valueOf(count.longValue() + 1));
143                }
144            } catch (ClassCastException ex) {
145                //TreeMap will throw ClassCastException if v is not comparable
146                throw MathRuntimeException.createIllegalArgumentException(
147                      "instance of class {0} not comparable to existing values",
148                      v.getClass().getName());
149            }
150        }
151    
152        /**
153         * Adds 1 to the frequency count for v.
154         *
155         * @param v the value to add.
156         */
157        public void addValue(int v) {
158            addValue(Long.valueOf(v));
159        }
160    
161        /**
162         * Adds 1 to the frequency count for v.
163         *
164         * @param v the value to add.
165         * @deprecated to be removed in math 3.0
166         */
167        @Deprecated
168        public void addValue(Integer v) {
169            addValue(Long.valueOf(v.longValue()));
170        }
171    
172        /**
173         * Adds 1 to the frequency count for v.
174         *
175         * @param v the value to add.
176         */
177        public void addValue(long v) {
178            addValue(Long.valueOf(v));
179        }
180    
181        /**
182         * Adds 1 to the frequency count for v.
183         *
184         * @param v the value to add.
185         */
186        public void addValue(char v) {
187            addValue(Character.valueOf(v));
188        }
189    
190        /** Clears the frequency table */
191        public void clear() {
192            freqTable.clear();
193        }
194    
195        /**
196         * Returns an Iterator over the set of values that have been added.
197         * <p>
198         * If added values are integral (i.e., integers, longs, Integers, or Longs),
199         * they are converted to Longs when they are added, so the objects returned
200         * by the Iterator will in this case be Longs.</p>
201         *
202         * @return values Iterator
203         */
204        public Iterator<Comparable<?>> valuesIterator() {
205            return freqTable.keySet().iterator();
206        }
207    
208        //-------------------------------------------------------------------------
209    
210        /**
211         * Returns the sum of all frequencies.
212         *
213         * @return the total frequency count.
214         */
215        public long getSumFreq() {
216            long result = 0;
217            Iterator<Long> iterator = freqTable.values().iterator();
218            while (iterator.hasNext())  {
219                result += iterator.next().longValue();
220            }
221            return result;
222        }
223    
224        /**
225         * Returns the number of values = v.
226         * Returns 0 if the value is not comparable.
227         *
228         * @param v the value to lookup.
229         * @return the frequency of v.
230         * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0
231         */
232        @Deprecated
233        public long getCount(Object v) {
234            return getCount((Comparable<?>) v);
235        }
236    
237        /**
238         * Returns the number of values = v.
239         * Returns 0 if the value is not comparable.
240         *
241         * @param v the value to lookup.
242         * @return the frequency of v.
243         */
244        public long getCount(Comparable<?> v) {
245            if (v instanceof Integer) {
246                return getCount(((Integer) v).longValue());
247            }
248            long result = 0;
249            try {
250                Long count =  freqTable.get(v);
251                if (count != null) {
252                    result = count.longValue();
253                }
254            } catch (ClassCastException ex) {
255                // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
256            }
257            return result;
258        }
259    
260        /**
261         * Returns the number of values = v.
262         *
263         * @param v the value to lookup.
264         * @return the frequency of v.
265         */
266        public long getCount(int v) {
267            return getCount(Long.valueOf(v));
268        }
269    
270        /**
271         * Returns the number of values = v.
272         *
273         * @param v the value to lookup.
274         * @return the frequency of v.
275         */
276        public long getCount(long v) {
277            return getCount(Long.valueOf(v));
278        }
279    
280        /**
281         * Returns the number of values = v.
282         *
283         * @param v the value to lookup.
284         * @return the frequency of v.
285         */
286        public long getCount(char v) {
287            return getCount(Character.valueOf(v));
288        }
289    
290        //-------------------------------------------------------------
291    
292        /**
293          * Returns the percentage of values that are equal to v
294         * (as a proportion between 0 and 1).
295         * <p>
296         * Returns <code>Double.NaN</code> if no values have been added.</p>
297         *
298         * @param v the value to lookup
299         * @return the proportion of values equal to v
300         * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0
301         */
302        @Deprecated
303        public double getPct(Object v) {
304            return getPct((Comparable<?>) v);
305        }
306    
307        /**
308         * Returns the percentage of values that are equal to v
309         * (as a proportion between 0 and 1).
310         * <p>
311         * Returns <code>Double.NaN</code> if no values have been added.</p>
312         *
313         * @param v the value to lookup
314         * @return the proportion of values equal to v
315         */
316        public double getPct(Comparable<?> v) {
317            final long sumFreq = getSumFreq();
318            if (sumFreq == 0) {
319                return Double.NaN;
320            }
321            return (double) getCount(v) / (double) sumFreq;
322        }
323    
324        /**
325         * Returns the percentage of values that are equal to v
326         * (as a proportion between 0 and 1).
327         *
328         * @param v the value to lookup
329         * @return the proportion of values equal to v
330         */
331        public double getPct(int v) {
332            return getPct(Long.valueOf(v));
333        }
334    
335        /**
336         * Returns the percentage of values that are equal to v
337         * (as a proportion between 0 and 1).
338         *
339         * @param v the value to lookup
340         * @return the proportion of values equal to v
341         */
342        public double getPct(long v) {
343            return getPct(Long.valueOf(v));
344        }
345    
346        /**
347         * Returns the percentage of values that are equal to v
348         * (as a proportion between 0 and 1).
349         *
350         * @param v the value to lookup
351         * @return the proportion of values equal to v
352         */
353        public double getPct(char v) {
354            return getPct(Character.valueOf(v));
355        }
356    
357        //-----------------------------------------------------------------------------------------
358    
359        /**
360         * Returns the cumulative frequency of values less than or equal to v.
361         * <p>
362         * Returns 0 if v is not comparable to the values set.</p>
363         *
364         * @param v the value to lookup.
365         * @return the proportion of values equal to v
366         * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0
367         */
368        @Deprecated
369        public long getCumFreq(Object v) {
370            return getCumFreq((Comparable<?>) v);
371        }
372    
373        /**
374         * Returns the cumulative frequency of values less than or equal to v.
375         * <p>
376         * Returns 0 if v is not comparable to the values set.</p>
377         *
378         * @param v the value to lookup.
379         * @return the proportion of values equal to v
380         */
381        public long getCumFreq(Comparable<?> v) {
382            if (getSumFreq() == 0) {
383                return 0;
384            }
385            if (v instanceof Integer) {
386                return getCumFreq(((Integer) v).longValue());
387            }
388            @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
389            Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
390            if (c == null) {
391                c = new NaturalComparator();
392            }
393            long result = 0;
394    
395            try {
396                Long value = freqTable.get(v);
397                if (value != null) {
398                    result = value.longValue();
399                }
400            } catch (ClassCastException ex) {
401                return result;   // v is not comparable
402            }
403    
404            if (c.compare(v, freqTable.firstKey()) < 0) {
405                return 0;  // v is comparable, but less than first value
406            }
407    
408            if (c.compare(v, freqTable.lastKey()) >= 0) {
409                return getSumFreq();    // v is comparable, but greater than the last value
410            }
411    
412            Iterator<Comparable<?>> values = valuesIterator();
413            while (values.hasNext()) {
414                Comparable<?> nextValue = values.next();
415                if (c.compare(v, nextValue) > 0) {
416                    result += getCount(nextValue);
417                } else {
418                    return result;
419                }
420            }
421            return result;
422        }
423    
424         /**
425         * Returns the cumulative frequency of values less than or equal to v.
426         * <p>
427         * Returns 0 if v is not comparable to the values set.</p>
428         *
429         * @param v the value to lookup
430         * @return the proportion of values equal to v
431         */
432        public long getCumFreq(int v) {
433            return getCumFreq(Long.valueOf(v));
434        }
435    
436         /**
437         * Returns the cumulative frequency of values less than or equal to v.
438         * <p>
439         * Returns 0 if v is not comparable to the values set.</p>
440         *
441         * @param v the value to lookup
442         * @return the proportion of values equal to v
443         */
444        public long getCumFreq(long v) {
445            return getCumFreq(Long.valueOf(v));
446        }
447    
448        /**
449         * Returns the cumulative frequency of values less than or equal to v.
450         * <p>
451         * Returns 0 if v is not comparable to the values set.</p>
452         *
453         * @param v the value to lookup
454         * @return the proportion of values equal to v
455         */
456        public long getCumFreq(char v) {
457            return getCumFreq(Character.valueOf(v));
458        }
459    
460        //----------------------------------------------------------------------------------------------
461    
462        /**
463         * Returns the cumulative percentage of values less than or equal to v
464         * (as a proportion between 0 and 1).
465         * <p>
466         * Returns <code>Double.NaN</code> if no values have been added.
467         * Returns 0 if at least one value has been added, but v is not comparable
468         * to the values set.</p>
469         *
470         * @param v the value to lookup
471         * @return the proportion of values less than or equal to v
472         * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0
473         */
474        @Deprecated
475        public double getCumPct(Object v) {
476            return getCumPct((Comparable<?>) v);
477    
478        }
479    
480        /**
481         * Returns the cumulative percentage of values less than or equal to v
482         * (as a proportion between 0 and 1).
483         * <p>
484         * Returns <code>Double.NaN</code> if no values have been added.
485         * Returns 0 if at least one value has been added, but v is not comparable
486         * to the values set.</p>
487         *
488         * @param v the value to lookup
489         * @return the proportion of values less than or equal to v
490         */
491        public double getCumPct(Comparable<?> v) {
492            final long sumFreq = getSumFreq();
493            if (sumFreq == 0) {
494                return Double.NaN;
495            }
496            return (double) getCumFreq(v) / (double) sumFreq;
497        }
498    
499        /**
500         * Returns the cumulative percentage of values less than or equal to v
501         * (as a proportion between 0 and 1).
502         * <p>
503         * Returns 0 if v is not comparable to the values set.</p>
504         *
505         * @param v the value to lookup
506         * @return the proportion of values less than or equal to v
507         */
508        public double getCumPct(int v) {
509            return getCumPct(Long.valueOf(v));
510        }
511    
512        /**
513         * Returns the cumulative percentage of values less than or equal to v
514         * (as a proportion between 0 and 1).
515         * <p>
516         * Returns 0 if v is not comparable to the values set.</p>
517         *
518         * @param v the value to lookup
519         * @return the proportion of values less than or equal to v
520         */
521        public double getCumPct(long v) {
522            return getCumPct(Long.valueOf(v));
523        }
524    
525        /**
526         * Returns the cumulative percentage of values less than or equal to v
527         * (as a proportion between 0 and 1).
528         * <p>
529         * Returns 0 if v is not comparable to the values set.</p>
530         *
531         * @param v the value to lookup
532         * @return the proportion of values less than or equal to v
533         */
534        public double getCumPct(char v) {
535            return getCumPct(Character.valueOf(v));
536        }
537    
538        /**
539         * A Comparator that compares comparable objects using the
540         * natural order.  Copied from Commons Collections ComparableComparator.
541         */
542        private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
543    
544            /** Serializable version identifier */
545            private static final long serialVersionUID = -3852193713161395148L;
546    
547            /**
548             * Compare the two {@link Comparable Comparable} arguments.
549             * This method is equivalent to:
550             * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
551             *
552             * @param  o1 the first object
553             * @param  o2 the second object
554             * @return  result of comparison
555             * @throws NullPointerException when <i>o1</i> is <code>null</code>,
556             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
557             * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
558             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
559             */
560            @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
561            public int compare(Comparable<T> o1, Comparable<T> o2) {
562                return o1.compareTo((T) o2);
563            }
564        }
565    
566        /** {@inheritDoc} */
567        @Override
568        public int hashCode() {
569            final int prime = 31;
570            int result = 1;
571            result = prime * result +
572                     ((freqTable == null) ? 0 : freqTable.hashCode());
573            return result;
574        }
575    
576        /** {@inheritDoc} */
577        @Override
578        public boolean equals(Object obj) {
579            if (this == obj)
580                return true;
581            if (!(obj instanceof Frequency))
582                return false;
583            Frequency other = (Frequency) obj;
584            if (freqTable == null) {
585                if (other.freqTable != null)
586                    return false;
587            } else if (!freqTable.equals(other.freqTable))
588                return false;
589            return true;
590        }
591    
592    }