001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.ranking;
019    
020    import java.util.ArrayList;
021    import java.util.Arrays;
022    import java.util.Iterator;
023    import java.util.List;
024    
025    import org.apache.commons.math.MathRuntimeException;
026    import org.apache.commons.math.random.RandomData;
027    import org.apache.commons.math.random.RandomDataImpl;
028    import org.apache.commons.math.random.RandomGenerator;
029    
030    
031    /**
032     * <p> Ranking based on the natural ordering on doubles.</p>
033     * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
034     * are handled using the selected {@link TiesStrategy}.
035     * Configuration settings are supplied in optional constructor arguments.
036     * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
037     * respectively. When using {@link TiesStrategy#RANDOM}, a
038     * {@link RandomGenerator} may be supplied as a constructor argument.</p>
039     * <p>Examples:
040     * <table border="1" cellpadding="3">
041     * <tr><th colspan="3">
042     * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
043     * </th></tr>
044     * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
045     * <th><code>rank(data)</code></th>
046     * <tr>
047     * <td>default (NaNs maximal)</td>
048     * <td>default (ties averaged)</td>
049     * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
050     * <tr>
051     * <td>default (NaNs maximal)</td>
052     * <td>MINIMUM</td>
053     * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
054     * <tr>
055     * <td>MINIMAL</td>
056     * <td>default (ties averaged)</td>
057     * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
058     * <tr>
059     * <td>REMOVED</td>
060     * <td>SEQUENTIAL</td>
061     * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
062     * <tr>
063     * <td>MINIMAL</td>
064     * <td>MAXIMUM</td>
065     * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
066     *
067     * @since 2.0
068     * @version $Revision: 811827 $ $Date: 2009-09-06 11:32:50 -0400 (Sun, 06 Sep 2009) $
069     */
070    public class NaturalRanking implements RankingAlgorithm {
071    
072        /** default NaN strategy */
073        public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
074    
075        /** default ties strategy */
076        public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
077    
078        /** NaN strategy - defaults to NaNs maximal */
079        private final NaNStrategy nanStrategy;
080    
081        /** Ties strategy - defaults to ties averaged */
082        private final TiesStrategy tiesStrategy;
083    
084        /** Source of random data - used only when ties strategy is RANDOM */
085        private final RandomData randomData;
086    
087        /**
088         * Create a NaturalRanking with default strategies for handling ties and NaNs.
089         */
090        public NaturalRanking() {
091            super();
092            tiesStrategy = DEFAULT_TIES_STRATEGY;
093            nanStrategy = DEFAULT_NAN_STRATEGY;
094            randomData = null;
095        }
096    
097        /**
098         * Create a NaturalRanking with the given TiesStrategy.
099         *
100         * @param tiesStrategy the TiesStrategy to use
101         */
102        public NaturalRanking(TiesStrategy tiesStrategy) {
103            super();
104            this.tiesStrategy = tiesStrategy;
105            nanStrategy = DEFAULT_NAN_STRATEGY;
106            randomData = new RandomDataImpl();
107        }
108    
109        /**
110         * Create a NaturalRanking with the given NaNStrategy.
111         *
112         * @param nanStrategy the NaNStrategy to use
113         */
114        public NaturalRanking(NaNStrategy nanStrategy) {
115            super();
116            this.nanStrategy = nanStrategy;
117            tiesStrategy = DEFAULT_TIES_STRATEGY;
118            randomData = null;
119        }
120    
121        /**
122         * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
123         *
124         * @param nanStrategy NaNStrategy to use
125         * @param tiesStrategy TiesStrategy to use
126         */
127        public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
128            super();
129            this.nanStrategy = nanStrategy;
130            this.tiesStrategy = tiesStrategy;
131            randomData = new RandomDataImpl();
132        }
133    
134        /**
135         * Create a NaturalRanking with TiesStrategy.RANDOM and the given
136         * RandomGenerator as the source of random data.
137         *
138         * @param randomGenerator source of random data
139         */
140        public NaturalRanking(RandomGenerator randomGenerator) {
141            super();
142            this.tiesStrategy = TiesStrategy.RANDOM;
143            nanStrategy = DEFAULT_NAN_STRATEGY;
144            randomData = new RandomDataImpl(randomGenerator);
145        }
146    
147    
148        /**
149         * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
150         * and the given source of random data.
151         *
152         * @param nanStrategy NaNStrategy to use
153         * @param randomGenerator source of random data
154         */
155        public NaturalRanking(NaNStrategy nanStrategy,
156                RandomGenerator randomGenerator) {
157            super();
158            this.nanStrategy = nanStrategy;
159            this.tiesStrategy = TiesStrategy.RANDOM;
160            randomData = new RandomDataImpl(randomGenerator);
161        }
162    
163        /**
164         * Return the NaNStrategy
165         *
166         * @return returns the NaNStrategy
167         */
168        public NaNStrategy getNanStrategy() {
169            return nanStrategy;
170        }
171    
172        /**
173         * Return the TiesStrategy
174         *
175         * @return the TiesStrategy
176         */
177        public TiesStrategy getTiesStrategy() {
178            return tiesStrategy;
179        }
180    
181        /**
182         * Rank <code>data</code> using the natural ordering on Doubles, with
183         * NaN values handled according to <code>nanStrategy</code> and ties
184         * resolved using <code>tiesStrategy.</code>
185         *
186         * @param data array to be ranked
187         * @return array of ranks
188         */
189        public double[] rank(double[] data) {
190    
191            // Array recording initial positions of data to be ranked
192            IntDoublePair[] ranks = new IntDoublePair[data.length];
193            for (int i = 0; i < data.length; i++) {
194                ranks[i] = new IntDoublePair(data[i], i);
195            }
196    
197            // Recode, remove or record positions of NaNs
198            List<Integer> nanPositions = null;
199            switch (nanStrategy) {
200                case MAXIMAL: // Replace NaNs with +INFs
201                    recodeNaNs(ranks, Double.POSITIVE_INFINITY);
202                    break;
203                case MINIMAL: // Replace NaNs with -INFs
204                    recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
205                    break;
206                case REMOVED: // Drop NaNs from data
207                    ranks = removeNaNs(ranks);
208                    break;
209                case FIXED:   // Record positions of NaNs
210                    nanPositions = getNanPositions(ranks);
211                    break;
212                default: // this should not happen unless NaNStrategy enum is changed
213                    throw MathRuntimeException.createInternalError(null);
214            }
215    
216            // Sort the IntDoublePairs
217            Arrays.sort(ranks);
218    
219            // Walk the sorted array, filling output array using sorted positions,
220            // resolving ties as we go
221            double[] out = new double[ranks.length];
222            int pos = 1;  // position in sorted array
223            out[ranks[0].getPosition()] = pos;
224            List<Integer> tiesTrace = new ArrayList<Integer>();
225            tiesTrace.add(ranks[0].getPosition());
226            for (int i = 1; i < ranks.length; i++) {
227                if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
228                    // tie sequence has ended (or had length 1)
229                    pos = i + 1;
230                    if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
231                        resolveTie(out, tiesTrace);
232                    }
233                    tiesTrace = new ArrayList<Integer>();
234                    tiesTrace.add(ranks[i].getPosition());
235                } else {
236                    // tie sequence continues
237                    tiesTrace.add(ranks[i].getPosition());
238                }
239                out[ranks[i].getPosition()] = pos;
240            }
241            if (tiesTrace.size() > 1) {  // handle tie sequence at end
242                resolveTie(out, tiesTrace);
243            }
244            if (nanStrategy == NaNStrategy.FIXED) {
245                restoreNaNs(out, nanPositions);
246            }
247            return out;
248        }
249    
250        /**
251         * Returns an array that is a copy of the input array with IntDoublePairs
252         * having NaN values removed.
253         *
254         * @param ranks input array
255         * @return array with NaN-valued entries removed
256         */
257        private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
258            if (!containsNaNs(ranks)) {
259                return ranks;
260            }
261            IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
262            int j = 0;
263            for (int i = 0; i < ranks.length; i++) {
264                if (Double.isNaN(ranks[i].getValue())) {
265                    // drop, but adjust original ranks of later elements
266                    for (int k = i + 1; k < ranks.length; k++) {
267                        ranks[k] = new IntDoublePair(
268                                ranks[k].getValue(), ranks[k].getPosition() - 1);
269                    }
270                } else {
271                    outRanks[j] = new IntDoublePair(
272                            ranks[i].getValue(), ranks[i].getPosition());
273                    j++;
274                }
275            }
276            IntDoublePair[] returnRanks = new IntDoublePair[j];
277            System.arraycopy(outRanks, 0, returnRanks, 0, j);
278            return returnRanks;
279        }
280    
281        /**
282         * Recodes NaN values to the given value.
283         *
284         * @param ranks array to recode
285         * @param value the value to replace NaNs with
286         */
287        private void recodeNaNs(IntDoublePair[] ranks, double value) {
288            for (int i = 0; i < ranks.length; i++) {
289                if (Double.isNaN(ranks[i].getValue())) {
290                    ranks[i] = new IntDoublePair(
291                            value, ranks[i].getPosition());
292                }
293            }
294        }
295    
296        /**
297         * Checks for presence of NaNs in <code>ranks.</code>
298         *
299         * @param ranks array to be searched for NaNs
300         * @return true iff ranks contains one or more NaNs
301         */
302        private boolean containsNaNs(IntDoublePair[] ranks) {
303            for (int i = 0; i < ranks.length; i++) {
304                if (Double.isNaN(ranks[i].getValue())) {
305                    return true;
306                }
307            }
308            return false;
309        }
310    
311        /**
312         * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
313         * The input <code>ranks</code> array is expected to take the same value
314         * for all indices in <code>tiesTrace</code>.  The common value is recoded
315         * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
316         * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
317         * The same array and trace with tiesStrategy AVERAGE will come out
318         * <5,8,3,6,3,7,1,3>.
319         *
320         * @param ranks array of ranks
321         * @param tiesTrace list of indices where <code>ranks</code> is constant
322         * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
323         * </code>
324         */
325        private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
326    
327            // constant value of ranks over tiesTrace
328            final double c = ranks[tiesTrace.get(0)];
329    
330            // length of sequence of tied ranks
331            final int length = tiesTrace.size();
332    
333            switch (tiesStrategy) {
334                case  AVERAGE:  // Replace ranks with average
335                    fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
336                    break;
337                case MAXIMUM:   // Replace ranks with maximum values
338                    fill(ranks, tiesTrace, c + length - 1);
339                    break;
340                case MINIMUM:   // Replace ties with minimum
341                    fill(ranks, tiesTrace, c);
342                    break;
343                case RANDOM:    // Fill with random integral values in [c, c + length - 1]
344                    Iterator<Integer> iterator = tiesTrace.iterator();
345                    long f = Math.round(c);
346                    while (iterator.hasNext()) {
347                        ranks[iterator.next()] =
348                            randomData.nextLong(f, f + length - 1);
349                    }
350                    break;
351                case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
352                    // walk and fill
353                    iterator = tiesTrace.iterator();
354                    f = Math.round(c);
355                    int i = 0;
356                    while (iterator.hasNext()) {
357                        ranks[iterator.next()] = f + i++;
358                    }
359                    break;
360                default: // this should not happen unless TiesStrategy enum is changed
361                    throw MathRuntimeException.createInternalError(null);
362            }
363        }
364    
365        /**
366         * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
367         *
368         * @param data array to modify
369         * @param tiesTrace list of index values to set
370         * @param value value to set
371         */
372        private void fill(double[] data, List<Integer> tiesTrace, double value) {
373            Iterator<Integer> iterator = tiesTrace.iterator();
374            while (iterator.hasNext()) {
375                data[iterator.next()] = value;
376            }
377        }
378    
379        /**
380         * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
381         *
382         * @param ranks array to modify
383         * @param nanPositions list of index values to set to <code>Double.NaN</code>
384         */
385        private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
386            if (nanPositions.size() == 0) {
387                return;
388            }
389            Iterator<Integer> iterator = nanPositions.iterator();
390            while (iterator.hasNext()) {
391                ranks[iterator.next().intValue()] = Double.NaN;
392            }
393    
394        }
395    
396        /**
397         * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
398         *
399         * @param ranks array to search for <code>NaNs</code>
400         * @return list of indexes i such that <code>ranks[i] = NaN</code>
401         */
402        private List<Integer> getNanPositions(IntDoublePair[] ranks) {
403            ArrayList<Integer> out = new ArrayList<Integer>();
404            for (int i = 0; i < ranks.length; i++) {
405                if (Double.isNaN(ranks[i].getValue())) {
406                    out.add(Integer.valueOf(i));
407                }
408            }
409            return out;
410        }
411    
412        /**
413         * Represents the position of a double value in an ordering.
414         * Comparable interface is implemented so Arrays.sort can be used
415         * to sort an array of IntDoublePairs by value.  Note that the
416         * implicitly defined natural ordering is NOT consistent with equals.
417         */
418        private static class IntDoublePair implements Comparable<IntDoublePair>  {
419    
420            /** Value of the pair */
421            private final double value;
422    
423            /** Original position of the pair */
424            private final int position;
425    
426            /**
427             * Construct an IntDoublePair with the given value and position.
428             * @param value the value of the pair
429             * @param position the original position
430             */
431            public IntDoublePair(double value, int position) {
432                this.value = value;
433                this.position = position;
434            }
435    
436            /**
437             * Compare this IntDoublePair to another pair.
438             * Only the <strong>values</strong> are compared.
439             *
440             * @param other the other pair to compare this to
441             * @return result of <code>Double.compare(value, other.value)</code>
442             */
443            public int compareTo(IntDoublePair other) {
444                return Double.compare(value, other.value);
445            }
446    
447            /**
448             * Returns the value of the pair.
449             * @return value
450             */
451            public double getValue() {
452                return value;
453            }
454    
455            /**
456             * Returns the original position of the pair.
457             * @return position
458             */
459            public int getPosition() {
460                return position;
461            }
462        }
463    }