001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.stat.ranking; 019 020 import java.util.ArrayList; 021 import java.util.Arrays; 022 import java.util.Iterator; 023 import java.util.List; 024 025 import org.apache.commons.math.MathRuntimeException; 026 import org.apache.commons.math.random.RandomData; 027 import org.apache.commons.math.random.RandomDataImpl; 028 import org.apache.commons.math.random.RandomGenerator; 029 030 031 /** 032 * <p> Ranking based on the natural ordering on doubles.</p> 033 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties 034 * are handled using the selected {@link TiesStrategy}. 035 * Configuration settings are supplied in optional constructor arguments. 036 * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE}, 037 * respectively. When using {@link TiesStrategy#RANDOM}, a 038 * {@link RandomGenerator} may be supplied as a constructor argument.</p> 039 * <p>Examples: 040 * <table border="1" cellpadding="3"> 041 * <tr><th colspan="3"> 042 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17) 043 * </th></tr> 044 * <tr><th>NaNStrategy</th><th>TiesStrategy</th> 045 * <th><code>rank(data)</code></th> 046 * <tr> 047 * <td>default (NaNs maximal)</td> 048 * <td>default (ties averaged)</td> 049 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr> 050 * <tr> 051 * <td>default (NaNs maximal)</td> 052 * <td>MINIMUM</td> 053 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr> 054 * <tr> 055 * <td>MINIMAL</td> 056 * <td>default (ties averaged)</td> 057 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr> 058 * <tr> 059 * <td>REMOVED</td> 060 * <td>SEQUENTIAL</td> 061 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr> 062 * <tr> 063 * <td>MINIMAL</td> 064 * <td>MAXIMUM</td> 065 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p> 066 * 067 * @since 2.0 068 * @version $Revision: 811827 $ $Date: 2009-09-06 11:32:50 -0400 (Sun, 06 Sep 2009) $ 069 */ 070 public class NaturalRanking implements RankingAlgorithm { 071 072 /** default NaN strategy */ 073 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL; 074 075 /** default ties strategy */ 076 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE; 077 078 /** NaN strategy - defaults to NaNs maximal */ 079 private final NaNStrategy nanStrategy; 080 081 /** Ties strategy - defaults to ties averaged */ 082 private final TiesStrategy tiesStrategy; 083 084 /** Source of random data - used only when ties strategy is RANDOM */ 085 private final RandomData randomData; 086 087 /** 088 * Create a NaturalRanking with default strategies for handling ties and NaNs. 089 */ 090 public NaturalRanking() { 091 super(); 092 tiesStrategy = DEFAULT_TIES_STRATEGY; 093 nanStrategy = DEFAULT_NAN_STRATEGY; 094 randomData = null; 095 } 096 097 /** 098 * Create a NaturalRanking with the given TiesStrategy. 099 * 100 * @param tiesStrategy the TiesStrategy to use 101 */ 102 public NaturalRanking(TiesStrategy tiesStrategy) { 103 super(); 104 this.tiesStrategy = tiesStrategy; 105 nanStrategy = DEFAULT_NAN_STRATEGY; 106 randomData = new RandomDataImpl(); 107 } 108 109 /** 110 * Create a NaturalRanking with the given NaNStrategy. 111 * 112 * @param nanStrategy the NaNStrategy to use 113 */ 114 public NaturalRanking(NaNStrategy nanStrategy) { 115 super(); 116 this.nanStrategy = nanStrategy; 117 tiesStrategy = DEFAULT_TIES_STRATEGY; 118 randomData = null; 119 } 120 121 /** 122 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy. 123 * 124 * @param nanStrategy NaNStrategy to use 125 * @param tiesStrategy TiesStrategy to use 126 */ 127 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) { 128 super(); 129 this.nanStrategy = nanStrategy; 130 this.tiesStrategy = tiesStrategy; 131 randomData = new RandomDataImpl(); 132 } 133 134 /** 135 * Create a NaturalRanking with TiesStrategy.RANDOM and the given 136 * RandomGenerator as the source of random data. 137 * 138 * @param randomGenerator source of random data 139 */ 140 public NaturalRanking(RandomGenerator randomGenerator) { 141 super(); 142 this.tiesStrategy = TiesStrategy.RANDOM; 143 nanStrategy = DEFAULT_NAN_STRATEGY; 144 randomData = new RandomDataImpl(randomGenerator); 145 } 146 147 148 /** 149 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM 150 * and the given source of random data. 151 * 152 * @param nanStrategy NaNStrategy to use 153 * @param randomGenerator source of random data 154 */ 155 public NaturalRanking(NaNStrategy nanStrategy, 156 RandomGenerator randomGenerator) { 157 super(); 158 this.nanStrategy = nanStrategy; 159 this.tiesStrategy = TiesStrategy.RANDOM; 160 randomData = new RandomDataImpl(randomGenerator); 161 } 162 163 /** 164 * Return the NaNStrategy 165 * 166 * @return returns the NaNStrategy 167 */ 168 public NaNStrategy getNanStrategy() { 169 return nanStrategy; 170 } 171 172 /** 173 * Return the TiesStrategy 174 * 175 * @return the TiesStrategy 176 */ 177 public TiesStrategy getTiesStrategy() { 178 return tiesStrategy; 179 } 180 181 /** 182 * Rank <code>data</code> using the natural ordering on Doubles, with 183 * NaN values handled according to <code>nanStrategy</code> and ties 184 * resolved using <code>tiesStrategy.</code> 185 * 186 * @param data array to be ranked 187 * @return array of ranks 188 */ 189 public double[] rank(double[] data) { 190 191 // Array recording initial positions of data to be ranked 192 IntDoublePair[] ranks = new IntDoublePair[data.length]; 193 for (int i = 0; i < data.length; i++) { 194 ranks[i] = new IntDoublePair(data[i], i); 195 } 196 197 // Recode, remove or record positions of NaNs 198 List<Integer> nanPositions = null; 199 switch (nanStrategy) { 200 case MAXIMAL: // Replace NaNs with +INFs 201 recodeNaNs(ranks, Double.POSITIVE_INFINITY); 202 break; 203 case MINIMAL: // Replace NaNs with -INFs 204 recodeNaNs(ranks, Double.NEGATIVE_INFINITY); 205 break; 206 case REMOVED: // Drop NaNs from data 207 ranks = removeNaNs(ranks); 208 break; 209 case FIXED: // Record positions of NaNs 210 nanPositions = getNanPositions(ranks); 211 break; 212 default: // this should not happen unless NaNStrategy enum is changed 213 throw MathRuntimeException.createInternalError(null); 214 } 215 216 // Sort the IntDoublePairs 217 Arrays.sort(ranks); 218 219 // Walk the sorted array, filling output array using sorted positions, 220 // resolving ties as we go 221 double[] out = new double[ranks.length]; 222 int pos = 1; // position in sorted array 223 out[ranks[0].getPosition()] = pos; 224 List<Integer> tiesTrace = new ArrayList<Integer>(); 225 tiesTrace.add(ranks[0].getPosition()); 226 for (int i = 1; i < ranks.length; i++) { 227 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) { 228 // tie sequence has ended (or had length 1) 229 pos = i + 1; 230 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve 231 resolveTie(out, tiesTrace); 232 } 233 tiesTrace = new ArrayList<Integer>(); 234 tiesTrace.add(ranks[i].getPosition()); 235 } else { 236 // tie sequence continues 237 tiesTrace.add(ranks[i].getPosition()); 238 } 239 out[ranks[i].getPosition()] = pos; 240 } 241 if (tiesTrace.size() > 1) { // handle tie sequence at end 242 resolveTie(out, tiesTrace); 243 } 244 if (nanStrategy == NaNStrategy.FIXED) { 245 restoreNaNs(out, nanPositions); 246 } 247 return out; 248 } 249 250 /** 251 * Returns an array that is a copy of the input array with IntDoublePairs 252 * having NaN values removed. 253 * 254 * @param ranks input array 255 * @return array with NaN-valued entries removed 256 */ 257 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) { 258 if (!containsNaNs(ranks)) { 259 return ranks; 260 } 261 IntDoublePair[] outRanks = new IntDoublePair[ranks.length]; 262 int j = 0; 263 for (int i = 0; i < ranks.length; i++) { 264 if (Double.isNaN(ranks[i].getValue())) { 265 // drop, but adjust original ranks of later elements 266 for (int k = i + 1; k < ranks.length; k++) { 267 ranks[k] = new IntDoublePair( 268 ranks[k].getValue(), ranks[k].getPosition() - 1); 269 } 270 } else { 271 outRanks[j] = new IntDoublePair( 272 ranks[i].getValue(), ranks[i].getPosition()); 273 j++; 274 } 275 } 276 IntDoublePair[] returnRanks = new IntDoublePair[j]; 277 System.arraycopy(outRanks, 0, returnRanks, 0, j); 278 return returnRanks; 279 } 280 281 /** 282 * Recodes NaN values to the given value. 283 * 284 * @param ranks array to recode 285 * @param value the value to replace NaNs with 286 */ 287 private void recodeNaNs(IntDoublePair[] ranks, double value) { 288 for (int i = 0; i < ranks.length; i++) { 289 if (Double.isNaN(ranks[i].getValue())) { 290 ranks[i] = new IntDoublePair( 291 value, ranks[i].getPosition()); 292 } 293 } 294 } 295 296 /** 297 * Checks for presence of NaNs in <code>ranks.</code> 298 * 299 * @param ranks array to be searched for NaNs 300 * @return true iff ranks contains one or more NaNs 301 */ 302 private boolean containsNaNs(IntDoublePair[] ranks) { 303 for (int i = 0; i < ranks.length; i++) { 304 if (Double.isNaN(ranks[i].getValue())) { 305 return true; 306 } 307 } 308 return false; 309 } 310 311 /** 312 * Resolve a sequence of ties, using the configured {@link TiesStrategy}. 313 * The input <code>ranks</code> array is expected to take the same value 314 * for all indices in <code>tiesTrace</code>. The common value is recoded 315 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>, 316 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged. 317 * The same array and trace with tiesStrategy AVERAGE will come out 318 * <5,8,3,6,3,7,1,3>. 319 * 320 * @param ranks array of ranks 321 * @param tiesTrace list of indices where <code>ranks</code> is constant 322 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 323 * </code> 324 */ 325 private void resolveTie(double[] ranks, List<Integer> tiesTrace) { 326 327 // constant value of ranks over tiesTrace 328 final double c = ranks[tiesTrace.get(0)]; 329 330 // length of sequence of tied ranks 331 final int length = tiesTrace.size(); 332 333 switch (tiesStrategy) { 334 case AVERAGE: // Replace ranks with average 335 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d); 336 break; 337 case MAXIMUM: // Replace ranks with maximum values 338 fill(ranks, tiesTrace, c + length - 1); 339 break; 340 case MINIMUM: // Replace ties with minimum 341 fill(ranks, tiesTrace, c); 342 break; 343 case RANDOM: // Fill with random integral values in [c, c + length - 1] 344 Iterator<Integer> iterator = tiesTrace.iterator(); 345 long f = Math.round(c); 346 while (iterator.hasNext()) { 347 ranks[iterator.next()] = 348 randomData.nextLong(f, f + length - 1); 349 } 350 break; 351 case SEQUENTIAL: // Fill sequentially from c to c + length - 1 352 // walk and fill 353 iterator = tiesTrace.iterator(); 354 f = Math.round(c); 355 int i = 0; 356 while (iterator.hasNext()) { 357 ranks[iterator.next()] = f + i++; 358 } 359 break; 360 default: // this should not happen unless TiesStrategy enum is changed 361 throw MathRuntimeException.createInternalError(null); 362 } 363 } 364 365 /** 366 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code> 367 * 368 * @param data array to modify 369 * @param tiesTrace list of index values to set 370 * @param value value to set 371 */ 372 private void fill(double[] data, List<Integer> tiesTrace, double value) { 373 Iterator<Integer> iterator = tiesTrace.iterator(); 374 while (iterator.hasNext()) { 375 data[iterator.next()] = value; 376 } 377 } 378 379 /** 380 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code> 381 * 382 * @param ranks array to modify 383 * @param nanPositions list of index values to set to <code>Double.NaN</code> 384 */ 385 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) { 386 if (nanPositions.size() == 0) { 387 return; 388 } 389 Iterator<Integer> iterator = nanPositions.iterator(); 390 while (iterator.hasNext()) { 391 ranks[iterator.next().intValue()] = Double.NaN; 392 } 393 394 } 395 396 /** 397 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code> 398 * 399 * @param ranks array to search for <code>NaNs</code> 400 * @return list of indexes i such that <code>ranks[i] = NaN</code> 401 */ 402 private List<Integer> getNanPositions(IntDoublePair[] ranks) { 403 ArrayList<Integer> out = new ArrayList<Integer>(); 404 for (int i = 0; i < ranks.length; i++) { 405 if (Double.isNaN(ranks[i].getValue())) { 406 out.add(Integer.valueOf(i)); 407 } 408 } 409 return out; 410 } 411 412 /** 413 * Represents the position of a double value in an ordering. 414 * Comparable interface is implemented so Arrays.sort can be used 415 * to sort an array of IntDoublePairs by value. Note that the 416 * implicitly defined natural ordering is NOT consistent with equals. 417 */ 418 private static class IntDoublePair implements Comparable<IntDoublePair> { 419 420 /** Value of the pair */ 421 private final double value; 422 423 /** Original position of the pair */ 424 private final int position; 425 426 /** 427 * Construct an IntDoublePair with the given value and position. 428 * @param value the value of the pair 429 * @param position the original position 430 */ 431 public IntDoublePair(double value, int position) { 432 this.value = value; 433 this.position = position; 434 } 435 436 /** 437 * Compare this IntDoublePair to another pair. 438 * Only the <strong>values</strong> are compared. 439 * 440 * @param other the other pair to compare this to 441 * @return result of <code>Double.compare(value, other.value)</code> 442 */ 443 public int compareTo(IntDoublePair other) { 444 return Double.compare(value, other.value); 445 } 446 447 /** 448 * Returns the value of the pair. 449 * @return value 450 */ 451 public double getValue() { 452 return value; 453 } 454 455 /** 456 * Returns the original position of the pair. 457 * @return position 458 */ 459 public int getPosition() { 460 return position; 461 } 462 } 463 }