001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.distribution; 019 020 import java.io.Serializable; 021 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.util.MathUtils; 024 025 /** 026 * The default implementation of {@link HypergeometricDistribution}. 027 * 028 * @version $Revision: 920852 $ $Date: 2010-03-09 07:53:44 -0500 (Tue, 09 Mar 2010) $ 029 */ 030 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution 031 implements HypergeometricDistribution, Serializable { 032 033 /** Serializable version identifier */ 034 private static final long serialVersionUID = -436928820673516179L; 035 036 /** The number of successes in the population. */ 037 private int numberOfSuccesses; 038 039 /** The population size. */ 040 private int populationSize; 041 042 /** The sample size. */ 043 private int sampleSize; 044 045 /** 046 * Construct a new hypergeometric distribution with the given the population 047 * size, the number of successes in the population, and the sample size. 048 * 049 * @param populationSize the population size. 050 * @param numberOfSuccesses number of successes in the population. 051 * @param sampleSize the sample size. 052 */ 053 public HypergeometricDistributionImpl(int populationSize, 054 int numberOfSuccesses, int sampleSize) { 055 super(); 056 if (numberOfSuccesses > populationSize) { 057 throw MathRuntimeException 058 .createIllegalArgumentException( 059 "number of successes ({0}) must be less than or equal to population size ({1})", 060 numberOfSuccesses, populationSize); 061 } 062 if (sampleSize > populationSize) { 063 throw MathRuntimeException 064 .createIllegalArgumentException( 065 "sample size ({0}) must be less than or equal to population size ({1})", 066 sampleSize, populationSize); 067 } 068 069 setPopulationSizeInternal(populationSize); 070 setSampleSizeInternal(sampleSize); 071 setNumberOfSuccessesInternal(numberOfSuccesses); 072 } 073 074 /** 075 * For this distribution, X, this method returns P(X ≤ x). 076 * 077 * @param x the value at which the PDF is evaluated. 078 * @return PDF for this distribution. 079 */ 080 @Override 081 public double cumulativeProbability(int x) { 082 double ret; 083 084 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 085 if (x < domain[0]) { 086 ret = 0.0; 087 } else if (x >= domain[1]) { 088 ret = 1.0; 089 } else { 090 ret = innerCumulativeProbability(domain[0], x, 1, populationSize, 091 numberOfSuccesses, sampleSize); 092 } 093 094 return ret; 095 } 096 097 /** 098 * Return the domain for the given hypergeometric distribution parameters. 099 * 100 * @param n the population size. 101 * @param m number of successes in the population. 102 * @param k the sample size. 103 * @return a two element array containing the lower and upper bounds of the 104 * hypergeometric distribution. 105 */ 106 private int[] getDomain(int n, int m, int k) { 107 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) }; 108 } 109 110 /** 111 * Access the domain value lower bound, based on <code>p</code>, used to 112 * bracket a PDF root. 113 * 114 * @param p the desired probability for the critical value 115 * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) < 116 * <code>p</code> 117 */ 118 @Override 119 protected int getDomainLowerBound(double p) { 120 return getLowerDomain(populationSize, numberOfSuccesses, sampleSize); 121 } 122 123 /** 124 * Access the domain value upper bound, based on <code>p</code>, used to 125 * bracket a PDF root. 126 * 127 * @param p the desired probability for the critical value 128 * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) > 129 * <code>p</code> 130 */ 131 @Override 132 protected int getDomainUpperBound(double p) { 133 return getUpperDomain(sampleSize, numberOfSuccesses); 134 } 135 136 /** 137 * Return the lowest domain value for the given hypergeometric distribution 138 * parameters. 139 * 140 * @param n the population size. 141 * @param m number of successes in the population. 142 * @param k the sample size. 143 * @return the lowest domain value of the hypergeometric distribution. 144 */ 145 private int getLowerDomain(int n, int m, int k) { 146 return Math.max(0, m - (n - k)); 147 } 148 149 /** 150 * Access the number of successes. 151 * 152 * @return the number of successes. 153 */ 154 public int getNumberOfSuccesses() { 155 return numberOfSuccesses; 156 } 157 158 /** 159 * Access the population size. 160 * 161 * @return the population size. 162 */ 163 public int getPopulationSize() { 164 return populationSize; 165 } 166 167 /** 168 * Access the sample size. 169 * 170 * @return the sample size. 171 */ 172 public int getSampleSize() { 173 return sampleSize; 174 } 175 176 /** 177 * Return the highest domain value for the given hypergeometric distribution 178 * parameters. 179 * 180 * @param m number of successes in the population. 181 * @param k the sample size. 182 * @return the highest domain value of the hypergeometric distribution. 183 */ 184 private int getUpperDomain(int m, int k) { 185 return Math.min(k, m); 186 } 187 188 /** 189 * For this distribution, X, this method returns P(X = x). 190 * 191 * @param x the value at which the PMF is evaluated. 192 * @return PMF for this distribution. 193 */ 194 public double probability(int x) { 195 double ret; 196 197 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 198 if (x < domain[0] || x > domain[1]) { 199 ret = 0.0; 200 } else { 201 double p = (double) sampleSize / (double) populationSize; 202 double q = (double) (populationSize - sampleSize) / (double) populationSize; 203 double p1 = SaddlePointExpansion.logBinomialProbability(x, 204 numberOfSuccesses, p, q); 205 double p2 = 206 SaddlePointExpansion.logBinomialProbability(sampleSize - x, 207 populationSize - numberOfSuccesses, p, q); 208 double p3 = 209 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q); 210 ret = Math.exp(p1 + p2 - p3); 211 } 212 213 return ret; 214 } 215 216 /** 217 * For the distribution, X, defined by the given hypergeometric distribution 218 * parameters, this method returns P(X = x). 219 * 220 * @param n the population size. 221 * @param m number of successes in the population. 222 * @param k the sample size. 223 * @param x the value at which the PMF is evaluated. 224 * @return PMF for the distribution. 225 */ 226 private double probability(int n, int m, int k, int x) { 227 return Math.exp(MathUtils.binomialCoefficientLog(m, x) + 228 MathUtils.binomialCoefficientLog(n - m, k - x) - 229 MathUtils.binomialCoefficientLog(n, k)); 230 } 231 232 /** 233 * Modify the number of successes. 234 * 235 * @param num the new number of successes. 236 * @throws IllegalArgumentException if <code>num</code> is negative. 237 * @deprecated as of 2.1 (class will become immutable in 3.0) 238 */ 239 @Deprecated 240 public void setNumberOfSuccesses(int num) { 241 setNumberOfSuccessesInternal(num); 242 } 243 /** 244 * Modify the number of successes. 245 * 246 * @param num the new number of successes. 247 * @throws IllegalArgumentException if <code>num</code> is negative. 248 */ 249 private void setNumberOfSuccessesInternal(int num) { 250 if (num < 0) { 251 throw MathRuntimeException.createIllegalArgumentException( 252 "number of successes must be non-negative ({0})", num); 253 } 254 numberOfSuccesses = num; 255 } 256 257 /** 258 * Modify the population size. 259 * 260 * @param size the new population size. 261 * @throws IllegalArgumentException if <code>size</code> is not positive. 262 * @deprecated as of 2.1 (class will become immutable in 3.0) 263 */ 264 @Deprecated 265 public void setPopulationSize(int size) { 266 setPopulationSizeInternal(size); 267 } 268 /** 269 * Modify the population size. 270 * 271 * @param size the new population size. 272 * @throws IllegalArgumentException if <code>size</code> is not positive. 273 */ 274 private void setPopulationSizeInternal(int size) { 275 if (size <= 0) { 276 throw MathRuntimeException.createIllegalArgumentException( 277 "population size must be positive ({0})", size); 278 } 279 populationSize = size; 280 } 281 282 /** 283 * Modify the sample size. 284 * 285 * @param size the new sample size. 286 * @throws IllegalArgumentException if <code>size</code> is negative. 287 * @deprecated as of 2.1 (class will become immutable in 3.0) 288 */ 289 @Deprecated 290 public void setSampleSize(int size) { 291 setSampleSizeInternal(size); 292 } 293 /** 294 * Modify the sample size. 295 * 296 * @param size the new sample size. 297 * @throws IllegalArgumentException if <code>size</code> is negative. 298 */ 299 private void setSampleSizeInternal(int size) { 300 if (size < 0) { 301 throw MathRuntimeException.createIllegalArgumentException( 302 "sample size must be positive ({0})", size); 303 } 304 sampleSize = size; 305 } 306 307 /** 308 * For this distribution, X, this method returns P(X ≥ x). 309 * 310 * @param x the value at which the CDF is evaluated. 311 * @return upper tail CDF for this distribution. 312 * @since 1.1 313 */ 314 public double upperCumulativeProbability(int x) { 315 double ret; 316 317 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 318 if (x < domain[0]) { 319 ret = 1.0; 320 } else if (x > domain[1]) { 321 ret = 0.0; 322 } else { 323 ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize); 324 } 325 326 return ret; 327 } 328 329 /** 330 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This 331 * probability is computed by summing the point probabilities for the values 332 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 333 * 334 * @param x0 the inclusive, lower bound 335 * @param x1 the inclusive, upper bound 336 * @param dx the direction of summation. 1 indicates summing from x0 to x1. 337 * 0 indicates summing from x1 to x0. 338 * @param n the population size. 339 * @param m number of successes in the population. 340 * @param k the sample size. 341 * @return P(x0 ≤ X ≤ x1). 342 */ 343 private double innerCumulativeProbability(int x0, int x1, int dx, int n, 344 int m, int k) { 345 double ret = probability(n, m, k, x0); 346 while (x0 != x1) { 347 x0 += dx; 348 ret += probability(n, m, k, x0); 349 } 350 return ret; 351 } 352 }