001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.distribution;
019    
020    import java.io.Serializable;
021    
022    import org.apache.commons.math.MathRuntimeException;
023    import org.apache.commons.math.util.MathUtils;
024    
025    /**
026     * The default implementation of {@link HypergeometricDistribution}.
027     *
028     * @version $Revision: 920852 $ $Date: 2010-03-09 07:53:44 -0500 (Tue, 09 Mar 2010) $
029     */
030    public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
031            implements HypergeometricDistribution, Serializable {
032    
033        /** Serializable version identifier */
034        private static final long serialVersionUID = -436928820673516179L;
035    
036        /** The number of successes in the population. */
037        private int numberOfSuccesses;
038    
039        /** The population size. */
040        private int populationSize;
041    
042        /** The sample size. */
043        private int sampleSize;
044    
045        /**
046         * Construct a new hypergeometric distribution with the given the population
047         * size, the number of successes in the population, and the sample size.
048         *
049         * @param populationSize the population size.
050         * @param numberOfSuccesses number of successes in the population.
051         * @param sampleSize the sample size.
052         */
053        public HypergeometricDistributionImpl(int populationSize,
054                int numberOfSuccesses, int sampleSize) {
055            super();
056            if (numberOfSuccesses > populationSize) {
057                throw MathRuntimeException
058                        .createIllegalArgumentException(
059                                "number of successes ({0}) must be less than or equal to population size ({1})",
060                                numberOfSuccesses, populationSize);
061            }
062            if (sampleSize > populationSize) {
063                throw MathRuntimeException
064                        .createIllegalArgumentException(
065                                "sample size ({0}) must be less than or equal to population size ({1})",
066                                sampleSize, populationSize);
067            }
068    
069            setPopulationSizeInternal(populationSize);
070            setSampleSizeInternal(sampleSize);
071            setNumberOfSuccessesInternal(numberOfSuccesses);
072        }
073    
074        /**
075         * For this distribution, X, this method returns P(X ≤ x).
076         *
077         * @param x the value at which the PDF is evaluated.
078         * @return PDF for this distribution.
079         */
080        @Override
081        public double cumulativeProbability(int x) {
082            double ret;
083    
084            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
085            if (x < domain[0]) {
086                ret = 0.0;
087            } else if (x >= domain[1]) {
088                ret = 1.0;
089            } else {
090                ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
091                                                 numberOfSuccesses, sampleSize);
092            }
093    
094            return ret;
095        }
096    
097        /**
098         * Return the domain for the given hypergeometric distribution parameters.
099         *
100         * @param n the population size.
101         * @param m number of successes in the population.
102         * @param k the sample size.
103         * @return a two element array containing the lower and upper bounds of the
104         *         hypergeometric distribution.
105         */
106        private int[] getDomain(int n, int m, int k) {
107            return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
108        }
109    
110        /**
111         * Access the domain value lower bound, based on <code>p</code>, used to
112         * bracket a PDF root.
113         *
114         * @param p the desired probability for the critical value
115         * @return domain value lower bound, i.e. P(X &lt; <i>lower bound</i>) &lt;
116         *         <code>p</code>
117         */
118        @Override
119        protected int getDomainLowerBound(double p) {
120            return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
121        }
122    
123        /**
124         * Access the domain value upper bound, based on <code>p</code>, used to
125         * bracket a PDF root.
126         *
127         * @param p the desired probability for the critical value
128         * @return domain value upper bound, i.e. P(X &lt; <i>upper bound</i>) &gt;
129         *         <code>p</code>
130         */
131        @Override
132        protected int getDomainUpperBound(double p) {
133            return getUpperDomain(sampleSize, numberOfSuccesses);
134        }
135    
136        /**
137         * Return the lowest domain value for the given hypergeometric distribution
138         * parameters.
139         *
140         * @param n the population size.
141         * @param m number of successes in the population.
142         * @param k the sample size.
143         * @return the lowest domain value of the hypergeometric distribution.
144         */
145        private int getLowerDomain(int n, int m, int k) {
146            return Math.max(0, m - (n - k));
147        }
148    
149        /**
150         * Access the number of successes.
151         *
152         * @return the number of successes.
153         */
154        public int getNumberOfSuccesses() {
155            return numberOfSuccesses;
156        }
157    
158        /**
159         * Access the population size.
160         *
161         * @return the population size.
162         */
163        public int getPopulationSize() {
164            return populationSize;
165        }
166    
167        /**
168         * Access the sample size.
169         *
170         * @return the sample size.
171         */
172        public int getSampleSize() {
173            return sampleSize;
174        }
175    
176        /**
177         * Return the highest domain value for the given hypergeometric distribution
178         * parameters.
179         *
180         * @param m number of successes in the population.
181         * @param k the sample size.
182         * @return the highest domain value of the hypergeometric distribution.
183         */
184        private int getUpperDomain(int m, int k) {
185            return Math.min(k, m);
186        }
187    
188        /**
189         * For this distribution, X, this method returns P(X = x).
190         *
191         * @param x the value at which the PMF is evaluated.
192         * @return PMF for this distribution.
193         */
194        public double probability(int x) {
195            double ret;
196    
197            int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
198            if (x < domain[0] || x > domain[1]) {
199                ret = 0.0;
200            } else {
201                double p = (double) sampleSize / (double) populationSize;
202                double q = (double) (populationSize - sampleSize) / (double) populationSize;
203                double p1 = SaddlePointExpansion.logBinomialProbability(x,
204                        numberOfSuccesses, p, q);
205                double p2 =
206                    SaddlePointExpansion.logBinomialProbability(sampleSize - x,
207                        populationSize - numberOfSuccesses, p, q);
208                double p3 =
209                    SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
210                ret = Math.exp(p1 + p2 - p3);
211            }
212    
213            return ret;
214        }
215    
216        /**
217         * For the distribution, X, defined by the given hypergeometric distribution
218         * parameters, this method returns P(X = x).
219         *
220         * @param n the population size.
221         * @param m number of successes in the population.
222         * @param k the sample size.
223         * @param x the value at which the PMF is evaluated.
224         * @return PMF for the distribution.
225         */
226        private double probability(int n, int m, int k, int x) {
227            return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
228                   MathUtils.binomialCoefficientLog(n - m, k - x) -
229                   MathUtils.binomialCoefficientLog(n, k));
230        }
231    
232        /**
233         * Modify the number of successes.
234         *
235         * @param num the new number of successes.
236         * @throws IllegalArgumentException if <code>num</code> is negative.
237         * @deprecated as of 2.1 (class will become immutable in 3.0)
238         */
239        @Deprecated
240        public void setNumberOfSuccesses(int num) {
241            setNumberOfSuccessesInternal(num);
242        }
243        /**
244         * Modify the number of successes.
245         *
246         * @param num the new number of successes.
247         * @throws IllegalArgumentException if <code>num</code> is negative.
248         */
249        private void setNumberOfSuccessesInternal(int num) {
250            if (num < 0) {
251                throw MathRuntimeException.createIllegalArgumentException(
252                        "number of successes must be non-negative ({0})", num);
253            }
254            numberOfSuccesses = num;
255        }
256    
257        /**
258         * Modify the population size.
259         *
260         * @param size the new population size.
261         * @throws IllegalArgumentException if <code>size</code> is not positive.
262         * @deprecated as of 2.1 (class will become immutable in 3.0)
263         */
264        @Deprecated
265        public void setPopulationSize(int size) {
266            setPopulationSizeInternal(size);
267        }
268        /**
269         * Modify the population size.
270         *
271         * @param size the new population size.
272         * @throws IllegalArgumentException if <code>size</code> is not positive.
273         */
274        private void setPopulationSizeInternal(int size) {
275            if (size <= 0) {
276                throw MathRuntimeException.createIllegalArgumentException(
277                        "population size must be positive ({0})", size);
278            }
279            populationSize = size;
280        }
281    
282        /**
283         * Modify the sample size.
284         *
285         * @param size the new sample size.
286         * @throws IllegalArgumentException if <code>size</code> is negative.
287         * @deprecated as of 2.1 (class will become immutable in 3.0)
288         */
289        @Deprecated
290        public void setSampleSize(int size) {
291            setSampleSizeInternal(size);
292        }
293        /**
294         * Modify the sample size.
295         *
296         * @param size the new sample size.
297         * @throws IllegalArgumentException if <code>size</code> is negative.
298         */
299        private void setSampleSizeInternal(int size) {
300            if (size < 0) {
301                throw MathRuntimeException.createIllegalArgumentException(
302                        "sample size must be positive ({0})", size);
303            }
304            sampleSize = size;
305        }
306    
307        /**
308         * For this distribution, X, this method returns P(X &ge; x).
309         *
310         * @param x the value at which the CDF is evaluated.
311         * @return upper tail CDF for this distribution.
312         * @since 1.1
313         */
314        public double upperCumulativeProbability(int x) {
315            double ret;
316    
317            final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
318            if (x < domain[0]) {
319                ret = 1.0;
320            } else if (x > domain[1]) {
321                ret = 0.0;
322            } else {
323                ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
324            }
325    
326            return ret;
327        }
328    
329        /**
330         * For this distribution, X, this method returns P(x0 &le; X &le; x1). This
331         * probability is computed by summing the point probabilities for the values
332         * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
333         *
334         * @param x0 the inclusive, lower bound
335         * @param x1 the inclusive, upper bound
336         * @param dx the direction of summation. 1 indicates summing from x0 to x1.
337         *            0 indicates summing from x1 to x0.
338         * @param n the population size.
339         * @param m number of successes in the population.
340         * @param k the sample size.
341         * @return P(x0 &le; X &le; x1).
342         */
343        private double innerCumulativeProbability(int x0, int x1, int dx, int n,
344                int m, int k) {
345            double ret = probability(n, m, k, x0);
346            while (x0 != x1) {
347                x0 += dx;
348                ret += probability(n, m, k, x0);
349            }
350            return ret;
351        }
352    }