1
#include "numpy/random/distributions.h"
2
#include <stdint.h>
3
#include <stddef.h>
4
#include <stdbool.h>
5
#include <math.h>
6

7
#include "logfactorial.h"
8

9

10
/*
11
 *  random_multivariate_hypergeometric_marginals
12
 *
13
 *  Draw samples from the multivariate hypergeometric distribution--
14
 *  the "marginals" algorithm.
15
 *
16
 *  This version generates the sample by iteratively calling
17
 *  hypergeometric() (the univariate hypergeometric distribution).
18
 *
19
 *  Parameters
20
 *  ----------
21
 *  bitgen_t *bitgen_state
22
 *      Pointer to a `bitgen_t` instance.
23
 *  int64_t total
24
 *      The sum of the values in the array `colors`.  (This is redundant
25
 *      information, but we know the caller has already computed it, so
26
 *      we might as well use it.)
27
 *  size_t num_colors
28
 *      The length of the `colors` array.  The functions assumes
29
 *      num_colors > 0.
30
 *  int64_t *colors
31
 *      The array of colors (i.e. the number of each type in the collection
32
 *      from which the random variate is drawn).
33
 *  int64_t nsample
34
 *      The number of objects drawn without replacement for each variate.
35
 *      `nsample` must not exceed sum(colors).  This condition is not checked;
36
 *      it is assumed that the caller has already validated the value.
37
 *  size_t num_variates
38
 *      The number of variates to be produced and put in the array
39
 *      pointed to by `variates`.  One variate is a vector of length
40
 *      `num_colors`, so the array pointed to by `variates` must have length
41
 *      `num_variates * num_colors`.
42
 *  int64_t *variates
43
 *      The array that will hold the result.  It must have length
44
 *      `num_variates * num_colors`.
45
 *      The array is not initialized in the function; it is expected that the
46
 *      array has been initialized with zeros when the function is called.
47
 *
48
 *  Notes
49
 *  -----
50
 *  Here's an example that demonstrates the idea of this algorithm.
51
 *
52
 *  Suppose the urn contains red, green, blue and yellow marbles.
53
 *  Let nred be the number of red marbles, and define the quantities for
54
 *  the other colors similarly.  The total number of marbles is
55
 *
56
 *      total = nred + ngreen + nblue + nyellow.
57
 *
58
 *  To generate a sample using rk_hypergeometric:
59
 *
60
 *     red_sample = hypergeometric(ngood=nred, nbad=total - nred,
61
 *                                 nsample=nsample)
62
 *
63
 *  This gives us the number of red marbles in the sample.  The number of
64
 *  marbles in the sample that are *not* red is nsample - red_sample.
65
 *  To figure out the distribution of those marbles, we again use
66
 *  rk_hypergeometric:
67
 *
68
 *      green_sample = hypergeometric(ngood=ngreen,
69
 *                                    nbad=total - nred - ngreen,
70
 *                                    nsample=nsample - red_sample)
71
 *
72
 *  Similarly,
73
 *
74
 *      blue_sample = hypergeometric(
75
 *                        ngood=nblue,
76
 *                        nbad=total - nred - ngreen - nblue,
77
 *                        nsample=nsample - red_sample - green_sample)
78
 *
79
 *  Finally,
80
 *
81
 *      yellow_sample = total - (red_sample + green_sample + blue_sample).
82
 *
83
 *  The above sequence of steps is implemented as a loop for an arbitrary
84
 *  number of colors in the innermost loop in the code below.  `remaining`
85
 *  is the value passed to `nbad`; it is `total - colors[0]` in the first
86
 *  call to random_hypergeometric(), and then decreases by `colors[j]` in
87
 *  each iteration.  `num_to_sample` is the `nsample` argument.  It
88
 *  starts at this function's `nsample` input, and is decreased by the
89
 *  result of the call to random_hypergeometric() in each iteration.
90
 *
91
 *  Assumptions on the arguments (not checked in the function):
92
 *    *  colors[k] >= 0  for k in range(num_colors)
93
 *    *  total = sum(colors)
94
 *    *  0 <= nsample <= total
95
 *    *  the product num_variates * num_colors does not overflow
96
 */
97

98 1
void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state,
99
                           int64_t total,
100
                           size_t num_colors, int64_t *colors,
101
                           int64_t nsample,
102
                           size_t num_variates, int64_t *variates)
103
{
104
    bool more_than_half;
105

106 1
    if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
107
        // Nothing to do.
108
        return;
109
    }
110

111 1
    more_than_half = nsample > (total / 2);
112 1
    if (more_than_half) {
113 1
        nsample = total - nsample;
114
    }
115

116 1
    for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
117
        int64_t num_to_sample = nsample;
118
        int64_t remaining = total;
119 1
        for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {
120
            int64_t r;
121 1
            remaining -= colors[j];
122 1
            r = random_hypergeometric(bitgen_state,
123
                                      colors[j], remaining, num_to_sample);
124 1
            variates[i + j] = r;
125 1
            num_to_sample -= r;
126
        }
127

128 1
        if (num_to_sample > 0) {
129 1
            variates[i + num_colors - 1] = num_to_sample;
130
        }
131

132 1
        if (more_than_half) {
133 1
            for (size_t k = 0; k < num_colors; ++k) {
134 1
                variates[i + k] = colors[k] - variates[i + k];
135
            }
136
        }
137
    }
138
}

Read our documentation on viewing source code .

Loading