1
|
|
#include "numpy/random/distributions.h"
|
2
|
|
#include <stdint.h>
|
3
|
|
#include <stddef.h>
|
4
|
|
#include <stdbool.h>
|
5
|
|
#include <math.h>
|
6
|
|
|
7
|
|
#include "logfactorial.h"
|
8
|
|
|
9
|
|
|
10
|
|
/*
|
11
|
|
* random_multivariate_hypergeometric_marginals
|
12
|
|
*
|
13
|
|
* Draw samples from the multivariate hypergeometric distribution--
|
14
|
|
* the "marginals" algorithm.
|
15
|
|
*
|
16
|
|
* This version generates the sample by iteratively calling
|
17
|
|
* hypergeometric() (the univariate hypergeometric distribution).
|
18
|
|
*
|
19
|
|
* Parameters
|
20
|
|
* ----------
|
21
|
|
* bitgen_t *bitgen_state
|
22
|
|
* Pointer to a `bitgen_t` instance.
|
23
|
|
* int64_t total
|
24
|
|
* The sum of the values in the array `colors`. (This is redundant
|
25
|
|
* information, but we know the caller has already computed it, so
|
26
|
|
* we might as well use it.)
|
27
|
|
* size_t num_colors
|
28
|
|
* The length of the `colors` array. The functions assumes
|
29
|
|
* num_colors > 0.
|
30
|
|
* int64_t *colors
|
31
|
|
* The array of colors (i.e. the number of each type in the collection
|
32
|
|
* from which the random variate is drawn).
|
33
|
|
* int64_t nsample
|
34
|
|
* The number of objects drawn without replacement for each variate.
|
35
|
|
* `nsample` must not exceed sum(colors). This condition is not checked;
|
36
|
|
* it is assumed that the caller has already validated the value.
|
37
|
|
* size_t num_variates
|
38
|
|
* The number of variates to be produced and put in the array
|
39
|
|
* pointed to by `variates`. One variate is a vector of length
|
40
|
|
* `num_colors`, so the array pointed to by `variates` must have length
|
41
|
|
* `num_variates * num_colors`.
|
42
|
|
* int64_t *variates
|
43
|
|
* The array that will hold the result. It must have length
|
44
|
|
* `num_variates * num_colors`.
|
45
|
|
* The array is not initialized in the function; it is expected that the
|
46
|
|
* array has been initialized with zeros when the function is called.
|
47
|
|
*
|
48
|
|
* Notes
|
49
|
|
* -----
|
50
|
|
* Here's an example that demonstrates the idea of this algorithm.
|
51
|
|
*
|
52
|
|
* Suppose the urn contains red, green, blue and yellow marbles.
|
53
|
|
* Let nred be the number of red marbles, and define the quantities for
|
54
|
|
* the other colors similarly. The total number of marbles is
|
55
|
|
*
|
56
|
|
* total = nred + ngreen + nblue + nyellow.
|
57
|
|
*
|
58
|
|
* To generate a sample using rk_hypergeometric:
|
59
|
|
*
|
60
|
|
* red_sample = hypergeometric(ngood=nred, nbad=total - nred,
|
61
|
|
* nsample=nsample)
|
62
|
|
*
|
63
|
|
* This gives us the number of red marbles in the sample. The number of
|
64
|
|
* marbles in the sample that are *not* red is nsample - red_sample.
|
65
|
|
* To figure out the distribution of those marbles, we again use
|
66
|
|
* rk_hypergeometric:
|
67
|
|
*
|
68
|
|
* green_sample = hypergeometric(ngood=ngreen,
|
69
|
|
* nbad=total - nred - ngreen,
|
70
|
|
* nsample=nsample - red_sample)
|
71
|
|
*
|
72
|
|
* Similarly,
|
73
|
|
*
|
74
|
|
* blue_sample = hypergeometric(
|
75
|
|
* ngood=nblue,
|
76
|
|
* nbad=total - nred - ngreen - nblue,
|
77
|
|
* nsample=nsample - red_sample - green_sample)
|
78
|
|
*
|
79
|
|
* Finally,
|
80
|
|
*
|
81
|
|
* yellow_sample = total - (red_sample + green_sample + blue_sample).
|
82
|
|
*
|
83
|
|
* The above sequence of steps is implemented as a loop for an arbitrary
|
84
|
|
* number of colors in the innermost loop in the code below. `remaining`
|
85
|
|
* is the value passed to `nbad`; it is `total - colors[0]` in the first
|
86
|
|
* call to random_hypergeometric(), and then decreases by `colors[j]` in
|
87
|
|
* each iteration. `num_to_sample` is the `nsample` argument. It
|
88
|
|
* starts at this function's `nsample` input, and is decreased by the
|
89
|
|
* result of the call to random_hypergeometric() in each iteration.
|
90
|
|
*
|
91
|
|
* Assumptions on the arguments (not checked in the function):
|
92
|
|
* * colors[k] >= 0 for k in range(num_colors)
|
93
|
|
* * total = sum(colors)
|
94
|
|
* * 0 <= nsample <= total
|
95
|
|
* * the product num_variates * num_colors does not overflow
|
96
|
|
*/
|
97
|
|
|
98
|
1
|
void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state,
|
99
|
|
int64_t total,
|
100
|
|
size_t num_colors, int64_t *colors,
|
101
|
|
int64_t nsample,
|
102
|
|
size_t num_variates, int64_t *variates)
|
103
|
|
{
|
104
|
|
bool more_than_half;
|
105
|
|
|
106
|
1
|
if ((total == 0) || (nsample == 0) || (num_variates == 0)) {
|
107
|
|
// Nothing to do.
|
108
|
|
return;
|
109
|
|
}
|
110
|
|
|
111
|
1
|
more_than_half = nsample > (total / 2);
|
112
|
1
|
if (more_than_half) {
|
113
|
1
|
nsample = total - nsample;
|
114
|
|
}
|
115
|
|
|
116
|
1
|
for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {
|
117
|
|
int64_t num_to_sample = nsample;
|
118
|
|
int64_t remaining = total;
|
119
|
1
|
for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {
|
120
|
|
int64_t r;
|
121
|
1
|
remaining -= colors[j];
|
122
|
1
|
r = random_hypergeometric(bitgen_state,
|
123
|
|
colors[j], remaining, num_to_sample);
|
124
|
1
|
variates[i + j] = r;
|
125
|
1
|
num_to_sample -= r;
|
126
|
|
}
|
127
|
|
|
128
|
1
|
if (num_to_sample > 0) {
|
129
|
1
|
variates[i + num_colors - 1] = num_to_sample;
|
130
|
|
}
|
131
|
|
|
132
|
1
|
if (more_than_half) {
|
133
|
1
|
for (size_t k = 0; k < num_colors; ++k) {
|
134
|
1
|
variates[i + k] = colors[k] - variates[i + k];
|
135
|
|
}
|
136
|
|
}
|
137
|
|
}
|
138
|
|
}
|