1


#include "numpy/random/distributions.h"

2


#include <stdint.h>

3


#include <stddef.h>

4


#include <stdbool.h>

5


#include <math.h>

6



7


#include "logfactorial.h"

8



9



10


/*

11


* random_multivariate_hypergeometric_marginals

12


*

13


* Draw samples from the multivariate hypergeometric distribution

14


* the "marginals" algorithm.

15


*

16


* This version generates the sample by iteratively calling

17


* hypergeometric() (the univariate hypergeometric distribution).

18


*

19


* Parameters

20


* 

21


* bitgen_t *bitgen_state

22


* Pointer to a `bitgen_t` instance.

23


* int64_t total

24


* The sum of the values in the array `colors`. (This is redundant

25


* information, but we know the caller has already computed it, so

26


* we might as well use it.)

27


* size_t num_colors

28


* The length of the `colors` array. The functions assumes

29


* num_colors > 0.

30


* int64_t *colors

31


* The array of colors (i.e. the number of each type in the collection

32


* from which the random variate is drawn).

33


* int64_t nsample

34


* The number of objects drawn without replacement for each variate.

35


* `nsample` must not exceed sum(colors). This condition is not checked;

36


* it is assumed that the caller has already validated the value.

37


* size_t num_variates

38


* The number of variates to be produced and put in the array

39


* pointed to by `variates`. One variate is a vector of length

40


* `num_colors`, so the array pointed to by `variates` must have length

41


* `num_variates * num_colors`.

42


* int64_t *variates

43


* The array that will hold the result. It must have length

44


* `num_variates * num_colors`.

45


* The array is not initialized in the function; it is expected that the

46


* array has been initialized with zeros when the function is called.

47


*

48


* Notes

49


* 

50


* Here's an example that demonstrates the idea of this algorithm.

51


*

52


* Suppose the urn contains red, green, blue and yellow marbles.

53


* Let nred be the number of red marbles, and define the quantities for

54


* the other colors similarly. The total number of marbles is

55


*

56


* total = nred + ngreen + nblue + nyellow.

57


*

58


* To generate a sample using rk_hypergeometric:

59


*

60


* red_sample = hypergeometric(ngood=nred, nbad=total  nred,

61


* nsample=nsample)

62


*

63


* This gives us the number of red marbles in the sample. The number of

64


* marbles in the sample that are *not* red is nsample  red_sample.

65


* To figure out the distribution of those marbles, we again use

66


* rk_hypergeometric:

67


*

68


* green_sample = hypergeometric(ngood=ngreen,

69


* nbad=total  nred  ngreen,

70


* nsample=nsample  red_sample)

71


*

72


* Similarly,

73


*

74


* blue_sample = hypergeometric(

75


* ngood=nblue,

76


* nbad=total  nred  ngreen  nblue,

77


* nsample=nsample  red_sample  green_sample)

78


*

79


* Finally,

80


*

81


* yellow_sample = total  (red_sample + green_sample + blue_sample).

82


*

83


* The above sequence of steps is implemented as a loop for an arbitrary

84


* number of colors in the innermost loop in the code below. `remaining`

85


* is the value passed to `nbad`; it is `total  colors[0]` in the first

86


* call to random_hypergeometric(), and then decreases by `colors[j]` in

87


* each iteration. `num_to_sample` is the `nsample` argument. It

88


* starts at this function's `nsample` input, and is decreased by the

89


* result of the call to random_hypergeometric() in each iteration.

90


*

91


* Assumptions on the arguments (not checked in the function):

92


* * colors[k] >= 0 for k in range(num_colors)

93


* * total = sum(colors)

94


* * 0 <= nsample <= total

95


* * the product num_variates * num_colors does not overflow

96


*/

97



98

1

void random_multivariate_hypergeometric_marginals(bitgen_t *bitgen_state,

99


int64_t total,

100


size_t num_colors, int64_t *colors,

101


int64_t nsample,

102


size_t num_variates, int64_t *variates)

103


{

104


bool more_than_half;

105



106

1

if ((total == 0)  (nsample == 0)  (num_variates == 0)) {

107


// Nothing to do.

108


return;

109


}

110



111

1

more_than_half = nsample > (total / 2);

112

1

if (more_than_half) {

113

1

nsample = total  nsample;

114


}

115



116

1

for (size_t i = 0; i < num_variates * num_colors; i += num_colors) {

117


int64_t num_to_sample = nsample;

118


int64_t remaining = total;

119

1

for (size_t j = 0; (num_to_sample > 0) && (j + 1 < num_colors); ++j) {

120


int64_t r;

121

1

remaining = colors[j];

122

1

r = random_hypergeometric(bitgen_state,

123


colors[j], remaining, num_to_sample);

124

1

variates[i + j] = r;

125

1

num_to_sample = r;

126


}

127



128

1

if (num_to_sample > 0) {

129

1

variates[i + num_colors  1] = num_to_sample;

130


}

131



132

1

if (more_than_half) {

133

1

for (size_t k = 0; k < num_colors; ++k) {

134

1

variates[i + k] = colors[k]  variates[i + k];

135


}

136


}

137


}

138


}
