1
#ifndef _RANDOMDGEN__PHILOX_H_
2
#define _RANDOMDGEN__PHILOX_H_
3

4
#include "numpy/npy_common.h"
5
#include <inttypes.h>
6

7
#define PHILOX_BUFFER_SIZE 4L
8

9
struct r123array2x64 {
10
  uint64_t v[2];
11
};
12
struct r123array4x64 {
13
  uint64_t v[4];
14
};
15

16
enum r123_enum_philox4x64 { philox4x64_rounds = 10 };
17
typedef struct r123array4x64 philox4x64_ctr_t;
18
typedef struct r123array2x64 philox4x64_key_t;
19
typedef struct r123array2x64 philox4x64_ukey_t;
20

21
static NPY_INLINE struct r123array2x64
22
_philox4x64bumpkey(struct r123array2x64 key) {
23 1
  key.v[0] += (0x9E3779B97F4A7C15ULL);
24 1
  key.v[1] += (0xBB67AE8584CAA73BULL);
25
  return key;
26
}
27

28
/* Prefer uint128 if available: GCC, clang, ICC */
29
#ifdef __SIZEOF_INT128__
30
static NPY_INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) {
31 1
  __uint128_t product = ((__uint128_t)a) * ((__uint128_t)b);
32 1
  *hip = product >> 64;
33 1
  return (uint64_t)product;
34
}
35
#else
36
#ifdef _WIN32
37
#include <intrin.h>
38
#if defined(_WIN64) && defined(_M_AMD64)
39
#pragma intrinsic(_umul128)
40
#else
41
#pragma intrinsic(__emulu)
42
static NPY_INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) {
43

44
  uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid,
45
      carry_bit;
46
  a_lo = (uint32_t)a;
47
  a_hi = a >> 32;
48
  b_lo = (uint32_t)b;
49
  b_hi = b >> 32;
50

51
  a_x_b_hi = __emulu(a_hi, b_hi);
52
  a_x_b_mid = __emulu(a_hi, b_lo);
53
  b_x_a_mid = __emulu(b_hi, a_lo);
54
  a_x_b_lo = __emulu(a_lo, b_lo);
55

56
  carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid +
57
               (a_x_b_lo >> 32)) >>
58
              32;
59

60
  *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
61

62
  return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32);
63
}
64
#endif
65
static NPY_INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) {
66
  return _umul128(a, b, hip);
67
}
68
#else
69
static NPY_INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) {
70

71
  uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid,
72
      carry_bit;
73
  a_lo = (uint32_t)a;
74
  a_hi = a >> 32;
75
  b_lo = (uint32_t)b;
76
  b_hi = b >> 32;
77

78
  a_x_b_hi = a_hi * b_hi;
79
  a_x_b_mid = a_hi * b_lo;
80
  b_x_a_mid = b_hi * a_lo;
81
  a_x_b_lo = a_lo * b_lo;
82

83
  carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid +
84
               (a_x_b_lo >> 32)) >>
85
              32;
86

87
  *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit;
88

89
  return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32);
90
}
91
static NPY_INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) {
92
  return _umul128(a, b, hip);
93
}
94
#endif
95
#endif
96

97
static NPY_INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr,
98
                                                    struct r123array2x64 key);
99

100
static NPY_INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr,
101
                                                    struct r123array2x64 key) {
102
  uint64_t hi0;
103
  uint64_t hi1;
104 1
  uint64_t lo0 = mulhilo64((0xD2E7470EE14C6C93ULL), ctr.v[0], &hi0);
105 1
  uint64_t lo1 = mulhilo64((0xCA5A826395121157ULL), ctr.v[2], &hi1);
106 1
  struct r123array4x64 out = {
107 1
      {hi1 ^ ctr.v[1] ^ key.v[0], lo1, hi0 ^ ctr.v[3] ^ key.v[1], lo0}};
108 1
  return out;
109
}
110

111
static NPY_INLINE philox4x64_key_t philox4x64keyinit(philox4x64_ukey_t uk) {
112
  return uk;
113
}
114
static NPY_INLINE philox4x64_ctr_t philox4x64_R(unsigned int R,
115
                                            philox4x64_ctr_t ctr,
116
                                            philox4x64_key_t key);
117

118 1
static NPY_INLINE philox4x64_ctr_t philox4x64_R(unsigned int R,
119
                                            philox4x64_ctr_t ctr,
120
                                            philox4x64_key_t key) {
121 1
  if (R > 0) {
122
    ctr = _philox4x64round(ctr, key);
123
  }
124 1
  if (R > 1) {
125 1
    key = _philox4x64bumpkey(key);
126 1
    ctr = _philox4x64round(ctr, key);
127
  }
128 1
  if (R > 2) {
129 1
    key = _philox4x64bumpkey(key);
130 1
    ctr = _philox4x64round(ctr, key);
131
  }
132 1
  if (R > 3) {
133 1
    key = _philox4x64bumpkey(key);
134 1
    ctr = _philox4x64round(ctr, key);
135
  }
136 1
  if (R > 4) {
137 1
    key = _philox4x64bumpkey(key);
138 1
    ctr = _philox4x64round(ctr, key);
139
  }
140 1
  if (R > 5) {
141 1
    key = _philox4x64bumpkey(key);
142 1
    ctr = _philox4x64round(ctr, key);
143
  }
144 1
  if (R > 6) {
145 1
    key = _philox4x64bumpkey(key);
146 1
    ctr = _philox4x64round(ctr, key);
147
  }
148 1
  if (R > 7) {
149 1
    key = _philox4x64bumpkey(key);
150 1
    ctr = _philox4x64round(ctr, key);
151
  }
152 1
  if (R > 8) {
153 1
    key = _philox4x64bumpkey(key);
154 1
    ctr = _philox4x64round(ctr, key);
155
  }
156 1
  if (R > 9) {
157 1
    key = _philox4x64bumpkey(key);
158 1
    ctr = _philox4x64round(ctr, key);
159
  }
160 1
  if (R > 10) {
161 0
    key = _philox4x64bumpkey(key);
162 0
    ctr = _philox4x64round(ctr, key);
163
  }
164 1
  if (R > 11) {
165 0
    key = _philox4x64bumpkey(key);
166 0
    ctr = _philox4x64round(ctr, key);
167
  }
168 1
  if (R > 12) {
169 0
    key = _philox4x64bumpkey(key);
170 0
    ctr = _philox4x64round(ctr, key);
171
  }
172 1
  if (R > 13) {
173 0
    key = _philox4x64bumpkey(key);
174 0
    ctr = _philox4x64round(ctr, key);
175
  }
176 1
  if (R > 14) {
177 0
    key = _philox4x64bumpkey(key);
178 0
    ctr = _philox4x64round(ctr, key);
179
  }
180 1
  if (R > 15) {
181 0
    key = _philox4x64bumpkey(key);
182 0
    ctr = _philox4x64round(ctr, key);
183
  }
184 1
  return ctr;
185
}
186

187
typedef struct s_philox_state {
188
  philox4x64_ctr_t *ctr;
189
  philox4x64_key_t *key;
190
  int buffer_pos;
191
  uint64_t buffer[PHILOX_BUFFER_SIZE];
192
  int has_uint32;
193
  uint32_t uinteger;
194
} philox_state;
195

196 1
static NPY_INLINE uint64_t philox_next(philox_state *state) {
197
  uint64_t out;
198
  int i;
199
  philox4x64_ctr_t ct;
200

201 1
  if (state->buffer_pos < PHILOX_BUFFER_SIZE) {
202 1
    out = state->buffer[state->buffer_pos];
203 1
    state->buffer_pos++;
204 1
    return out;
205
  }
206
  /* generate 4 new uint64_t */
207 1
  state->ctr->v[0]++;
208
  /* Handle carry */
209 1
  if (state->ctr->v[0] == 0) {
210 0
    state->ctr->v[1]++;
211 0
    if (state->ctr->v[1] == 0) {
212 0
      state->ctr->v[2]++;
213 0
      if (state->ctr->v[2] == 0) {
214 0
        state->ctr->v[3]++;
215
      }
216
    }
217
  }
218 1
  ct = philox4x64_R(philox4x64_rounds, *state->ctr, *state->key);
219 1
  for (i = 0; i < 4; i++) {
220 1
    state->buffer[i] = ct.v[i];
221
  }
222 1
  state->buffer_pos = 1;
223 1
  return state->buffer[0];
224
}
225

226
static NPY_INLINE uint64_t philox_next64(philox_state *state) {
227 1
  return philox_next(state);
228
}
229

230
static NPY_INLINE uint32_t philox_next32(philox_state *state) {
231
  uint64_t next;
232

233 1
  if (state->has_uint32) {
234 1
    state->has_uint32 = 0;
235 1
    return state->uinteger;
236
  }
237 1
  next = philox_next(state);
238

239 1
  state->has_uint32 = 1;
240 1
  state->uinteger = (uint32_t)(next >> 32);
241 1
  return (uint32_t)(next & 0xffffffff);
242
}
243

244
extern void philox_jump(philox_state *state);
245

246
extern void philox_advance(uint64_t *step, philox_state *state);
247

248
#endif

Read our documentation on viewing source code .

Loading