1
|
|
#ifndef NPY_EXTINT128_H_
|
2
|
|
#define NPY_EXTINT128_H_
|
3
|
|
|
4
|
|
|
5
|
|
typedef struct {
|
6
|
|
signed char sign;
|
7
|
|
npy_uint64 lo, hi;
|
8
|
|
} npy_extint128_t;
|
9
|
|
|
10
|
|
|
11
|
|
/* Integer addition with overflow checking */
|
12
|
|
static NPY_INLINE npy_int64
|
13
|
|
safe_add(npy_int64 a, npy_int64 b, char *overflow_flag)
|
14
|
|
{
|
15
|
1
|
if (a > 0 && b > NPY_MAX_INT64 - a) {
|
16
|
1
|
*overflow_flag = 1;
|
17
|
|
}
|
18
|
1
|
else if (a < 0 && b < NPY_MIN_INT64 - a) {
|
19
|
1
|
*overflow_flag = 1;
|
20
|
|
}
|
21
|
1
|
return a + b;
|
22
|
|
}
|
23
|
|
|
24
|
|
|
25
|
|
/* Integer subtraction with overflow checking */
|
26
|
|
static NPY_INLINE npy_int64
|
27
|
|
safe_sub(npy_int64 a, npy_int64 b, char *overflow_flag)
|
28
|
|
{
|
29
|
1
|
if (a >= 0 && b < a - NPY_MAX_INT64) {
|
30
|
1
|
*overflow_flag = 1;
|
31
|
|
}
|
32
|
1
|
else if (a < 0 && b > a - NPY_MIN_INT64) {
|
33
|
1
|
*overflow_flag = 1;
|
34
|
|
}
|
35
|
1
|
return a - b;
|
36
|
|
}
|
37
|
|
|
38
|
|
|
39
|
|
/* Integer multiplication with overflow checking */
|
40
|
|
static NPY_INLINE npy_int64
|
41
|
1
|
safe_mul(npy_int64 a, npy_int64 b, char *overflow_flag)
|
42
|
|
{
|
43
|
1
|
if (a > 0) {
|
44
|
1
|
if (b > NPY_MAX_INT64 / a || b < NPY_MIN_INT64 / a) {
|
45
|
1
|
*overflow_flag = 1;
|
46
|
|
}
|
47
|
|
}
|
48
|
1
|
else if (a < 0) {
|
49
|
1
|
if (b > 0 && a < NPY_MIN_INT64 / b) {
|
50
|
1
|
*overflow_flag = 1;
|
51
|
|
}
|
52
|
1
|
else if (b < 0 && a < NPY_MAX_INT64 / b) {
|
53
|
1
|
*overflow_flag = 1;
|
54
|
|
}
|
55
|
|
}
|
56
|
1
|
return a * b;
|
57
|
|
}
|
58
|
|
|
59
|
|
|
60
|
|
/* Long integer init */
|
61
|
|
static NPY_INLINE npy_extint128_t
|
62
|
|
to_128(npy_int64 x)
|
63
|
|
{
|
64
|
|
npy_extint128_t result;
|
65
|
1
|
result.sign = (x >= 0 ? 1 : -1);
|
66
|
1
|
if (x >= 0) {
|
67
|
1
|
result.lo = x;
|
68
|
|
}
|
69
|
|
else {
|
70
|
1
|
result.lo = (npy_uint64)(-(x + 1)) + 1;
|
71
|
|
}
|
72
|
1
|
result.hi = 0;
|
73
|
1
|
return result;
|
74
|
|
}
|
75
|
|
|
76
|
|
|
77
|
|
static NPY_INLINE npy_int64
|
78
|
|
to_64(npy_extint128_t x, char *overflow)
|
79
|
|
{
|
80
|
1
|
if (x.hi != 0 ||
|
81
|
1
|
(x.sign > 0 && x.lo > NPY_MAX_INT64) ||
|
82
|
1
|
(x.sign < 0 && x.lo != 0 && x.lo - 1 > -(NPY_MIN_INT64 + 1))) {
|
83
|
1
|
*overflow = 1;
|
84
|
|
}
|
85
|
1
|
return x.lo * x.sign;
|
86
|
|
}
|
87
|
|
|
88
|
|
|
89
|
|
/* Long integer multiply */
|
90
|
|
static NPY_INLINE npy_extint128_t
|
91
|
1
|
mul_64_64(npy_int64 a, npy_int64 b)
|
92
|
|
{
|
93
|
|
npy_extint128_t x, y, z;
|
94
|
|
npy_uint64 x1, x2, y1, y2, r1, r2, prev;
|
95
|
|
|
96
|
1
|
x = to_128(a);
|
97
|
1
|
y = to_128(b);
|
98
|
|
|
99
|
1
|
x1 = x.lo & 0xffffffff;
|
100
|
1
|
x2 = x.lo >> 32;
|
101
|
|
|
102
|
1
|
y1 = y.lo & 0xffffffff;
|
103
|
1
|
y2 = y.lo >> 32;
|
104
|
|
|
105
|
1
|
r1 = x1*y2;
|
106
|
1
|
r2 = x2*y1;
|
107
|
|
|
108
|
1
|
z.sign = x.sign * y.sign;
|
109
|
1
|
z.hi = x2*y2 + (r1 >> 32) + (r2 >> 32);
|
110
|
1
|
z.lo = x1*y1;
|
111
|
|
|
112
|
|
/* Add with carry */
|
113
|
1
|
prev = z.lo;
|
114
|
1
|
z.lo += (r1 << 32);
|
115
|
1
|
if (z.lo < prev) {
|
116
|
1
|
++z.hi;
|
117
|
|
}
|
118
|
|
|
119
|
1
|
prev = z.lo;
|
120
|
1
|
z.lo += (r2 << 32);
|
121
|
1
|
if (z.lo < prev) {
|
122
|
1
|
++z.hi;
|
123
|
|
}
|
124
|
|
|
125
|
1
|
return z;
|
126
|
|
}
|
127
|
|
|
128
|
|
|
129
|
|
/* Long integer add */
|
130
|
|
static NPY_INLINE npy_extint128_t
|
131
|
1
|
add_128(npy_extint128_t x, npy_extint128_t y, char *overflow)
|
132
|
|
{
|
133
|
|
npy_extint128_t z;
|
134
|
|
|
135
|
1
|
if (x.sign == y.sign) {
|
136
|
1
|
z.sign = x.sign;
|
137
|
1
|
z.hi = x.hi + y.hi;
|
138
|
1
|
if (z.hi < x.hi) {
|
139
|
1
|
*overflow = 1;
|
140
|
|
}
|
141
|
1
|
z.lo = x.lo + y.lo;
|
142
|
1
|
if (z.lo < x.lo) {
|
143
|
1
|
if (z.hi == NPY_MAX_UINT64) {
|
144
|
1
|
*overflow = 1;
|
145
|
|
}
|
146
|
1
|
++z.hi;
|
147
|
|
}
|
148
|
|
}
|
149
|
1
|
else if (x.hi > y.hi || (x.hi == y.hi && x.lo >= y.lo)) {
|
150
|
1
|
z.sign = x.sign;
|
151
|
1
|
z.hi = x.hi - y.hi;
|
152
|
1
|
z.lo = x.lo;
|
153
|
1
|
z.lo -= y.lo;
|
154
|
1
|
if (z.lo > x.lo) {
|
155
|
1
|
--z.hi;
|
156
|
|
}
|
157
|
|
}
|
158
|
|
else {
|
159
|
1
|
z.sign = y.sign;
|
160
|
1
|
z.hi = y.hi - x.hi;
|
161
|
1
|
z.lo = y.lo;
|
162
|
1
|
z.lo -= x.lo;
|
163
|
1
|
if (z.lo > y.lo) {
|
164
|
1
|
--z.hi;
|
165
|
|
}
|
166
|
|
}
|
167
|
|
|
168
|
1
|
return z;
|
169
|
|
}
|
170
|
|
|
171
|
|
|
172
|
|
/* Long integer negation */
|
173
|
|
static NPY_INLINE npy_extint128_t
|
174
|
|
neg_128(npy_extint128_t x)
|
175
|
|
{
|
176
|
1
|
npy_extint128_t z = x;
|
177
|
1
|
z.sign *= -1;
|
178
|
1
|
return z;
|
179
|
|
}
|
180
|
|
|
181
|
|
|
182
|
|
static NPY_INLINE npy_extint128_t
|
183
|
|
sub_128(npy_extint128_t x, npy_extint128_t y, char *overflow)
|
184
|
|
{
|
185
|
1
|
return add_128(x, neg_128(y), overflow);
|
186
|
|
}
|
187
|
|
|
188
|
|
|
189
|
|
static NPY_INLINE npy_extint128_t
|
190
|
|
shl_128(npy_extint128_t v)
|
191
|
|
{
|
192
|
|
npy_extint128_t z;
|
193
|
1
|
z = v;
|
194
|
1
|
z.hi <<= 1;
|
195
|
1
|
z.hi |= (z.lo & (((npy_uint64)1) << 63)) >> 63;
|
196
|
1
|
z.lo <<= 1;
|
197
|
1
|
return z;
|
198
|
|
}
|
199
|
|
|
200
|
|
|
201
|
|
static NPY_INLINE npy_extint128_t
|
202
|
|
shr_128(npy_extint128_t v)
|
203
|
|
{
|
204
|
|
npy_extint128_t z;
|
205
|
1
|
z = v;
|
206
|
1
|
z.lo >>= 1;
|
207
|
1
|
z.lo |= (z.hi & 0x1) << 63;
|
208
|
1
|
z.hi >>= 1;
|
209
|
1
|
return z;
|
210
|
|
}
|
211
|
|
|
212
|
|
static NPY_INLINE int
|
213
|
1
|
gt_128(npy_extint128_t a, npy_extint128_t b)
|
214
|
|
{
|
215
|
1
|
if (a.sign > 0 && b.sign > 0) {
|
216
|
1
|
return (a.hi > b.hi) || (a.hi == b.hi && a.lo > b.lo);
|
217
|
|
}
|
218
|
1
|
else if (a.sign < 0 && b.sign < 0) {
|
219
|
1
|
return (a.hi < b.hi) || (a.hi == b.hi && a.lo < b.lo);
|
220
|
|
}
|
221
|
1
|
else if (a.sign > 0 && b.sign < 0) {
|
222
|
1
|
return a.hi != 0 || a.lo != 0 || b.hi != 0 || b.lo != 0;
|
223
|
|
}
|
224
|
|
else {
|
225
|
|
return 0;
|
226
|
|
}
|
227
|
|
}
|
228
|
|
|
229
|
|
|
230
|
|
/* Long integer divide */
|
231
|
|
static NPY_INLINE npy_extint128_t
|
232
|
1
|
divmod_128_64(npy_extint128_t x, npy_int64 b, npy_int64 *mod)
|
233
|
|
{
|
234
|
|
npy_extint128_t remainder, pointer, result, divisor;
|
235
|
1
|
char overflow = 0;
|
236
|
|
|
237
|
|
assert(b > 0);
|
238
|
|
|
239
|
1
|
if (b <= 1 || x.hi == 0) {
|
240
|
1
|
result.sign = x.sign;
|
241
|
1
|
result.lo = x.lo / b;
|
242
|
1
|
result.hi = x.hi / b;
|
243
|
1
|
*mod = x.sign * (x.lo % b);
|
244
|
1
|
return result;
|
245
|
|
}
|
246
|
|
|
247
|
|
/* Long division, not the most efficient choice */
|
248
|
1
|
remainder = x;
|
249
|
1
|
remainder.sign = 1;
|
250
|
|
|
251
|
1
|
divisor.sign = 1;
|
252
|
1
|
divisor.hi = 0;
|
253
|
1
|
divisor.lo = b;
|
254
|
|
|
255
|
1
|
result.sign = 1;
|
256
|
1
|
result.lo = 0;
|
257
|
1
|
result.hi = 0;
|
258
|
|
|
259
|
1
|
pointer.sign = 1;
|
260
|
1
|
pointer.lo = 1;
|
261
|
1
|
pointer.hi = 0;
|
262
|
|
|
263
|
1
|
while ((divisor.hi & (((npy_uint64)1) << 63)) == 0 &&
|
264
|
1
|
gt_128(remainder, divisor)) {
|
265
|
1
|
divisor = shl_128(divisor);
|
266
|
1
|
pointer = shl_128(pointer);
|
267
|
|
}
|
268
|
|
|
269
|
1
|
while (pointer.lo || pointer.hi) {
|
270
|
1
|
if (!gt_128(divisor, remainder)) {
|
271
|
1
|
remainder = sub_128(remainder, divisor, &overflow);
|
272
|
1
|
result = add_128(result, pointer, &overflow);
|
273
|
|
}
|
274
|
1
|
divisor = shr_128(divisor);
|
275
|
1
|
pointer = shr_128(pointer);
|
276
|
|
}
|
277
|
|
|
278
|
|
/* Fix signs and return; cannot overflow */
|
279
|
1
|
result.sign = x.sign;
|
280
|
1
|
*mod = x.sign * remainder.lo;
|
281
|
|
|
282
|
1
|
return result;
|
283
|
|
}
|
284
|
|
|
285
|
|
|
286
|
|
/* Divide and round down (positive divisor; no overflows) */
|
287
|
|
static NPY_INLINE npy_extint128_t
|
288
|
1
|
floordiv_128_64(npy_extint128_t a, npy_int64 b)
|
289
|
|
{
|
290
|
|
npy_extint128_t result;
|
291
|
|
npy_int64 remainder;
|
292
|
1
|
char overflow = 0;
|
293
|
|
assert(b > 0);
|
294
|
1
|
result = divmod_128_64(a, b, &remainder);
|
295
|
1
|
if (a.sign < 0 && remainder != 0) {
|
296
|
1
|
result = sub_128(result, to_128(1), &overflow);
|
297
|
|
}
|
298
|
1
|
return result;
|
299
|
|
}
|
300
|
|
|
301
|
|
|
302
|
|
/* Divide and round up (positive divisor; no overflows) */
|
303
|
|
static NPY_INLINE npy_extint128_t
|
304
|
1
|
ceildiv_128_64(npy_extint128_t a, npy_int64 b)
|
305
|
|
{
|
306
|
|
npy_extint128_t result;
|
307
|
|
npy_int64 remainder;
|
308
|
1
|
char overflow = 0;
|
309
|
|
assert(b > 0);
|
310
|
1
|
result = divmod_128_64(a, b, &remainder);
|
311
|
1
|
if (a.sign > 0 && remainder != 0) {
|
312
|
1
|
result = add_128(result, to_128(1), &overflow);
|
313
|
|
}
|
314
|
1
|
return result;
|
315
|
|
}
|
316
|
|
|
317
|
|
#endif
|