1
/* -*- c -*- */
2
/*
3
 * vim:syntax=c
4
 *
5
 * Low-level routines related to IEEE-754 format
6
 */
7
#include "npy_math_common.h"
8
#include "npy_math_private.h"
9
#include "numpy/utils.h"
10

11
#ifndef HAVE_COPYSIGN
12
double npy_copysign(double x, double y)
13
{
14
    npy_uint32 hx, hy;
15
    GET_HIGH_WORD(hx, x);
16
    GET_HIGH_WORD(hy, y);
17
    SET_HIGH_WORD(x, (hx & 0x7fffffff) | (hy & 0x80000000));
18
    return x;
19
}
20
#endif
21

22
/*
23
 The below code is provided for compilers which do not yet provide C11 compatibility (gcc 4.5 and older)
24
 */
25
#ifndef LDBL_TRUE_MIN
26
#define LDBL_TRUE_MIN __LDBL_DENORM_MIN__
27
#endif
28

29
#if !defined(HAVE_DECL_SIGNBIT)
30
#include "_signbit.c"
31

32
int _npy_signbit_f(float x)
33
{
34
    return _npy_signbit_d((double) x);
35
}
36

37
int _npy_signbit_ld(long double x)
38
{
39
    return _npy_signbit_d((double) x);
40
}
41
#endif
42

43
/*
44
 * FIXME: There is a lot of redundancy between _next* and npy_nextafter*.
45
 * refactor this at some point
46
 *
47
 * p >= 0, returnx x + nulp
48
 * p < 0, returnx x - nulp
49
 */
50 1
static double _next(double x, int p)
51
{
52
    volatile double t;
53
    npy_int32 hx, hy, ix;
54
    npy_uint32 lx;
55

56 1
    EXTRACT_WORDS(hx, lx, x);
57 1
    ix = hx & 0x7fffffff;       /* |x| */
58

59 1
    if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0))        /* x is nan */
60
        return x;
61 1
    if ((ix | lx) == 0) {       /* x == 0 */
62 1
        if (p >= 0) {
63 1
            INSERT_WORDS(x, 0x0, 1);    /* return +minsubnormal */
64
        } else {
65 0
            INSERT_WORDS(x, 0x80000000, 1);    /* return -minsubnormal */
66
        }
67 1
        t = x * x;
68 1
        if (t == x)
69 0
            return t;
70
        else
71
            return x;           /* raise underflow flag */
72
    }
73 1
    if (p < 0) {     /* x -= ulp */
74 0
        if (lx == 0)
75 0
            hx -= 1;
76 0
        lx -= 1;
77
    } else {         /* x += ulp */
78 1
        lx += 1;
79 1
        if (lx == 0)
80 0
            hx += 1;
81
    }
82 1
    hy = hx & 0x7ff00000;
83 1
    if (hy >= 0x7ff00000)
84 0
        return x + x;           /* overflow  */
85 1
    if (hy < 0x00100000) {      /* underflow */
86 0
        t = x * x;
87 0
        if (t != x) {           /* raise underflow flag */
88 0
            INSERT_WORDS(x, hx, lx);
89 0
            return x;
90
        }
91
    }
92 1
    INSERT_WORDS(x, hx, lx);
93 1
    return x;
94
}
95

96 1
static float _nextf(float x, int p)
97
{
98
    volatile float t;
99
    npy_int32 hx, hy, ix;
100

101 1
    GET_FLOAT_WORD(hx, x);
102 1
    ix = hx & 0x7fffffff;       /* |x| */
103

104 1
    if ((ix > 0x7f800000))      /* x is nan */
105
        return x;
106 1
    if (ix == 0) {              /* x == 0 */
107 0
        if (p >= 0) {
108
            SET_FLOAT_WORD(x, 0x0 | 1); /* return +minsubnormal */
109
        } else {
110 0
            SET_FLOAT_WORD(x, 0x80000000 | 1); /* return -minsubnormal */
111
        }
112 0
        t = x * x;
113 0
        if (t == x)
114 0
            return t;
115
        else
116
            return x;           /* raise underflow flag */
117
    }
118 1
    if (p < 0) {            /* x -= ulp */
119 0
        hx -= 1;
120
    } else {                /* x += ulp */
121 1
        hx += 1;
122
    }
123 1
    hy = hx & 0x7f800000;
124 1
    if (hy >= 0x7f800000)
125 0
        return x + x;           /* overflow  */
126 1
    if (hy < 0x00800000) {      /* underflow */
127 0
        t = x * x;
128 0
        if (t != x) {           /* raise underflow flag */
129 0
            SET_FLOAT_WORD(x, hx);
130 0
            return x;
131
        }
132
    }
133 1
    SET_FLOAT_WORD(x, hx);
134 1
    return x;
135
}
136

137
#if defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) || \
138
    defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)
139

140
/*
141
 * FIXME: this is ugly and untested. The asm part only works with gcc, and we
142
 * should consolidate the GET_LDOUBLE* / SET_LDOUBLE macros
143
 */
144
#define math_opt_barrier(x) \
145
        ({ __typeof (x) __x = x; __asm ("" : "+m" (__x)); __x; })
146
#define math_force_eval(x) __asm __volatile ("" : : "m" (x))
147

148
/* only works for big endian */
149
typedef union
150
{
151
    npy_longdouble value;
152
    struct
153
    {
154
        npy_uint64 msw;
155
        npy_uint64 lsw;
156
    } parts64;
157
    struct
158
    {
159
        npy_uint32 w0, w1, w2, w3;
160
    } parts32;
161
} ieee854_long_double_shape_type;
162

163
/* Get two 64 bit ints from a long double.  */
164

165
#define GET_LDOUBLE_WORDS64(ix0,ix1,d) \
166
do {                                   \
167
  ieee854_long_double_shape_type qw_u; \
168
  qw_u.value = (d);                    \
169
  (ix0) = qw_u.parts64.msw;            \
170
  (ix1) = qw_u.parts64.lsw;            \
171
} while (0)
172

173
/* Set a long double from two 64 bit ints.  */
174

175
#define SET_LDOUBLE_WORDS64(d,ix0,ix1) \
176
do {                                   \
177
  ieee854_long_double_shape_type qw_u; \
178
  qw_u.parts64.msw = (ix0);            \
179
  qw_u.parts64.lsw = (ix1);            \
180
  (d) = qw_u.value;                    \
181
} while (0)
182

183
static npy_longdouble _nextl(npy_longdouble x, int p)
184
{
185
    npy_int64 hx,ihx,ilx;
186
    npy_uint64 lx;
187
    npy_longdouble u;
188

189
    GET_LDOUBLE_WORDS64(hx, lx, x);
190
    ihx = hx & 0x7fffffffffffffffLL;      /* |hx| */
191
    ilx = lx & 0x7fffffffffffffffLL;      /* |lx| */
192

193
    if(((ihx & 0x7ff0000000000000LL)==0x7ff0000000000000LL)&&
194
       ((ihx & 0x000fffffffffffffLL)!=0)) {
195
        return x; /* signal the nan */
196
    }
197
    if(ihx == 0 && ilx == 0) {          /* x == 0 */
198
        SET_LDOUBLE_WORDS64(x, p, 0ULL);/* return +-minsubnormal */
199
        u = x * x;
200
        if (u == x) {
201
            return u;
202
        } else {
203
            return x;           /* raise underflow flag */
204
        }
205
    }
206

207
    if(p < 0) { /* p < 0, x -= ulp */
208
        if((hx==0xffefffffffffffffLL)&&(lx==0xfc8ffffffffffffeLL))
209
            return x+x; /* overflow, return -inf */
210
        if (hx >= 0x7ff0000000000000LL) {
211
            SET_LDOUBLE_WORDS64(u,0x7fefffffffffffffLL,0x7c8ffffffffffffeLL);
212
            return u;
213
        }
214
        if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
215
            u = math_opt_barrier (x);
216
            x -= LDBL_TRUE_MIN;
217
            if (ihx < 0x0360000000000000LL
218
                    || (hx > 0 && (npy_int64) lx <= 0)
219
                    || (hx < 0 && (npy_int64) lx > 1)) {
220
                u = u * u;
221
                math_force_eval (u);        /* raise underflow flag */
222
            }
223
            return x;
224
        }
225
        if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
226
            SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
227
            u *= 0x1.0000000000000p-105L;
228
        } else
229
            SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
230
        return x - u;
231
    } else {                /* p >= 0, x += ulp */
232
        if((hx==0x7fefffffffffffffLL)&&(lx==0x7c8ffffffffffffeLL))
233
            return x+x; /* overflow, return +inf */
234
        if ((npy_uint64) hx >= 0xfff0000000000000ULL) {
235
            SET_LDOUBLE_WORDS64(u,0xffefffffffffffffLL,0xfc8ffffffffffffeLL);
236
            return u;
237
        }
238
        if(ihx <= 0x0360000000000000LL) {  /* x <= LDBL_MIN */
239
            u = math_opt_barrier (x);
240
            x += LDBL_TRUE_MIN;
241
            if (ihx < 0x0360000000000000LL
242
                    || (hx > 0 && (npy_int64) lx < 0 && lx != 0x8000000000000001LL)
243
                    || (hx < 0 && (npy_int64) lx >= 0)) {
244
                u = u * u;
245
                math_force_eval (u);        /* raise underflow flag */
246
            }
247
            if (x == 0.0L)  /* handle negative LDBL_TRUE_MIN case */
248
                x = -0.0L;
249
            return x;
250
        }
251
        if (ihx < 0x06a0000000000000LL) { /* ulp will denormal */
252
            SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL),0ULL);
253
            u *= 0x1.0000000000000p-105L;
254
        } else
255
            SET_LDOUBLE_WORDS64(u,(hx&0x7ff0000000000000LL)-0x0690000000000000LL,0ULL);
256
        return x + u;
257
    }
258
}
259
#else
260 1
static npy_longdouble _nextl(npy_longdouble x, int p)
261
{
262
    volatile npy_longdouble t;
263
    union IEEEl2bitsrep ux;
264

265 1
    ux.e = x;
266

267 1
    if ((GET_LDOUBLE_EXP(ux) == 0x7fff &&
268 1
         ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0)) {
269
        return ux.e;        /* x is nan */
270
    }
271 1
    if (ux.e == 0.0) {
272 0
        SET_LDOUBLE_MANH(ux, 0);              /* return +-minsubnormal */
273 0
        SET_LDOUBLE_MANL(ux, 1);
274 0
        if (p >= 0) {
275 0
            SET_LDOUBLE_SIGN(ux, 0);
276
        } else {
277 0
            SET_LDOUBLE_SIGN(ux, 1);
278
        }
279 0
        t = ux.e * ux.e;
280 0
        if (t == ux.e) {
281 0
            return t;
282
        } else {
283
            return ux.e;           /* raise underflow flag */
284
        }
285
    }
286 1
    if (p < 0) {      /* x -= ulp */
287 0
        if (GET_LDOUBLE_MANL(ux) == 0) {
288 0
            if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) {
289 0
                SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1);
290
            }
291 0
            SET_LDOUBLE_MANH(ux,
292
                             (GET_LDOUBLE_MANH(ux) - 1) |
293
                             (GET_LDOUBLE_MANH(ux) & LDBL_NBIT));
294
        }
295 0
        SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1);
296
    } else {                    /* x += ulp */
297 1
        SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1);
298 1
        if (GET_LDOUBLE_MANL(ux) == 0) {
299 0
            SET_LDOUBLE_MANH(ux,
300
                             (GET_LDOUBLE_MANH(ux) + 1) |
301
                             (GET_LDOUBLE_MANH(ux) & LDBL_NBIT));
302 0
            if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) {
303 0
                SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1);
304
            }
305
        }
306
    }
307 1
    if (GET_LDOUBLE_EXP(ux) == 0x7fff) {
308 0
        return ux.e + ux.e;           /* overflow  */
309
    }
310 1
    if (GET_LDOUBLE_EXP(ux) == 0) {            /* underflow */
311
        if (LDBL_NBIT) {
312
            SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT);
313
        }
314 0
        t = ux.e * ux.e;
315 0
        if (t != ux.e) {           /* raise underflow flag */
316
            return ux.e;
317
        }
318
    }
319

320 1
    return ux.e;
321
}
322
#endif
323

324
/*
325
 * nextafter code taken from BSD math lib, the code contains the following
326
 * notice:
327
 *
328
 * ====================================================
329
 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
330
 *
331
 * Developed at SunPro, a Sun Microsystems, Inc. business.
332
 * Permission to use, copy, modify, and distribute this
333
 * software is freely granted, provided that this notice
334
 * is preserved.
335
 * ====================================================
336
 */
337

338
#ifndef HAVE_NEXTAFTER
339
double npy_nextafter(double x, double y)
340
{
341
    volatile double t;
342
    npy_int32 hx, hy, ix, iy;
343
    npy_uint32 lx, ly;
344

345
    EXTRACT_WORDS(hx, lx, x);
346
    EXTRACT_WORDS(hy, ly, y);
347
    ix = hx & 0x7fffffff;       /* |x| */
348
    iy = hy & 0x7fffffff;       /* |y| */
349

350
    if (((ix >= 0x7ff00000) && ((ix - 0x7ff00000) | lx) != 0) ||        /* x is nan */
351
        ((iy >= 0x7ff00000) && ((iy - 0x7ff00000) | ly) != 0))  /* y is nan */
352
        return x + y;
353
    if (x == y)
354
        return y;               /* x=y, return y */
355
    if ((ix | lx) == 0) {       /* x == 0 */
356
        INSERT_WORDS(x, hy & 0x80000000, 1);    /* return +-minsubnormal */
357
        t = x * x;
358
        if (t == x)
359
            return t;
360
        else
361
            return x;           /* raise underflow flag */
362
    }
363
    if (hx >= 0) {              /* x > 0 */
364
        if (hx > hy || ((hx == hy) && (lx > ly))) {     /* x > y, x -= ulp */
365
            if (lx == 0)
366
                hx -= 1;
367
            lx -= 1;
368
        } else {                /* x < y, x += ulp */
369
            lx += 1;
370
            if (lx == 0)
371
                hx += 1;
372
        }
373
    } else {                    /* x < 0 */
374
        if (hy >= 0 || hx > hy || ((hx == hy) && (lx > ly))) {  /* x < y, x -= ulp */
375
            if (lx == 0)
376
                hx -= 1;
377
            lx -= 1;
378
        } else {                /* x > y, x += ulp */
379
            lx += 1;
380
            if (lx == 0)
381
                hx += 1;
382
        }
383
    }
384
    hy = hx & 0x7ff00000;
385
    if (hy >= 0x7ff00000)
386
        return x + x;           /* overflow  */
387
    if (hy < 0x00100000) {      /* underflow */
388
        t = x * x;
389
        if (t != x) {           /* raise underflow flag */
390
            INSERT_WORDS(y, hx, lx);
391
            return y;
392
        }
393
    }
394
    INSERT_WORDS(x, hx, lx);
395
    return x;
396
}
397
#endif
398

399
#ifndef HAVE_NEXTAFTERF
400
float npy_nextafterf(float x, float y)
401
{
402
    volatile float t;
403
    npy_int32 hx, hy, ix, iy;
404

405
    GET_FLOAT_WORD(hx, x);
406
    GET_FLOAT_WORD(hy, y);
407
    ix = hx & 0x7fffffff;       /* |x| */
408
    iy = hy & 0x7fffffff;       /* |y| */
409

410
    if ((ix > 0x7f800000) ||    /* x is nan */
411
        (iy > 0x7f800000))      /* y is nan */
412
        return x + y;
413
    if (x == y)
414
        return y;               /* x=y, return y */
415
    if (ix == 0) {              /* x == 0 */
416
        SET_FLOAT_WORD(x, (hy & 0x80000000) | 1); /* return +-minsubnormal */
417
        t = x * x;
418
        if (t == x)
419
            return t;
420
        else
421
            return x;           /* raise underflow flag */
422
    }
423
    if (hx >= 0) {              /* x > 0 */
424
        if (hx > hy) {          /* x > y, x -= ulp */
425
            hx -= 1;
426
        } else {                /* x < y, x += ulp */
427
            hx += 1;
428
        }
429
    } else {                    /* x < 0 */
430
        if (hy >= 0 || hx > hy) {       /* x < y, x -= ulp */
431
            hx -= 1;
432
        } else {                /* x > y, x += ulp */
433
            hx += 1;
434
        }
435
    }
436
    hy = hx & 0x7f800000;
437
    if (hy >= 0x7f800000)
438
        return x + x;           /* overflow  */
439
    if (hy < 0x00800000) {      /* underflow */
440
        t = x * x;
441
        if (t != x) {           /* raise underflow flag */
442
            SET_FLOAT_WORD(y, hx);
443
            return y;
444
        }
445
    }
446
    SET_FLOAT_WORD(x, hx);
447
    return x;
448
}
449
#endif
450

451
#ifndef HAVE_NEXTAFTERL
452
npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
453
{
454
    volatile npy_longdouble t;
455
    union IEEEl2bitsrep ux;
456
    union IEEEl2bitsrep uy;
457

458
    ux.e = x;
459
    uy.e = y;
460

461
    if ((GET_LDOUBLE_EXP(ux) == 0x7fff &&
462
         ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(ux)) != 0) ||
463
        (GET_LDOUBLE_EXP(uy) == 0x7fff &&
464
         ((GET_LDOUBLE_MANH(uy) & ~LDBL_NBIT) | GET_LDOUBLE_MANL(uy)) != 0)) {
465
        return ux.e + uy.e;        /* x or y is nan */
466
    }
467
    if (ux.e == uy.e) {
468
        return uy.e;               /* x=y, return y */
469
    }
470
    if (ux.e == 0.0) {
471
        SET_LDOUBLE_MANH(ux, 0);              /* return +-minsubnormal */
472
        SET_LDOUBLE_MANL(ux, 1);
473
        SET_LDOUBLE_SIGN(ux, GET_LDOUBLE_SIGN(uy));
474
        t = ux.e * ux.e;
475
        if (t == ux.e) {
476
            return t;
477
        } else {
478
            return ux.e;           /* raise underflow flag */
479
        }
480
    }
481
    if ((ux.e > 0.0) ^ (ux.e < uy.e)) {      /* x -= ulp */
482
        if (GET_LDOUBLE_MANL(ux) == 0) {
483
            if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) {
484
                SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) - 1);
485
            }
486
            SET_LDOUBLE_MANH(ux,
487
                             (GET_LDOUBLE_MANH(ux) - 1) |
488
                             (GET_LDOUBLE_MANH(ux) & LDBL_NBIT));
489
        }
490
        SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) - 1);
491
    } else {                    /* x += ulp */
492
        SET_LDOUBLE_MANL(ux, GET_LDOUBLE_MANL(ux) + 1);
493
        if (GET_LDOUBLE_MANL(ux) == 0) {
494
            SET_LDOUBLE_MANH(ux,
495
                             (GET_LDOUBLE_MANH(ux) + 1) |
496
                             (GET_LDOUBLE_MANH(ux) & LDBL_NBIT));
497
            if ((GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT) == 0) {
498
                SET_LDOUBLE_EXP(ux, GET_LDOUBLE_EXP(ux) + 1);
499
            }
500
        }
501
    }
502
    if (GET_LDOUBLE_EXP(ux) == 0x7fff) {
503
        return ux.e + ux.e;           /* overflow  */
504
    }
505
    if (GET_LDOUBLE_EXP(ux) == 0) {            /* underflow */
506
        if (LDBL_NBIT) {
507
            SET_LDOUBLE_MANH(ux, GET_LDOUBLE_MANH(ux) & ~LDBL_NBIT);
508
        }
509
        t = ux.e * ux.e;
510
        if (t != ux.e) {           /* raise underflow flag */
511
            return ux.e;
512
        }
513
    }
514

515
    return ux.e;
516
}
517
#endif
518

519
/**begin repeat
520
 * #suff = f,,l#
521
 * #SUFF = F,,L#
522
 * #type = npy_float, npy_double, npy_longdouble#
523
 */
524 1
@type@ npy_spacing@suff@(@type@ x)
525
{
526
    /* XXX: npy isnan/isinf may be optimized by bit twiddling */
527 1
    if (npy_isinf(x)) {
528
        return NPY_NAN@SUFF@;
529
    }
530

531 1
    return _next@suff@(x, 1) - x;
532
}
533
/**end repeat**/
534

535
/*
536
 * Decorate all the math functions which are available on the current platform
537
 */
538

539
#ifdef HAVE_NEXTAFTERF
540 1
float npy_nextafterf(float x, float y)
541
{
542 1
    return nextafterf(x, y);
543
}
544
#endif
545

546
#ifdef HAVE_NEXTAFTER
547 1
double npy_nextafter(double x, double y)
548
{
549 1
    return nextafter(x, y);
550
}
551
#endif
552

553
#ifdef HAVE_NEXTAFTERL
554 1
npy_longdouble npy_nextafterl(npy_longdouble x, npy_longdouble y)
555
{
556 1
    return nextafterl(x, y);
557
}
558
#endif
559

560 0
int npy_clear_floatstatus() {
561 0
    char x=0;
562 0
    return npy_clear_floatstatus_barrier(&x);
563
}
564 0
int npy_get_floatstatus() {
565 0
    char x=0;
566 0
    return npy_get_floatstatus_barrier(&x);
567
}
568

569
/*
570
 * Functions to set the floating point status word.
571
 */
572

573
#if (defined(__unix__) || defined(unix)) && !defined(USG)
574
#include <sys/param.h>
575
#endif
576

577

578
/*
579
 * Define floating point status functions. We must define
580
 * npy_get_floatstatus_barrier, npy_clear_floatstatus_barrier,
581
 * npy_set_floatstatus_{divbyzero, overflow, underflow, invalid}
582
 * for all supported platforms.
583
 */
584

585

586
/* Solaris --------------------------------------------------------*/
587
/* --------ignoring SunOS ieee_flags approach, someone else can
588
**         deal with that! */
589
#if defined(sun) || defined(__BSD__) || defined(__OpenBSD__) || \
590
    (defined(__FreeBSD__) && (__FreeBSD_version < 502114)) || \
591
    defined(__NetBSD__)
592
#include <ieeefp.h>
593

594
int npy_get_floatstatus_barrier(char * param)
595
{
596
    int fpstatus = fpgetsticky();
597
    /*
598
     * By using a volatile, the compiler cannot reorder this call
599
     */
600
    if (param != NULL) {
601
        volatile char NPY_UNUSED(c) = *(char*)param;
602
    }
603
    return ((FP_X_DZ  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
604
           ((FP_X_OFL & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
605
           ((FP_X_UFL & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
606
           ((FP_X_INV & fpstatus) ? NPY_FPE_INVALID : 0);
607
}
608

609
int npy_clear_floatstatus_barrier(char * param)
610
{
611
    int fpstatus = npy_get_floatstatus_barrier(param);
612
    fpsetsticky(0);
613

614
    return fpstatus;
615
}
616

617
void npy_set_floatstatus_divbyzero(void)
618
{
619
    fpsetsticky(FP_X_DZ);
620
}
621

622
void npy_set_floatstatus_overflow(void)
623
{
624
    fpsetsticky(FP_X_OFL);
625
}
626

627
void npy_set_floatstatus_underflow(void)
628
{
629
    fpsetsticky(FP_X_UFL);
630
}
631

632
void npy_set_floatstatus_invalid(void)
633
{
634
    fpsetsticky(FP_X_INV);
635
}
636

637
#elif defined(_AIX) && !defined(__GNUC__)
638
#include <float.h>
639
#include <fpxcp.h>
640

641
int npy_get_floatstatus_barrier(char *param)
642
{
643
    int fpstatus = fp_read_flag();
644
    /*
645
     * By using a volatile, the compiler cannot reorder this call
646
     */
647
    if (param != NULL) {
648
        volatile char NPY_UNUSED(c) = *(char*)param;
649
    }
650
    return ((FP_DIV_BY_ZERO & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
651
           ((FP_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
652
           ((FP_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
653
           ((FP_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
654
}
655

656
int npy_clear_floatstatus_barrier(char * param)
657
{
658
    int fpstatus = npy_get_floatstatus_barrier(param);
659
    fp_swap_flag(0);
660

661
    return fpstatus;
662
}
663

664
void npy_set_floatstatus_divbyzero(void)
665
{
666
    fp_raise_xcp(FP_DIV_BY_ZERO);
667
}
668

669
void npy_set_floatstatus_overflow(void)
670
{
671
    fp_raise_xcp(FP_OVERFLOW);
672
}
673

674
void npy_set_floatstatus_underflow(void)
675
{
676
    fp_raise_xcp(FP_UNDERFLOW);
677
}
678

679
void npy_set_floatstatus_invalid(void)
680
{
681
    fp_raise_xcp(FP_INVALID);
682
}
683

684
#elif defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) || \
685
      defined (__UCLIBC__) || (defined(__arc__) && defined(__GLIBC__))
686

687
/*
688
 * By using a volatile floating point value,
689
 * the compiler is forced to actually do the requested
690
 * operations because of potential concurrency.
691
 *
692
 * We shouldn't write multiple values to a single
693
 * global here, because that would cause
694
 * a race condition.
695
 */
696
static volatile double _npy_floatstatus_x,
697
    _npy_floatstatus_zero = 0.0, _npy_floatstatus_big = 1e300,
698
    _npy_floatstatus_small = 1e-300, _npy_floatstatus_inf;
699

700
void npy_set_floatstatus_divbyzero(void)
701
{
702
    _npy_floatstatus_x = 1.0 / _npy_floatstatus_zero;
703
}
704

705
void npy_set_floatstatus_overflow(void)
706
{
707
    _npy_floatstatus_x = _npy_floatstatus_big * 1e300;
708
}
709

710
void npy_set_floatstatus_underflow(void)
711
{
712
    _npy_floatstatus_x = _npy_floatstatus_small * 1e-300;
713
}
714

715
void npy_set_floatstatus_invalid(void)
716
{
717
    _npy_floatstatus_inf = NPY_INFINITY;
718
    _npy_floatstatus_x = _npy_floatstatus_inf - NPY_INFINITY;
719
}
720

721
/* MS Windows -----------------------------------------------------*/
722
#if defined(_MSC_VER)
723

724
#include <float.h>
725

726
int npy_get_floatstatus_barrier(char *param)
727
{
728
    /*
729
     * By using a volatile, the compiler cannot reorder this call
730
     */
731
#if defined(_WIN64)
732
    int fpstatus = _statusfp();
733
#else
734
    /* windows enables sse on 32 bit, so check both flags */
735
    int fpstatus, fpstatus2;
736
    _statusfp2(&fpstatus, &fpstatus2);
737
    fpstatus |= fpstatus2;
738
#endif
739
    if (param != NULL) {
740
        volatile char NPY_UNUSED(c) = *(char*)param;
741
    }
742
    return ((SW_ZERODIVIDE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
743
           ((SW_OVERFLOW & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
744
           ((SW_UNDERFLOW & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
745
           ((SW_INVALID & fpstatus) ? NPY_FPE_INVALID : 0);
746
}
747

748
int npy_clear_floatstatus_barrier(char *param)
749
{
750
    int fpstatus = npy_get_floatstatus_barrier(param);
751
    _clearfp();
752

753
    return fpstatus;
754
}
755

756
/*  OSF/Alpha (Tru64)  ---------------------------------------------*/
757
#elif defined(__osf__) && defined(__alpha)
758

759
#include <machine/fpu.h>
760

761
int npy_get_floatstatus_barrier(char *param)
762
{
763
    unsigned long fpstatus = ieee_get_fp_control();
764
    /*
765
     * By using a volatile, the compiler cannot reorder this call
766
     */
767
    if (param != NULL) {
768
        volatile char NPY_UNUSED(c) = *(char*)param;
769
    }
770
    return  ((IEEE_STATUS_DZE & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
771
            ((IEEE_STATUS_OVF & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
772
            ((IEEE_STATUS_UNF & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
773
            ((IEEE_STATUS_INV & fpstatus) ? NPY_FPE_INVALID : 0);
774
}
775

776
int npy_clear_floatstatus_barrier(char *param)
777
{
778
    int fpstatus = npy_get_floatstatus_barrier(param);
779
    /* clear status bits as well as disable exception mode if on */
780
    ieee_set_fp_control(0);
781

782
    return fpstatus;
783
}
784

785
#endif
786
/* End of defined(_MSC_VER) || (defined(__osf__) && defined(__alpha)) */
787

788
#else
789
/* General GCC code, should work on most platforms */
790
#  include <fenv.h>
791

792 1
int npy_get_floatstatus_barrier(char* param)
793
{
794 1
    int fpstatus = fetestexcept(FE_DIVBYZERO | FE_OVERFLOW |
795
                                FE_UNDERFLOW | FE_INVALID);
796
    /*
797
     * By using a volatile, the compiler cannot reorder this call
798
     */
799 1
    if (param != NULL) {
800 1
        volatile char NPY_UNUSED(c) = *(char*)param;
801
    }
802

803 1
    return ((FE_DIVBYZERO  & fpstatus) ? NPY_FPE_DIVIDEBYZERO : 0) |
804 1
           ((FE_OVERFLOW   & fpstatus) ? NPY_FPE_OVERFLOW : 0) |
805 1
           ((FE_UNDERFLOW  & fpstatus) ? NPY_FPE_UNDERFLOW : 0) |
806 1
           ((FE_INVALID    & fpstatus) ? NPY_FPE_INVALID : 0);
807
}
808

809 1
int npy_clear_floatstatus_barrier(char * param)
810
{
811
    /* testing float status is 50-100 times faster than clearing on x86 */
812 1
    int fpstatus = npy_get_floatstatus_barrier(param);
813 1
    if (fpstatus != 0) {
814 1
        feclearexcept(FE_DIVBYZERO | FE_OVERFLOW |
815
                      FE_UNDERFLOW | FE_INVALID);
816
    }
817

818 1
    return fpstatus;
819
}
820

821

822 1
void npy_set_floatstatus_divbyzero(void)
823
{
824 1
    feraiseexcept(FE_DIVBYZERO);
825 1
}
826

827 1
void npy_set_floatstatus_overflow(void)
828
{
829 1
    feraiseexcept(FE_OVERFLOW);
830 1
}
831

832 1
void npy_set_floatstatus_underflow(void)
833
{
834 1
    feraiseexcept(FE_UNDERFLOW);
835 1
}
836

837 1
void npy_set_floatstatus_invalid(void)
838
{
839 1
    feraiseexcept(FE_INVALID);
840 1
}
841

842
#endif

Read our documentation on viewing source code .

Loading