1
/* -*- c -*- */
2

3
#define _UMATHMODULE
4
#define _MULTIARRAYMODULE
5
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
6

7
#include "Python.h"
8

9
#include "npy_config.h"
10
#include "numpy/npy_common.h"
11
#include "numpy/arrayobject.h"
12
#include "numpy/ufuncobject.h"
13
#include "numpy/npy_math.h"
14
#include "numpy/halffloat.h"
15
#include "lowlevel_strided_loops.h"
16

17
#include "npy_pycompat.h"
18

19
#include "ufunc_object.h"
20

21
#include <string.h> /* for memchr */
22

23
/*
24
 * cutoff blocksize for pairwise summation
25
 * decreasing it decreases errors slightly as more pairs are summed but
26
 * also lowers performance, as the inner loop is unrolled eight times it is
27
 * effectively 16
28
 */
29
#define PW_BLOCKSIZE    128
30

31

32
/*
33
 * largest simd vector size in bytes numpy supports
34
 * it is currently a extremely large value as it is only used for memory
35
 * overlap checks
36
 */
37
#ifndef NPY_MAX_SIMD_SIZE
38
#define NPY_MAX_SIMD_SIZE 1024
39
#endif
40

41
/** Provides the various *_LOOP macros */
42
#include "fast_loop_macros.h"
43

44
/*
45
 * include vectorized functions and dispatchers
46
 * this file is safe to include also for generic builds
47
 * platform specific instructions are either masked via the proprocessor or
48
 * runtime detected
49
 */
50
#include "simd.inc"
51

52
/******************************************************************************
53
 **                          GENERIC FLOAT LOOPS                             **
54
 *****************************************************************************/
55

56
/* direct loops using a suitable callback */
57

58
/**begin repeat
59
 * #c = e, f, d, g#
60
 * #type = npy_half, npy_float, npy_double, npy_longdouble#
61
 **/
62

63
/*UFUNC_API*/
64
NPY_NO_EXPORT void
65 1
PyUFunc_@c@_@c@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
66
{
67
    typedef @type@ func_type(@type@);
68 1
    func_type *f = (func_type *)func;
69 1
    UNARY_LOOP {
70 1
        const @type@ in1 = *(@type@ *)ip1;
71 1
        *(@type@ *)op1 = f(in1);
72
    }
73 1
}
74

75
/*UFUNC_API*/
76
NPY_NO_EXPORT void
77 1
PyUFunc_@c@@c@_@c@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
78
{
79
    typedef @type@ func_type(@type@, @type@);
80 1
    func_type *f = (func_type *)func;
81 1
    BINARY_LOOP {
82 1
        @type@ in1 = *(@type@ *)ip1;
83 1
        @type@ in2 = *(@type@ *)ip2;
84 1
        *(@type@ *)op1 = f(in1, in2);
85
    }
86 1
}
87

88
/**end repeat**/
89

90
/* indirect loops with casting */
91
/**begin repeat
92
 * #c1    = e,         e,          f#
93
 * #type1 = npy_half,  npy_half,   npy_float#
94
 * #c2    = f,         d,          d#
95
 * #type2 = npy_float, npy_double, npy_double#
96
 *
97
 * #conv12  = npy_half_to_float, npy_half_to_double, (double)#
98
 * #conv21  = npy_float_to_half, npy_double_to_half, (float)#
99
 **/
100

101
/*UFUNC_API*/
102
NPY_NO_EXPORT void
103 1
PyUFunc_@c1@_@c1@_As_@c2@_@c2@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
104
{
105
    typedef @type2@ func_type(@type2@);
106 1
    func_type *f = (func_type *)func;
107 1
    UNARY_LOOP {
108 1
        const @type2@ in1 = @conv12@(*(@type1@ *)ip1);
109 1
        *(@type1@ *)op1 = @conv21@(f(in1));
110
    }
111 1
}
112
/*UFUNC_API*/
113
NPY_NO_EXPORT void
114 1
PyUFunc_@c1@@c1@_@c1@_As_@c2@@c2@_@c2@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
115
{
116
    typedef @type2@ func_type(@type2@, @type2@);
117 1
    func_type *f = (func_type *)func;
118 1
    BINARY_LOOP {
119 1
        const @type2@ in1 = @conv12@(*(@type1@ *)ip1);
120 1
        const @type2@ in2 = @conv12@(*(@type1@ *)ip2);
121 1
        *(@type1@ *)op1 = @conv21@(f(in1, in2));
122
    }
123 1
}
124

125
/**end repeat**/
126

127
/******************************************************************************
128
 **                          GENERIC COMPLEX LOOPS                           **
129
 *****************************************************************************/
130

131
/* direct loops using a suitable callback */
132
/**begin repeat
133
 * #c = F, D, G#
134
 * #type = npy_cfloat, npy_cdouble, npy_clongdouble#
135
 **/
136

137
/*UFUNC_API*/
138
NPY_NO_EXPORT void
139 1
PyUFunc_@c@_@c@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
140
{
141
    typedef void func_type(@type@ *, @type@ *);
142 1
    func_type *f = (func_type *)func;
143 1
    UNARY_LOOP {
144 1
        @type@ in1 = *(@type@ *)ip1;
145 1
        @type@ *out = (@type@ *)op1;
146 1
        f(&in1, out);
147
    }
148 1
}
149

150
/*UFUNC_API*/
151
NPY_NO_EXPORT void
152 1
PyUFunc_@c@@c@_@c@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
153
{
154
    typedef void func_type(@type@ *, @type@ *, @type@ *);
155 1
    func_type *f = (func_type *)func;
156 1
    BINARY_LOOP {
157 1
        @type@ in1 = *(@type@ *)ip1;
158 1
        @type@ in2 = *(@type@ *)ip2;
159 1
        @type@ *out = (@type@ *)op1;
160 1
        f(&in1, &in2, out);
161
    }
162 1
}
163
/**end repeat**/
164

165

166
/* indirect loops with casting */
167
/*UFUNC_API*/
168
NPY_NO_EXPORT void
169 0
PyUFunc_F_F_As_D_D(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
170
{
171
    typedef void func_type(npy_cdouble *, npy_cdouble *);
172 0
    func_type *f = (func_type *)func;
173 0
    UNARY_LOOP {
174
        npy_cdouble tmp, out;
175 0
        tmp.real = (double)((float *)ip1)[0];
176 0
        tmp.imag = (double)((float *)ip1)[1];
177 0
        f(&tmp, &out);
178 0
        ((float *)op1)[0] = (float)out.real;
179 0
        ((float *)op1)[1] = (float)out.imag;
180
    }
181 0
}
182

183
/*UFUNC_API*/
184
NPY_NO_EXPORT void
185 0
PyUFunc_FF_F_As_DD_D(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
186
{
187
    typedef void func_type(npy_cdouble *, npy_cdouble *, npy_cdouble *);
188 0
    func_type *f = (func_type *)func;
189 0
    BINARY_LOOP {
190
        npy_cdouble tmp1, tmp2, out;
191 0
        tmp1.real = (double)((float *)ip1)[0];
192 0
        tmp1.imag = (double)((float *)ip1)[1];
193 0
        tmp2.real = (double)((float *)ip2)[0];
194 0
        tmp2.imag = (double)((float *)ip2)[1];
195 0
        f(&tmp1, &tmp2, &out);
196 0
        ((float *)op1)[0] = (float)out.real;
197 0
        ((float *)op1)[1] = (float)out.imag;
198
    }
199 0
}
200

201

202
/******************************************************************************
203
 **                         GENERIC OBJECT lOOPS                             **
204
 *****************************************************************************/
205

206
/*UFUNC_API*/
207
NPY_NO_EXPORT void
208 1
PyUFunc_O_O(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
209
{
210 1
    unaryfunc f = (unaryfunc)func;
211 1
    UNARY_LOOP {
212 1
        PyObject *in1 = *(PyObject **)ip1;
213 1
        PyObject **out = (PyObject **)op1;
214 1
        PyObject *ret = f(in1 ? in1 : Py_None);
215 1
        if (ret == NULL) {
216
            return;
217
        }
218 1
        Py_XDECREF(*out);
219 1
        *out = ret;
220
    }
221
}
222

223
/*UFUNC_API*/
224
NPY_NO_EXPORT void
225 1
PyUFunc_O_O_method(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
226
{
227 1
    char *meth = (char *)func;
228 1
    UNARY_LOOP {
229 1
        PyObject *in1 = *(PyObject **)ip1;
230 1
        PyObject **out = (PyObject **)op1;
231
        PyObject *ret, *func;
232 1
        func = PyObject_GetAttrString(in1 ? in1 : Py_None, meth);
233 1
        if (func != NULL && !PyCallable_Check(func)) {
234 0
            Py_DECREF(func);
235
            func = NULL;
236
        }
237 1
        if (func == NULL) {
238
            PyObject *exc, *val, *tb;
239 1
            PyTypeObject *type = in1 ? Py_TYPE(in1) : Py_TYPE(Py_None);
240 1
            PyErr_Fetch(&exc, &val, &tb);
241 1
            PyErr_Format(PyExc_TypeError,
242
                         "loop of ufunc does not support argument %d of "
243
                         "type %s which has no callable %s method",
244
                         i, type->tp_name, meth);
245 1
            npy_PyErr_ChainExceptionsCause(exc, val, tb);
246 1
            Py_XDECREF(func);
247
            return;
248
        }
249 1
        ret = PyObject_CallObject(func, NULL);
250 1
        Py_DECREF(func);
251 1
        if (ret == NULL) {
252
            return;
253
        }
254 1
        Py_XDECREF(*out);
255 1
        *out = ret;
256
    }
257
}
258

259
/*UFUNC_API*/
260
NPY_NO_EXPORT void
261 1
PyUFunc_OO_O(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
262
{
263 1
    binaryfunc f = (binaryfunc)func;
264 1
    BINARY_LOOP {
265 1
        PyObject *in1 = *(PyObject **)ip1;
266 1
        PyObject *in2 = *(PyObject **)ip2;
267 1
        PyObject **out = (PyObject **)op1;
268 1
        PyObject *ret = f(in1 ? in1 : Py_None, in2 ? in2 : Py_None);
269 1
        if (ret == NULL) {
270
            return;
271
        }
272 1
        Py_XDECREF(*out);
273 1
        *out = ret;
274
    }
275
}
276

277
NPY_NO_EXPORT void
278 1
PyUFunc_OOO_O(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
279
{
280 1
    ternaryfunc f = (ternaryfunc)func;
281 1
    TERNARY_LOOP {
282 1
        PyObject *in1 = *(PyObject **)ip1;
283 1
        PyObject *in2 = *(PyObject **)ip2;
284 1
        PyObject *in3 = *(PyObject **)ip3;
285 1
        PyObject **out = (PyObject **)op1;
286 1
        PyObject *ret = f(
287
            in1 ? in1 : Py_None,
288
            in2 ? in2 : Py_None,
289
            in3 ? in3 : Py_None
290
        );
291 1
        if (ret == NULL) {
292
            return;
293
        }
294 1
        Py_XDECREF(*out);
295 1
        *out = ret;
296
    }
297
}
298

299
/*UFUNC_API*/
300
NPY_NO_EXPORT void
301 1
PyUFunc_OO_O_method(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
302
{
303 1
    char *meth = (char *)func;
304 1
    BINARY_LOOP {
305 1
        PyObject *in1 = *(PyObject **)ip1;
306 1
        PyObject *in2 = *(PyObject **)ip2;
307 1
        PyObject **out = (PyObject **)op1;
308 1
        PyObject *ret = PyObject_CallMethod(in1 ? in1 : Py_None,
309
                                            meth, "(O)", in2);
310 1
        if (ret == NULL) {
311
            return;
312
        }
313 1
        Py_XDECREF(*out);
314 1
        *out = ret;
315
    }
316
}
317

318
/*
319
 * A general-purpose ufunc that deals with general-purpose Python callable.
320
 * func is a structure with nin, nout, and a Python callable function
321
 */
322

323
/*UFUNC_API*/
324
NPY_NO_EXPORT void
325 1
PyUFunc_On_Om(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
326
{
327 1
    npy_intp n =  dimensions[0];
328 1
    PyUFunc_PyFuncData *data = (PyUFunc_PyFuncData *)func;
329 1
    int nin = data->nin;
330 1
    int nout = data->nout;
331 1
    PyObject *tocall = data->callable;
332
    char *ptrs[NPY_MAXARGS];
333
    PyObject *arglist, *result;
334
    PyObject *in, **op;
335
    npy_intp i, j, ntot;
336

337 1
    ntot = nin+nout;
338

339 1
    for(j = 0; j < ntot; j++) {
340 1
        ptrs[j] = args[j];
341
    }
342 1
    for(i = 0; i < n; i++) {
343 1
        arglist = PyTuple_New(nin);
344 1
        if (arglist == NULL) {
345 0
            return;
346
        }
347 1
        for(j = 0; j < nin; j++) {
348 1
            in = *((PyObject **)ptrs[j]);
349 1
            if (in == NULL) {
350 0
                in = Py_None;
351
            }
352 1
            PyTuple_SET_ITEM(arglist, j, in);
353 1
            Py_INCREF(in);
354
        }
355 1
        result = PyObject_CallObject(tocall, arglist);
356 1
        Py_DECREF(arglist);
357 1
        if (result == NULL) {
358
            return;
359
        }
360 1
        if (nout == 0  && result == Py_None) {
361
            /* No output expected, no output received, continue */
362 1
            Py_DECREF(result);
363
        }
364 1
        else if (nout == 1) {
365
            /* Single output expected, assign and continue */
366 1
            op = (PyObject **)ptrs[nin];
367 1
            Py_XDECREF(*op);
368 1
            *op = result;
369
        }
370 1
        else if (PyTuple_Check(result) && nout == PyTuple_Size(result)) {
371
            /*
372
             * Multiple returns match expected number of outputs, assign
373
             * and continue. Will also gobble empty tuples if nout == 0.
374
             */
375 1
            for(j = 0; j < nout; j++) {
376 1
                op = (PyObject **)ptrs[j+nin];
377 1
                Py_XDECREF(*op);
378 1
                *op = PyTuple_GET_ITEM(result, j);
379 1
                Py_INCREF(*op);
380
            }
381 1
            Py_DECREF(result);
382
        }
383
        else {
384
            /* Mismatch between returns and expected outputs, exit */
385 0
            Py_DECREF(result);
386
            return;
387
        }
388 1
        for(j = 0; j < ntot; j++) {
389 1
            ptrs[j] += steps[j];
390
        }
391
    }
392
}
393

394
/*
395
 *****************************************************************************
396
 **                             BOOLEAN LOOPS                               **
397
 *****************************************************************************
398
 */
399

400
/**begin repeat
401
 * #kind = equal, not_equal, greater, greater_equal, less, less_equal#
402
 * #OP =  ==, !=, >, >=, <, <=#
403
 **/
404

405
NPY_NO_EXPORT void
406 1
BOOL_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
407
{
408 1
    BINARY_LOOP {
409 1
        npy_bool in1 = *((npy_bool *)ip1) != 0;
410 1
        npy_bool in2 = *((npy_bool *)ip2) != 0;
411 1
        *((npy_bool *)op1)= in1 @OP@ in2;
412
    }
413 1
}
414
/**end repeat**/
415

416

417
/**begin repeat
418
 * #kind = logical_and, logical_or#
419
 * #OP =  &&, ||#
420
 * #SC =  ==, !=#
421
 * #and = 1, 0#
422
 **/
423

424
NPY_NO_EXPORT void
425 1
BOOL_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
426
{
427 1
    if(IS_BINARY_REDUCE) {
428
#ifdef NPY_HAVE_SSE2_INTRINSICS
429
        /*
430
         * stick with our variant for more reliable performance, only known
431
         * platform which outperforms it by ~20% is an i7 with glibc 2.17
432
         */
433 1
        if (run_reduce_simd_@kind@_BOOL(args, dimensions, steps)) {
434
            return;
435
        }
436
#else
437
        /* for now only use libc on 32-bit/non-x86 */
438
        if (steps[1] == 1) {
439
            npy_bool * op = (npy_bool *)args[0];
440
#if @and@
441
            /* np.all(), search for a zero (false) */
442
            if (*op) {
443
                *op = memchr(args[1], 0, dimensions[0]) == NULL;
444
            }
445
#else
446
            /*
447
             * np.any(), search for a non-zero (true) via comparing against
448
             * zero blocks, memcmp is faster than memchr on SSE4 machines
449
             * with glibc >= 2.12 and memchr can only check for equal 1
450
             */
451
            static const npy_bool zero[4096]; /* zero by C standard */
452
            npy_uintp i, n = dimensions[0];
453

454
            for (i = 0; !*op && i < n - (n % sizeof(zero)); i += sizeof(zero)) {
455
                *op = memcmp(&args[1][i], zero, sizeof(zero)) != 0;
456
            }
457
            if (!*op && n - i > 0) {
458
                *op = memcmp(&args[1][i], zero, n - i) != 0;
459
            }
460
#endif
461
            return;
462
        }
463
#endif
464
        else {
465 1
            BINARY_REDUCE_LOOP(npy_bool) {
466 1
                const npy_bool in2 = *(npy_bool *)ip2;
467 1
                io1 = io1 @OP@ in2;
468 1
                if (io1 @SC@ 0) {
469
                    break;
470
                }
471
            }
472 1
            *((npy_bool *)iop1) = io1;
473
        }
474
    }
475
    else {
476 1
        if (run_binary_simd_@kind@_BOOL(args, dimensions, steps)) {
477
            return;
478
        }
479
        else {
480 1
            BINARY_LOOP {
481 1
                const npy_bool in1 = *(npy_bool *)ip1;
482 1
                const npy_bool in2 = *(npy_bool *)ip2;
483 1
                *((npy_bool *)op1) = in1 @OP@ in2;
484
            }
485
        }
486
    }
487
}
488
/**end repeat**/
489

490
/**begin repeat
491
 * #kind = absolute, logical_not#
492
 * #OP =  !=, ==#
493
 **/
494
NPY_NO_EXPORT void
495 1
BOOL_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
496
{
497 1
    if (run_unary_simd_@kind@_BOOL(args, dimensions, steps)) {
498
        return;
499
    }
500
    else {
501 1
        UNARY_LOOP {
502 1
            npy_bool in1 = *(npy_bool *)ip1;
503 1
            *((npy_bool *)op1) = in1 @OP@ 0;
504
        }
505
    }
506
}
507
/**end repeat**/
508

509
NPY_NO_EXPORT void
510 0
BOOL__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
511
{
512 0
    OUTPUT_LOOP {
513 0
        *((npy_bool *)op1) = 1;
514
    }
515 0
}
516

517

518
/**begin repeat
519
 * #kind = isnan, isinf, isfinite#
520
 * #func = npy_isnan, npy_isinf, npy_isfinite#
521
 * #val = NPY_FALSE, NPY_FALSE, NPY_TRUE#
522
 **/
523
NPY_NO_EXPORT void
524 1
BOOL_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
525
{
526
    /*
527
     * The (void)in; suppresses an unused variable warning raised by gcc and allows
528
     * us to re-use this macro even though we do not depend on in
529
     */
530 1
    UNARY_LOOP_FAST(npy_bool, npy_bool, (void)in; *out = @val@);
531 1
}
532

533
/**end repeat**/
534

535
/*
536
 *****************************************************************************
537
 **                           INTEGER LOOPS
538
 *****************************************************************************
539
 */
540

541
/**begin repeat
542
 * #TYPE = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
543
 *         LONG, ULONG, LONGLONG, ULONGLONG#
544
 * #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
545
 *         npy_long, npy_ulong, npy_longlong, npy_ulonglong#
546
 * #ftype = npy_float, npy_float, npy_float, npy_float, npy_double, npy_double,
547
 *          npy_double, npy_double, npy_double, npy_double#
548
 * #SIGNED = 1, 0, 1, 0, 1, 0, 1, 0, 1, 0#
549
 * #c = hh,uhh,h,uh,,u,l,ul,ll,ull#
550
 */
551

552
#define @TYPE@_floor_divide @TYPE@_divide
553
#define @TYPE@_fmax @TYPE@_maximum
554
#define @TYPE@_fmin @TYPE@_minimum
555

556
NPY_NO_EXPORT void
557 0
@TYPE@__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
558
{
559 0
    OUTPUT_LOOP {
560 0
        *((@type@ *)op1) = 1;
561
    }
562 0
}
563

564
NPY_NO_EXPORT void
565 1
@TYPE@_positive(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
566
{
567 1
    UNARY_LOOP_FAST(@type@, @type@, *out = +in);
568 1
}
569

570
/**begin repeat1
571
 * #isa = , _avx2#
572
 * #ISA = , AVX2#
573
 * #CHK = 1, HAVE_ATTRIBUTE_TARGET_AVX2#
574
 * #ATTR = , NPY_GCC_TARGET_AVX2#
575
 */
576

577
#if @CHK@
578
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
579 1
@TYPE@_square@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
580
{
581 1
    UNARY_LOOP_FAST(@type@, @type@, *out = in * in);
582 1
}
583
#endif
584

585
#if @CHK@
586
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
587 1
@TYPE@_reciprocal@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
588
{
589 1
    UNARY_LOOP_FAST(@type@, @type@, *out = 1.0 / in);
590 1
}
591
#endif
592

593
#if @CHK@
594
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
595 1
@TYPE@_conjugate@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
596
{
597 1
    UNARY_LOOP_FAST(@type@, @type@, *out = in);
598 1
}
599
#endif
600

601
#if @CHK@
602
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
603 1
@TYPE@_negative@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
604
{
605 1
    UNARY_LOOP_FAST(@type@, @type@, *out = -in);
606 1
}
607
#endif
608

609
#if @CHK@
610
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
611 1
@TYPE@_logical_not@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
612
{
613 1
    UNARY_LOOP_FAST(@type@, npy_bool, *out = !in);
614 1
}
615
#endif
616

617
#if @CHK@
618
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
619 1
@TYPE@_invert@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
620
{
621 1
    UNARY_LOOP_FAST(@type@, @type@, *out = ~in);
622 1
}
623
#endif
624

625
/**begin repeat2
626
 * Arithmetic
627
 * #kind = add, subtract, multiply, bitwise_and, bitwise_or, bitwise_xor#
628
 * #OP = +, -, *, &, |, ^#
629
 */
630

631
#if @CHK@
632
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
633 1
@TYPE@_@kind@@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
634
{
635 1
    if (IS_BINARY_REDUCE) {
636 1
        BINARY_REDUCE_LOOP(@type@) {
637 1
            io1 @OP@= *(@type@ *)ip2;
638
        }
639 1
        *((@type@ *)iop1) = io1;
640
    }
641
    else {
642 1
        BINARY_LOOP_FAST(@type@, @type@, *out = in1 @OP@ in2);
643
    }
644 1
}
645
#endif
646

647
/**end repeat2**/
648

649
/*
650
 * Arithmetic bit shift operations.
651
 *
652
 * Intel hardware masks bit shift values, so large shifts wrap around
653
 * and can produce surprising results. The special handling ensures that
654
 * behavior is independent of compiler or hardware.
655
 * TODO: We could implement consistent behavior for negative shifts,
656
 *       which is undefined in C.
657
 */
658

659
#define INT_left_shift_needs_clear_floatstatus
660
#define UINT_left_shift_needs_clear_floatstatus
661

662
NPY_NO_EXPORT NPY_GCC_OPT_3 void
663 1
@TYPE@_left_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps,
664
                  void *NPY_UNUSED(func))
665
{
666 1
    BINARY_LOOP_FAST(@type@, @type@, *out = npy_lshift@c@(in1, in2));
667

668
#ifdef @TYPE@_left_shift_needs_clear_floatstatus
669
    // For some reason, our macOS CI sets an "invalid" flag here, but only
670
    // for some types.
671 1
    npy_clear_floatstatus_barrier((char*)dimensions);
672
#endif
673 1
}
674

675
#undef INT_left_shift_needs_clear_floatstatus
676
#undef UINT_left_shift_needs_clear_floatstatus
677

678
NPY_NO_EXPORT
679
#ifndef NPY_DO_NOT_OPTIMIZE_@TYPE@_right_shift
680
NPY_GCC_OPT_3
681
#endif
682
void
683 1
@TYPE@_right_shift@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps,
684
                   void *NPY_UNUSED(func))
685
{
686 1
    BINARY_LOOP_FAST(@type@, @type@, *out = npy_rshift@c@(in1, in2));
687 1
}
688

689

690
/**begin repeat2
691
 * #kind = equal, not_equal, greater, greater_equal, less, less_equal,
692
 *         logical_and, logical_or#
693
 * #OP =  ==, !=, >, >=, <, <=, &&, ||#
694
 */
695

696
#if @CHK@
697
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
698 1
@TYPE@_@kind@@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
699
{
700
    /*
701
     * gcc vectorization of this is not good (PR60575) but manual integer
702
     * vectorization is too tedious to be worthwhile
703
     */
704 1
    BINARY_LOOP_FAST(@type@, npy_bool, *out = in1 @OP@ in2);
705 1
}
706
#endif
707

708
/**end repeat2**/
709

710
#if @CHK@
711
NPY_NO_EXPORT NPY_GCC_OPT_3 @ATTR@ void
712 1
@TYPE@_logical_xor@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
713
{
714 1
    BINARY_LOOP {
715 1
        const int t1 = !!*(@type@ *)ip1;
716 1
        const int t2 = !!*(@type@ *)ip2;
717 1
        *((npy_bool *)op1) = (t1 != t2);
718
    }
719 1
}
720
#endif
721

722
/**end repeat1**/
723

724
/**begin repeat1
725
 * #kind = maximum, minimum#
726
 * #OP =  >, <#
727
 **/
728

729
NPY_NO_EXPORT void
730 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
731
{
732 1
    if (IS_BINARY_REDUCE) {
733 1
        BINARY_REDUCE_LOOP(@type@) {
734 1
            const @type@ in2 = *(@type@ *)ip2;
735 1
            io1 = (io1 @OP@ in2) ? io1 : in2;
736
        }
737 1
        *((@type@ *)iop1) = io1;
738
    }
739
    else {
740 1
        BINARY_LOOP {
741 1
            const @type@ in1 = *(@type@ *)ip1;
742 1
            const @type@ in2 = *(@type@ *)ip2;
743 1
            *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
744
        }
745
    }
746 1
}
747

748
/**end repeat1**/
749

750
NPY_NO_EXPORT void
751 1
@TYPE@_power(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
752
{
753 1
    BINARY_LOOP {
754 1
        @type@ in1 = *(@type@ *)ip1;
755 1
        @type@ in2 = *(@type@ *)ip2;
756
        @type@ out;
757

758
#if @SIGNED@
759 1
        if (in2 < 0) {
760
            NPY_ALLOW_C_API_DEF
761 1
            NPY_ALLOW_C_API;
762 1
            PyErr_SetString(PyExc_ValueError,
763
                    "Integers to negative integer powers are not allowed.");
764 1
            NPY_DISABLE_C_API;
765 1
            return;
766
        }
767
#endif
768 1
        if (in2 == 0) {
769 1
            *((@type@ *)op1) = 1;
770 1
            continue;
771
        }
772 1
        if (in1 == 1) {
773 1
            *((@type@ *)op1) = 1;
774 1
            continue;
775
        }
776

777 1
        out = in2 & 1 ? in1 : 1;
778 1
        in2 >>= 1;
779 1
        while (in2 > 0) {
780 1
            in1 *= in1;
781 1
            if (in2 & 1) {
782 1
                out *= in1;
783
            }
784 1
            in2 >>= 1;
785
        }
786 1
        *((@type@ *) op1) = out;
787
    }
788 1
}
789

790
NPY_NO_EXPORT void
791 1
@TYPE@_fmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
792
{
793 1
    BINARY_LOOP {
794 1
        const @type@ in1 = *(@type@ *)ip1;
795 1
        const @type@ in2 = *(@type@ *)ip2;
796 1
        if (in2 == 0) {
797 0
            npy_set_floatstatus_divbyzero();
798 0
            *((@type@ *)op1) = 0;
799
        }
800
        else {
801 1
            *((@type@ *)op1)= in1 % in2;
802
        }
803

804
    }
805 1
}
806

807
/**begin repeat1
808
 * #kind = isnan, isinf, isfinite#
809
 * #func = npy_isnan, npy_isinf, npy_isfinite#
810
 * #val = NPY_FALSE, NPY_FALSE, NPY_TRUE#
811
 **/
812
NPY_NO_EXPORT void
813 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
814
{
815
    /*
816
     * The (void)in; suppresses an unused variable warning raised by gcc and allows
817
     * us to re-use this macro even though we do not depend on in
818
     */
819 1
    UNARY_LOOP_FAST(@type@, npy_bool, (void)in; *out = @val@);
820 1
}
821
/**end repeat1**/
822

823
/**end repeat**/
824

825
/**begin repeat
826
 * #TYPE = BYTE, SHORT, INT, LONG, LONGLONG#
827
 * #type = npy_byte, npy_short, npy_int, npy_long, npy_longlong#
828
 * #c    = ,,,l,ll#
829
 */
830

831
NPY_NO_EXPORT NPY_GCC_OPT_3 void
832 1
@TYPE@_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
833
{
834 1
    UNARY_LOOP_FAST(@type@, @type@, *out = (in >= 0) ? in : -in);
835 1
}
836

837
NPY_NO_EXPORT NPY_GCC_OPT_3 void
838 1
@TYPE@_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
839
{
840 1
    UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : (in < 0 ? -1 : 0));
841 1
}
842

843
NPY_NO_EXPORT void
844 1
@TYPE@_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
845
{
846 1
    BINARY_LOOP {
847 1
        const @type@ in1 = *(@type@ *)ip1;
848 1
        const @type@ in2 = *(@type@ *)ip2;
849
        /*
850
         * FIXME: On x86 at least, dividing the smallest representable integer
851
         * by -1 causes a SIFGPE (division overflow). We treat this case here
852
         * (to avoid a SIGFPE crash at python level), but a good solution would
853
         * be to treat integer division problems separately from FPU exceptions
854
         * (i.e. a different approach than npy_set_floatstatus_divbyzero()).
855
         */
856 1
        if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) {
857 1
            npy_set_floatstatus_divbyzero();
858 1
            *((@type@ *)op1) = 0;
859
        }
860 1
        else if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) {
861 1
            *((@type@ *)op1) = in1/in2 - 1;
862
        }
863
        else {
864 1
            *((@type@ *)op1) = in1/in2;
865
        }
866
    }
867 1
}
868

869
NPY_NO_EXPORT void
870 1
@TYPE@_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
871
{
872 1
    BINARY_LOOP {
873 1
        const @type@ in1 = *(@type@ *)ip1;
874 1
        const @type@ in2 = *(@type@ *)ip2;
875 1
        if (in2 == 0) {
876 0
            npy_set_floatstatus_divbyzero();
877 0
            *((@type@ *)op1) = 0;
878
        }
879
        else {
880
            /* handle mixed case the way Python does */
881 1
            const @type@ rem = in1 % in2;
882 1
            if ((in1 > 0) == (in2 > 0) || rem == 0) {
883 1
                *((@type@ *)op1) = rem;
884
            }
885
            else {
886 1
                *((@type@ *)op1) = rem + in2;
887
            }
888
        }
889
    }
890 1
}
891

892
NPY_NO_EXPORT void
893 1
@TYPE@_divmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
894
{
895 1
    BINARY_LOOP_TWO_OUT {
896 1
        const @type@ in1 = *(@type@ *)ip1;
897 1
        const @type@ in2 = *(@type@ *)ip2;
898
        /* see FIXME note for divide above */
899 1
        if (in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1)) {
900 0
            npy_set_floatstatus_divbyzero();
901 0
            *((@type@ *)op1) = 0;
902 0
            *((@type@ *)op2) = 0;
903
        }
904
        else {
905
            /* handle mixed case the way Python does */
906 1
            const @type@ quo = in1 / in2;
907 1
            const @type@ rem = in1 % in2;
908 1
            if ((in1 > 0) == (in2 > 0) || rem == 0) {
909 1
                *((@type@ *)op1) = quo;
910 1
                *((@type@ *)op2) = rem;
911
            }
912
            else {
913 1
                *((@type@ *)op1) = quo - 1;
914 1
                *((@type@ *)op2) = rem + in2;
915
            }
916
        }
917
    }
918 1
}
919

920
/**begin repeat1
921
 * #kind = gcd, lcm#
922
 **/
923
NPY_NO_EXPORT void
924 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
925
{
926 1
    BINARY_LOOP {
927 1
        const @type@ in1 = *(@type@ *)ip1;
928 1
        const @type@ in2 = *(@type@ *)ip2;
929 1
        *((@type@ *)op1) = npy_@kind@@c@(in1, in2);
930
    }
931 1
}
932
/**end repeat1**/
933

934
/**end repeat**/
935

936
/**begin repeat
937
 * #TYPE = UBYTE, USHORT, UINT, ULONG, ULONGLONG#
938
 * #type = npy_ubyte, npy_ushort, npy_uint, npy_ulong, npy_ulonglong#
939
 * #c    = u,u,u,ul,ull#
940
 */
941

942
NPY_NO_EXPORT NPY_GCC_OPT_3 void
943 1
@TYPE@_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
944
{
945 1
    UNARY_LOOP_FAST(@type@, @type@, *out = in);
946 1
}
947

948
NPY_NO_EXPORT NPY_GCC_OPT_3 void
949 1
@TYPE@_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
950
{
951 1
    UNARY_LOOP_FAST(@type@, @type@, *out = in > 0 ? 1 : 0);
952 1
}
953

954
NPY_NO_EXPORT void
955 1
@TYPE@_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
956
{
957 1
    BINARY_LOOP {
958 1
        const @type@ in1 = *(@type@ *)ip1;
959 1
        const @type@ in2 = *(@type@ *)ip2;
960 1
        if (in2 == 0) {
961 1
            npy_set_floatstatus_divbyzero();
962 1
            *((@type@ *)op1) = 0;
963
        }
964
        else {
965 1
            *((@type@ *)op1)= in1/in2;
966
        }
967
    }
968 1
}
969

970
NPY_NO_EXPORT void
971 1
@TYPE@_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
972
{
973 1
    BINARY_LOOP {
974 1
        const @type@ in1 = *(@type@ *)ip1;
975 1
        const @type@ in2 = *(@type@ *)ip2;
976 1
        if (in2 == 0) {
977 0
            npy_set_floatstatus_divbyzero();
978 0
            *((@type@ *)op1) = 0;
979
        }
980
        else {
981 1
            *((@type@ *)op1) = in1 % in2;
982
        }
983
    }
984 1
}
985

986
NPY_NO_EXPORT void
987 1
@TYPE@_divmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
988
{
989 1
    BINARY_LOOP_TWO_OUT {
990 1
        const @type@ in1 = *(@type@ *)ip1;
991 1
        const @type@ in2 = *(@type@ *)ip2;
992 1
        if (in2 == 0) {
993 0
            npy_set_floatstatus_divbyzero();
994 0
            *((@type@ *)op1) = 0;
995 0
            *((@type@ *)op2) = 0;
996
        }
997
        else {
998 1
            *((@type@ *)op1)= in1/in2;
999 1
            *((@type@ *)op2) = in1 % in2;
1000
        }
1001
    }
1002 1
}
1003

1004
/**begin repeat1
1005
 * #kind = gcd, lcm#
1006
 **/
1007
NPY_NO_EXPORT void
1008 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1009
{
1010 1
    BINARY_LOOP {
1011 1
        const @type@ in1 = *(@type@ *)ip1;
1012 1
        const @type@ in2 = *(@type@ *)ip2;
1013 1
        *((@type@ *)op1) = npy_@kind@@c@(in1, in2);
1014
    }
1015 1
}
1016
/**end repeat1**/
1017

1018
/**end repeat**/
1019

1020
/*
1021
 *****************************************************************************
1022
 **                           DATETIME LOOPS                                **
1023
 *****************************************************************************
1024
 */
1025

1026
NPY_NO_EXPORT void
1027 1
TIMEDELTA_negative(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1028
{
1029 1
    UNARY_LOOP {
1030 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1031 1
        if (in1 == NPY_DATETIME_NAT) {
1032 0
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1033
        }
1034
        else {
1035 1
            *((npy_timedelta *)op1) = -in1;
1036
        }
1037
    }
1038 1
}
1039

1040
NPY_NO_EXPORT void
1041 1
TIMEDELTA_positive(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1042
{
1043 1
    UNARY_LOOP {
1044 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1045 1
        *((npy_timedelta *)op1) = +in1;
1046
    }
1047 1
}
1048

1049
NPY_NO_EXPORT void
1050 1
TIMEDELTA_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1051
{
1052 1
    UNARY_LOOP {
1053 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1054 1
        if (in1 == NPY_DATETIME_NAT) {
1055 0
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1056
        }
1057
        else {
1058 1
            *((npy_timedelta *)op1) = (in1 >= 0) ? in1 : -in1;
1059
        }
1060
    }
1061 1
}
1062

1063
NPY_NO_EXPORT void
1064 1
TIMEDELTA_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1065
{
1066 1
    UNARY_LOOP {
1067 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1068 1
        *((npy_timedelta *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : 0);
1069
    }
1070 1
}
1071

1072
/**begin repeat
1073
 * #type = npy_datetime, npy_timedelta#
1074
 * #TYPE = DATETIME, TIMEDELTA#
1075
 */
1076

1077
NPY_NO_EXPORT void
1078 1
@TYPE@_isnat(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1079
{
1080 1
    UNARY_LOOP {
1081 1
        const @type@ in1 = *(@type@ *)ip1;
1082 1
        *((npy_bool *)op1) = (in1 == NPY_DATETIME_NAT);
1083
    }
1084 1
}
1085

1086
NPY_NO_EXPORT void
1087 1
@TYPE@_isfinite(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1088
{
1089 1
    UNARY_LOOP {
1090 1
        const @type@ in1 = *(@type@ *)ip1;
1091 1
        *((npy_bool *)op1) = (in1 != NPY_DATETIME_NAT);
1092
    }
1093 1
}
1094

1095
NPY_NO_EXPORT void
1096 1
@TYPE@_isinf(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1097
{
1098 1
    UNARY_LOOP_FAST(npy_bool, npy_bool, (void)in; *out = NPY_FALSE);
1099 1
}
1100

1101
NPY_NO_EXPORT void
1102 0
@TYPE@__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1103
{
1104 0
    OUTPUT_LOOP {
1105 0
        *((@type@ *)op1) = 1;
1106
    }
1107 0
}
1108

1109
/**begin repeat1
1110
 * #kind = equal, greater, greater_equal, less, less_equal#
1111
 * #OP =  ==, >, >=, <, <=#
1112
 */
1113
NPY_NO_EXPORT void
1114 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1115
{
1116 1
    BINARY_LOOP {
1117 1
        const @type@ in1 = *(@type@ *)ip1;
1118 1
        const @type@ in2 = *(@type@ *)ip2;
1119 1
        *((npy_bool *)op1) = (in1 @OP@ in2 &&
1120 1
                              in1 != NPY_DATETIME_NAT &&
1121
                              in2 != NPY_DATETIME_NAT);
1122
    }
1123 1
}
1124
/**end repeat1**/
1125

1126
NPY_NO_EXPORT void
1127 1
@TYPE@_not_equal(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1128
{
1129 1
    BINARY_LOOP {
1130 1
        const @type@ in1 = *(@type@ *)ip1;
1131 1
        const @type@ in2 = *(@type@ *)ip2;
1132 1
        *((npy_bool *)op1) = (in1 != in2 ||
1133 1
                              in1 == NPY_DATETIME_NAT ||
1134
                              in2 == NPY_DATETIME_NAT);
1135
    }
1136 1
}
1137

1138

1139
/**begin repeat1
1140
 * #kind = maximum, minimum#
1141
 * #OP =  >, <#
1142
 **/
1143
NPY_NO_EXPORT void
1144 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1145
{
1146 1
    BINARY_LOOP {
1147 1
        const @type@ in1 = *(@type@ *)ip1;
1148 1
        const @type@ in2 = *(@type@ *)ip2;
1149 1
        if (in1 == NPY_DATETIME_NAT) {
1150 1
            *((@type@ *)op1) = in1;
1151
        }
1152 1
        else if (in2 == NPY_DATETIME_NAT) {
1153 1
            *((@type@ *)op1) = in2;
1154
        }
1155
        else {
1156 1
            *((@type@ *)op1) = (in1 @OP@ in2) ? in1 : in2;
1157
        }
1158
    }
1159 1
}
1160
/**end repeat1**/
1161

1162
/**begin repeat1
1163
 * #kind = fmax, fmin#
1164
 * #OP =  >=, <=#
1165
 **/
1166
NPY_NO_EXPORT void
1167 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1168
{
1169 1
    BINARY_LOOP {
1170 1
        const @type@ in1 = *(@type@ *)ip1;
1171 1
        const @type@ in2 = *(@type@ *)ip2;
1172 1
        if (in1 == NPY_DATETIME_NAT) {
1173 1
            *((@type@ *)op1) = in2;
1174
        }
1175 1
        else if (in2 == NPY_DATETIME_NAT) {
1176 1
            *((@type@ *)op1) = in1;
1177
        }
1178
        else {
1179 1
            *((@type@ *)op1) = in1 @OP@ in2 ? in1 : in2;
1180
        }
1181
    }
1182 1
}
1183
/**end repeat1**/
1184

1185
/**end repeat**/
1186

1187
NPY_NO_EXPORT void
1188 1
DATETIME_Mm_M_add(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1189
{
1190 1
    BINARY_LOOP {
1191 1
        const npy_datetime in1 = *(npy_datetime *)ip1;
1192 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1193 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1194 1
            *((npy_datetime *)op1) = NPY_DATETIME_NAT;
1195
        }
1196
        else {
1197 1
            *((npy_datetime *)op1) = in1 + in2;
1198
        }
1199
    }
1200 1
}
1201

1202
NPY_NO_EXPORT void
1203 1
DATETIME_mM_M_add(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1204
{
1205 1
    BINARY_LOOP {
1206 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1207 1
        const npy_datetime in2 = *(npy_datetime *)ip2;
1208 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1209 1
            *((npy_datetime *)op1) = NPY_DATETIME_NAT;
1210
        }
1211
        else {
1212 1
            *((npy_datetime *)op1) = in1 + in2;
1213
        }
1214
    }
1215 1
}
1216

1217
NPY_NO_EXPORT void
1218 1
TIMEDELTA_mm_m_add(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1219
{
1220 1
    BINARY_LOOP {
1221 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1222 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1223 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1224 0
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1225
        }
1226
        else {
1227 1
            *((npy_timedelta *)op1) = in1 + in2;
1228
        }
1229
    }
1230 1
}
1231

1232
NPY_NO_EXPORT void
1233 1
DATETIME_Mm_M_subtract(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1234
{
1235 1
    BINARY_LOOP {
1236 1
        const npy_datetime in1 = *(npy_datetime *)ip1;
1237 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1238 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1239 1
            *((npy_datetime *)op1) = NPY_DATETIME_NAT;
1240
        }
1241
        else {
1242 1
            *((npy_datetime *)op1) = in1 - in2;
1243
        }
1244
    }
1245 1
}
1246

1247
NPY_NO_EXPORT void
1248 1
DATETIME_MM_m_subtract(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1249
{
1250 1
    BINARY_LOOP {
1251 1
        const npy_datetime in1 = *(npy_datetime *)ip1;
1252 1
        const npy_datetime in2 = *(npy_datetime *)ip2;
1253 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1254 0
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1255
        }
1256
        else {
1257 1
            *((npy_timedelta *)op1) = in1 - in2;
1258
        }
1259
    }
1260 1
}
1261

1262
NPY_NO_EXPORT void
1263 1
TIMEDELTA_mm_m_subtract(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1264
{
1265 1
    BINARY_LOOP {
1266 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1267 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1268 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1269 0
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1270
        }
1271
        else {
1272 1
            *((npy_timedelta *)op1) = in1 - in2;
1273
        }
1274
    }
1275 1
}
1276

1277
/* Note: Assuming 'q' == NPY_LONGLONG */
1278
NPY_NO_EXPORT void
1279 1
TIMEDELTA_mq_m_multiply(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1280
{
1281 1
    BINARY_LOOP {
1282 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1283 1
        const npy_int64 in2 = *(npy_int64 *)ip2;
1284 1
        if (in1 == NPY_DATETIME_NAT) {
1285 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1286
        }
1287
        else {
1288 1
            *((npy_timedelta *)op1) = in1 * in2;
1289
        }
1290
    }
1291 1
}
1292

1293
/* Note: Assuming 'q' == NPY_LONGLONG */
1294
NPY_NO_EXPORT void
1295 1
TIMEDELTA_qm_m_multiply(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1296
{
1297 1
    BINARY_LOOP {
1298 1
        const npy_int64 in1 = *(npy_int64 *)ip1;
1299 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1300 1
        if (in2 == NPY_DATETIME_NAT) {
1301 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1302
        }
1303
        else {
1304 1
            *((npy_timedelta *)op1) = in1 * in2;
1305
        }
1306
    }
1307 1
}
1308

1309
NPY_NO_EXPORT void
1310 1
TIMEDELTA_md_m_multiply(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1311
{
1312 1
    BINARY_LOOP {
1313 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1314 1
        const double in2 = *(double *)ip2;
1315 1
        if (in1 == NPY_DATETIME_NAT) {
1316 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1317
        }
1318
        else {
1319 1
            double result = in1 * in2;
1320 1
            if (npy_isfinite(result)) {
1321 1
                *((npy_timedelta *)op1) = (npy_timedelta)result;
1322
            }
1323
            else {
1324 1
                *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1325
            }
1326
        }
1327
    }
1328 1
}
1329

1330
NPY_NO_EXPORT void
1331 1
TIMEDELTA_dm_m_multiply(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1332
{
1333 1
    BINARY_LOOP {
1334 1
        const double in1 = *(double *)ip1;
1335 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1336 1
        if (in2 == NPY_DATETIME_NAT) {
1337 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1338
        }
1339
        else {
1340 1
            double result = in1 * in2;
1341 1
            if (npy_isfinite(result)) {
1342 1
                *((npy_timedelta *)op1) = (npy_timedelta)result;
1343
            }
1344
            else {
1345 1
                *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1346
            }
1347
        }
1348
    }
1349 1
}
1350

1351
/* Note: Assuming 'q' == NPY_LONGLONG */
1352
NPY_NO_EXPORT void
1353 1
TIMEDELTA_mq_m_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1354
{
1355 1
    BINARY_LOOP {
1356 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1357 1
        const npy_int64 in2 = *(npy_int64 *)ip2;
1358 1
        if (in1 == NPY_DATETIME_NAT || in2 == 0) {
1359 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1360
        }
1361
        else {
1362 1
            *((npy_timedelta *)op1) = in1 / in2;
1363
        }
1364
    }
1365 1
}
1366

1367
NPY_NO_EXPORT void
1368 1
TIMEDELTA_md_m_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1369
{
1370 1
    BINARY_LOOP {
1371 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1372 1
        const double in2 = *(double *)ip2;
1373 1
        if (in1 == NPY_DATETIME_NAT) {
1374 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1375
        }
1376
        else {
1377 1
            double result = in1 / in2;
1378 1
            if (npy_isfinite(result)) {
1379 1
                *((npy_timedelta *)op1) = (npy_timedelta)result;
1380
            }
1381
            else {
1382 1
                *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1383
            }
1384
        }
1385
    }
1386 1
}
1387

1388
NPY_NO_EXPORT void
1389 1
TIMEDELTA_mm_d_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1390
{
1391 1
    BINARY_LOOP {
1392 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1393 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1394 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1395 0
            *((double *)op1) = NPY_NAN;
1396
        }
1397
        else {
1398 1
            *((double *)op1) = (double)in1 / (double)in2;
1399
        }
1400
    }
1401 1
}
1402

1403
NPY_NO_EXPORT void
1404 1
TIMEDELTA_mm_m_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1405
{
1406 1
    BINARY_LOOP {
1407 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1408 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1409 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1410 1
            *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1411
        }
1412
        else {
1413 1
            if (in2 == 0) {
1414 1
                npy_set_floatstatus_divbyzero();
1415 1
                *((npy_timedelta *)op1) = NPY_DATETIME_NAT;
1416
            }
1417
            else {
1418
                /* handle mixed case the way Python does */
1419 1
                const npy_timedelta rem = in1 % in2;
1420 1
                if ((in1 > 0) == (in2 > 0) || rem == 0) {
1421 1
                    *((npy_timedelta *)op1) = rem;
1422
                }
1423
                else {
1424 1
                    *((npy_timedelta *)op1) = rem + in2;
1425
                }
1426
            }
1427
        }
1428
    }
1429 1
}
1430

1431
NPY_NO_EXPORT void
1432 1
TIMEDELTA_mm_q_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1433
{
1434 1
    BINARY_LOOP {
1435 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1436 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1437 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1438 1
            npy_set_floatstatus_invalid();
1439 1
            *((npy_int64 *)op1) = 0;
1440
        }
1441 1
        else if (in2 == 0) {
1442 1
            npy_set_floatstatus_divbyzero();
1443 1
            *((npy_int64 *)op1) = 0;
1444
        }
1445
        else {
1446 1
            if (((in1 > 0) != (in2 > 0)) && (in1 % in2 != 0)) {
1447 1
                *((npy_int64 *)op1) = in1/in2 - 1;
1448
            }
1449
            else {
1450 1
                *((npy_int64 *)op1) = in1/in2;
1451
            }
1452
        }
1453
    }
1454 1
}
1455

1456
NPY_NO_EXPORT void
1457 1
TIMEDELTA_mm_qm_divmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1458
{
1459 1
    BINARY_LOOP_TWO_OUT {
1460 1
        const npy_timedelta in1 = *(npy_timedelta *)ip1;
1461 1
        const npy_timedelta in2 = *(npy_timedelta *)ip2;
1462 1
        if (in1 == NPY_DATETIME_NAT || in2 == NPY_DATETIME_NAT) {
1463 1
            npy_set_floatstatus_invalid();
1464 1
            *((npy_int64 *)op1) = 0;
1465 1
            *((npy_timedelta *)op2) = NPY_DATETIME_NAT;
1466
        }
1467 1
        else if (in2 == 0) {
1468 1
            npy_set_floatstatus_divbyzero();
1469 1
            *((npy_int64 *)op1) = 0;
1470 1
            *((npy_timedelta *)op2) = NPY_DATETIME_NAT;
1471
        }
1472
        else {
1473 1
            const npy_int64 quo = in1 / in2;
1474 1
            const npy_timedelta rem = in1 % in2;
1475 1
            if ((in1 > 0) == (in2 > 0) || rem == 0) {
1476 1
                *((npy_int64 *)op1) = quo;
1477 1
                *((npy_timedelta *)op2) = rem;
1478
            }
1479
            else {
1480 1
                *((npy_int64 *)op1) = quo - 1;
1481 1
                *((npy_timedelta *)op2) = rem + in2;
1482
            }
1483
        }
1484
    }
1485 1
}
1486

1487
/*
1488
 *****************************************************************************
1489
 **                             FLOAT LOOPS                                 **
1490
 *****************************************************************************
1491
 */
1492

1493
/**begin repeat
1494
 * Float types
1495
 *  #type = npy_float, npy_double#
1496
 *  #TYPE = FLOAT, DOUBLE#
1497
 *  #scalarf = npy_sqrtf, npy_sqrt#
1498
 */
1499

1500
NPY_NO_EXPORT void
1501 0
@TYPE@_sqrt(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1502
{
1503 0
    if (!run_unary_simd_sqrt_@TYPE@(args, dimensions, steps)) {
1504 0
        UNARY_LOOP {
1505 0
            const @type@ in1 = *(@type@ *)ip1;
1506 0
            *(@type@ *)op1 = @scalarf@(in1);
1507
        }
1508
    }
1509 0
}
1510

1511
/**end repeat**/
1512

1513
/**begin repeat
1514
 *  #func = rint, ceil, floor, trunc#
1515
 *  #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
1516
 */
1517

1518
/**begin repeat1
1519
*  #TYPE = FLOAT, DOUBLE#
1520
*  #type = npy_float, npy_double#
1521
*  #typesub = f, #
1522
*/
1523

1524
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1525 0
@TYPE@_@func@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1526
{
1527 0
    UNARY_LOOP {
1528 0
        const @type@ in1 = *(@type@ *)ip1;
1529 0
        *(@type@ *)op1 = @scalarf@@typesub@(in1);
1530
    }
1531 0
}
1532

1533

1534
/**end repeat1**/
1535
/**end repeat**/
1536

1537
/**begin repeat
1538
 *  #func = sin, cos, exp, log#
1539
 *  #scalarf = npy_sinf, npy_cosf, npy_expf, npy_logf#
1540
 */
1541

1542
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1543 0
FLOAT_@func@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1544
{
1545 0
    UNARY_LOOP {
1546 0
	const npy_float in1 = *(npy_float *)ip1;
1547 0
	*(npy_float *)op1 = @scalarf@(in1);
1548
    }
1549 0
}
1550

1551
/**end repeat**/
1552

1553
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1554 0
DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1555
{
1556 0
    UNARY_LOOP {
1557 0
        const npy_double in1 = *(npy_double *)ip1;
1558 0
        *(npy_double *)op1 = npy_exp(in1);
1559
    }
1560 0
}
1561
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1562 0
DOUBLE_log(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1563
{
1564 0
    UNARY_LOOP {
1565 0
        const npy_double in1 = *(npy_double *)ip1;
1566 0
        *(npy_double *)op1 = npy_log(in1);
1567
    }
1568 0
}
1569

1570
/**begin repeat
1571
 * #isa = avx512f, fma#
1572
 * #ISA = AVX512F, FMA#
1573
 * #CHK = HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS, HAVE_ATTRIBUTE_TARGET_AVX2_WITH_INTRINSICS#
1574
 */
1575

1576
/**begin repeat1
1577
 *  #TYPE = FLOAT, DOUBLE#
1578
 *  #type = npy_float, npy_double#
1579
 *  #typesub = f, #
1580
 */
1581

1582
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1583 1
@TYPE@_sqrt_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1584
{
1585 1
    if (!run_unary_@isa@_sqrt_@TYPE@(args, dimensions, steps)) {
1586 1
        UNARY_LOOP {
1587 1
            const @type@ in1 = *(@type@ *)ip1;
1588 1
            *(@type@ *)op1 = npy_sqrt@typesub@(in1);
1589
        }
1590
    }
1591 1
}
1592

1593
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1594 1
@TYPE@_absolute_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1595
{
1596 1
    if (!run_unary_@isa@_absolute_@TYPE@(args, dimensions, steps)) {
1597 1
        UNARY_LOOP {
1598 1
            const @type@ in1 = *(@type@ *)ip1;
1599 1
            const @type@ tmp = in1 > 0 ? in1 : -in1;
1600
            /* add 0 to clear -0.0 */
1601 1
            *((@type@ *)op1) = tmp + 0;
1602
        }
1603
    }
1604 1
    npy_clear_floatstatus_barrier((char*)dimensions);
1605 1
}
1606

1607
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1608 1
@TYPE@_square_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1609
{
1610 1
    if (!run_unary_@isa@_square_@TYPE@(args, dimensions, steps)) {
1611 1
        UNARY_LOOP {
1612 1
            const @type@ in1 = *(@type@ *)ip1;
1613 1
            *(@type@ *)op1 = in1*in1;
1614
        }
1615
    }
1616 1
}
1617

1618
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1619 1
@TYPE@_reciprocal_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1620
{
1621 1
    if (!run_unary_@isa@_reciprocal_@TYPE@(args, dimensions, steps)) {
1622 1
        UNARY_LOOP {
1623 1
            const @type@ in1 = *(@type@ *)ip1;
1624 1
            *(@type@ *)op1 = 1.0f/in1;
1625
        }
1626
    }
1627 1
}
1628

1629
/**begin repeat2
1630
 *  #func = rint, ceil, floor, trunc#
1631
 *  #scalarf = npy_rint, npy_ceil, npy_floor, npy_trunc#
1632
 */
1633

1634
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1635 1
@TYPE@_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1636
{
1637 1
    if (!run_unary_@isa@_@func@_@TYPE@(args, dimensions, steps)) {
1638 1
        UNARY_LOOP {
1639 1
            const @type@ in1 = *(@type@ *)ip1;
1640 1
            *(@type@ *)op1 = @scalarf@@typesub@(in1);
1641
        }
1642
    }
1643 1
}
1644

1645
/**end repeat2**/
1646
/**end repeat1**/
1647

1648
/**begin repeat1
1649
 *  #func = exp, log#
1650
 *  #scalarf = npy_expf, npy_logf#
1651
 */
1652

1653
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1654 1
FLOAT_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1655
{
1656 1
    if (!run_unary_@isa@_@func@_FLOAT(args, dimensions, steps)) {
1657 1
        UNARY_LOOP {
1658
            /*
1659
             * We use the AVX function to compute exp/log for scalar elements as well.
1660
             * This is needed to ensure the output of strided and non-strided
1661
             * cases match. SIMD code handles strided input cases, but not
1662
             * strided output.
1663
             */
1664
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
1665 1
            @ISA@_@func@_FLOAT((npy_float *)op1, (npy_float *)ip1, 1, steps[0]);
1666
#else
1667
            const npy_float in1 = *(npy_float *)ip1;
1668
            *(npy_float *)op1 = @scalarf@(in1);
1669
#endif
1670
        }
1671
    }
1672 1
}
1673

1674
/**end repeat1**/
1675

1676
/**begin repeat1
1677
 *  #func = cos, sin#
1678
 *  #enum = npy_compute_cos, npy_compute_sin#
1679
 *  #scalarf = npy_cosf, npy_sinf#
1680
 */
1681

1682
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1683 1
FLOAT_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1684
{
1685 1
    if (!run_unary_@isa@_sincos_FLOAT(args, dimensions, steps, @enum@)) {
1686 1
        UNARY_LOOP {
1687
#if defined @CHK@ && defined NPY_HAVE_SSE2_INTRINSICS
1688 1
            @ISA@_sincos_FLOAT((npy_float *)op1, (npy_float *)ip1, 1, steps[0], @enum@);
1689
#else
1690
	        const npy_float in1 = *(npy_float *)ip1;
1691
	        *(npy_float *)op1 = @scalarf@(in1);
1692
#endif
1693
        }
1694
    }
1695 1
}
1696

1697
/**end repeat1**/
1698
/**end repeat**/
1699

1700
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1701 1
DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1702
{
1703 1
    if (!run_unary_avx512f_exp_DOUBLE(args, dimensions, steps)) {
1704 1
        UNARY_LOOP {
1705 1
            const npy_double in1 = *(npy_double *)ip1;
1706 1
            *(npy_double *)op1 = npy_exp(in1);
1707
        }
1708
    }
1709 1
}
1710

1711
NPY_NO_EXPORT NPY_GCC_OPT_3 void
1712 1
DOUBLE_log_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
1713
{
1714 1
    if (!run_unary_avx512f_log_DOUBLE(args, dimensions, steps)) {
1715 1
        UNARY_LOOP {
1716 1
            const npy_double in1 = *(npy_double *)ip1;
1717 1
            *(npy_double *)op1 = npy_log(in1);
1718
        }
1719
    }
1720 1
}
1721

1722
/**begin repeat
1723
 * Float types
1724
 *  #type = npy_float, npy_double, npy_longdouble, npy_float#
1725
 *  #dtype = npy_float, npy_double, npy_longdouble, npy_half#
1726
 *  #TYPE = FLOAT, DOUBLE, LONGDOUBLE, HALF#
1727
 *  #c = f, , l, #
1728
 *  #C = F, , L, #
1729
 *  #trf = , , , npy_half_to_float#
1730
 */
1731

1732
/*
1733
 * Pairwise summation, rounding error O(lg n) instead of O(n).
1734
 * The recursion depth is O(lg n) as well.
1735
 * when updating also update similar complex floats summation
1736
 */
1737
static @type@
1738 1
pairwise_sum_@TYPE@(char *a, npy_intp n, npy_intp stride)
1739
{
1740 1
    if (n < 8) {
1741
        npy_intp i;
1742
        @type@ res = 0.;
1743

1744 1
        for (i = 0; i < n; i++) {
1745 1
            res += @trf@(*((@dtype@*)(a + i * stride)));
1746
        }
1747
        return res;
1748
    }
1749 1
    else if (n <= PW_BLOCKSIZE) {
1750
        npy_intp i;
1751
        @type@ r[8], res;
1752

1753
        /*
1754
         * sum a block with 8 accumulators
1755
         * 8 times unroll reduces blocksize to 16 and allows vectorization with
1756
         * avx without changing summation ordering
1757
         */
1758 1
        r[0] = @trf@(*((@dtype@ *)(a + 0 * stride)));
1759 1
        r[1] = @trf@(*((@dtype@ *)(a + 1 * stride)));
1760 1
        r[2] = @trf@(*((@dtype@ *)(a + 2 * stride)));
1761 1
        r[3] = @trf@(*((@dtype@ *)(a + 3 * stride)));
1762 1
        r[4] = @trf@(*((@dtype@ *)(a + 4 * stride)));
1763 1
        r[5] = @trf@(*((@dtype@ *)(a + 5 * stride)));
1764 1
        r[6] = @trf@(*((@dtype@ *)(a + 6 * stride)));
1765 1
        r[7] = @trf@(*((@dtype@ *)(a + 7 * stride)));
1766

1767 1
        for (i = 8; i < n - (n % 8); i += 8) {
1768
            /* small blocksizes seems to mess with hardware prefetch */
1769 1
            NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@dtype@))*stride, 0, 3);
1770 1
            r[0] += @trf@(*((@dtype@ *)(a + (i + 0) * stride)));
1771 1
            r[1] += @trf@(*((@dtype@ *)(a + (i + 1) * stride)));
1772 1
            r[2] += @trf@(*((@dtype@ *)(a + (i + 2) * stride)));
1773 1
            r[3] += @trf@(*((@dtype@ *)(a + (i + 3) * stride)));
1774 1
            r[4] += @trf@(*((@dtype@ *)(a + (i + 4) * stride)));
1775 1
            r[5] += @trf@(*((@dtype@ *)(a + (i + 5) * stride)));
1776 1
            r[6] += @trf@(*((@dtype@ *)(a + (i + 6) * stride)));
1777 1
            r[7] += @trf@(*((@dtype@ *)(a + (i + 7) * stride)));
1778
        }
1779

1780
        /* accumulate now to avoid stack spills for single peel loop */
1781 1
        res = ((r[0] + r[1]) + (r[2] + r[3])) +
1782 1
              ((r[4] + r[5]) + (r[6] + r[7]));
1783

1784
        /* do non multiple of 8 rest */
1785 1
        for (; i < n; i++) {
1786 1
            res += @trf@(*((@dtype@ *)(a + i * stride)));
1787
        }
1788
        return res;
1789
    }
1790
    else {
1791
        /* divide by two but avoid non-multiples of unroll factor */
1792 1
        npy_intp n2 = n / 2;
1793

1794 1
        n2 -= n2 % 8;
1795 1
        return pairwise_sum_@TYPE@(a, n2, stride) +
1796 1
               pairwise_sum_@TYPE@(a + n2 * stride, n - n2, stride);
1797
    }
1798
}
1799

1800
/**end repeat**/
1801

1802
/**begin repeat
1803
 * Float types
1804
 *  #type = npy_float, npy_double, npy_longdouble#
1805
 *  #TYPE = FLOAT, DOUBLE, LONGDOUBLE#
1806
 *  #c = f, , l#
1807
 *  #C = F, , L#
1808
 */
1809

1810
/**begin repeat1
1811
 * Arithmetic
1812
 * # kind = add, subtract, multiply, divide#
1813
 * # OP = +, -, *, /#
1814
 * # PW = 1, 0, 0, 0#
1815
 */
1816
NPY_NO_EXPORT void
1817 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1818
{
1819 1
    if (IS_BINARY_REDUCE) {
1820
#if @PW@
1821 1
        @type@ * iop1 = (@type@ *)args[0];
1822 1
        npy_intp n = dimensions[0];
1823

1824 1
        *iop1 @OP@= pairwise_sum_@TYPE@(args[1], n, steps[1]);
1825
#else
1826 1
        BINARY_REDUCE_LOOP(@type@) {
1827 1
            io1 @OP@= *(@type@ *)ip2;
1828
        }
1829 1
        *((@type@ *)iop1) = io1;
1830
#endif
1831
    }
1832 1
    else if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) {
1833 1
        BINARY_LOOP {
1834 1
            const @type@ in1 = *(@type@ *)ip1;
1835 1
            const @type@ in2 = *(@type@ *)ip2;
1836 1
            *((@type@ *)op1) = in1 @OP@ in2;
1837
        }
1838
    }
1839 1
}
1840
/**end repeat1**/
1841

1842
/**begin repeat1
1843
 * #kind = equal, not_equal, less, less_equal, greater, greater_equal,
1844
 *        logical_and, logical_or#
1845
 * #OP = ==, !=, <, <=, >, >=, &&, ||#
1846
 */
1847
NPY_NO_EXPORT void
1848 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1849
{
1850 1
    if (!run_binary_simd_@kind@_@TYPE@(args, dimensions, steps)) {
1851 1
        BINARY_LOOP {
1852 1
            const @type@ in1 = *(@type@ *)ip1;
1853 1
            const @type@ in2 = *(@type@ *)ip2;
1854 1
            *((npy_bool *)op1) = in1 @OP@ in2;
1855
        }
1856
    }
1857 1
    npy_clear_floatstatus_barrier((char*)dimensions);
1858 1
}
1859
/**end repeat1**/
1860

1861
NPY_NO_EXPORT void
1862 1
@TYPE@_logical_xor(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1863
{
1864 1
    BINARY_LOOP {
1865 1
        const int t1 = !!*(@type@ *)ip1;
1866 1
        const int t2 = !!*(@type@ *)ip2;
1867 1
        *((npy_bool *)op1) = (t1 != t2);
1868
    }
1869 1
}
1870

1871
NPY_NO_EXPORT void
1872 1
@TYPE@_logical_not(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1873
{
1874 1
    UNARY_LOOP {
1875 1
        const @type@ in1 = *(@type@ *)ip1;
1876 1
        *((npy_bool *)op1) = !in1;
1877
    }
1878 1
}
1879

1880
/**begin repeat1
1881
 * #kind = isnan, isinf, isfinite, signbit#
1882
 * #func = npy_isnan, npy_isinf, npy_isfinite, npy_signbit#
1883
 **/
1884

1885
/**begin repeat2
1886
 * #ISA  = , _avx512_skx#
1887
 * #isa  = simd, avx512_skx#
1888
 **/
1889
NPY_NO_EXPORT void
1890 1
@TYPE@_@kind@@ISA@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1891
{
1892 1
    if (!run_@kind@_@isa@_@TYPE@(args, dimensions, steps)) {
1893 1
        UNARY_LOOP {
1894 1
            const @type@ in1 = *(@type@ *)ip1;
1895 1
            *((npy_bool *)op1) = @func@(in1) != 0;
1896
        }
1897
    }
1898 1
    npy_clear_floatstatus_barrier((char*)dimensions);
1899 1
}
1900
/**end repeat2**/
1901
/**end repeat1**/
1902

1903
NPY_NO_EXPORT void
1904 1
@TYPE@_spacing(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1905
{
1906 1
    UNARY_LOOP {
1907 1
        const @type@ in1 = *(@type@ *)ip1;
1908 1
        *((@type@ *)op1) = npy_spacing@c@(in1);
1909
    }
1910 1
}
1911

1912
NPY_NO_EXPORT void
1913 1
@TYPE@_copysign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1914
{
1915 1
    BINARY_LOOP {
1916 1
        const @type@ in1 = *(@type@ *)ip1;
1917 1
        const @type@ in2 = *(@type@ *)ip2;
1918 1
        *((@type@ *)op1)= npy_copysign@c@(in1, in2);
1919
    }
1920 1
}
1921

1922
NPY_NO_EXPORT void
1923 1
@TYPE@_nextafter(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1924
{
1925 1
    BINARY_LOOP {
1926 1
        const @type@ in1 = *(@type@ *)ip1;
1927 1
        const @type@ in2 = *(@type@ *)ip2;
1928 1
        *((@type@ *)op1)= npy_nextafter@c@(in1, in2);
1929
    }
1930 1
}
1931

1932
/**begin repeat1
1933
 * #kind = maximum, minimum#
1934
 * #OP =  >=, <=#
1935
 **/
1936
NPY_NO_EXPORT void
1937 1
@TYPE@_@kind@_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1938
{
1939
    /*  */
1940 1
    if (IS_BINARY_REDUCE) {
1941 1
        if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
1942 1
            BINARY_REDUCE_LOOP(@type@) {
1943 1
                const @type@ in2 = *(@type@ *)ip2;
1944
                /* Order of operations important for MSVC 2015 */
1945 1
                io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
1946
            }
1947 1
            *((@type@ *)iop1) = io1;
1948
        }
1949
    }
1950
    else {
1951 1
        if (!run_binary_avx512f_@kind@_@TYPE@(args, dimensions, steps)) {
1952 1
            BINARY_LOOP {
1953 1
                @type@ in1 = *(@type@ *)ip1;
1954 1
                const @type@ in2 = *(@type@ *)ip2;
1955
                /* Order of operations important for MSVC 2015 */
1956 1
                in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
1957 1
                *((@type@ *)op1) = in1;
1958
            }
1959
        }
1960
    }
1961 1
    npy_clear_floatstatus_barrier((char*)dimensions);
1962 1
}
1963

1964
NPY_NO_EXPORT void
1965 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1966
{
1967
    /*  */
1968 1
    if (IS_BINARY_REDUCE) {
1969 1
        if (!run_unary_reduce_simd_@kind@_@TYPE@(args, dimensions, steps)) {
1970 1
            BINARY_REDUCE_LOOP(@type@) {
1971 1
                const @type@ in2 = *(@type@ *)ip2;
1972
                /* Order of operations important for MSVC 2015 */
1973 1
                io1 = (io1 @OP@ in2 || npy_isnan(io1)) ? io1 : in2;
1974
            }
1975 1
            *((@type@ *)iop1) = io1;
1976
        }
1977
    }
1978
    else {
1979 1
        BINARY_LOOP {
1980 1
            @type@ in1 = *(@type@ *)ip1;
1981 1
            const @type@ in2 = *(@type@ *)ip2;
1982
            /* Order of operations important for MSVC 2015 */
1983 1
            in1 = (in1 @OP@ in2 || npy_isnan(in1)) ? in1 : in2;
1984 1
            *((@type@ *)op1) = in1;
1985
        }
1986
    }
1987 1
    npy_clear_floatstatus_barrier((char*)dimensions);
1988 1
}
1989
/**end repeat1**/
1990

1991
/**begin repeat1
1992
 * #kind = fmax, fmin#
1993
 * #OP =  >=, <=#
1994
 **/
1995
NPY_NO_EXPORT void
1996 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
1997
{
1998
    /*  */
1999 1
    if (IS_BINARY_REDUCE) {
2000 1
        BINARY_REDUCE_LOOP(@type@) {
2001 1
            const @type@ in2 = *(@type@ *)ip2;
2002
            /* Order of operations important for MSVC 2015 */
2003 1
            io1 = (io1 @OP@ in2 || npy_isnan(in2)) ? io1 : in2;
2004
        }
2005 1
        *((@type@ *)iop1) = io1;
2006
    }
2007
    else {
2008 1
        BINARY_LOOP {
2009 1
            const @type@ in1 = *(@type@ *)ip1;
2010 1
            const @type@ in2 = *(@type@ *)ip2;
2011
            /* Order of operations important for MSVC 2015 */
2012 1
            *((@type@ *)op1) = (in1 @OP@ in2 || npy_isnan(in2)) ? in1 : in2;
2013
        }
2014
    }
2015 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2016 1
}
2017
/**end repeat1**/
2018

2019
NPY_NO_EXPORT void
2020 1
@TYPE@_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2021
{
2022 1
    BINARY_LOOP {
2023 1
        const @type@ in1 = *(@type@ *)ip1;
2024 1
        const @type@ in2 = *(@type@ *)ip2;
2025
        @type@ mod;
2026 1
        *((@type@ *)op1) = npy_divmod@c@(in1, in2, &mod);
2027
    }
2028 1
}
2029

2030
NPY_NO_EXPORT void
2031 1
@TYPE@_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2032
{
2033 1
    BINARY_LOOP {
2034 1
        const @type@ in1 = *(@type@ *)ip1;
2035 1
        const @type@ in2 = *(@type@ *)ip2;
2036 1
        npy_divmod@c@(in1, in2, (@type@ *)op1);
2037
    }
2038 1
}
2039

2040
NPY_NO_EXPORT void
2041 1
@TYPE@_divmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2042
{
2043 1
    BINARY_LOOP_TWO_OUT {
2044 1
        const @type@ in1 = *(@type@ *)ip1;
2045 1
        const @type@ in2 = *(@type@ *)ip2;
2046 1
        *((@type@ *)op1) = npy_divmod@c@(in1, in2, (@type@ *)op2);
2047
    }
2048 1
}
2049

2050
NPY_NO_EXPORT void
2051 1
@TYPE@_square(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2052
{
2053 1
    char * margs[] = {args[0], args[0], args[1]};
2054 1
    npy_intp msteps[] = {steps[0], steps[0], steps[1]};
2055 1
    if (!run_binary_simd_multiply_@TYPE@(margs, dimensions, msteps)) {
2056 1
        UNARY_LOOP {
2057 1
            const @type@ in1 = *(@type@ *)ip1;
2058 1
            *((@type@ *)op1) = in1*in1;
2059
        }
2060
    }
2061 1
}
2062

2063
NPY_NO_EXPORT void
2064 1
@TYPE@_reciprocal(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2065
{
2066 1
    @type@ one = 1.@c@;
2067 1
    char * margs[] = {(char*)&one, args[0], args[1]};
2068 1
    npy_intp msteps[] = {0, steps[0], steps[1]};
2069 1
    if (!run_binary_simd_divide_@TYPE@(margs, dimensions, msteps)) {
2070 1
        UNARY_LOOP {
2071 1
            const @type@ in1 = *(@type@ *)ip1;
2072 1
            *((@type@ *)op1) = 1/in1;
2073
        }
2074
    }
2075 1
}
2076

2077
NPY_NO_EXPORT void
2078 1
@TYPE@__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2079
{
2080 1
    OUTPUT_LOOP {
2081 1
        *((@type@ *)op1) = 1;
2082
    }
2083 1
}
2084

2085
NPY_NO_EXPORT void
2086 1
@TYPE@_conjugate(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2087
{
2088 1
    UNARY_LOOP {
2089 1
        const @type@ in1 = *(@type@ *)ip1;
2090 1
        *((@type@ *)op1) = in1;
2091
    }
2092 1
}
2093

2094
NPY_NO_EXPORT void
2095 1
@TYPE@_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2096
{
2097 1
    if (!run_unary_simd_absolute_@TYPE@(args, dimensions, steps)) {
2098 1
        UNARY_LOOP {
2099 1
            const @type@ in1 = *(@type@ *)ip1;
2100 1
            const @type@ tmp = in1 > 0 ? in1 : -in1;
2101
            /* add 0 to clear -0.0 */
2102 1
            *((@type@ *)op1) = tmp + 0;
2103
        }
2104
    }
2105 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2106 1
}
2107

2108
NPY_NO_EXPORT void
2109 1
@TYPE@_negative(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2110
{
2111 1
    if (!run_unary_simd_negative_@TYPE@(args, dimensions, steps)) {
2112 1
        UNARY_LOOP {
2113 1
            const @type@ in1 = *(@type@ *)ip1;
2114 1
            *((@type@ *)op1) = -in1;
2115
        }
2116
    }
2117 1
}
2118

2119
NPY_NO_EXPORT void
2120 1
@TYPE@_positive(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2121
{
2122 1
    UNARY_LOOP {
2123 1
        const @type@ in1 = *(@type@ *)ip1;
2124 1
        *((@type@ *)op1) = +in1;
2125
    }
2126 1
}
2127

2128
NPY_NO_EXPORT void
2129 1
@TYPE@_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2130
{
2131
    /* Sign of nan is nan */
2132 1
    UNARY_LOOP {
2133 1
        const @type@ in1 = *(@type@ *)ip1;
2134 1
        *((@type@ *)op1) = in1 > 0 ? 1 : (in1 < 0 ? -1 : (in1 == 0 ? 0 : in1));
2135
    }
2136 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2137 1
}
2138

2139
NPY_NO_EXPORT void
2140 1
@TYPE@_modf(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2141
{
2142 1
    UNARY_LOOP_TWO_OUT {
2143 1
        const @type@ in1 = *(@type@ *)ip1;
2144 1
        *((@type@ *)op1) = npy_modf@c@(in1, (@type@ *)op2);
2145
    }
2146 1
}
2147

2148
NPY_NO_EXPORT void
2149 1
@TYPE@_frexp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2150
{
2151 1
    UNARY_LOOP_TWO_OUT {
2152 1
        const @type@ in1 = *(@type@ *)ip1;
2153 1
        *((@type@ *)op1) = npy_frexp@c@(in1, (int *)op2);
2154
    }
2155 1
}
2156

2157
NPY_NO_EXPORT void
2158 1
@TYPE@_frexp_avx512_skx(char **args, npy_intp const *dimensions, npy_intp const *steps, void *func)
2159
{
2160 1
    if (!run_unary_two_out_avx512_skx_frexp_@TYPE@(args, dimensions, steps)) {
2161 1
        @TYPE@_frexp(args, dimensions, steps, func);
2162
    }
2163 1
}
2164

2165
NPY_NO_EXPORT void
2166 1
@TYPE@_ldexp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2167
{
2168 1
    BINARY_LOOP {
2169 1
        const @type@ in1 = *(@type@ *)ip1;
2170 1
        const int in2 = *(int *)ip2;
2171 1
        *((@type@ *)op1) = npy_ldexp@c@(in1, in2);
2172
    }
2173 1
}
2174

2175
NPY_NO_EXPORT void
2176 1
@TYPE@_ldexp_avx512_skx(char **args, const npy_intp *dimensions, const npy_intp *steps, void *func)
2177
{
2178 1
    if (!run_binary_avx512_skx_ldexp_@TYPE@(args, dimensions, steps)) {
2179 1
        @TYPE@_ldexp(args, dimensions, steps, func);
2180
    }
2181 1
}
2182

2183
NPY_NO_EXPORT void
2184 1
@TYPE@_ldexp_long(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2185
{
2186
    /*
2187
     * Additional loop to handle npy_long integer inputs (cf. #866, #1633).
2188
     * npy_long != npy_int on many 64-bit platforms, so we need this second loop
2189
     * to handle the default integer type.
2190
     */
2191 1
    BINARY_LOOP {
2192 1
        const @type@ in1 = *(@type@ *)ip1;
2193 1
        const long in2 = *(long *)ip2;
2194 1
        if (((int)in2) == in2) {
2195
            /* Range OK */
2196 1
            *((@type@ *)op1) = npy_ldexp@c@(in1, ((int)in2));
2197
        }
2198
        else {
2199
            /*
2200
             * Outside npy_int range -- also ldexp will overflow in this case,
2201
             * given that exponent has less bits than npy_int.
2202
             */
2203 1
            if (in2 > 0) {
2204 1
                *((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MAX_INT);
2205
            }
2206
            else {
2207 1
                *((@type@ *)op1) = npy_ldexp@c@(in1, NPY_MIN_INT);
2208
            }
2209
        }
2210
    }
2211 1
}
2212

2213
#define @TYPE@_true_divide @TYPE@_divide
2214

2215
/**end repeat**/
2216

2217
/*
2218
 *****************************************************************************
2219
 **                          HALF-FLOAT LOOPS                               **
2220
 *****************************************************************************
2221
 */
2222

2223

2224
/**begin repeat
2225
 * Arithmetic
2226
 * # kind = add, subtract, multiply, divide#
2227
 * # OP = +, -, *, /#
2228
 * # PW = 1, 0, 0, 0#
2229
 */
2230
NPY_NO_EXPORT void
2231 1
HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2232
{
2233 1
    if (IS_BINARY_REDUCE) {
2234 1
        char *iop1 = args[0];
2235 1
        float io1 = npy_half_to_float(*(npy_half *)iop1);
2236
#if @PW@
2237 1
        npy_intp n = dimensions[0];
2238

2239 1
        io1 @OP@= pairwise_sum_HALF(args[1], n, steps[1]);
2240
#else
2241 1
        BINARY_REDUCE_LOOP_INNER {
2242 1
            io1 @OP@= npy_half_to_float(*(npy_half *)ip2);
2243
        }
2244
#endif
2245 1
        *((npy_half *)iop1) = npy_float_to_half(io1);
2246
    }
2247
    else {
2248 1
        BINARY_LOOP {
2249 1
            const float in1 = npy_half_to_float(*(npy_half *)ip1);
2250 1
            const float in2 = npy_half_to_float(*(npy_half *)ip2);
2251 1
            *((npy_half *)op1) = npy_float_to_half(in1 @OP@ in2);
2252
        }
2253
    }
2254 1
}
2255
/**end repeat**/
2256

2257
#define _HALF_LOGICAL_AND(a,b) (!npy_half_iszero(a) && !npy_half_iszero(b))
2258
#define _HALF_LOGICAL_OR(a,b) (!npy_half_iszero(a) || !npy_half_iszero(b))
2259
/**begin repeat
2260
 * #kind = equal, not_equal, less, less_equal, greater,
2261
 *         greater_equal, logical_and, logical_or#
2262
 * #OP = npy_half_eq, npy_half_ne, npy_half_lt, npy_half_le, npy_half_gt,
2263
 *       npy_half_ge, _HALF_LOGICAL_AND, _HALF_LOGICAL_OR#
2264
 */
2265
NPY_NO_EXPORT void
2266 1
HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2267
{
2268 1
    BINARY_LOOP {
2269 1
        const npy_half in1 = *(npy_half *)ip1;
2270 1
        const npy_half in2 = *(npy_half *)ip2;
2271 1
        *((npy_bool *)op1) = @OP@(in1, in2);
2272
    }
2273 1
}
2274
/**end repeat**/
2275
#undef _HALF_LOGICAL_AND
2276
#undef _HALF_LOGICAL_OR
2277

2278
NPY_NO_EXPORT void
2279 1
HALF_logical_xor(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2280
{
2281 1
    BINARY_LOOP {
2282 1
        const int in1 = !npy_half_iszero(*(npy_half *)ip1);
2283 1
        const int in2 = !npy_half_iszero(*(npy_half *)ip2);
2284 1
        *((npy_bool *)op1) = (in1 != in2);
2285
    }
2286 1
}
2287

2288
NPY_NO_EXPORT void
2289 1
HALF_logical_not(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2290
{
2291 1
    UNARY_LOOP {
2292 1
        const npy_half in1 = *(npy_half *)ip1;
2293 1
        *((npy_bool *)op1) = npy_half_iszero(in1);
2294
    }
2295 1
}
2296

2297
/**begin repeat
2298
 * #kind = isnan, isinf, isfinite, signbit#
2299
 * #func = npy_half_isnan, npy_half_isinf, npy_half_isfinite, npy_half_signbit#
2300
 **/
2301
NPY_NO_EXPORT void
2302 1
HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2303
{
2304 1
    UNARY_LOOP {
2305 1
        const npy_half in1 = *(npy_half *)ip1;
2306 1
        *((npy_bool *)op1) = @func@(in1) != 0;
2307
    }
2308 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2309 1
}
2310
/**end repeat**/
2311

2312
NPY_NO_EXPORT void
2313 1
HALF_spacing(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2314
{
2315 1
    UNARY_LOOP {
2316 1
        const npy_half in1 = *(npy_half *)ip1;
2317 1
        *((npy_half *)op1) = npy_half_spacing(in1);
2318
    }
2319 1
}
2320

2321
NPY_NO_EXPORT void
2322 1
HALF_copysign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2323
{
2324 1
    BINARY_LOOP {
2325 1
        const npy_half in1 = *(npy_half *)ip1;
2326 1
        const npy_half in2 = *(npy_half *)ip2;
2327 1
        *((npy_half *)op1)= npy_half_copysign(in1, in2);
2328
    }
2329 1
}
2330

2331
NPY_NO_EXPORT void
2332 1
HALF_nextafter(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2333
{
2334 1
    BINARY_LOOP {
2335 1
        const npy_half in1 = *(npy_half *)ip1;
2336 1
        const npy_half in2 = *(npy_half *)ip2;
2337 1
        *((npy_half *)op1)= npy_half_nextafter(in1, in2);
2338
    }
2339 1
}
2340

2341
/**begin repeat
2342
 * #kind = maximum, minimum#
2343
 * #OP =  npy_half_ge, npy_half_le#
2344
 **/
2345
NPY_NO_EXPORT void
2346 1
HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2347
{
2348
    /*  */
2349 1
    BINARY_LOOP {
2350 1
        const npy_half in1 = *(npy_half *)ip1;
2351 1
        const npy_half in2 = *(npy_half *)ip2;
2352 1
        *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in1)) ? in1 : in2;
2353
    }
2354
    /* npy_half_isnan will never set floatstatus_invalid, so do not clear */
2355 1
}
2356
/**end repeat**/
2357

2358
/**begin repeat
2359
 * #kind = fmax, fmin#
2360
 * #OP =  npy_half_ge, npy_half_le#
2361
 **/
2362
NPY_NO_EXPORT void
2363 1
HALF_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2364
{
2365
    /*  */
2366 1
    BINARY_LOOP {
2367 1
        const npy_half in1 = *(npy_half *)ip1;
2368 1
        const npy_half in2 = *(npy_half *)ip2;
2369 1
        *((npy_half *)op1) = (@OP@(in1, in2) || npy_half_isnan(in2)) ? in1 : in2;
2370
    }
2371
    /* npy_half_isnan will never set floatstatus_invalid, so do not clear */
2372 1
}
2373
/**end repeat**/
2374

2375
NPY_NO_EXPORT void
2376 1
HALF_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2377
{
2378 1
    BINARY_LOOP {
2379 1
        const npy_half in1 = *(npy_half *)ip1;
2380 1
        const npy_half in2 = *(npy_half *)ip2;
2381
        npy_half mod;
2382 1
        *((npy_half *)op1) = npy_half_divmod(in1, in2, &mod);
2383
    }
2384 1
}
2385

2386
NPY_NO_EXPORT void
2387 1
HALF_remainder(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2388
{
2389 1
    BINARY_LOOP {
2390 1
        const npy_half in1 = *(npy_half *)ip1;
2391 1
        const npy_half in2 = *(npy_half *)ip2;
2392 1
        npy_half_divmod(in1, in2, (npy_half *)op1);
2393
    }
2394 1
}
2395

2396
NPY_NO_EXPORT void
2397 1
HALF_divmod(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2398
{
2399 1
    BINARY_LOOP_TWO_OUT {
2400 1
        const npy_half in1 = *(npy_half *)ip1;
2401 1
        const npy_half in2 = *(npy_half *)ip2;
2402 1
        *((npy_half *)op1) = npy_half_divmod(in1, in2, (npy_half *)op2);
2403
    }
2404 1
}
2405

2406
NPY_NO_EXPORT void
2407 1
HALF_square(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2408
{
2409 1
    UNARY_LOOP {
2410 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2411 1
        *((npy_half *)op1) = npy_float_to_half(in1*in1);
2412
    }
2413 1
}
2414

2415
NPY_NO_EXPORT void
2416 1
HALF_reciprocal(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2417
{
2418 1
    UNARY_LOOP {
2419 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2420 1
        *((npy_half *)op1) = npy_float_to_half(1/in1);
2421
    }
2422 1
}
2423

2424
NPY_NO_EXPORT void
2425 0
HALF__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2426
{
2427 0
    OUTPUT_LOOP {
2428 0
        *((npy_half *)op1) = NPY_HALF_ONE;
2429
    }
2430 0
}
2431

2432
NPY_NO_EXPORT void
2433 1
HALF_conjugate(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2434
{
2435 1
    UNARY_LOOP {
2436 1
        const npy_half in1 = *(npy_half *)ip1;
2437 1
        *((npy_half *)op1) = in1;
2438
    }
2439 1
}
2440

2441
NPY_NO_EXPORT NPY_GCC_OPT_3 void
2442 1
HALF_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2443
{
2444 1
    UNARY_LOOP_FAST(npy_half, npy_half, *out = in&0x7fffu);
2445 1
}
2446

2447
NPY_NO_EXPORT void
2448 1
HALF_negative(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2449
{
2450 1
    UNARY_LOOP {
2451 1
        const npy_half in1 = *(npy_half *)ip1;
2452 1
        *((npy_half *)op1) = in1^0x8000u;
2453
    }
2454 1
}
2455

2456
NPY_NO_EXPORT void
2457 1
HALF_positive(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2458
{
2459 1
    UNARY_LOOP {
2460 1
        const npy_half in1 = *(npy_half *)ip1;
2461 1
        *((npy_half *)op1) = +in1;
2462
    }
2463 1
}
2464

2465
NPY_NO_EXPORT void
2466 1
HALF_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2467
{
2468
    /* Sign of nan is nan */
2469 1
    UNARY_LOOP {
2470 1
        const npy_half in1 = *(npy_half *)ip1;
2471 1
        *((npy_half *)op1) = npy_half_isnan(in1) ? in1 :
2472
                    (((in1&0x7fffu) == 0) ? 0 :
2473 1
                      (((in1&0x8000u) == 0) ? NPY_HALF_ONE : NPY_HALF_NEGONE));
2474
    }
2475 1
}
2476

2477
NPY_NO_EXPORT void
2478 1
HALF_modf(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2479
{
2480
    float temp;
2481

2482 1
    UNARY_LOOP_TWO_OUT {
2483 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2484 1
        *((npy_half *)op1) = npy_float_to_half(npy_modff(in1, &temp));
2485 1
        *((npy_half *)op2) = npy_float_to_half(temp);
2486
    }
2487 1
}
2488

2489
NPY_NO_EXPORT void
2490 1
HALF_frexp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2491
{
2492 1
    UNARY_LOOP_TWO_OUT {
2493 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2494 1
        *((npy_half *)op1) = npy_float_to_half(npy_frexpf(in1, (int *)op2));
2495
    }
2496 1
}
2497

2498
NPY_NO_EXPORT void
2499 1
HALF_ldexp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2500
{
2501 1
    BINARY_LOOP {
2502 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2503 1
        const int in2 = *(int *)ip2;
2504 1
        *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, in2));
2505
    }
2506 1
}
2507

2508
NPY_NO_EXPORT void
2509 1
HALF_ldexp_long(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2510
{
2511
    /*
2512
     * Additional loop to handle npy_long integer inputs (cf. #866, #1633).
2513
     * npy_long != npy_int on many 64-bit platforms, so we need this second loop
2514
     * to handle the default integer type.
2515
     */
2516 1
    BINARY_LOOP {
2517 1
        const float in1 = npy_half_to_float(*(npy_half *)ip1);
2518 1
        const long in2 = *(long *)ip2;
2519 1
        if (((int)in2) == in2) {
2520
            /* Range OK */
2521 1
            *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, ((int)in2)));
2522
        }
2523
        else {
2524
            /*
2525
             * Outside npy_int range -- also ldexp will overflow in this case,
2526
             * given that exponent has less bits than npy_int.
2527
             */
2528 0
            if (in2 > 0) {
2529 0
                *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MAX_INT));
2530
            }
2531
            else {
2532 0
                *((npy_half *)op1) = npy_float_to_half(npy_ldexpf(in1, NPY_MIN_INT));
2533
            }
2534
        }
2535
    }
2536 1
}
2537

2538
#define HALF_true_divide HALF_divide
2539

2540

2541
/*
2542
 *****************************************************************************
2543
 **                           COMPLEX LOOPS                                 **
2544
 *****************************************************************************
2545
 */
2546

2547
#define CGE(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \
2548
                          || (xr == yr && xi >= yi))
2549
#define CLE(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \
2550
                          || (xr == yr && xi <= yi))
2551
#define CGT(xr,xi,yr,yi) ((xr > yr && !npy_isnan(xi) && !npy_isnan(yi)) \
2552
                          || (xr == yr && xi > yi))
2553
#define CLT(xr,xi,yr,yi) ((xr < yr && !npy_isnan(xi) && !npy_isnan(yi)) \
2554
                          || (xr == yr && xi < yi))
2555
#define CEQ(xr,xi,yr,yi) (xr == yr && xi == yi)
2556
#define CNE(xr,xi,yr,yi) (xr != yr || xi != yi)
2557

2558
/**begin repeat
2559
 * complex types
2560
 * #TYPE = CFLOAT, CDOUBLE, CLONGDOUBLE#
2561
 * #ftype = npy_float, npy_double, npy_longdouble#
2562
 * #c = f, , l#
2563
 * #C = F, , L#
2564
 * #SIMD = 1, 1, 0#
2565
 */
2566

2567
/* similar to pairwise sum of real floats */
2568
static void
2569 1
pairwise_sum_@TYPE@(@ftype@ *rr, @ftype@ * ri, char * a, npy_intp n,
2570
                    npy_intp stride)
2571
{
2572
    assert(n % 2 == 0);
2573 1
    if (n < 8) {
2574
        npy_intp i;
2575

2576 1
        *rr = 0.;
2577 1
        *ri = 0.;
2578 1
        for (i = 0; i < n; i += 2) {
2579 1
            *rr += *((@ftype@ *)(a + i * stride + 0));
2580 1
            *ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@)));
2581
        }
2582
        return;
2583
    }
2584 1
    else if (n <= PW_BLOCKSIZE) {
2585
        npy_intp i;
2586
        @ftype@ r[8];
2587

2588
        /*
2589
         * sum a block with 8 accumulators
2590
         * 8 times unroll reduces blocksize to 16 and allows vectorization with
2591
         * avx without changing summation ordering
2592
         */
2593 1
        r[0] = *((@ftype@ *)(a + 0 * stride));
2594 1
        r[1] = *((@ftype@ *)(a + 0 * stride + sizeof(@ftype@)));
2595 1
        r[2] = *((@ftype@ *)(a + 2 * stride));
2596 1
        r[3] = *((@ftype@ *)(a + 2 * stride + sizeof(@ftype@)));
2597 1
        r[4] = *((@ftype@ *)(a + 4 * stride));
2598 1
        r[5] = *((@ftype@ *)(a + 4 * stride + sizeof(@ftype@)));
2599 1
        r[6] = *((@ftype@ *)(a + 6 * stride));
2600 1
        r[7] = *((@ftype@ *)(a + 6 * stride + sizeof(@ftype@)));
2601

2602 1
        for (i = 8; i < n - (n % 8); i += 8) {
2603
            /* small blocksizes seems to mess with hardware prefetch */
2604 1
            NPY_PREFETCH(a + (i + 512/(npy_intp)sizeof(@ftype@))*stride, 0, 3);
2605 1
            r[0] += *((@ftype@ *)(a + (i + 0) * stride));
2606 1
            r[1] += *((@ftype@ *)(a + (i + 0) * stride + sizeof(@ftype@)));
2607 1
            r[2] += *((@ftype@ *)(a + (i + 2) * stride));
2608 1
            r[3] += *((@ftype@ *)(a + (i + 2) * stride + sizeof(@ftype@)));
2609 1
            r[4] += *((@ftype@ *)(a + (i + 4) * stride));
2610 1
            r[5] += *((@ftype@ *)(a + (i + 4) * stride + sizeof(@ftype@)));
2611 1
            r[6] += *((@ftype@ *)(a + (i + 6) * stride));
2612 1
            r[7] += *((@ftype@ *)(a + (i + 6) * stride + sizeof(@ftype@)));
2613
        }
2614

2615
        /* accumulate now to avoid stack spills for single peel loop */
2616 1
        *rr = ((r[0] + r[2]) + (r[4] + r[6]));
2617 1
        *ri = ((r[1] + r[3]) + (r[5] + r[7]));
2618

2619
        /* do non multiple of 8 rest */
2620 1
        for (; i < n; i+=2) {
2621 1
            *rr += *((@ftype@ *)(a + i * stride + 0));
2622 1
            *ri += *((@ftype@ *)(a + i * stride + sizeof(@ftype@)));
2623
        }
2624
        return;
2625
    }
2626
    else {
2627
        /* divide by two but avoid non-multiples of unroll factor */
2628
        @ftype@ rr1, ri1, rr2, ri2;
2629 1
        npy_intp n2 = n / 2;
2630

2631 1
        n2 -= n2 % 8;
2632 1
        pairwise_sum_@TYPE@(&rr1, &ri1, a, n2, stride);
2633 1
        pairwise_sum_@TYPE@(&rr2, &ri2, a + n2 * stride, n - n2, stride);
2634 1
        *rr = rr1 + rr2;
2635 1
        *ri = ri1 + ri2;
2636
        return;
2637
    }
2638
}
2639

2640

2641
/**begin repeat1
2642
 * arithmetic
2643
 * #kind = add, subtract#
2644
 * #OP = +, -#
2645
 * #PW = 1, 0#
2646
 */
2647
NPY_NO_EXPORT void
2648 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2649
{
2650 1
    if (IS_BINARY_REDUCE && @PW@) {
2651 1
        npy_intp n = dimensions[0];
2652 1
        @ftype@ * or = ((@ftype@ *)args[0]);
2653 1
        @ftype@ * oi = ((@ftype@ *)args[0]) + 1;
2654
        @ftype@ rr, ri;
2655

2656 1
        pairwise_sum_@TYPE@(&rr, &ri, args[1], n * 2, steps[1] / 2);
2657 1
        *or @OP@= rr;
2658 1
        *oi @OP@= ri;
2659
        return;
2660
    }
2661
    else {
2662 1
        BINARY_LOOP {
2663 1
            const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2664 1
            const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2665 1
            const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2666 1
            const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2667 1
            ((@ftype@ *)op1)[0] = in1r @OP@ in2r;
2668 1
            ((@ftype@ *)op1)[1] = in1i @OP@ in2i;
2669
        }
2670
    }
2671
}
2672
/**end repeat1**/
2673

2674
NPY_NO_EXPORT void
2675 1
@TYPE@_multiply(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2676
{
2677 1
    BINARY_LOOP {
2678 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2679 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2680 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2681 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2682 1
        ((@ftype@ *)op1)[0] = in1r*in2r - in1i*in2i;
2683 1
        ((@ftype@ *)op1)[1] = in1r*in2i + in1i*in2r;
2684
    }
2685 1
}
2686

2687
NPY_NO_EXPORT void
2688 1
@TYPE@_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2689
{
2690 1
    BINARY_LOOP {
2691 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2692 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2693 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2694 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2695 1
        const @ftype@ in2r_abs = npy_fabs@c@(in2r);
2696 1
        const @ftype@ in2i_abs = npy_fabs@c@(in2i);
2697 1
        if (in2r_abs >= in2i_abs) {
2698 1
            if (in2r_abs == 0 && in2i_abs == 0) {
2699
                /* divide by zero should yield a complex inf or nan */
2700 1
                ((@ftype@ *)op1)[0] = in1r/in2r_abs;
2701 1
                ((@ftype@ *)op1)[1] = in1i/in2i_abs;
2702
            }
2703
            else {
2704 1
                const @ftype@ rat = in2i/in2r;
2705 1
                const @ftype@ scl = 1.0@c@/(in2r + in2i*rat);
2706 1
                ((@ftype@ *)op1)[0] = (in1r + in1i*rat)*scl;
2707 1
                ((@ftype@ *)op1)[1] = (in1i - in1r*rat)*scl;
2708
            }
2709
        }
2710
        else {
2711 1
            const @ftype@ rat = in2r/in2i;
2712 1
            const @ftype@ scl = 1.0@c@/(in2i + in2r*rat);
2713 1
            ((@ftype@ *)op1)[0] = (in1r*rat + in1i)*scl;
2714 1
            ((@ftype@ *)op1)[1] = (in1i*rat - in1r)*scl;
2715
        }
2716
    }
2717 1
}
2718

2719
#if @SIMD@
2720
NPY_NO_EXPORT void
2721 1
@TYPE@_add_avx512f(char **args, const npy_intp *dimensions, const npy_intp *steps, void *func)
2722
{
2723 1
    if (IS_BINARY_REDUCE) {
2724 1
        @TYPE@_add(args, dimensions, steps, func);
2725
    }
2726 1
    else if (!run_binary_avx512f_add_@TYPE@(args, dimensions, steps)) {
2727 1
        @TYPE@_add(args, dimensions, steps, func);
2728
    }
2729 1
}
2730

2731
/**begin repeat1
2732
 * arithmetic
2733
 * #kind = subtract, multiply#
2734
 */
2735
NPY_NO_EXPORT void
2736 1
@TYPE@_@kind@_avx512f(char **args, const npy_intp *dimensions, const npy_intp *steps, void *func)
2737
{
2738 1
    if (!run_binary_avx512f_@kind@_@TYPE@(args, dimensions, steps)) {
2739 1
        @TYPE@_@kind@(args, dimensions, steps, func);
2740
    }
2741 1
}
2742
/**end repeat1**/
2743
#endif
2744

2745
NPY_NO_EXPORT void
2746 1
@TYPE@_floor_divide(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2747
{
2748 1
    BINARY_LOOP {
2749 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2750 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2751 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2752 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2753 1
        if (npy_fabs@c@(in2r) >= npy_fabs@c@(in2i)) {
2754 1
            const @ftype@ rat = in2i/in2r;
2755 1
            ((@ftype@ *)op1)[0] = npy_floor@c@((in1r + in1i*rat)/(in2r + in2i*rat));
2756 1
            ((@ftype@ *)op1)[1] = 0;
2757 1
        }
2758 1
        else {
2759 1
            const @ftype@ rat = in2r/in2i;
2760 1
            ((@ftype@ *)op1)[0] = npy_floor@c@((in1r*rat + in1i)/(in2i + in2r*rat));
2761 1
            ((@ftype@ *)op1)[1] = 0;
2762 1
        }
2763
    }
2764 1
}
2765 0

2766 0
/**begin repeat1
2767 0
 * #kind= greater, greater_equal, less, less_equal, equal, not_equal#
2768
 * #OP = CGT, CGE, CLT, CLE, CEQ, CNE#
2769
 */
2770 1
NPY_NO_EXPORT void
2771 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2772
{
2773 1
    BINARY_LOOP {
2774 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2775 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2776 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2777 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2778 1
        *((npy_bool *)op1) = @OP@(in1r,in1i,in2r,in2i);
2779
    }
2780 1
}
2781
/**end repeat1**/
2782

2783
/**begin repeat1
2784
   #kind = logical_and, logical_or#
2785
   #OP1 = ||, ||#
2786
   #OP2 = &&, ||#
2787
*/
2788
NPY_NO_EXPORT void
2789 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2790
{
2791 1
    BINARY_LOOP {
2792 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2793 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2794 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2795 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2796 1
        *((npy_bool *)op1) = (in1r @OP1@ in1i) @OP2@ (in2r @OP1@ in2i);
2797
    }
2798 1
}
2799
/**end repeat1**/
2800

2801
NPY_NO_EXPORT void
2802 1
@TYPE@_logical_xor(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2803
{
2804 1
    BINARY_LOOP {
2805 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2806 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2807 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2808 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2809 1
        const npy_bool tmp1 = (in1r || in1i);
2810 1
        const npy_bool tmp2 = (in2r || in2i);
2811 1
        *((npy_bool *)op1) = tmp1 != tmp2;
2812
    }
2813 1
}
2814

2815
NPY_NO_EXPORT void
2816 0
@TYPE@_logical_not(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2817
{
2818 0
    UNARY_LOOP {
2819 0
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2820 0
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2821 0
        *((npy_bool *)op1) = !(in1r || in1i);
2822
    }
2823 0
}
2824

2825
/**begin repeat1
2826
 * #kind = isnan, isinf, isfinite#
2827
 * #func = npy_isnan, npy_isinf, npy_isfinite#
2828
 * #OP = ||, ||, &&#
2829
 **/
2830
NPY_NO_EXPORT void
2831 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2832
{
2833 1
    UNARY_LOOP {
2834 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2835 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2836 1
        *((npy_bool *)op1) = @func@(in1r) @OP@ @func@(in1i);
2837
    }
2838 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2839 1
}
2840
/**end repeat1**/
2841

2842
NPY_NO_EXPORT void
2843 1
@TYPE@_square(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2844
{
2845 1
    UNARY_LOOP {
2846 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2847 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2848 1
        ((@ftype@ *)op1)[0] = in1r*in1r - in1i*in1i;
2849 1
        ((@ftype@ *)op1)[1] = in1r*in1i + in1i*in1r;
2850
    }
2851 1
}
2852

2853
NPY_NO_EXPORT void
2854 1
@TYPE@_reciprocal(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2855
{
2856 1
    UNARY_LOOP {
2857 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2858 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2859 1
        if (npy_fabs@c@(in1i) <= npy_fabs@c@(in1r)) {
2860 1
            const @ftype@ r = in1i/in1r;
2861 1
            const @ftype@ d = in1r + in1i*r;
2862 1
            ((@ftype@ *)op1)[0] = 1/d;
2863 1
            ((@ftype@ *)op1)[1] = -r/d;
2864
        } else {
2865 1
            const @ftype@ r = in1r/in1i;
2866 1
            const @ftype@ d = in1r*r + in1i;
2867 1
            ((@ftype@ *)op1)[0] = r/d;
2868 1
            ((@ftype@ *)op1)[1] = -1/d;
2869
        }
2870
    }
2871 1
}
2872

2873
NPY_NO_EXPORT void
2874 1
@TYPE@__ones_like(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
2875
{
2876 1
    OUTPUT_LOOP {
2877 1
        ((@ftype@ *)op1)[0] = 1;
2878 1
        ((@ftype@ *)op1)[1] = 0;
2879
    }
2880 1
}
2881

2882
NPY_NO_EXPORT void
2883 1
@TYPE@_conjugate(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) {
2884 1
    UNARY_LOOP {
2885 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2886 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2887 1
        ((@ftype@ *)op1)[0] = in1r;
2888 1
        ((@ftype@ *)op1)[1] = -in1i;
2889
    }
2890 1
}
2891

2892
NPY_NO_EXPORT void
2893 1
@TYPE@_absolute(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2894
{
2895 1
    UNARY_LOOP {
2896 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2897 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2898 1
        *((@ftype@ *)op1) = npy_hypot@c@(in1r, in1i);
2899
    }
2900 1
}
2901

2902
#if @SIMD@
2903
/**begin repeat1
2904
 * arithmetic
2905
 * #kind = conjugate, square, absolute#
2906
 */
2907
NPY_NO_EXPORT void
2908 1
@TYPE@_@kind@_avx512f(char **args, const npy_intp *dimensions, const npy_intp *steps, void *func)
2909
{
2910 1
    if (!run_unary_avx512f_@kind@_@TYPE@(args, dimensions, steps)) {
2911 1
        @TYPE@_@kind@(args, dimensions, steps, func);
2912
    }
2913 1
}
2914
/**end repeat1**/
2915
#endif
2916

2917
NPY_NO_EXPORT void
2918 1
@TYPE@__arg(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2919
{
2920 1
    UNARY_LOOP {
2921 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2922 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2923 1
        *((@ftype@ *)op1) = npy_atan2@c@(in1i, in1r);
2924
    }
2925 1
}
2926

2927
NPY_NO_EXPORT void
2928 1
@TYPE@_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2929
{
2930
    /* fixme: sign of nan is currently 0 */
2931 1
    UNARY_LOOP {
2932 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2933 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2934 1
        ((@ftype@ *)op1)[0] = CGT(in1r, in1i, 0.0, 0.0) ?  1 :
2935 1
                            (CLT(in1r, in1i, 0.0, 0.0) ? -1 :
2936 1
                            (CEQ(in1r, in1i, 0.0, 0.0) ?  0 : NPY_NAN@C@));
2937 1
        ((@ftype@ *)op1)[1] = 0;
2938 0
    }
2939 1
}
2940 0

2941
/**begin repeat1
2942
 * #kind = maximum, minimum#
2943 1
 * #OP = CGE, CLE#
2944
 */
2945
NPY_NO_EXPORT void
2946 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2947 1
{
2948 1
    BINARY_LOOP {
2949 1
        @ftype@ in1r = ((@ftype@ *)ip1)[0];
2950 1
        @ftype@ in1i = ((@ftype@ *)ip1)[1];
2951 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2952 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2953 1
        if ( !(npy_isnan(in1r) || npy_isnan(in1i) || @OP@(in1r, in1i, in2r, in2i))) {
2954 1
            in1r = in2r;
2955 1
            in1i = in2i;
2956
        }
2957 1
        ((@ftype@ *)op1)[0] = in1r;
2958 1
        ((@ftype@ *)op1)[1] = in1i;
2959
    }
2960 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2961 1
}
2962
/**end repeat1**/
2963

2964
/**begin repeat1
2965
 * #kind = fmax, fmin#
2966
 * #OP = CGE, CLE#
2967
 */
2968
NPY_NO_EXPORT void
2969 1
@TYPE@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
2970
{
2971 1
    BINARY_LOOP {
2972 1
        const @ftype@ in1r = ((@ftype@ *)ip1)[0];
2973 1
        const @ftype@ in1i = ((@ftype@ *)ip1)[1];
2974 1
        const @ftype@ in2r = ((@ftype@ *)ip2)[0];
2975 1
        const @ftype@ in2i = ((@ftype@ *)ip2)[1];
2976 1
        if (npy_isnan(in2r) || npy_isnan(in2i) || @OP@(in1r, in1i, in2r, in2i)) {
2977 1
            ((@ftype@ *)op1)[0] = in1r;
2978 1
            ((@ftype@ *)op1)[1] = in1i;
2979
        }
2980
        else {
2981 1
            ((@ftype@ *)op1)[0] = in2r;
2982 1
            ((@ftype@ *)op1)[1] = in2i;
2983
        }
2984
    }
2985 1
    npy_clear_floatstatus_barrier((char*)dimensions);
2986 1
}
2987
/**end repeat1**/
2988

2989
#define @TYPE@_true_divide @TYPE@_divide
2990

2991
/**end repeat**/
2992

2993
#undef CGE
2994
#undef CLE
2995
#undef CGT
2996
#undef CLT
2997
#undef CEQ
2998
#undef CNE
2999

3000
/*
3001
 *****************************************************************************
3002
 **                            OBJECT LOOPS                                 **
3003
 *****************************************************************************
3004
 */
3005

3006
/**begin repeat
3007
 * #kind = equal, not_equal, greater, greater_equal, less, less_equal#
3008
 * #OP = EQ, NE, GT, GE, LT, LE#
3009
 * #identity = NPY_TRUE, NPY_FALSE, -1*4#
3010
 */
3011

3012
/**begin repeat1
3013
 * #suffix = , _OO_O#
3014
 * #as_bool = 1, 0#
3015
 */
3016
NPY_NO_EXPORT void
3017 1
OBJECT@suffix@_@kind@(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)) {
3018 1
    BINARY_LOOP {
3019
        PyObject *ret_obj;
3020 1
        PyObject *in1 = *(PyObject **)ip1;
3021 1
        PyObject *in2 = *(PyObject **)ip2;
3022

3023 1
        in1 = in1 ? in1 : Py_None;
3024 1
        in2 = in2 ? in2 : Py_None;
3025

3026
        /*
3027
         * Do not use RichCompareBool because it includes an identity check for
3028
         * == and !=. This is wrong for elementwise behaviour, since it means
3029
         * that NaN can be equal to NaN and an array is equal to itself.
3030
         */
3031 1
        ret_obj = PyObject_RichCompare(in1, in2, Py_@OP@);
3032 1
        if (ret_obj == NULL) {
3033
            return;
3034
        }
3035
#if @as_bool@
3036
        {
3037 1
            int ret = PyObject_IsTrue(ret_obj);
3038 1
            Py_DECREF(ret_obj);
3039 1
            if (ret == -1) {
3040
                return;
3041
            }
3042 1
            *((npy_bool *)op1) = (npy_bool)ret;
3043
        }
3044
#else
3045 1
        *((PyObject **)op1) = ret_obj;
3046
#endif
3047
    }
3048
}
3049
/**end repeat1**/
3050
/**end repeat**/
3051

3052
NPY_NO_EXPORT void
3053 1
OBJECT_sign(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func))
3054
{
3055 1
    PyObject *zero = PyLong_FromLong(0);
3056

3057 1
    UNARY_LOOP {
3058 1
        PyObject *in1 = *(PyObject **)ip1;
3059 1
        PyObject **out = (PyObject **)op1;
3060 1
        PyObject *ret = NULL;
3061
        int v;
3062

3063 1
        if (in1 == NULL) {
3064 0
            in1 = Py_None;
3065
        }
3066

3067 1
        if ((v = PyObject_RichCompareBool(in1, zero, Py_LT)) == 1) {
3068 1
            ret = PyLong_FromLong(-1);
3069
        }
3070 1
        else if (v == 0 &&
3071
                (v = PyObject_RichCompareBool(in1, zero, Py_GT)) == 1) {
3072 1
            ret = PyLong_FromLong(1);
3073
        }
3074 1
        else if (v == 0 &&
3075
                (v = PyObject_RichCompareBool(in1, zero, Py_EQ)) == 1) {
3076 1
            ret = PyLong_FromLong(0);
3077
        }
3078 1
        else if (v == 0) {
3079
            /* in1 is NaN */
3080 1
            PyErr_SetString(PyExc_TypeError,
3081
                    "unorderable types for comparison");
3082
        }
3083

3084 1
        if (ret == NULL) {
3085
            break;
3086
        }
3087 1
        Py_XDECREF(*out);
3088 1
        *out = ret;
3089
    }
3090 1
    Py_XDECREF(zero);
3091 1
}
3092

3093
/*
3094
 *****************************************************************************
3095
 **                              END LOOPS                                  **
3096
 *****************************************************************************
3097
 */

Read our documentation on viewing source code .

Loading