1
/*
2
 * This file implements most of the main API functions of NumPy's nditer.
3
 * This excludes functions specialized using the templating system.
4
 *
5
 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6
 * The University of British Columbia
7
 *
8
 * Copyright (c) 2011 Enthought, Inc
9
 *
10
 * See LICENSE.txt for the license.
11
 */
12
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
13

14
/* Indicate that this .c file is allowed to include the header */
15
#define NPY_ITERATOR_IMPLEMENTATION_CODE
16
#include "nditer_impl.h"
17
#include "templ_common.h"
18
#include "ctors.h"
19

20
/* Internal helper functions private to this file */
21
static npy_intp
22
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
23
                                npy_intp *reduce_innersize,
24
                                npy_intp *reduce_outerdim);
25

26
/*NUMPY_API
27
 * Removes an axis from iteration. This requires that NPY_ITER_MULTI_INDEX
28
 * was set for iterator creation, and does not work if buffering is
29
 * enabled. This function also resets the iterator to its initial state.
30
 *
31
 * Returns NPY_SUCCEED or NPY_FAIL.
32
 */
33
NPY_NO_EXPORT int
34 1
NpyIter_RemoveAxis(NpyIter *iter, int axis)
35
{
36 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
37 1
    int idim, ndim = NIT_NDIM(iter);
38 1
    int iop, nop = NIT_NOP(iter);
39

40 1
    int xdim = 0;
41 1
    npy_int8 *perm = NIT_PERM(iter);
42 1
    NpyIter_AxisData *axisdata_del = NIT_AXISDATA(iter), *axisdata;
43 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
44

45 1
    npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
46 1
    char **resetdataptr = NIT_RESETDATAPTR(iter);
47

48 1
    if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
49 0
        PyErr_SetString(PyExc_RuntimeError,
50
                "Iterator RemoveAxis may only be called "
51
                "if a multi-index is being tracked");
52 0
        return NPY_FAIL;
53
    }
54 1
    else if (itflags&NPY_ITFLAG_HASINDEX) {
55 0
        PyErr_SetString(PyExc_RuntimeError,
56
                "Iterator RemoveAxis may not be called on "
57
                "an index is being tracked");
58 0
        return NPY_FAIL;
59
    }
60 1
    else if (itflags&NPY_ITFLAG_BUFFER) {
61 0
        PyErr_SetString(PyExc_RuntimeError,
62
                "Iterator RemoveAxis may not be called on "
63
                "a buffered iterator");
64 0
        return NPY_FAIL;
65
    }
66 1
    else if (axis < 0 || axis >= ndim) {
67 0
        PyErr_SetString(PyExc_ValueError,
68
                "axis out of bounds in iterator RemoveAxis");
69 0
        return NPY_FAIL;
70
    }
71

72
    /* Reverse axis, since the iterator treats them that way */
73 1
    axis = ndim - 1 - axis;
74

75
    /* First find the axis in question */
76 1
    for (idim = 0; idim < ndim; ++idim) {
77
        /* If this is it, and it's iterated forward, done */
78 1
        if (perm[idim] == axis) {
79
            xdim = idim;
80
            break;
81
        }
82
        /* If this is it, but it's iterated backward, must reverse the axis */
83 1
        else if (-1 - perm[idim] == axis) {
84 1
            npy_intp *strides = NAD_STRIDES(axisdata_del);
85 1
            npy_intp shape = NAD_SHAPE(axisdata_del), offset;
86

87 1
            xdim = idim;
88

89
            /*
90
             * Adjust baseoffsets and resetbaseptr back to the start of
91
             * this axis.
92
             */
93 1
            for (iop = 0; iop < nop; ++iop) {
94 1
                offset = (shape-1)*strides[iop];
95 1
                baseoffsets[iop] += offset;
96 1
                resetdataptr[iop] += offset;
97
            }
98
            break;
99
        }
100

101 1
        NIT_ADVANCE_AXISDATA(axisdata_del, 1);
102
    }
103

104 1
    if (idim == ndim) {
105 0
        PyErr_SetString(PyExc_RuntimeError,
106
                "internal error in iterator perm");
107 0
        return NPY_FAIL;
108
    }
109

110
    /* Adjust the permutation */
111 1
    for (idim = 0; idim < ndim-1; ++idim) {
112 1
        npy_int8 p = (idim < xdim) ? perm[idim] : perm[idim+1];
113 1
        if (p >= 0) {
114 1
            if (p > axis) {
115 1
                --p;
116
            }
117
        }
118
        else if (p <= 0) {
119 1
            if (p < -1-axis) {
120 1
                ++p;
121
            }
122
        }
123 1
        perm[idim] = p;
124
    }
125

126
    /* Shift all the axisdata structures by one */
127 1
    axisdata = NIT_INDEX_AXISDATA(axisdata_del, 1);
128 1
    memmove(axisdata_del, axisdata, (ndim-1-xdim)*sizeof_axisdata);
129

130
    /* Adjust the iteration size and reset iterend */
131 1
    NIT_ITERSIZE(iter) = 1;
132 1
    axisdata = NIT_AXISDATA(iter);
133 1
    for (idim = 0; idim < ndim-1; ++idim) {
134 1
        if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
135
                    NIT_ITERSIZE(iter), NAD_SHAPE(axisdata))) {
136 1
            NIT_ITERSIZE(iter) = -1;
137 1
            break;
138
        }
139 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
140
    }
141 1
    NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
142

143
    /* Shrink the iterator */
144 1
    NIT_NDIM(iter) = ndim - 1;
145
    /* If it is now 0-d fill the singleton dimension */
146 1
    if (ndim == 1) {
147 1
        npy_intp *strides = NAD_STRIDES(axisdata_del);
148 1
        NAD_SHAPE(axisdata_del) = 1;
149 1
        for (iop = 0; iop < nop; ++iop) {
150 1
            strides[iop] = 0;
151
        }
152 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
153
    }
154

155 1
    return NpyIter_Reset(iter, NULL);
156
}
157

158
/*NUMPY_API
159
 * Removes multi-index support from an iterator.
160
 *
161
 * Returns NPY_SUCCEED or NPY_FAIL.
162
 */
163
NPY_NO_EXPORT int
164 1
NpyIter_RemoveMultiIndex(NpyIter *iter)
165
{
166
    npy_uint32 itflags;
167

168
    /* Make sure the iterator is reset */
169 1
    if (NpyIter_Reset(iter, NULL) != NPY_SUCCEED) {
170
        return NPY_FAIL;
171
    }
172

173 1
    itflags = NIT_ITFLAGS(iter);
174 1
    if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
175 1
        if (NIT_ITERSIZE(iter) < 0) {
176 1
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
177 1
            return NPY_FAIL;
178
        }
179

180 1
        NIT_ITFLAGS(iter) = itflags & ~NPY_ITFLAG_HASMULTIINDEX;
181 1
        npyiter_coalesce_axes(iter);
182
    }
183

184
    return NPY_SUCCEED;
185
}
186

187
/*NUMPY_API
188
 * Removes the inner loop handling (so HasExternalLoop returns true)
189
 */
190
NPY_NO_EXPORT int
191 1
NpyIter_EnableExternalLoop(NpyIter *iter)
192
{
193 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
194
    /*int ndim = NIT_NDIM(iter);*/
195 1
    int nop = NIT_NOP(iter);
196

197
    /* Check conditions under which this can be done */
198 1
    if (itflags&(NPY_ITFLAG_HASINDEX|NPY_ITFLAG_HASMULTIINDEX)) {
199 0
        PyErr_SetString(PyExc_ValueError,
200
                "Iterator flag EXTERNAL_LOOP cannot be used "
201
                "if an index or multi-index is being tracked");
202 0
        return NPY_FAIL;
203
    }
204 1
    if ((itflags&(NPY_ITFLAG_BUFFER|NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP))
205
                        == (NPY_ITFLAG_RANGE|NPY_ITFLAG_EXLOOP)) {
206 0
        PyErr_SetString(PyExc_ValueError,
207
                "Iterator flag EXTERNAL_LOOP cannot be used "
208
                "with ranged iteration unless buffering is also enabled");
209 0
        return NPY_FAIL;
210
    }
211
    /* Set the flag */
212 1
    if (!(itflags&NPY_ITFLAG_EXLOOP)) {
213 1
        itflags |= NPY_ITFLAG_EXLOOP;
214 1
        NIT_ITFLAGS(iter) = itflags;
215

216
        /*
217
         * Check whether we can apply the single iteration
218
         * optimization to the iternext function.
219
         */
220 1
        if (!(itflags&NPY_ITFLAG_BUFFER)) {
221 1
            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
222 1
            if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
223 1
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
224
            }
225
        }
226
    }
227

228
    /* Reset the iterator */
229 1
    return NpyIter_Reset(iter, NULL);
230
}
231

232

233
static char *_reset_cast_error = (
234
        "Iterator reset failed due to a casting failure. "
235
        "This error is set as a Python error.");
236

237
/*NUMPY_API
238
 * Resets the iterator to its initial state
239
 *
240
 * The use of errmsg is discouraged, it cannot be guaranteed that the GIL
241
 * will not be grabbed on casting errors even when this is passed.
242
 *
243
 * If errmsg is non-NULL, it should point to a variable which will
244
 * receive the error message, and no Python exception will be set.
245
 * This is so that the function can be called from code not holding
246
 * the GIL. Note that cast errors may still lead to the GIL being
247
 * grabbed temporarily.
248
 */
249
NPY_NO_EXPORT int
250 1
NpyIter_Reset(NpyIter *iter, char **errmsg)
251
{
252 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
253
    /*int ndim = NIT_NDIM(iter);*/
254 1
    int nop = NIT_NOP(iter);
255

256 1
    if (itflags&NPY_ITFLAG_BUFFER) {
257
        NpyIter_BufferData *bufferdata;
258

259
        /* If buffer allocation was delayed, do it now */
260 1
        if (itflags&NPY_ITFLAG_DELAYBUF) {
261 1
            if (!npyiter_allocate_buffers(iter, errmsg)) {
262 0
                if (errmsg != NULL) {
263 0
                    *errmsg = _reset_cast_error;
264
                }
265
                return NPY_FAIL;
266
            }
267 1
            NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
268
        }
269
        else {
270
            /*
271
             * If the iterindex is already right, no need to
272
             * do anything (and no cast error has previously occurred).
273
             */
274 1
            bufferdata = NIT_BUFFERDATA(iter);
275 1
            if (NIT_ITERINDEX(iter) == NIT_ITERSTART(iter) &&
276 1
                    NBF_BUFITEREND(bufferdata) <= NIT_ITEREND(iter) &&
277 1
                    NBF_SIZE(bufferdata) > 0) {
278
                return NPY_SUCCEED;
279
            }
280 1
            if (npyiter_copy_from_buffers(iter) < 0) {
281 0
                if (errmsg != NULL) {
282 0
                    *errmsg = _reset_cast_error;
283
                }
284
                return NPY_FAIL;
285
            }
286
        }
287
    }
288

289 1
    npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
290

291 1
    if (itflags&NPY_ITFLAG_BUFFER) {
292
        /* Prepare the next buffers and set iterend/size */
293 1
        if (npyiter_copy_to_buffers(iter, NULL) < 0) {
294 1
            if (errmsg != NULL) {
295 0
                *errmsg = _reset_cast_error;
296
            }
297
            return NPY_FAIL;
298
        }
299
    }
300

301
    return NPY_SUCCEED;
302
}
303

304
/*NUMPY_API
305
 * Resets the iterator to its initial state, with new base data pointers.
306
 * This function requires great caution.
307
 *
308
 * If errmsg is non-NULL, it should point to a variable which will
309
 * receive the error message, and no Python exception will be set.
310
 * This is so that the function can be called from code not holding
311
 * the GIL. Note that cast errors may still lead to the GIL being
312
 * grabbed temporarily.
313
 */
314
NPY_NO_EXPORT int
315 1
NpyIter_ResetBasePointers(NpyIter *iter, char **baseptrs, char **errmsg)
316
{
317 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
318
    /*int ndim = NIT_NDIM(iter);*/
319 1
    int iop, nop = NIT_NOP(iter);
320

321 1
    char **resetdataptr = NIT_RESETDATAPTR(iter);
322 1
    npy_intp *baseoffsets = NIT_BASEOFFSETS(iter);
323

324 1
    if (itflags&NPY_ITFLAG_BUFFER) {
325
        /* If buffer allocation was delayed, do it now */
326 1
        if (itflags&NPY_ITFLAG_DELAYBUF) {
327 1
            if (!npyiter_allocate_buffers(iter, errmsg)) {
328
                return NPY_FAIL;
329
            }
330 1
            NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_DELAYBUF;
331
        }
332
        else {
333 1
            if (npyiter_copy_from_buffers(iter) < 0) {
334 0
                if (errmsg != NULL) {
335 0
                    *errmsg = _reset_cast_error;
336
                }
337
                return NPY_FAIL;
338
            }
339
        }
340
    }
341

342
    /* The new data pointers for resetting */
343 1
    for (iop = 0; iop < nop; ++iop) {
344 1
        resetdataptr[iop] = baseptrs[iop] + baseoffsets[iop];
345
    }
346

347 1
    npyiter_goto_iterindex(iter, NIT_ITERSTART(iter));
348

349 1
    if (itflags&NPY_ITFLAG_BUFFER) {
350
        /* Prepare the next buffers and set iterend/size */
351 1
        if (npyiter_copy_to_buffers(iter, NULL) < 0) {
352 0
            if (errmsg != NULL) {
353 0
                *errmsg = _reset_cast_error;
354
            }
355
            return NPY_FAIL;
356
        }
357
    }
358

359
    return NPY_SUCCEED;
360
}
361

362
/*NUMPY_API
363
 * Resets the iterator to a new iterator index range
364
 *
365
 * If errmsg is non-NULL, it should point to a variable which will
366
 * receive the error message, and no Python exception will be set.
367
 * This is so that the function can be called from code not holding
368
 * the GIL. Note that cast errors may still lead to the GIL being
369
 * grabbed temporarily.
370
 */
371
NPY_NO_EXPORT int
372 1
NpyIter_ResetToIterIndexRange(NpyIter *iter,
373
                              npy_intp istart, npy_intp iend, char **errmsg)
374
{
375 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
376
    /*int ndim = NIT_NDIM(iter);*/
377
    /*int nop = NIT_NOP(iter);*/
378

379 1
    if (!(itflags&NPY_ITFLAG_RANGE)) {
380 1
        if (errmsg == NULL) {
381 1
            PyErr_SetString(PyExc_ValueError,
382
                    "Cannot call ResetToIterIndexRange on an iterator without "
383
                    "requesting ranged iteration support in the constructor");
384
        }
385
        else {
386 0
            *errmsg = "Cannot call ResetToIterIndexRange on an iterator "
387
                      "without requesting ranged iteration support in the "
388
                    "constructor";
389
        }
390
        return NPY_FAIL;
391
    }
392

393 1
    if (istart < 0 || iend > NIT_ITERSIZE(iter)) {
394 1
        if (NIT_ITERSIZE(iter) < 0) {
395 1
            if (errmsg == NULL) {
396 1
                PyErr_SetString(PyExc_ValueError, "iterator is too large");
397
            }
398
            else {
399 1
                *errmsg = "iterator is too large";
400
            }
401
            return NPY_FAIL;
402
        }
403 0
        if (errmsg == NULL) {
404 0
            PyErr_Format(PyExc_ValueError,
405
                    "Out-of-bounds range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to "
406
                    "ResetToIterIndexRange", istart, iend);
407
        }
408
        else {
409 0
            *errmsg = "Out-of-bounds range passed to ResetToIterIndexRange";
410
        }
411
        return NPY_FAIL;
412
    }
413 1
    else if (iend < istart) {
414 0
        if (errmsg == NULL) {
415 0
            PyErr_Format(PyExc_ValueError,
416
                    "Invalid range [%" NPY_INTP_FMT ", %" NPY_INTP_FMT ") passed to ResetToIterIndexRange",
417
                    istart, iend);
418
        }
419
        else {
420 0
            *errmsg = "Invalid range passed to ResetToIterIndexRange";
421
        }
422
        return NPY_FAIL;
423
    }
424

425 1
    NIT_ITERSTART(iter) = istart;
426 1
    NIT_ITEREND(iter) = iend;
427

428 1
    return NpyIter_Reset(iter, errmsg);
429
}
430

431
/*NUMPY_API
432
 * Sets the iterator to the specified multi-index, which must have the
433
 * correct number of entries for 'ndim'.  It is only valid
434
 * when NPY_ITER_MULTI_INDEX was passed to the constructor.  This operation
435
 * fails if the multi-index is out of bounds.
436
 *
437
 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
438
 */
439
NPY_NO_EXPORT int
440 1
NpyIter_GotoMultiIndex(NpyIter *iter, npy_intp const *multi_index)
441
{
442 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
443 1
    int idim, ndim = NIT_NDIM(iter);
444 1
    int nop = NIT_NOP(iter);
445

446
    npy_intp iterindex, factor;
447
    NpyIter_AxisData *axisdata;
448
    npy_intp sizeof_axisdata;
449
    npy_int8 *perm;
450

451 1
    if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
452 0
        PyErr_SetString(PyExc_ValueError,
453
                "Cannot call GotoMultiIndex on an iterator without "
454
                "requesting a multi-index in the constructor");
455 0
        return NPY_FAIL;
456
    }
457

458 1
    if (itflags&NPY_ITFLAG_BUFFER) {
459 0
        PyErr_SetString(PyExc_ValueError,
460
                "Cannot call GotoMultiIndex on an iterator which "
461
                "is buffered");
462 0
        return NPY_FAIL;
463
    }
464

465 1
    if (itflags&NPY_ITFLAG_EXLOOP) {
466 0
        PyErr_SetString(PyExc_ValueError,
467
                "Cannot call GotoMultiIndex on an iterator which "
468
                "has the flag EXTERNAL_LOOP");
469 0
        return NPY_FAIL;
470
    }
471

472 1
    perm = NIT_PERM(iter);
473 1
    axisdata = NIT_AXISDATA(iter);
474 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
475

476
    /* Compute the iterindex corresponding to the multi-index */
477 1
    iterindex = 0;
478 1
    factor = 1;
479 1
    for (idim = 0; idim < ndim; ++idim) {
480 1
        npy_int8 p = perm[idim];
481
        npy_intp i, shape;
482

483 1
        shape = NAD_SHAPE(axisdata);
484 1
        if (p < 0) {
485
            /* If the perm entry is negative, reverse the index */
486 0
            i = shape - multi_index[ndim+p] - 1;
487
        }
488
        else {
489 1
            i = multi_index[ndim-p-1];
490
        }
491

492
        /* Bounds-check this index */
493 1
        if (i >= 0 && i < shape) {
494 1
            iterindex += factor * i;
495 1
            factor *= shape;
496
        }
497
        else {
498 0
            PyErr_SetString(PyExc_IndexError,
499
                    "Iterator GotoMultiIndex called with an out-of-bounds "
500
                    "multi-index");
501 0
            return NPY_FAIL;
502
        }
503

504 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
505
    }
506

507 1
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
508 1
        if (NIT_ITERSIZE(iter) < 0) {
509 1
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
510 1
            return NPY_FAIL;
511
        }
512 0
        PyErr_SetString(PyExc_IndexError,
513
                "Iterator GotoMultiIndex called with a multi-index outside the "
514
                "restricted iteration range");
515 0
        return NPY_FAIL;
516
    }
517

518 1
    npyiter_goto_iterindex(iter, iterindex);
519

520 1
    return NPY_SUCCEED;
521
}
522

523
/*NUMPY_API
524
 * If the iterator is tracking an index, sets the iterator
525
 * to the specified index.
526
 *
527
 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
528
 */
529
NPY_NO_EXPORT int
530 0
NpyIter_GotoIndex(NpyIter *iter, npy_intp flat_index)
531
{
532 0
    npy_uint32 itflags = NIT_ITFLAGS(iter);
533 0
    int idim, ndim = NIT_NDIM(iter);
534 0
    int nop = NIT_NOP(iter);
535

536
    npy_intp iterindex, factor;
537
    NpyIter_AxisData *axisdata;
538
    npy_intp sizeof_axisdata;
539

540 0
    if (!(itflags&NPY_ITFLAG_HASINDEX)) {
541 0
        PyErr_SetString(PyExc_ValueError,
542
                "Cannot call GotoIndex on an iterator without "
543
                "requesting a C or Fortran index in the constructor");
544 0
        return NPY_FAIL;
545
    }
546

547 0
    if (itflags&NPY_ITFLAG_BUFFER) {
548 0
        PyErr_SetString(PyExc_ValueError,
549
                "Cannot call GotoIndex on an iterator which "
550
                "is buffered");
551 0
        return NPY_FAIL;
552
    }
553

554 0
    if (itflags&NPY_ITFLAG_EXLOOP) {
555 0
        PyErr_SetString(PyExc_ValueError,
556
                "Cannot call GotoIndex on an iterator which "
557
                "has the flag EXTERNAL_LOOP");
558 0
        return NPY_FAIL;
559
    }
560

561 0
    if (flat_index < 0 || flat_index >= NIT_ITERSIZE(iter)) {
562 0
        PyErr_SetString(PyExc_IndexError,
563
                "Iterator GotoIndex called with an out-of-bounds "
564
                "index");
565 0
        return NPY_FAIL;
566
    }
567

568 0
    axisdata = NIT_AXISDATA(iter);
569 0
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
570

571
    /* Compute the iterindex corresponding to the flat_index */
572 0
    iterindex = 0;
573 0
    factor = 1;
574 0
    for (idim = 0; idim < ndim; ++idim) {
575
        npy_intp i, shape, iterstride;
576

577 0
        iterstride = NAD_STRIDES(axisdata)[nop];
578 0
        shape = NAD_SHAPE(axisdata);
579

580
        /* Extract the index from the flat_index */
581 0
        if (iterstride == 0) {
582
            i = 0;
583
        }
584 0
        else if (iterstride < 0) {
585 0
            i = shape - (flat_index/(-iterstride))%shape - 1;
586
        }
587
        else {
588 0
            i = (flat_index/iterstride)%shape;
589
        }
590

591
        /* Add its contribution to iterindex */
592 0
        iterindex += factor * i;
593 0
        factor *= shape;
594

595 0
        NIT_ADVANCE_AXISDATA(axisdata, 1);
596
    }
597

598

599 0
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
600 0
        PyErr_SetString(PyExc_IndexError,
601
                "Iterator GotoIndex called with an index outside the "
602
                "restricted iteration range.");
603 0
        return NPY_FAIL;
604
    }
605

606 0
    npyiter_goto_iterindex(iter, iterindex);
607

608 0
    return NPY_SUCCEED;
609
}
610

611
/*NUMPY_API
612
 * Sets the iterator position to the specified iterindex,
613
 * which matches the iteration order of the iterator.
614
 *
615
 * Returns NPY_SUCCEED on success, NPY_FAIL on failure.
616
 */
617
NPY_NO_EXPORT int
618 1
NpyIter_GotoIterIndex(NpyIter *iter, npy_intp iterindex)
619
{
620 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
621
    /*int ndim = NIT_NDIM(iter);*/
622 1
    int iop, nop = NIT_NOP(iter);
623

624 1
    if (itflags&NPY_ITFLAG_EXLOOP) {
625 1
        PyErr_SetString(PyExc_ValueError,
626
                "Cannot call GotoIterIndex on an iterator which "
627
                "has the flag EXTERNAL_LOOP");
628 1
        return NPY_FAIL;
629
    }
630

631 1
    if (iterindex < NIT_ITERSTART(iter) || iterindex >= NIT_ITEREND(iter)) {
632 0
        if (NIT_ITERSIZE(iter) < 0) {
633 0
            PyErr_SetString(PyExc_ValueError, "iterator is too large");
634 0
            return NPY_FAIL;
635
        }
636 0
        PyErr_SetString(PyExc_IndexError,
637
                "Iterator GotoIterIndex called with an iterindex outside the "
638
                "iteration range.");
639 0
        return NPY_FAIL;
640
    }
641

642 1
    if (itflags&NPY_ITFLAG_BUFFER) {
643 1
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
644
        npy_intp bufiterend, size;
645

646 1
        size = NBF_SIZE(bufferdata);
647 1
        bufiterend = NBF_BUFITEREND(bufferdata);
648
        /* Check if the new iterindex is already within the buffer */
649 1
        if (!(itflags&NPY_ITFLAG_REDUCE) && iterindex < bufiterend &&
650 1
                                        iterindex >= bufiterend - size) {
651
            npy_intp *strides, delta;
652
            char **ptrs;
653

654 1
            strides = NBF_STRIDES(bufferdata);
655 1
            ptrs = NBF_PTRS(bufferdata);
656 1
            delta = iterindex - NIT_ITERINDEX(iter);
657

658 1
            for (iop = 0; iop < nop; ++iop) {
659 1
                ptrs[iop] += delta * strides[iop];
660
            }
661

662 1
            NIT_ITERINDEX(iter) = iterindex;
663
        }
664
        /* Start the buffer at the provided iterindex */
665
        else {
666
            /* Write back to the arrays */
667 1
            if (npyiter_copy_from_buffers(iter) < 0) {
668
                return NPY_FAIL;
669
            }
670

671 1
            npyiter_goto_iterindex(iter, iterindex);
672

673
            /* Prepare the next buffers and set iterend/size */
674 1
            if (npyiter_copy_to_buffers(iter, NULL) < 0) {
675
                return NPY_FAIL;
676
            }
677
        }
678
    }
679
    else {
680 1
        npyiter_goto_iterindex(iter, iterindex);
681
    }
682

683
    return NPY_SUCCEED;
684
}
685

686
/*NUMPY_API
687
 * Gets the current iteration index
688
 */
689
NPY_NO_EXPORT npy_intp
690 1
NpyIter_GetIterIndex(NpyIter *iter)
691
{
692 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
693 1
    int idim, ndim = NIT_NDIM(iter);
694 1
    int nop = NIT_NOP(iter);
695

696
    /* iterindex is only used if NPY_ITER_RANGED or NPY_ITER_BUFFERED was set */
697 1
    if (itflags&(NPY_ITFLAG_RANGE|NPY_ITFLAG_BUFFER)) {
698 1
        return NIT_ITERINDEX(iter);
699
    }
700
    else {
701
        npy_intp iterindex;
702
        NpyIter_AxisData *axisdata;
703
        npy_intp sizeof_axisdata;
704

705 1
        iterindex = 0;
706 1
        if (ndim == 0) {
707
            return 0;
708
        }
709 1
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
710 1
        axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
711

712 1
        for (idim = ndim-2; idim >= 0; --idim) {
713 1
            iterindex += NAD_INDEX(axisdata);
714 1
            NIT_ADVANCE_AXISDATA(axisdata, -1);
715 1
            iterindex *= NAD_SHAPE(axisdata);
716
        }
717 1
        iterindex += NAD_INDEX(axisdata);
718

719 1
        return iterindex;
720
    }
721
}
722

723
/*NUMPY_API
724
 * Whether the buffer allocation is being delayed
725
 */
726
NPY_NO_EXPORT npy_bool
727 1
NpyIter_HasDelayedBufAlloc(NpyIter *iter)
728
{
729 1
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_DELAYBUF) != 0;
730
}
731

732
/*NUMPY_API
733
 * Whether the iterator handles the inner loop
734
 */
735
NPY_NO_EXPORT npy_bool
736 1
NpyIter_HasExternalLoop(NpyIter *iter)
737
{
738 1
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_EXLOOP) != 0;
739
}
740

741
/*NUMPY_API
742
 * Whether the iterator is tracking a multi-index
743
 */
744
NPY_NO_EXPORT npy_bool
745 1
NpyIter_HasMultiIndex(NpyIter *iter)
746
{
747 1
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASMULTIINDEX) != 0;
748
}
749

750
/*NUMPY_API
751
 * Whether the iterator is tracking an index
752
 */
753
NPY_NO_EXPORT npy_bool
754 1
NpyIter_HasIndex(NpyIter *iter)
755
{
756 1
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_HASINDEX) != 0;
757
}
758

759
/*NUMPY_API
760
 * Checks to see whether this is the first time the elements
761
 * of the specified reduction operand which the iterator points at are
762
 * being seen for the first time. The function returns
763
 * a reasonable answer for reduction operands and when buffering is
764
 * disabled. The answer may be incorrect for buffered non-reduction
765
 * operands.
766
 *
767
 * This function is intended to be used in EXTERNAL_LOOP mode only,
768
 * and will produce some wrong answers when that mode is not enabled.
769
 *
770
 * If this function returns true, the caller should also
771
 * check the inner loop stride of the operand, because if
772
 * that stride is 0, then only the first element of the innermost
773
 * external loop is being visited for the first time.
774
 *
775
 * WARNING: For performance reasons, 'iop' is not bounds-checked,
776
 *          it is not confirmed that 'iop' is actually a reduction
777
 *          operand, and it is not confirmed that EXTERNAL_LOOP
778
 *          mode is enabled. These checks are the responsibility of
779
 *          the caller, and should be done outside of any inner loops.
780
 */
781
NPY_NO_EXPORT npy_bool
782 1
NpyIter_IsFirstVisit(NpyIter *iter, int iop)
783
{
784 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
785 1
    int idim, ndim = NIT_NDIM(iter);
786 1
    int nop = NIT_NOP(iter);
787

788
    NpyIter_AxisData *axisdata;
789
    npy_intp sizeof_axisdata;
790

791 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
792 1
    axisdata = NIT_AXISDATA(iter);
793

794 1
    for (idim = 0; idim < ndim; ++idim) {
795 1
        npy_intp coord = NAD_INDEX(axisdata);
796 1
        npy_intp stride = NAD_STRIDES(axisdata)[iop];
797

798
        /*
799
         * If this is a reduction dimension and the coordinate
800
         * is not at the start, it's definitely not the first visit
801
         */
802 1
        if (stride == 0 && coord != 0) {
803
            return 0;
804
        }
805

806 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
807
    }
808

809
    /*
810
     * In reduction buffering mode, there's a double loop being
811
     * tracked in the buffer part of the iterator data structure.
812
     * We only need to check the outer level of this two-level loop,
813
     * because of the requirement that EXTERNAL_LOOP be enabled.
814
     */
815 1
    if (itflags&NPY_ITFLAG_BUFFER) {
816 1
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
817
        /* The outer reduce loop */
818 1
        if (NBF_REDUCE_POS(bufferdata) != 0 &&
819 1
                NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop] == 0) {
820
            return 0;
821
        }
822
    }
823

824 1
    return 1;
825
}
826

827
/*NUMPY_API
828
 * Whether the iteration could be done with no buffering.
829
 */
830
NPY_NO_EXPORT npy_bool
831 1
NpyIter_RequiresBuffering(NpyIter *iter)
832
{
833 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
834
    /*int ndim = NIT_NDIM(iter);*/
835 1
    int iop, nop = NIT_NOP(iter);
836

837
    npyiter_opitflags *op_itflags;
838

839 1
    if (!(itflags&NPY_ITFLAG_BUFFER)) {
840
        return 0;
841
    }
842

843 1
    op_itflags = NIT_OPITFLAGS(iter);
844

845
    /* If any operand requires a cast, buffering is mandatory */
846 1
    for (iop = 0; iop < nop; ++iop) {
847 1
        if (op_itflags[iop]&NPY_OP_ITFLAG_CAST) {
848
            return 1;
849
        }
850
    }
851

852
    return 0;
853
}
854

855
/*NUMPY_API
856
 * Whether the iteration loop, and in particular the iternext()
857
 * function, needs API access.  If this is true, the GIL must
858
 * be retained while iterating.
859
 */
860
NPY_NO_EXPORT npy_bool
861 1
NpyIter_IterationNeedsAPI(NpyIter *iter)
862
{
863 1
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_NEEDSAPI) != 0;
864
}
865

866
/*NUMPY_API
867
 * Gets the number of dimensions being iterated
868
 */
869
NPY_NO_EXPORT int
870 1
NpyIter_GetNDim(NpyIter *iter)
871
{
872 1
    return NIT_NDIM(iter);
873
}
874

875
/*NUMPY_API
876
 * Gets the number of operands being iterated
877
 */
878
NPY_NO_EXPORT int
879 1
NpyIter_GetNOp(NpyIter *iter)
880
{
881 1
    return NIT_NOP(iter);
882
}
883

884
/*NUMPY_API
885
 * Gets the number of elements being iterated
886
 */
887
NPY_NO_EXPORT npy_intp
888 1
NpyIter_GetIterSize(NpyIter *iter)
889
{
890 1
    return NIT_ITERSIZE(iter);
891
}
892

893
/*NUMPY_API
894
 * Whether the iterator is buffered
895
 */
896
NPY_NO_EXPORT npy_bool
897 0
NpyIter_IsBuffered(NpyIter *iter)
898
{
899 0
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_BUFFER) != 0;
900
}
901

902
/*NUMPY_API
903
 * Whether the inner loop can grow if buffering is unneeded
904
 */
905
NPY_NO_EXPORT npy_bool
906 0
NpyIter_IsGrowInner(NpyIter *iter)
907
{
908 0
    return (NIT_ITFLAGS(iter)&NPY_ITFLAG_GROWINNER) != 0;
909
}
910

911
/*NUMPY_API
912
 * Gets the size of the buffer, or 0 if buffering is not enabled
913
 */
914
NPY_NO_EXPORT npy_intp
915 0
NpyIter_GetBufferSize(NpyIter *iter)
916
{
917 0
    npy_uint32 itflags = NIT_ITFLAGS(iter);
918
    /*int ndim = NIT_NDIM(iter);*/
919 0
    int nop = NIT_NOP(iter);
920

921 0
    if (itflags&NPY_ITFLAG_BUFFER) {
922 0
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
923 0
        return NBF_BUFFERSIZE(bufferdata);
924
    }
925
    else {
926
        return 0;
927
    }
928

929
}
930

931
/*NUMPY_API
932
 * Gets the range of iteration indices being iterated
933
 */
934
NPY_NO_EXPORT void
935 1
NpyIter_GetIterIndexRange(NpyIter *iter,
936
                          npy_intp *istart, npy_intp *iend)
937
{
938 1
    *istart = NIT_ITERSTART(iter);
939 1
    *iend = NIT_ITEREND(iter);
940
}
941

942
/*NUMPY_API
943
 * Gets the broadcast shape if a multi-index is being tracked by the iterator,
944
 * otherwise gets the shape of the iteration as Fortran-order
945
 * (fastest-changing index first).
946
 *
947
 * The reason Fortran-order is returned when a multi-index
948
 * is not enabled is that this is providing a direct view into how
949
 * the iterator traverses the n-dimensional space. The iterator organizes
950
 * its memory from fastest index to slowest index, and when
951
 * a multi-index is enabled, it uses a permutation to recover the original
952
 * order.
953
 *
954
 * Returns NPY_SUCCEED or NPY_FAIL.
955
 */
956
NPY_NO_EXPORT int
957 1
NpyIter_GetShape(NpyIter *iter, npy_intp *outshape)
958
{
959 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
960 1
    int ndim = NIT_NDIM(iter);
961 1
    int nop = NIT_NOP(iter);
962

963
    int idim, sizeof_axisdata;
964
    NpyIter_AxisData *axisdata;
965
    npy_int8 *perm;
966

967 1
    axisdata = NIT_AXISDATA(iter);
968 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
969

970 1
    if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
971
        perm = NIT_PERM(iter);
972 1
        for(idim = 0; idim < ndim; ++idim) {
973 1
            int axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
974 1
            outshape[axis] = NAD_SHAPE(axisdata);
975

976 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
977
        }
978
    }
979
    else {
980 1
        for(idim = 0; idim < ndim; ++idim) {
981 1
            outshape[idim] = NAD_SHAPE(axisdata);
982 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
983
        }
984
    }
985

986 1
    return NPY_SUCCEED;
987
}
988

989
/*NUMPY_API
990
 * Builds a set of strides which are the same as the strides of an
991
 * output array created using the NPY_ITER_ALLOCATE flag, where NULL
992
 * was passed for op_axes.  This is for data packed contiguously,
993
 * but not necessarily in C or Fortran order. This should be used
994
 * together with NpyIter_GetShape and NpyIter_GetNDim.
995
 *
996
 * A use case for this function is to match the shape and layout of
997
 * the iterator and tack on one or more dimensions.  For example,
998
 * in order to generate a vector per input value for a numerical gradient,
999
 * you pass in ndim*itemsize for itemsize, then add another dimension to
1000
 * the end with size ndim and stride itemsize.  To do the Hessian matrix,
1001
 * you do the same thing but add two dimensions, or take advantage of
1002
 * the symmetry and pack it into 1 dimension with a particular encoding.
1003
 *
1004
 * This function may only be called if the iterator is tracking a multi-index
1005
 * and if NPY_ITER_DONT_NEGATE_STRIDES was used to prevent an axis from
1006
 * being iterated in reverse order.
1007
 *
1008
 * If an array is created with this method, simply adding 'itemsize'
1009
 * for each iteration will traverse the new array matching the
1010
 * iterator.
1011
 *
1012
 * Returns NPY_SUCCEED or NPY_FAIL.
1013
 */
1014
NPY_NO_EXPORT int
1015 1
NpyIter_CreateCompatibleStrides(NpyIter *iter,
1016
                            npy_intp itemsize, npy_intp *outstrides)
1017
{
1018 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1019 1
    int idim, ndim = NIT_NDIM(iter);
1020 1
    int nop = NIT_NOP(iter);
1021

1022
    npy_intp sizeof_axisdata;
1023
    NpyIter_AxisData *axisdata;
1024
    npy_int8 *perm;
1025

1026 1
    if (!(itflags&NPY_ITFLAG_HASMULTIINDEX)) {
1027 0
        PyErr_SetString(PyExc_RuntimeError,
1028
                "Iterator CreateCompatibleStrides may only be called "
1029
                "if a multi-index is being tracked");
1030 0
        return NPY_FAIL;
1031
    }
1032

1033 1
    axisdata = NIT_AXISDATA(iter);
1034 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1035

1036 1
    perm = NIT_PERM(iter);
1037 1
    for(idim = 0; idim < ndim; ++idim) {
1038
        npy_bool flipped;
1039 1
        npy_int8 axis = npyiter_undo_iter_axis_perm(idim, ndim, perm, &flipped);
1040 1
        if (flipped) {
1041 0
            PyErr_SetString(PyExc_RuntimeError,
1042
                    "Iterator CreateCompatibleStrides may only be called "
1043
                    "if DONT_NEGATE_STRIDES was used to prevent reverse "
1044
                    "iteration of an axis");
1045 0
            return NPY_FAIL;
1046
        }
1047
        else {
1048 1
            outstrides[axis] = itemsize;
1049
        }
1050

1051 1
        itemsize *= NAD_SHAPE(axisdata);
1052 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
1053
    }
1054

1055
    return NPY_SUCCEED;
1056
}
1057

1058
/*NUMPY_API
1059
 * Get the array of data pointers (1 per object being iterated)
1060
 *
1061
 * This function may be safely called without holding the Python GIL.
1062
 */
1063
NPY_NO_EXPORT char **
1064 1
NpyIter_GetDataPtrArray(NpyIter *iter)
1065
{
1066 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1067
    /*int ndim = NIT_NDIM(iter);*/
1068 1
    int nop = NIT_NOP(iter);
1069

1070 1
    if (itflags&NPY_ITFLAG_BUFFER) {
1071 1
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1072 1
        return NBF_PTRS(bufferdata);
1073
    }
1074
    else {
1075 1
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1076 1
        return NAD_PTRS(axisdata);
1077
    }
1078
}
1079

1080
/*NUMPY_API
1081
 * Get the array of data pointers (1 per object being iterated),
1082
 * directly into the arrays (never pointing to a buffer), for starting
1083
 * unbuffered iteration. This always returns the addresses for the
1084
 * iterator position as reset to iterator index 0.
1085
 *
1086
 * These pointers are different from the pointers accepted by
1087
 * NpyIter_ResetBasePointers, because the direction along some
1088
 * axes may have been reversed, requiring base offsets.
1089
 *
1090
 * This function may be safely called without holding the Python GIL.
1091
 */
1092
NPY_NO_EXPORT char **
1093 1
NpyIter_GetInitialDataPtrArray(NpyIter *iter)
1094
{
1095
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1096
    /*int ndim = NIT_NDIM(iter);*/
1097 1
    int nop = NIT_NOP(iter);
1098

1099 1
    return NIT_RESETDATAPTR(iter);
1100
}
1101

1102
/*NUMPY_API
1103
 * Get the array of data type pointers (1 per object being iterated)
1104
 */
1105
NPY_NO_EXPORT PyArray_Descr **
1106 1
NpyIter_GetDescrArray(NpyIter *iter)
1107
{
1108
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1109
    /*int ndim = NIT_NDIM(iter);*/
1110
    /*int nop = NIT_NOP(iter);*/
1111

1112 1
    return NIT_DTYPES(iter);
1113
}
1114

1115
/*NUMPY_API
1116
 * Get the array of objects being iterated
1117
 */
1118
NPY_NO_EXPORT PyArrayObject **
1119 1
NpyIter_GetOperandArray(NpyIter *iter)
1120
{
1121
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1122
    /*int ndim = NIT_NDIM(iter);*/
1123 1
    int nop = NIT_NOP(iter);
1124

1125 1
    return NIT_OPERANDS(iter);
1126
}
1127

1128
/*NUMPY_API
1129
 * Returns a view to the i-th object with the iterator's internal axes
1130
 */
1131
NPY_NO_EXPORT PyArrayObject *
1132 1
NpyIter_GetIterView(NpyIter *iter, npy_intp i)
1133
{
1134 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1135 1
    int idim, ndim = NIT_NDIM(iter);
1136 1
    int nop = NIT_NOP(iter);
1137

1138
    npy_intp shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
1139
    PyArrayObject *obj, *view;
1140
    PyArray_Descr *dtype;
1141
    char *dataptr;
1142
    NpyIter_AxisData *axisdata;
1143
    npy_intp sizeof_axisdata;
1144
    int writeable;
1145

1146 1
    if (i < 0) {
1147 0
        PyErr_SetString(PyExc_IndexError,
1148
                "index provided for an iterator view was out of bounds");
1149 0
        return NULL;
1150
    }
1151

1152
    /* Don't provide views if buffering is enabled */
1153 1
    if (itflags&NPY_ITFLAG_BUFFER) {
1154 0
        PyErr_SetString(PyExc_ValueError,
1155
                "cannot provide an iterator view when buffering is enabled");
1156 0
        return NULL;
1157
    }
1158

1159 1
    obj = NIT_OPERANDS(iter)[i];
1160 1
    dtype = PyArray_DESCR(obj);
1161 1
    writeable = NIT_OPITFLAGS(iter)[i]&NPY_OP_ITFLAG_WRITE;
1162 1
    dataptr = NIT_RESETDATAPTR(iter)[i];
1163 1
    axisdata = NIT_AXISDATA(iter);
1164 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1165

1166
    /* Retrieve the shape and strides from the axisdata */
1167 1
    for (idim = 0; idim < ndim; ++idim) {
1168 1
        shape[ndim-idim-1] = NAD_SHAPE(axisdata);
1169 1
        strides[ndim-idim-1] = NAD_STRIDES(axisdata)[i];
1170

1171 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
1172
    }
1173

1174 1
    Py_INCREF(dtype);
1175 1
    view = (PyArrayObject *)PyArray_NewFromDescrAndBase(
1176
            &PyArray_Type, dtype,
1177
            ndim, shape, strides, dataptr,
1178
            writeable ? NPY_ARRAY_WRITEABLE : 0, NULL, (PyObject *)obj);
1179

1180 1
    return view;
1181
}
1182

1183
/*NUMPY_API
1184
 * Get a pointer to the index, if it is being tracked
1185
 */
1186
NPY_NO_EXPORT npy_intp *
1187 1
NpyIter_GetIndexPtr(NpyIter *iter)
1188
{
1189 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1190
    /*int ndim = NIT_NDIM(iter);*/
1191 1
    int nop = NIT_NOP(iter);
1192

1193 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1194

1195 1
    if (itflags&NPY_ITFLAG_HASINDEX) {
1196
        /* The index is just after the data pointers */
1197 1
        return (npy_intp*)NAD_PTRS(axisdata) + nop;
1198
    }
1199
    else {
1200
        return NULL;
1201
    }
1202
}
1203

1204
/*NUMPY_API
1205
 * Gets an array of read flags (1 per object being iterated)
1206
 */
1207
NPY_NO_EXPORT void
1208 1
NpyIter_GetReadFlags(NpyIter *iter, char *outreadflags)
1209
{
1210
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1211
    /*int ndim = NIT_NDIM(iter);*/
1212 1
    int iop, nop = NIT_NOP(iter);
1213

1214 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1215

1216 1
    for (iop = 0; iop < nop; ++iop) {
1217 1
        outreadflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_READ) != 0;
1218
    }
1219
}
1220

1221
/*NUMPY_API
1222
 * Gets an array of write flags (1 per object being iterated)
1223
 */
1224
NPY_NO_EXPORT void
1225 1
NpyIter_GetWriteFlags(NpyIter *iter, char *outwriteflags)
1226
{
1227
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1228
    /*int ndim = NIT_NDIM(iter);*/
1229 1
    int iop, nop = NIT_NOP(iter);
1230

1231 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1232

1233 1
    for (iop = 0; iop < nop; ++iop) {
1234 1
        outwriteflags[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_WRITE) != 0;
1235
    }
1236
}
1237

1238

1239
/*NUMPY_API
1240
 * Get the array of strides for the inner loop (when HasExternalLoop is true)
1241
 *
1242
 * This function may be safely called without holding the Python GIL.
1243
 */
1244
NPY_NO_EXPORT npy_intp *
1245 1
NpyIter_GetInnerStrideArray(NpyIter *iter)
1246
{
1247 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1248
    /*int ndim = NIT_NDIM(iter);*/
1249 1
    int nop = NIT_NOP(iter);
1250

1251 1
    if (itflags&NPY_ITFLAG_BUFFER) {
1252 1
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1253 1
        return NBF_STRIDES(data);
1254
    }
1255
    else {
1256 1
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1257 1
        return NAD_STRIDES(axisdata);
1258
    }
1259
}
1260

1261
/*NUMPY_API
1262
 * Gets the array of strides for the specified axis.
1263
 * If the iterator is tracking a multi-index, gets the strides
1264
 * for the axis specified, otherwise gets the strides for
1265
 * the iteration axis as Fortran order (fastest-changing axis first).
1266
 *
1267
 * Returns NULL if an error occurs.
1268
 */
1269
NPY_NO_EXPORT npy_intp *
1270 1
NpyIter_GetAxisStrideArray(NpyIter *iter, int axis)
1271
{
1272 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1273 1
    int idim, ndim = NIT_NDIM(iter);
1274 1
    int nop = NIT_NOP(iter);
1275

1276 1
    npy_int8 *perm = NIT_PERM(iter);
1277 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1278 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1279

1280 1
    if (axis < 0 || axis >= ndim) {
1281 0
        PyErr_SetString(PyExc_ValueError,
1282
                "axis out of bounds in iterator GetStrideAxisArray");
1283 0
        return NULL;
1284
    }
1285

1286 1
    if (itflags&NPY_ITFLAG_HASMULTIINDEX) {
1287
        /* Reverse axis, since the iterator treats them that way */
1288 0
        axis = ndim-1-axis;
1289

1290
        /* First find the axis in question */
1291 0
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1292 0
            if (perm[idim] == axis || -1 - perm[idim] == axis) {
1293 0
                return NAD_STRIDES(axisdata);
1294
            }
1295
        }
1296
    }
1297
    else {
1298 1
        return NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, axis));
1299
    }
1300

1301 0
    PyErr_SetString(PyExc_RuntimeError,
1302
            "internal error in iterator perm");
1303 0
    return  NULL;
1304
}
1305

1306
/*NUMPY_API
1307
 * Get an array of strides which are fixed.  Any strides which may
1308
 * change during iteration receive the value NPY_MAX_INTP.  Once
1309
 * the iterator is ready to iterate, call this to get the strides
1310
 * which will always be fixed in the inner loop, then choose optimized
1311
 * inner loop functions which take advantage of those fixed strides.
1312
 *
1313
 * This function may be safely called without holding the Python GIL.
1314
 */
1315
NPY_NO_EXPORT void
1316 1
NpyIter_GetInnerFixedStrideArray(NpyIter *iter, npy_intp *out_strides)
1317
{
1318 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1319 1
    int ndim = NIT_NDIM(iter);
1320 1
    int iop, nop = NIT_NOP(iter);
1321

1322 1
    NpyIter_AxisData *axisdata0 = NIT_AXISDATA(iter);
1323 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1324

1325 1
    if (itflags&NPY_ITFLAG_BUFFER) {
1326 1
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1327 1
        npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1328 1
        npy_intp stride, *strides = NBF_STRIDES(data),
1329 1
                *ad_strides = NAD_STRIDES(axisdata0);
1330 1
        PyArray_Descr **dtypes = NIT_DTYPES(iter);
1331

1332 1
        for (iop = 0; iop < nop; ++iop) {
1333 1
            stride = strides[iop];
1334
            /*
1335
             * Operands which are always/never buffered have fixed strides,
1336
             * and everything has fixed strides when ndim is 0 or 1
1337
             */
1338 1
            if (ndim <= 1 || (op_itflags[iop]&
1339
                            (NPY_OP_ITFLAG_CAST|NPY_OP_ITFLAG_BUFNEVER))) {
1340 1
                out_strides[iop] = stride;
1341
            }
1342
            /* If it's a reduction, 0-stride inner loop may have fixed stride */
1343 1
            else if (stride == 0 && (itflags&NPY_ITFLAG_REDUCE)) {
1344
                /* If it's a reduction operand, definitely fixed stride */
1345 1
                if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1346 1
                    out_strides[iop] = stride;
1347
                }
1348
                /*
1349
                 * Otherwise it's guaranteed to be a fixed stride if the
1350
                 * stride is 0 for all the dimensions.
1351
                 */
1352
                else {
1353
                    NpyIter_AxisData *axisdata = axisdata0;
1354
                    int idim;
1355 1
                    for (idim = 0; idim < ndim; ++idim) {
1356 1
                        if (NAD_STRIDES(axisdata)[iop] != 0) {
1357
                            break;
1358
                        }
1359 1
                        NIT_ADVANCE_AXISDATA(axisdata, 1);
1360
                    }
1361
                    /* If all the strides were 0, the stride won't change */
1362 1
                    if (idim == ndim) {
1363 0
                        out_strides[iop] = stride;
1364
                    }
1365
                    else {
1366 1
                        out_strides[iop] = NPY_MAX_INTP;
1367
                    }
1368
                }
1369
            }
1370
            /*
1371
             * Inner loop contiguous array means its stride won't change when
1372
             * switching between buffering and not buffering
1373
             */
1374 1
            else if (ad_strides[iop] == dtypes[iop]->elsize) {
1375 1
                out_strides[iop] = ad_strides[iop];
1376
            }
1377
            /*
1378
             * Otherwise the strides can change if the operand is sometimes
1379
             * buffered, sometimes not.
1380
             */
1381
            else {
1382 1
                out_strides[iop] = NPY_MAX_INTP;
1383
            }
1384
        }
1385
    }
1386
    else {
1387
        /* If there's no buffering, the strides are always fixed */
1388 1
        memcpy(out_strides, NAD_STRIDES(axisdata0), nop*NPY_SIZEOF_INTP);
1389
    }
1390
}
1391

1392
/*NUMPY_API
1393
 * Get a pointer to the size of the inner loop  (when HasExternalLoop is true)
1394
 *
1395
 * This function may be safely called without holding the Python GIL.
1396
 */
1397
NPY_NO_EXPORT npy_intp *
1398 1
NpyIter_GetInnerLoopSizePtr(NpyIter *iter)
1399
{
1400 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1401
    /*int ndim = NIT_NDIM(iter);*/
1402 1
    int nop = NIT_NOP(iter);
1403

1404 1
    if (itflags&NPY_ITFLAG_BUFFER) {
1405 1
        NpyIter_BufferData *data = NIT_BUFFERDATA(iter);
1406 1
        return &NBF_SIZE(data);
1407
    }
1408
    else {
1409 1
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1410 1
        return &NAD_SHAPE(axisdata);
1411
    }
1412
}
1413

1414

1415
/*NUMPY_API
1416
 * For debugging
1417
 */
1418
NPY_NO_EXPORT void
1419 0
NpyIter_DebugPrint(NpyIter *iter)
1420
{
1421 0
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1422 0
    int idim, ndim = NIT_NDIM(iter);
1423 0
    int iop, nop = NIT_NOP(iter);
1424

1425
    NpyIter_AxisData *axisdata;
1426
    npy_intp sizeof_axisdata;
1427

1428
    NPY_ALLOW_C_API_DEF
1429 0
    NPY_ALLOW_C_API
1430

1431 0
    printf("\n------ BEGIN ITERATOR DUMP ------\n");
1432 0
    printf("| Iterator Address: %p\n", (void *)iter);
1433 0
    printf("| ItFlags: ");
1434 0
    if (itflags&NPY_ITFLAG_IDENTPERM)
1435
        printf("IDENTPERM ");
1436 0
    if (itflags&NPY_ITFLAG_NEGPERM)
1437
        printf("NEGPERM ");
1438 0
    if (itflags&NPY_ITFLAG_HASINDEX)
1439
        printf("HASINDEX ");
1440 0
    if (itflags&NPY_ITFLAG_HASMULTIINDEX)
1441
        printf("HASMULTIINDEX ");
1442 0
    if (itflags&NPY_ITFLAG_FORCEDORDER)
1443
        printf("FORCEDORDER ");
1444 0
    if (itflags&NPY_ITFLAG_EXLOOP)
1445
        printf("EXLOOP ");
1446 0
    if (itflags&NPY_ITFLAG_RANGE)
1447
        printf("RANGE ");
1448 0
    if (itflags&NPY_ITFLAG_BUFFER)
1449
        printf("BUFFER ");
1450 0
    if (itflags&NPY_ITFLAG_GROWINNER)
1451
        printf("GROWINNER ");
1452 0
    if (itflags&NPY_ITFLAG_ONEITERATION)
1453
        printf("ONEITERATION ");
1454 0
    if (itflags&NPY_ITFLAG_DELAYBUF)
1455
        printf("DELAYBUF ");
1456 0
    if (itflags&NPY_ITFLAG_NEEDSAPI)
1457
        printf("NEEDSAPI ");
1458 0
    if (itflags&NPY_ITFLAG_REDUCE)
1459
        printf("REDUCE ");
1460 0
    if (itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS)
1461
        printf("REUSE_REDUCE_LOOPS ");
1462

1463 0
    printf("\n");
1464 0
    printf("| NDim: %d\n", ndim);
1465 0
    printf("| NOp: %d\n", nop);
1466 0
    if (NIT_MASKOP(iter) >= 0) {
1467 0
        printf("| MaskOp: %d\n", (int)NIT_MASKOP(iter));
1468
    }
1469 0
    printf("| IterSize: %d\n", (int)NIT_ITERSIZE(iter));
1470 0
    printf("| IterStart: %d\n", (int)NIT_ITERSTART(iter));
1471 0
    printf("| IterEnd: %d\n", (int)NIT_ITEREND(iter));
1472 0
    printf("| IterIndex: %d\n", (int)NIT_ITERINDEX(iter));
1473 0
    printf("| Iterator SizeOf: %d\n",
1474 0
                            (int)NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
1475 0
    printf("| BufferData SizeOf: %d\n",
1476 0
                            (int)NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop));
1477 0
    printf("| AxisData SizeOf: %d\n",
1478
                            (int)NIT_AXISDATA_SIZEOF(itflags, ndim, nop));
1479 0
    printf("|\n");
1480

1481 0
    printf("| Perm: ");
1482 0
    for (idim = 0; idim < ndim; ++idim) {
1483 0
        printf("%d ", (int)NIT_PERM(iter)[idim]);
1484
    }
1485 0
    printf("\n");
1486 0
    printf("| DTypes: ");
1487 0
    for (iop = 0; iop < nop; ++iop) {
1488 0
        printf("%p ", (void *)NIT_DTYPES(iter)[iop]);
1489
    }
1490 0
    printf("\n");
1491 0
    printf("| DTypes: ");
1492 0
    for (iop = 0; iop < nop; ++iop) {
1493 0
        if (NIT_DTYPES(iter)[iop] != NULL)
1494 0
            PyObject_Print((PyObject*)NIT_DTYPES(iter)[iop], stdout, 0);
1495
        else
1496
            printf("(nil) ");
1497 0
        printf(" ");
1498
    }
1499 0
    printf("\n");
1500 0
    printf("| InitDataPtrs: ");
1501 0
    for (iop = 0; iop < nop; ++iop) {
1502 0
        printf("%p ", (void *)NIT_RESETDATAPTR(iter)[iop]);
1503
    }
1504 0
    printf("\n");
1505 0
    printf("| BaseOffsets: ");
1506 0
    for (iop = 0; iop < nop; ++iop) {
1507 0
        printf("%i ", (int)NIT_BASEOFFSETS(iter)[iop]);
1508
    }
1509 0
    printf("\n");
1510 0
    if (itflags&NPY_ITFLAG_HASINDEX) {
1511 0
        printf("| InitIndex: %d\n",
1512 0
                        (int)(npy_intp)NIT_RESETDATAPTR(iter)[nop]);
1513
    }
1514 0
    printf("| Operands: ");
1515 0
    for (iop = 0; iop < nop; ++iop) {
1516 0
        printf("%p ", (void *)NIT_OPERANDS(iter)[iop]);
1517
    }
1518 0
    printf("\n");
1519 0
    printf("| Operand DTypes: ");
1520 0
    for (iop = 0; iop < nop; ++iop) {
1521
        PyArray_Descr *dtype;
1522 0
        if (NIT_OPERANDS(iter)[iop] != NULL) {
1523 0
            dtype = PyArray_DESCR(NIT_OPERANDS(iter)[iop]);
1524 0
            if (dtype != NULL)
1525 0
                PyObject_Print((PyObject *)dtype, stdout, 0);
1526
            else
1527
                printf("(nil) ");
1528
        }
1529
        else {
1530
            printf("(op nil) ");
1531
        }
1532 0
        printf(" ");
1533
    }
1534 0
    printf("\n");
1535 0
    printf("| OpItFlags:\n");
1536 0
    for (iop = 0; iop < nop; ++iop) {
1537 0
        printf("|   Flags[%d]: ", (int)iop);
1538 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_READ)
1539
            printf("READ ");
1540 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITE)
1541
            printf("WRITE ");
1542 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_CAST)
1543
            printf("CAST ");
1544 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_BUFNEVER)
1545
            printf("BUFNEVER ");
1546 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_ALIGNED)
1547
            printf("ALIGNED ");
1548 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_REDUCE)
1549
            printf("REDUCE ");
1550 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_VIRTUAL)
1551
            printf("VIRTUAL ");
1552 0
        if ((NIT_OPITFLAGS(iter)[iop])&NPY_OP_ITFLAG_WRITEMASKED)
1553
            printf("WRITEMASKED ");
1554 0
        printf("\n");
1555
    }
1556 0
    printf("|\n");
1557

1558 0
    if (itflags&NPY_ITFLAG_BUFFER) {
1559 0
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1560 0
        printf("| BufferData:\n");
1561 0
        printf("|   BufferSize: %d\n", (int)NBF_BUFFERSIZE(bufferdata));
1562 0
        printf("|   Size: %d\n", (int)NBF_SIZE(bufferdata));
1563 0
        printf("|   BufIterEnd: %d\n", (int)NBF_BUFITEREND(bufferdata));
1564 0
        if (itflags&NPY_ITFLAG_REDUCE) {
1565 0
            printf("|   REDUCE Pos: %d\n",
1566 0
                        (int)NBF_REDUCE_POS(bufferdata));
1567 0
            printf("|   REDUCE OuterSize: %d\n",
1568 0
                        (int)NBF_REDUCE_OUTERSIZE(bufferdata));
1569 0
            printf("|   REDUCE OuterDim: %d\n",
1570 0
                        (int)NBF_REDUCE_OUTERDIM(bufferdata));
1571
        }
1572 0
        printf("|   Strides: ");
1573 0
        for (iop = 0; iop < nop; ++iop)
1574 0
            printf("%d ", (int)NBF_STRIDES(bufferdata)[iop]);
1575 0
        printf("\n");
1576
        /* Print the fixed strides when there's no inner loop */
1577 0
        if (itflags&NPY_ITFLAG_EXLOOP) {
1578
            npy_intp fixedstrides[NPY_MAXDIMS];
1579 0
            printf("|   Fixed Strides: ");
1580 0
            NpyIter_GetInnerFixedStrideArray(iter, fixedstrides);
1581 0
            for (iop = 0; iop < nop; ++iop)
1582 0
                printf("%d ", (int)fixedstrides[iop]);
1583 0
            printf("\n");
1584
        }
1585 0
        printf("|   Ptrs: ");
1586 0
        for (iop = 0; iop < nop; ++iop)
1587 0
            printf("%p ", (void *)NBF_PTRS(bufferdata)[iop]);
1588 0
        printf("\n");
1589 0
        if (itflags&NPY_ITFLAG_REDUCE) {
1590 0
            printf("|   REDUCE Outer Strides: ");
1591 0
            for (iop = 0; iop < nop; ++iop)
1592 0
                printf("%d ", (int)NBF_REDUCE_OUTERSTRIDES(bufferdata)[iop]);
1593 0
            printf("\n");
1594 0
            printf("|   REDUCE Outer Ptrs: ");
1595 0
            for (iop = 0; iop < nop; ++iop)
1596 0
                printf("%p ", (void *)NBF_REDUCE_OUTERPTRS(bufferdata)[iop]);
1597
            printf("\n");
1598
        }
1599 0
        printf("|   ReadTransferFn: ");
1600 0
        for (iop = 0; iop < nop; ++iop)
1601 0
            printf("%p ", (void *)NBF_READTRANSFERFN(bufferdata)[iop]);
1602 0
        printf("\n");
1603 0
        printf("|   ReadTransferData: ");
1604 0
        for (iop = 0; iop < nop; ++iop)
1605 0
            printf("%p ", (void *)NBF_READTRANSFERDATA(bufferdata)[iop]);
1606 0
        printf("\n");
1607 0
        printf("|   WriteTransferFn: ");
1608 0
        for (iop = 0; iop < nop; ++iop)
1609 0
            printf("%p ", (void *)NBF_WRITETRANSFERFN(bufferdata)[iop]);
1610 0
        printf("\n");
1611 0
        printf("|   WriteTransferData: ");
1612 0
        for (iop = 0; iop < nop; ++iop)
1613 0
            printf("%p ", (void *)NBF_WRITETRANSFERDATA(bufferdata)[iop]);
1614 0
        printf("\n");
1615 0
        printf("|   Buffers: ");
1616 0
        for (iop = 0; iop < nop; ++iop)
1617 0
            printf("%p ", (void *)NBF_BUFFERS(bufferdata)[iop]);
1618 0
        printf("\n");
1619
        printf("|\n");
1620
    }
1621

1622 0
    axisdata = NIT_AXISDATA(iter);
1623 0
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1624 0
    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1625 0
        printf("| AxisData[%d]:\n", (int)idim);
1626 0
        printf("|   Shape: %d\n", (int)NAD_SHAPE(axisdata));
1627 0
        printf("|   Index: %d\n", (int)NAD_INDEX(axisdata));
1628 0
        printf("|   Strides: ");
1629 0
        for (iop = 0; iop < nop; ++iop) {
1630 0
            printf("%d ", (int)NAD_STRIDES(axisdata)[iop]);
1631
        }
1632 0
        printf("\n");
1633 0
        if (itflags&NPY_ITFLAG_HASINDEX) {
1634 0
            printf("|   Index Stride: %d\n", (int)NAD_STRIDES(axisdata)[nop]);
1635
        }
1636 0
        printf("|   Ptrs: ");
1637 0
        for (iop = 0; iop < nop; ++iop) {
1638 0
            printf("%p ", (void *)NAD_PTRS(axisdata)[iop]);
1639
        }
1640 0
        printf("\n");
1641 0
        if (itflags&NPY_ITFLAG_HASINDEX) {
1642 0
            printf("|   Index Value: %d\n",
1643 0
                               (int)((npy_intp*)NAD_PTRS(axisdata))[nop]);
1644
        }
1645
    }
1646

1647 0
    printf("------- END ITERATOR DUMP -------\n");
1648 0
    fflush(stdout);
1649

1650 0
    NPY_DISABLE_C_API
1651
}
1652

1653
NPY_NO_EXPORT void
1654 1
npyiter_coalesce_axes(NpyIter *iter)
1655
{
1656 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1657 1
    int idim, ndim = NIT_NDIM(iter);
1658 1
    int nop = NIT_NOP(iter);
1659

1660 1
    npy_intp istrides, nstrides = NAD_NSTRIDES();
1661 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
1662 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1663 1
    NpyIter_AxisData *ad_compress = axisdata;
1664 1
    npy_intp new_ndim = 1;
1665

1666
    /* The HASMULTIINDEX or IDENTPERM flags do not apply after coalescing */
1667 1
    NIT_ITFLAGS(iter) &= ~(NPY_ITFLAG_IDENTPERM|NPY_ITFLAG_HASMULTIINDEX);
1668

1669 1
    for (idim = 0; idim < ndim-1; ++idim) {
1670 1
        int can_coalesce = 1;
1671 1
        npy_intp shape0 = NAD_SHAPE(ad_compress);
1672 1
        npy_intp shape1 = NAD_SHAPE(NIT_INDEX_AXISDATA(axisdata, 1));
1673 1
        npy_intp *strides0 = NAD_STRIDES(ad_compress);
1674 1
        npy_intp *strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, 1));
1675

1676
        /* Check that all the axes can be coalesced */
1677 1
        for (istrides = 0; istrides < nstrides; ++istrides) {
1678 1
            if (!((shape0 == 1 && strides0[istrides] == 0) ||
1679 1
                  (shape1 == 1 && strides1[istrides] == 0)) &&
1680 1
                     (strides0[istrides]*shape0 != strides1[istrides])) {
1681
                can_coalesce = 0;
1682
                break;
1683
            }
1684
        }
1685

1686 1
        if (can_coalesce) {
1687 1
            npy_intp *strides = NAD_STRIDES(ad_compress);
1688

1689 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
1690 1
            NAD_SHAPE(ad_compress) *= NAD_SHAPE(axisdata);
1691 1
            for (istrides = 0; istrides < nstrides; ++istrides) {
1692 1
                if (strides[istrides] == 0) {
1693 1
                    strides[istrides] = NAD_STRIDES(axisdata)[istrides];
1694
                }
1695
            }
1696
        }
1697
        else {
1698 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
1699 1
            NIT_ADVANCE_AXISDATA(ad_compress, 1);
1700 1
            if (ad_compress != axisdata) {
1701 1
                memcpy(ad_compress, axisdata, sizeof_axisdata);
1702
            }
1703 1
            ++new_ndim;
1704
        }
1705
    }
1706

1707
    /*
1708
     * If the number of axes shrunk, reset the perm and
1709
     * compress the data into the new layout.
1710
     */
1711 1
    if (new_ndim < ndim) {
1712
        npy_int8 *perm = NIT_PERM(iter);
1713

1714
        /* Reset to an identity perm */
1715 1
        for (idim = 0; idim < new_ndim; ++idim) {
1716 1
            perm[idim] = (npy_int8)idim;
1717
        }
1718 1
        NIT_NDIM(iter) = new_ndim;
1719
    }
1720
}
1721

1722
/*
1723
 * If errmsg is non-NULL, it should point to a variable which will
1724
 * receive the error message, and no Python exception will be set.
1725
 * This is so that the function can be called from code not holding
1726
 * the GIL.
1727
 */
1728
NPY_NO_EXPORT int
1729 1
npyiter_allocate_buffers(NpyIter *iter, char **errmsg)
1730
{
1731
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
1732
    /*int ndim = NIT_NDIM(iter);*/
1733 1
    int iop = 0, nop = NIT_NOP(iter);
1734

1735
    npy_intp i;
1736 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1737 1
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1738 1
    PyArray_Descr **op_dtype = NIT_DTYPES(iter);
1739 1
    npy_intp buffersize = NBF_BUFFERSIZE(bufferdata);
1740 1
    char *buffer, **buffers = NBF_BUFFERS(bufferdata);
1741

1742 1
    for (iop = 0; iop < nop; ++iop) {
1743 1
        npyiter_opitflags flags = op_itflags[iop];
1744

1745
        /*
1746
         * If we have determined that a buffer may be needed,
1747
         * allocate one.
1748
         */
1749 1
        if (!(flags&NPY_OP_ITFLAG_BUFNEVER)) {
1750 1
            npy_intp itemsize = op_dtype[iop]->elsize;
1751 1
            buffer = PyArray_malloc(itemsize*buffersize);
1752 1
            if (buffer == NULL) {
1753 0
                if (errmsg == NULL) {
1754 0
                    PyErr_NoMemory();
1755
                }
1756
                else {
1757 0
                    *errmsg = "out of memory";
1758
                }
1759
                goto fail;
1760
            }
1761 1
            buffers[iop] = buffer;
1762
        }
1763
    }
1764

1765
    return 1;
1766

1767
fail:
1768 0
    for (i = 0; i < iop; ++i) {
1769 0
        if (buffers[i] != NULL) {
1770 0
            PyArray_free(buffers[i]);
1771 0
            buffers[i] = NULL;
1772
        }
1773
    }
1774
    return 0;
1775
}
1776

1777
/*
1778
 * This sets the AXISDATA portion of the iterator to the specified
1779
 * iterindex, updating the pointers as well.  This function does
1780
 * no error checking.
1781
 */
1782
NPY_NO_EXPORT void
1783 1
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex)
1784
{
1785 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1786 1
    int idim, ndim = NIT_NDIM(iter);
1787 1
    int nop = NIT_NOP(iter);
1788

1789
    char **dataptr;
1790
    NpyIter_AxisData *axisdata;
1791
    npy_intp sizeof_axisdata;
1792
    npy_intp istrides, nstrides, i, shape;
1793

1794 1
    axisdata = NIT_AXISDATA(iter);
1795 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1796 1
    nstrides = NAD_NSTRIDES();
1797

1798 1
    NIT_ITERINDEX(iter) = iterindex;
1799

1800 1
    ndim = ndim ? ndim : 1;
1801

1802 1
    if (iterindex == 0) {
1803 1
        dataptr = NIT_RESETDATAPTR(iter);
1804

1805 1
        for (idim = 0; idim < ndim; ++idim) {
1806
            char **ptrs;
1807 1
            NAD_INDEX(axisdata) = 0;
1808 1
            ptrs = NAD_PTRS(axisdata);
1809 1
            for (istrides = 0; istrides < nstrides; ++istrides) {
1810 1
                ptrs[istrides] = dataptr[istrides];
1811
            }
1812

1813 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
1814
        }
1815
    }
1816
    else {
1817
        /*
1818
         * Set the multi-index, from the fastest-changing to the
1819
         * slowest-changing.
1820
         */
1821 1
        axisdata = NIT_AXISDATA(iter);
1822 1
        shape = NAD_SHAPE(axisdata);
1823 1
        i = iterindex;
1824 1
        iterindex /= shape;
1825 1
        NAD_INDEX(axisdata) = i - iterindex * shape;
1826 1
        for (idim = 0; idim < ndim-1; ++idim) {
1827 1
            NIT_ADVANCE_AXISDATA(axisdata, 1);
1828

1829 1
            shape = NAD_SHAPE(axisdata);
1830 1
            i = iterindex;
1831 1
            iterindex /= shape;
1832 1
            NAD_INDEX(axisdata) = i - iterindex * shape;
1833
        }
1834

1835 1
        dataptr = NIT_RESETDATAPTR(iter);
1836

1837
        /*
1838
         * Accumulate the successive pointers with their
1839
         * offsets in the opposite order, starting from the
1840
         * original data pointers.
1841
         */
1842 1
        for (idim = 0; idim < ndim; ++idim) {
1843
            npy_intp *strides;
1844
            char **ptrs;
1845

1846 1
            strides = NAD_STRIDES(axisdata);
1847 1
            ptrs = NAD_PTRS(axisdata);
1848

1849 1
            i = NAD_INDEX(axisdata);
1850

1851 1
            for (istrides = 0; istrides < nstrides; ++istrides) {
1852 1
                ptrs[istrides] = dataptr[istrides] + i*strides[istrides];
1853
            }
1854

1855 1
            dataptr = ptrs;
1856

1857 1
            NIT_ADVANCE_AXISDATA(axisdata, -1);
1858
        }
1859
    }
1860
}
1861

1862
/*
1863
 * This gets called after the buffers have been exhausted, and
1864
 * their data needs to be written back to the arrays.  The multi-index
1865
 * must be positioned for the beginning of the buffer.
1866
 */
1867
NPY_NO_EXPORT int
1868 1
npyiter_copy_from_buffers(NpyIter *iter)
1869
{
1870 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1871 1
    int ndim = NIT_NDIM(iter);
1872 1
    int iop, nop = NIT_NOP(iter);
1873 1
    int maskop = NIT_MASKOP(iter);
1874

1875 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
1876 1
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
1877 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
1878 1
                    *reduce_outeraxisdata = NULL;
1879

1880 1
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
1881 1
    npy_intp transfersize = NBF_SIZE(bufferdata);
1882 1
    npy_intp *strides = NBF_STRIDES(bufferdata),
1883 1
             *ad_strides = NAD_STRIDES(axisdata);
1884 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1885 1
    char **ad_ptrs = NAD_PTRS(axisdata);
1886 1
    char **buffers = NBF_BUFFERS(bufferdata);
1887
    char *buffer;
1888

1889 1
    npy_intp reduce_outerdim = 0;
1890 1
    npy_intp *reduce_outerstrides = NULL;
1891

1892 1
    PyArray_StridedUnaryOp *stransfer = NULL;
1893 1
    NpyAuxData *transferdata = NULL;
1894

1895 1
    npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
1896
                                NPY_SIZEOF_INTP;
1897

1898
    /* If we're past the end, nothing to copy */
1899 1
    if (NBF_SIZE(bufferdata) == 0) {
1900
        return 0;
1901
    }
1902

1903
    NPY_IT_DBG_PRINT("Iterator: Copying buffers to outputs\n");
1904

1905 1
    if (itflags&NPY_ITFLAG_REDUCE) {
1906 1
        reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
1907 1
        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
1908 1
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
1909 1
        transfersize *= NBF_REDUCE_OUTERSIZE(bufferdata);
1910
    }
1911

1912 1
    for (iop = 0; iop < nop; ++iop) {
1913 1
        stransfer = NBF_WRITETRANSFERFN(bufferdata)[iop];
1914 1
        transferdata = NBF_WRITETRANSFERDATA(bufferdata)[iop];
1915 1
        buffer = buffers[iop];
1916
        /*
1917
         * Copy the data back to the arrays.  If the type has refs,
1918
         * this function moves them so the buffer's refs are released.
1919
         *
1920
         * The flag USINGBUFFER is set when the buffer was used, so
1921
         * only copy back when this flag is on.
1922
         */
1923 1
        if ((stransfer != NULL) &&
1924 1
               (op_itflags[iop]&(NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER))
1925 1
                        == (NPY_OP_ITFLAG_WRITE|NPY_OP_ITFLAG_USINGBUFFER)) {
1926
            npy_intp op_transfersize;
1927

1928
            npy_intp src_stride, *dst_strides, *dst_coords, *dst_shape;
1929
            int ndim_transfer;
1930

1931
            NPY_IT_DBG_PRINT1("Iterator: Operand %d was buffered\n",
1932
                                        (int)iop);
1933

1934
            /*
1935
             * If this operand is being reduced in the inner loop,
1936
             * its buffering stride was set to zero, and just
1937
             * one element was copied.
1938
             */
1939 1
            if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
1940 1
                if (strides[iop] == 0) {
1941 1
                    if (reduce_outerstrides[iop] == 0) {
1942 1
                        op_transfersize = 1;
1943 1
                        src_stride = 0;
1944 1
                        dst_strides = &src_stride;
1945 1
                        dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1946 1
                        dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1947 1
                        ndim_transfer = 1;
1948
                    }
1949
                    else {
1950 1
                        op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
1951 1
                        src_stride = reduce_outerstrides[iop];
1952 1
                        dst_strides =
1953 1
                                &NAD_STRIDES(reduce_outeraxisdata)[iop];
1954 1
                        dst_coords = &NAD_INDEX(reduce_outeraxisdata);
1955 1
                        dst_shape = &NAD_SHAPE(reduce_outeraxisdata);
1956 1
                        ndim_transfer = ndim - reduce_outerdim;
1957
                    }
1958
                }
1959
                else {
1960 1
                    if (reduce_outerstrides[iop] == 0) {
1961 1
                        op_transfersize = NBF_SIZE(bufferdata);
1962 1
                        src_stride = strides[iop];
1963 1
                        dst_strides = &ad_strides[iop];
1964 1
                        dst_coords = &NAD_INDEX(axisdata);
1965 1
                        dst_shape = &NAD_SHAPE(axisdata);
1966 1
                        ndim_transfer = reduce_outerdim ?
1967
                                        reduce_outerdim : 1;
1968
                    }
1969
                    else {
1970 1
                        op_transfersize = transfersize;
1971 1
                        src_stride = strides[iop];
1972 1
                        dst_strides = &ad_strides[iop];
1973 1
                        dst_coords = &NAD_INDEX(axisdata);
1974 1
                        dst_shape = &NAD_SHAPE(axisdata);
1975 1
                        ndim_transfer = ndim;
1976
                    }
1977
                }
1978
            }
1979
            else {
1980 1
                op_transfersize = transfersize;
1981 1
                src_stride = strides[iop];
1982 1
                dst_strides = &ad_strides[iop];
1983 1
                dst_coords = &NAD_INDEX(axisdata);
1984 1
                dst_shape = &NAD_SHAPE(axisdata);
1985 1
                ndim_transfer = ndim;
1986
            }
1987

1988
            NPY_IT_DBG_PRINT2("Iterator: Copying buffer to "
1989
                                "operand %d (%d items)\n",
1990
                                (int)iop, (int)op_transfersize);
1991

1992
            /* WRITEMASKED operand */
1993 1
            if (op_itflags[iop] & NPY_OP_ITFLAG_WRITEMASKED) {
1994
                npy_bool *maskptr;
1995

1996
                /*
1997
                 * The mask pointer may be in the buffer or in
1998
                 * the array, detect which one.
1999
                 */
2000 1
                if ((op_itflags[maskop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2001 1
                    maskptr = (npy_bool *)buffers[maskop];
2002
                }
2003
                else {
2004 1
                    maskptr = (npy_bool *)ad_ptrs[maskop];
2005
                }
2006

2007 1
                if (PyArray_TransferMaskedStridedToNDim(ndim_transfer,
2008 1
                        ad_ptrs[iop], dst_strides, axisdata_incr,
2009
                        buffer, src_stride,
2010 1
                        maskptr, strides[maskop],
2011
                        dst_coords, axisdata_incr,
2012
                        dst_shape, axisdata_incr,
2013 1
                        op_transfersize, dtypes[iop]->elsize,
2014
                        (PyArray_MaskedStridedUnaryOp *)stransfer,
2015
                        transferdata) < 0) {
2016 1
                    return -1;
2017
                }
2018
            }
2019
            /* Regular operand */
2020
            else {
2021 1
                if (PyArray_TransferStridedToNDim(ndim_transfer,
2022 1
                        ad_ptrs[iop], dst_strides, axisdata_incr,
2023
                        buffer, src_stride,
2024
                        dst_coords, axisdata_incr,
2025
                        dst_shape, axisdata_incr,
2026 1
                        op_transfersize, dtypes[iop]->elsize,
2027
                        stransfer,
2028
                        transferdata) < 0) {
2029
                    return -1;
2030
                }
2031
            }
2032
        }
2033
        /* If there's no copy back, we may have to decrement refs.  In
2034
         * this case, the transfer function has a 'decsrcref' transfer
2035
         * function, so we can use it to do the decrement.
2036
         *
2037
         * The flag USINGBUFFER is set when the buffer was used, so
2038
         * only decrement refs when this flag is on.
2039
         */
2040 1
        else if (stransfer != NULL &&
2041 1
                       (op_itflags[iop]&NPY_OP_ITFLAG_USINGBUFFER) != 0) {
2042
            NPY_IT_DBG_PRINT1("Iterator: Freeing refs and zeroing buffer "
2043
                                "of operand %d\n", (int)iop);
2044
            /* Decrement refs */
2045 1
            if (stransfer(NULL, 0, buffer, dtypes[iop]->elsize,
2046 1
                          transfersize, dtypes[iop]->elsize,
2047
                          transferdata) < 0) {
2048
                /* Since this should only decrement, it should never error */
2049
                assert(0);
2050
                return -1;
2051
            }
2052
            /*
2053
             * Zero out the memory for safety.  For instance,
2054
             * if during iteration some Python code copied an
2055
             * array pointing into the buffer, it will get None
2056
             * values for its references after this.
2057
             */
2058 1
            memset(buffer, 0, dtypes[iop]->elsize*transfersize);
2059
        }
2060
    }
2061

2062
    NPY_IT_DBG_PRINT("Iterator: Finished copying buffers to outputs\n");
2063
    return 0;
2064
}
2065

2066
/*
2067
 * This gets called after the iterator has been positioned to a multi-index
2068
 * for the start of a buffer.  It decides which operands need a buffer,
2069
 * and copies the data into the buffers.
2070
 */
2071
NPY_NO_EXPORT int
2072 1
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs)
2073
{
2074 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2075 1
    int ndim = NIT_NDIM(iter);
2076 1
    int iop, nop = NIT_NOP(iter);
2077

2078 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2079 1
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2080 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter),
2081 1
                    *reduce_outeraxisdata = NULL;
2082

2083 1
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
2084 1
    PyArrayObject **operands = NIT_OPERANDS(iter);
2085 1
    npy_intp *strides = NBF_STRIDES(bufferdata),
2086 1
             *ad_strides = NAD_STRIDES(axisdata);
2087 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2088 1
    char **ptrs = NBF_PTRS(bufferdata), **ad_ptrs = NAD_PTRS(axisdata);
2089 1
    char **buffers = NBF_BUFFERS(bufferdata);
2090
    npy_intp iterindex, iterend, transfersize,
2091 1
            singlestridesize, reduce_innersize = 0, reduce_outerdim = 0;
2092 1
    int is_onestride = 0, any_buffered = 0;
2093

2094 1
    npy_intp *reduce_outerstrides = NULL;
2095 1
    char **reduce_outerptrs = NULL;
2096

2097 1
    PyArray_StridedUnaryOp *stransfer = NULL;
2098 1
    NpyAuxData *transferdata = NULL;
2099

2100
    /*
2101
     * Have to get this flag before npyiter_checkreducesize sets
2102
     * it for the next iteration.
2103
     */
2104 1
    npy_bool reuse_reduce_loops = (prev_dataptrs != NULL) &&
2105 1
                    ((itflags&NPY_ITFLAG_REUSE_REDUCE_LOOPS) != 0);
2106

2107 1
    npy_intp axisdata_incr = NIT_AXISDATA_SIZEOF(itflags, ndim, nop) /
2108
                                NPY_SIZEOF_INTP;
2109

2110
    NPY_IT_DBG_PRINT("Iterator: Copying inputs to buffers\n");
2111

2112
    /* Calculate the size if using any buffers */
2113 1
    iterindex = NIT_ITERINDEX(iter);
2114 1
    iterend = NIT_ITEREND(iter);
2115 1
    transfersize = NBF_BUFFERSIZE(bufferdata);
2116 1
    if (transfersize > iterend - iterindex) {
2117 1
        transfersize = iterend - iterindex;
2118
    }
2119

2120
    /* If last time around, the reduce loop structure was full, we reuse it */
2121 1
    if (reuse_reduce_loops) {
2122
        npy_intp full_transfersize, prev_reduce_outersize;
2123

2124 1
        prev_reduce_outersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2125 1
        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2126 1
        reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2127 1
        reduce_outerdim = NBF_REDUCE_OUTERDIM(bufferdata);
2128 1
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2129 1
        reduce_innersize = NBF_SIZE(bufferdata);
2130 1
        NBF_REDUCE_POS(bufferdata) = 0;
2131
        /*
2132
         * Try to do make the outersize as big as possible. This allows
2133
         * it to shrink when processing the last bit of the outer reduce loop,
2134
         * then grow again at the beginnning of the next outer reduce loop.
2135
         */
2136 1
        NBF_REDUCE_OUTERSIZE(bufferdata) = (NAD_SHAPE(reduce_outeraxisdata)-
2137 1
                                            NAD_INDEX(reduce_outeraxisdata));
2138 1
        full_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2139
        /* If the full transfer size doesn't fit in the buffer, truncate it */
2140 1
        if (full_transfersize > NBF_BUFFERSIZE(bufferdata)) {
2141 1
            NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2142 1
            transfersize = NBF_REDUCE_OUTERSIZE(bufferdata)*reduce_innersize;
2143
        }
2144
        else {
2145
            transfersize = full_transfersize;
2146
        }
2147 1
        if (prev_reduce_outersize < NBF_REDUCE_OUTERSIZE(bufferdata)) {
2148
            /*
2149
             * If the previous time around less data was copied it may not
2150
             * be safe to reuse the buffers even if the pointers match.
2151
             */
2152 1
            reuse_reduce_loops = 0;
2153
        }
2154 1
        NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2155

2156
        NPY_IT_DBG_PRINT3("Reused reduce transfersize: %d innersize: %d "
2157
                        "itersize: %d\n",
2158
                            (int)transfersize,
2159
                            (int)reduce_innersize,
2160
                            (int)NpyIter_GetIterSize(iter));
2161
        NPY_IT_DBG_PRINT1("Reduced reduce outersize: %d",
2162
                            (int)NBF_REDUCE_OUTERSIZE(bufferdata));
2163
    }
2164
    /*
2165
     * If there are any reduction operands, we may have to make
2166
     * the size smaller so we don't copy the same value into
2167
     * a buffer twice, as the buffering does not have a mechanism
2168
     * to combine values itself.
2169
     */
2170 1
    else if (itflags&NPY_ITFLAG_REDUCE) {
2171
        NPY_IT_DBG_PRINT("Iterator: Calculating reduce loops\n");
2172 1
        transfersize = npyiter_checkreducesize(iter, transfersize,
2173
                                                &reduce_innersize,
2174
                                                &reduce_outerdim);
2175
        NPY_IT_DBG_PRINT3("Reduce transfersize: %d innersize: %d "
2176
                        "itersize: %d\n",
2177
                            (int)transfersize,
2178
                            (int)reduce_innersize,
2179
                            (int)NpyIter_GetIterSize(iter));
2180

2181 1
        reduce_outerstrides = NBF_REDUCE_OUTERSTRIDES(bufferdata);
2182 1
        reduce_outerptrs = NBF_REDUCE_OUTERPTRS(bufferdata);
2183 1
        reduce_outeraxisdata = NIT_INDEX_AXISDATA(axisdata, reduce_outerdim);
2184 1
        NBF_SIZE(bufferdata) = reduce_innersize;
2185 1
        NBF_REDUCE_POS(bufferdata) = 0;
2186 1
        NBF_REDUCE_OUTERDIM(bufferdata) = reduce_outerdim;
2187 1
        NBF_BUFITEREND(bufferdata) = iterindex + reduce_innersize;
2188 1
        if (reduce_innersize == 0) {
2189 1
            NBF_REDUCE_OUTERSIZE(bufferdata) = 0;
2190 1
            return 0;
2191
        }
2192
        else {
2193 1
            NBF_REDUCE_OUTERSIZE(bufferdata) = transfersize/reduce_innersize;
2194
        }
2195
    }
2196
    else {
2197 1
        NBF_SIZE(bufferdata) = transfersize;
2198 1
        NBF_BUFITEREND(bufferdata) = iterindex + transfersize;
2199
    }
2200

2201
    /* Calculate the maximum size if using a single stride and no buffers */
2202 1
    singlestridesize = NAD_SHAPE(axisdata)-NAD_INDEX(axisdata);
2203 1
    if (singlestridesize > iterend - iterindex) {
2204 1
        singlestridesize = iterend - iterindex;
2205
    }
2206 1
    if (singlestridesize >= transfersize) {
2207 1
        is_onestride = 1;
2208
    }
2209

2210 1
    for (iop = 0; iop < nop; ++iop) {
2211
        /*
2212
         * If the buffer is write-only, these two are NULL, and the buffer
2213
         * pointers will be set up but the read copy won't be done
2214
         */
2215 1
        stransfer = NBF_READTRANSFERFN(bufferdata)[iop];
2216 1
        transferdata = NBF_READTRANSFERDATA(bufferdata)[iop];
2217 1
        switch (op_itflags[iop]&
2218
                        (NPY_OP_ITFLAG_BUFNEVER|
2219
                         NPY_OP_ITFLAG_CAST|
2220
                         NPY_OP_ITFLAG_REDUCE)) {
2221
            /* Never need to buffer this operand */
2222 1
            case NPY_OP_ITFLAG_BUFNEVER:
2223 1
                ptrs[iop] = ad_ptrs[iop];
2224 1
                if (itflags&NPY_ITFLAG_REDUCE) {
2225 1
                    reduce_outerstrides[iop] = reduce_innersize *
2226 1
                                                 strides[iop];
2227 1
                    reduce_outerptrs[iop] = ptrs[iop];
2228
                }
2229
                /*
2230
                 * Should not adjust the stride - ad_strides[iop]
2231
                 * could be zero, but strides[iop] was initialized
2232
                 * to the first non-trivial stride.
2233
                 */
2234
                stransfer = NULL;
2235
                /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2236
                break;
2237
            /* Never need to buffer this operand */
2238 1
            case NPY_OP_ITFLAG_BUFNEVER|NPY_OP_ITFLAG_REDUCE:
2239 1
                ptrs[iop] = ad_ptrs[iop];
2240 1
                reduce_outerptrs[iop] = ptrs[iop];
2241 1
                reduce_outerstrides[iop] = 0;
2242
                /*
2243
                 * Should not adjust the stride - ad_strides[iop]
2244
                 * could be zero, but strides[iop] was initialized
2245
                 * to the first non-trivial stride.
2246
                 */
2247 1
                stransfer = NULL;
2248
                /* The flag NPY_OP_ITFLAG_USINGBUFFER can be ignored here */
2249
                break;
2250
            /* Just a copy */
2251 1
            case 0:
2252
                /* Do not reuse buffer if it did not exist */
2253 1
                if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2254
                                                (prev_dataptrs != NULL)) {
2255 1
                    prev_dataptrs[iop] = NULL;
2256
                }
2257
                /*
2258
                 * No copyswap or cast was requested, so all we're
2259
                 * doing is copying the data to fill the buffer and
2260
                 * produce a single stride.  If the underlying data
2261
                 * already does that, no need to copy it.
2262
                 */
2263 1
                if (is_onestride) {
2264 1
                    ptrs[iop] = ad_ptrs[iop];
2265 1
                    strides[iop] = ad_strides[iop];
2266 1
                    stransfer = NULL;
2267
                    /* Signal that the buffer is not being used */
2268 1
                    op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2269
                }
2270
                /* If some other op is reduced, we have a double reduce loop */
2271 1
                else if ((itflags&NPY_ITFLAG_REDUCE) &&
2272 1
                                (reduce_outerdim == 1) &&
2273 1
                                (transfersize/reduce_innersize <=
2274 1
                                            NAD_SHAPE(reduce_outeraxisdata) -
2275 1
                                            NAD_INDEX(reduce_outeraxisdata))) {
2276 1
                    ptrs[iop] = ad_ptrs[iop];
2277 1
                    reduce_outerptrs[iop] = ptrs[iop];
2278 1
                    strides[iop] = ad_strides[iop];
2279 1
                    reduce_outerstrides[iop] =
2280 1
                                    NAD_STRIDES(reduce_outeraxisdata)[iop];
2281 1
                    stransfer = NULL;
2282
                    /* Signal that the buffer is not being used */
2283 1
                    op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2284
                }
2285
                else {
2286
                    /* In this case, the buffer is being used */
2287 1
                    ptrs[iop] = buffers[iop];
2288 1
                    strides[iop] = dtypes[iop]->elsize;
2289 1
                    if (itflags&NPY_ITFLAG_REDUCE) {
2290 1
                        reduce_outerstrides[iop] = reduce_innersize *
2291
                                                     strides[iop];
2292 1
                        reduce_outerptrs[iop] = ptrs[iop];
2293
                    }
2294
                    /* Signal that the buffer is being used */
2295 1
                    op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2296
                }
2297
                break;
2298
            /* Just a copy, but with a reduction */
2299 1
            case NPY_OP_ITFLAG_REDUCE:
2300
                /* Do not reuse buffer if it did not exist */
2301 1
                if (!(op_itflags[iop] & NPY_OP_ITFLAG_USINGBUFFER) &&
2302
                                                (prev_dataptrs != NULL)) {
2303 1
                    prev_dataptrs[iop] = NULL;
2304
                }
2305 1
                if (ad_strides[iop] == 0) {
2306 1
                    strides[iop] = 0;
2307
                    /* It's all in one stride in the inner loop dimension */
2308 1
                    if (is_onestride) {
2309
                        NPY_IT_DBG_PRINT1("reduce op %d all one stride\n", (int)iop);
2310 1
                        ptrs[iop] = ad_ptrs[iop];
2311 1
                        reduce_outerstrides[iop] = 0;
2312 1
                        stransfer = NULL;
2313
                        /* Signal that the buffer is not being used */
2314 1
                        op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2315
                    }
2316
                    /* It's all in one stride in the reduce outer loop */
2317 1
                    else if ((reduce_outerdim > 0) &&
2318 1
                                    (transfersize/reduce_innersize <=
2319 1
                                            NAD_SHAPE(reduce_outeraxisdata) -
2320 1
                                            NAD_INDEX(reduce_outeraxisdata))) {
2321
                        NPY_IT_DBG_PRINT1("reduce op %d all one outer stride\n",
2322
                                                            (int)iop);
2323 1
                        ptrs[iop] = ad_ptrs[iop];
2324
                        /* Outer reduce loop advances by one item */
2325 1
                        reduce_outerstrides[iop] =
2326 1
                                NAD_STRIDES(reduce_outeraxisdata)[iop];
2327 1
                        stransfer = NULL;
2328
                        /* Signal that the buffer is not being used */
2329 1
                        op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2330
                    }
2331
                    /* In this case, the buffer is being used */
2332
                    else {
2333
                        NPY_IT_DBG_PRINT1("reduce op %d must buffer\n", (int)iop);
2334 1
                        ptrs[iop] = buffers[iop];
2335
                        /* Both outer and inner reduce loops have stride 0 */
2336 1
                        if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2337 1
                            reduce_outerstrides[iop] = 0;
2338
                        }
2339
                        /* Outer reduce loop advances by one item */
2340
                        else {
2341 1
                            reduce_outerstrides[iop] = dtypes[iop]->elsize;
2342
                        }
2343
                        /* Signal that the buffer is being used */
2344 1
                        op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2345
                    }
2346

2347
                }
2348 1
                else if (is_onestride) {
2349
                    NPY_IT_DBG_PRINT1("reduce op %d all one stride in dim 0\n", (int)iop);
2350 1
                    ptrs[iop] = ad_ptrs[iop];
2351 1
                    strides[iop] = ad_strides[iop];
2352 1
                    reduce_outerstrides[iop] = 0;
2353 1
                    stransfer = NULL;
2354
                    /* Signal that the buffer is not being used */
2355 1
                    op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2356
                }
2357
                else {
2358
                    /* It's all in one stride in the reduce outer loop */
2359 1
                    if ((reduce_outerdim == 1) &&
2360 1
                                    (transfersize/reduce_innersize <=
2361 1
                                            NAD_SHAPE(reduce_outeraxisdata) -
2362 1
                                            NAD_INDEX(reduce_outeraxisdata))) {
2363 1
                        ptrs[iop] = ad_ptrs[iop];
2364 1
                        strides[iop] = ad_strides[iop];
2365
                        /* Outer reduce loop advances by one item */
2366 1
                        reduce_outerstrides[iop] =
2367 1
                                NAD_STRIDES(reduce_outeraxisdata)[iop];
2368 1
                        stransfer = NULL;
2369
                        /* Signal that the buffer is not being used */
2370 1
                        op_itflags[iop] &= (~NPY_OP_ITFLAG_USINGBUFFER);
2371
                    }
2372
                    /* In this case, the buffer is being used */
2373
                    else {
2374 1
                        ptrs[iop] = buffers[iop];
2375 1
                        strides[iop] = dtypes[iop]->elsize;
2376

2377 1
                        if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2378
                            /* Reduction in outer reduce loop */
2379 1
                            reduce_outerstrides[iop] = 0;
2380
                        }
2381
                        else {
2382
                            /* Advance to next items in outer reduce loop */
2383 1
                            reduce_outerstrides[iop] = reduce_innersize *
2384
                                                         dtypes[iop]->elsize;
2385
                        }
2386
                        /* Signal that the buffer is being used */
2387 1
                        op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2388
                    }
2389
                }
2390 1
                reduce_outerptrs[iop] = ptrs[iop];
2391 1
                break;
2392 1
            default:
2393
                /* In this case, the buffer is always being used */
2394 1
                any_buffered = 1;
2395

2396
                /* Signal that the buffer is being used */
2397 1
                op_itflags[iop] |= NPY_OP_ITFLAG_USINGBUFFER;
2398

2399 1
                if (!(op_itflags[iop]&NPY_OP_ITFLAG_REDUCE)) {
2400 1
                    ptrs[iop] = buffers[iop];
2401 1
                    strides[iop] = dtypes[iop]->elsize;
2402 1
                    if (itflags&NPY_ITFLAG_REDUCE) {
2403 1
                        reduce_outerstrides[iop] = reduce_innersize *
2404
                                                     strides[iop];
2405 1
                        reduce_outerptrs[iop] = ptrs[iop];
2406
                    }
2407
                }
2408
                /* The buffer is being used with reduction */
2409
                else {
2410 1
                    ptrs[iop] = buffers[iop];
2411 1
                    if (ad_strides[iop] == 0) {
2412
                        NPY_IT_DBG_PRINT1("cast op %d has innermost stride 0\n", (int)iop);
2413 1
                        strides[iop] = 0;
2414
                        /* Both outer and inner reduce loops have stride 0 */
2415 1
                        if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2416
                            NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2417 1
                            reduce_outerstrides[iop] = 0;
2418
                        }
2419
                        /* Outer reduce loop advances by one item */
2420
                        else {
2421
                            NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2422 1
                            reduce_outerstrides[iop] = dtypes[iop]->elsize;
2423
                        }
2424
                    }
2425
                    else {
2426
                        NPY_IT_DBG_PRINT1("cast op %d has innermost stride !=0\n", (int)iop);
2427 1
                        strides[iop] = dtypes[iop]->elsize;
2428

2429 1
                        if (NAD_STRIDES(reduce_outeraxisdata)[iop] == 0) {
2430
                            NPY_IT_DBG_PRINT1("cast op %d has outermost stride 0\n", (int)iop);
2431
                            /* Reduction in outer reduce loop */
2432 1
                            reduce_outerstrides[iop] = 0;
2433
                        }
2434
                        else {
2435
                            NPY_IT_DBG_PRINT1("cast op %d has outermost stride !=0\n", (int)iop);
2436
                            /* Advance to next items in outer reduce loop */
2437 1
                            reduce_outerstrides[iop] = reduce_innersize *
2438
                                                         dtypes[iop]->elsize;
2439
                        }
2440
                    }
2441 1
                    reduce_outerptrs[iop] = ptrs[iop];
2442
                }
2443
                break;
2444
        }
2445

2446 1
        if (stransfer != NULL) {
2447
            npy_intp src_itemsize;
2448
            npy_intp op_transfersize;
2449

2450
            npy_intp dst_stride, *src_strides, *src_coords, *src_shape;
2451
            int ndim_transfer;
2452

2453 1
            npy_bool skip_transfer = 0;
2454

2455 1
            src_itemsize = PyArray_DTYPE(operands[iop])->elsize;
2456

2457
            /* If stransfer wasn't set to NULL, buffering is required */
2458 1
            any_buffered = 1;
2459

2460
            /*
2461
             * If this operand is being reduced in the inner loop,
2462
             * set its buffering stride to zero, and just copy
2463
             * one element.
2464
             */
2465 1
            if (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) {
2466 1
                if (ad_strides[iop] == 0) {
2467 1
                    strides[iop] = 0;
2468 1
                    if (reduce_outerstrides[iop] == 0) {
2469 1
                        op_transfersize = 1;
2470 1
                        dst_stride = 0;
2471 1
                        src_strides = &dst_stride;
2472 1
                        src_coords = &NAD_INDEX(reduce_outeraxisdata);
2473 1
                        src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2474 1
                        ndim_transfer = 1;
2475

2476
                        /*
2477
                         * When we're reducing a single element, and
2478
                         * it's still the same element, don't overwrite
2479
                         * it even when reuse reduce loops is unset.
2480
                         * This preserves the precision of the
2481
                         * intermediate calculation.
2482
                         */
2483 1
                        if (prev_dataptrs &&
2484 1
                                    prev_dataptrs[iop] == ad_ptrs[iop]) {
2485
                            NPY_IT_DBG_PRINT1("Iterator: skipping operand %d"
2486
                                    " copy because it's a 1-element reduce\n",
2487
                                    (int)iop);
2488

2489 1
                            skip_transfer = 1;
2490
                        }
2491
                    }
2492
                    else {
2493 1
                        op_transfersize = NBF_REDUCE_OUTERSIZE(bufferdata);
2494 1
                        dst_stride = reduce_outerstrides[iop];
2495 1
                        src_strides = &NAD_STRIDES(reduce_outeraxisdata)[iop];
2496 1
                        src_coords = &NAD_INDEX(reduce_outeraxisdata);
2497 1
                        src_shape = &NAD_SHAPE(reduce_outeraxisdata);
2498 1
                        ndim_transfer = ndim - reduce_outerdim;
2499
                    }
2500
                }
2501
                else {
2502 1
                    if (reduce_outerstrides[iop] == 0) {
2503 1
                        op_transfersize = NBF_SIZE(bufferdata);
2504 1
                        dst_stride = strides[iop];
2505 1
                        src_strides = &ad_strides[iop];
2506 1
                        src_coords = &NAD_INDEX(axisdata);
2507 1
                        src_shape = &NAD_SHAPE(axisdata);
2508 1
                        ndim_transfer = reduce_outerdim ? reduce_outerdim : 1;
2509
                    }
2510
                    else {
2511 1
                        op_transfersize = transfersize;
2512 1
                        dst_stride = strides[iop];
2513 1
                        src_strides = &ad_strides[iop];
2514 1
                        src_coords = &NAD_INDEX(axisdata);
2515 1
                        src_shape = &NAD_SHAPE(axisdata);
2516 1
                        ndim_transfer = ndim;
2517
                    }
2518
                }
2519
            }
2520
            else {
2521 1
                op_transfersize = transfersize;
2522 1
                dst_stride = strides[iop];
2523 1
                src_strides = &ad_strides[iop];
2524 1
                src_coords = &NAD_INDEX(axisdata);
2525 1
                src_shape = &NAD_SHAPE(axisdata);
2526 1
                ndim_transfer = ndim;
2527
            }
2528

2529
            /*
2530
             * If the whole buffered loop structure remains the same,
2531
             * and the source pointer for this data didn't change,
2532
             * we don't have to copy the data again.
2533
             */
2534 1
            if (reuse_reduce_loops && prev_dataptrs[iop] == ad_ptrs[iop]) {
2535
                NPY_IT_DBG_PRINT2("Iterator: skipping operands %d "
2536
                        "copy (%d items) because loops are reused and the data "
2537
                        "pointer didn't change\n",
2538
                        (int)iop, (int)op_transfersize);
2539 1
                skip_transfer = 1;
2540
            }
2541

2542
            /* If the data type requires zero-inititialization */
2543 1
            if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
2544
                NPY_IT_DBG_PRINT("Iterator: Buffer requires init, "
2545
                                    "memsetting to 0\n");
2546 1
                memset(ptrs[iop], 0, dtypes[iop]->elsize*op_transfersize);
2547
                /* Can't skip the transfer in this case */
2548 1
                skip_transfer = 0;
2549
            }
2550

2551 1
            if (!skip_transfer) {
2552
                NPY_IT_DBG_PRINT2("Iterator: Copying operand %d to "
2553
                                "buffer (%d items)\n",
2554
                                (int)iop, (int)op_transfersize);
2555

2556 1
                if (PyArray_TransferNDimToStrided(
2557 1
                                ndim_transfer, ptrs[iop], dst_stride,
2558 1
                                ad_ptrs[iop], src_strides, axisdata_incr,
2559
                                src_coords, axisdata_incr,
2560
                                src_shape, axisdata_incr,
2561
                                op_transfersize, src_itemsize,
2562
                                stransfer, transferdata) < 0) {
2563 1
                    return -1;
2564
                }
2565
            }
2566
        }
2567 1
        else if (ptrs[iop] == buffers[iop]) {
2568
            /* If the data type requires zero-inititialization */
2569 1
            if (PyDataType_FLAGCHK(dtypes[iop], NPY_NEEDS_INIT)) {
2570
                NPY_IT_DBG_PRINT1("Iterator: Write-only buffer for "
2571
                                    "operand %d requires init, "
2572
                                    "memsetting to 0\n", (int)iop);
2573 1
                memset(ptrs[iop], 0, dtypes[iop]->elsize*transfersize);
2574
            }
2575
        }
2576

2577
    }
2578

2579
    /*
2580
     * If buffering wasn't needed, we can grow the inner
2581
     * loop to as large as possible.
2582
     *
2583
     * TODO: Could grow REDUCE loop too with some more logic above.
2584
     */
2585 1
    if (!any_buffered && (itflags&NPY_ITFLAG_GROWINNER) &&
2586
                        !(itflags&NPY_ITFLAG_REDUCE)) {
2587 1
        if (singlestridesize > transfersize) {
2588
            NPY_IT_DBG_PRINT2("Iterator: Expanding inner loop size "
2589
                    "from %d to %d since buffering wasn't needed\n",
2590
                    (int)NBF_SIZE(bufferdata), (int)singlestridesize);
2591 1
            NBF_SIZE(bufferdata) = singlestridesize;
2592 1
            NBF_BUFITEREND(bufferdata) = iterindex + singlestridesize;
2593
        }
2594
    }
2595

2596
    NPY_IT_DBG_PRINT1("Any buffering needed: %d\n", any_buffered);
2597

2598
    NPY_IT_DBG_PRINT1("Iterator: Finished copying inputs to buffers "
2599
                        "(buffered size is %d)\n", (int)NBF_SIZE(bufferdata));
2600
    return 0;
2601
}
2602

2603

2604
/**
2605
 * This function clears any references still held by the buffers and should
2606
 * only be used to discard buffers if an error occurred.
2607
 *
2608
 * @param iter Iterator
2609
 */
2610
NPY_NO_EXPORT void
2611 1
npyiter_clear_buffers(NpyIter *iter)
2612
{
2613 1
    int nop = iter->nop;
2614 1
    NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
2615

2616 1
    if (NBF_SIZE(bufferdata) == 0) {
2617
        /* if the buffers are empty already, there is nothing to do */
2618 1
        return;
2619
    }
2620

2621 1
    if (!(NIT_ITFLAGS(iter) & NPY_ITFLAG_NEEDSAPI)) {
2622
        /* Buffers do not require clearing, but should not be copied back */
2623 1
        NBF_SIZE(bufferdata) = 0;
2624 1
        return;
2625
    }
2626

2627
    /*
2628
     * The iterator may be using a dtype with references, which always
2629
     * requires the API. In that case, further cleanup may be necessary.
2630
     *
2631
     * TODO: At this time, we assume that a dtype having references
2632
     *       implies the need to hold the GIL at all times. In theory
2633
     *       we could broaden this definition for a new
2634
     *       `PyArray_Item_XDECREF` API and the assumption may become
2635
     *       incorrect.
2636
     */
2637
    PyObject *type, *value, *traceback;
2638 1
    PyErr_Fetch(&type,  &value, &traceback);
2639

2640
    /* Cleanup any buffers with references */
2641 1
    char **buffers = NBF_BUFFERS(bufferdata);
2642 1
    PyArray_Descr **dtypes = NIT_DTYPES(iter);
2643 1
    for (int iop = 0; iop < nop; ++iop, ++buffers) {
2644
        /*
2645
         * We may want to find a better way to do this, on the other hand,
2646
         * this cleanup seems rare and fairly special.  A dtype using
2647
         * references (right now only us) must always keep the buffer in
2648
         * a well defined state (either NULL or owning the reference).
2649
         * Only we implement cleanup
2650
         */
2651 1
        if (!PyDataType_REFCHK(dtypes[iop])) {
2652 1
            continue;
2653
        }
2654 1
        if (*buffers == 0) {
2655 1
            continue;
2656
        }
2657 1
        int itemsize = dtypes[iop]->elsize;
2658 1
        for (npy_intp i = 0; i < NBF_SIZE(bufferdata); i++) {
2659
            /*
2660
             * See above comment, if this API is expanded the GIL assumption
2661
             * could become incorrect.
2662
             */
2663 1
            PyArray_Item_XDECREF(*buffers + (itemsize * i), dtypes[iop]);
2664
        }
2665
        /* Clear out the buffer just to be sure */
2666 1
        memset(*buffers, 0, NBF_SIZE(bufferdata) * itemsize);
2667
    }
2668
    /* Signal that the buffers are empty */
2669 1
    NBF_SIZE(bufferdata) = 0;
2670 1
    PyErr_Restore(type, value, traceback);
2671
}
2672

2673

2674
/*
2675
 * This checks how much space can be buffered without encountering the
2676
 * same value twice, or for operands whose innermost stride is zero,
2677
 * without encountering a different value.  By reducing the buffered
2678
 * amount to this size, reductions can be safely buffered.
2679
 *
2680
 * Reductions are buffered with two levels of looping, to avoid
2681
 * frequent copying to the buffers.  The return value is the over-all
2682
 * buffer size, and when the flag NPY_ITFLAG_REDUCE is set, reduce_innersize
2683
 * receives the size of the inner of the two levels of looping.
2684
 *
2685
 * The value placed in reduce_outerdim is the index into the AXISDATA
2686
 * for where the second level of the double loop begins.
2687
 *
2688
 * The return value is always a multiple of the value placed in
2689
 * reduce_innersize.
2690
 */
2691
static npy_intp
2692 1
npyiter_checkreducesize(NpyIter *iter, npy_intp count,
2693
                                npy_intp *reduce_innersize,
2694
                                npy_intp *reduce_outerdim)
2695
{
2696 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2697 1
    int idim, ndim = NIT_NDIM(iter);
2698 1
    int iop, nop = NIT_NOP(iter);
2699

2700
    NpyIter_AxisData *axisdata;
2701
    npy_intp sizeof_axisdata;
2702
    npy_intp coord, shape, *strides;
2703 1
    npy_intp reducespace = 1, factor;
2704
    npy_bool nonzerocoord;
2705

2706 1
    npyiter_opitflags *op_itflags = NIT_OPITFLAGS(iter);
2707
    char stride0op[NPY_MAXARGS];
2708

2709
    /* Default to no outer axis */
2710 1
    *reduce_outerdim = 0;
2711

2712
    /* If there's only one dimension, no need to calculate anything */
2713 1
    if (ndim == 1 || count == 0) {
2714 1
        *reduce_innersize = count;
2715 1
        return count;
2716
    }
2717

2718 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2719 1
    axisdata = NIT_AXISDATA(iter);
2720

2721
    /* Indicate which REDUCE operands have stride 0 in the inner loop */
2722 1
    strides = NAD_STRIDES(axisdata);
2723 1
    for (iop = 0; iop < nop; ++iop) {
2724 1
        stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2725 1
                           (strides[iop] == 0);
2726
        NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2727
                        "the inner loop? %d\n", iop, (int)stride0op[iop]);
2728
    }
2729 1
    shape = NAD_SHAPE(axisdata);
2730 1
    coord = NAD_INDEX(axisdata);
2731 1
    reducespace += (shape-coord-1);
2732 1
    factor = shape;
2733 1
    NIT_ADVANCE_AXISDATA(axisdata, 1);
2734

2735
    /* Initialize nonzerocoord based on the first coordinate */
2736 1
    nonzerocoord = (coord != 0);
2737

2738
    /* Go forward through axisdata, calculating the space available */
2739 1
    for (idim = 1; idim < ndim && reducespace < count;
2740 1
                                ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2741
        NPY_IT_DBG_PRINT2("Iterator: inner loop reducespace %d, count %d\n",
2742
                                (int)reducespace, (int)count);
2743

2744 1
        strides = NAD_STRIDES(axisdata);
2745 1
        for (iop = 0; iop < nop; ++iop) {
2746
            /*
2747
             * If a reduce stride switched from zero to non-zero, or
2748
             * vice versa, that's the point where the data will stop
2749
             * being the same element or will repeat, and if the
2750
             * buffer starts with an all zero multi-index up to this
2751
             * point, gives us the reduce_innersize.
2752
             */
2753 1
            if((stride0op[iop] && (strides[iop] != 0)) ||
2754 1
                        (!stride0op[iop] &&
2755 1
                         (strides[iop] == 0) &&
2756 1
                         (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2757
                NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2758
                                    "buffer to %d\n", (int)reducespace);
2759
                /*
2760
                 * If we already found more elements than count, or
2761
                 * the starting coordinate wasn't zero, the two-level
2762
                 * looping is unnecessary/can't be done, so return.
2763
                 */
2764 1
                if (count <= reducespace) {
2765 0
                    *reduce_innersize = count;
2766 0
                    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2767 0
                    return count;
2768
                }
2769 1
                else if (nonzerocoord) {
2770
                    if (reducespace < count) {
2771 1
                        count = reducespace;
2772
                    }
2773 1
                    *reduce_innersize = count;
2774
                    /* NOTE: This is similar to the (coord != 0) case below. */
2775 1
                    NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2776 1
                    return count;
2777
                }
2778
                else {
2779 1
                    *reduce_innersize = reducespace;
2780 1
                    break;
2781
                }
2782
            }
2783
        }
2784
        /* If we broke out of the loop early, we found reduce_innersize */
2785 1
        if (iop != nop) {
2786
            NPY_IT_DBG_PRINT2("Iterator: Found first dim not "
2787
                            "reduce (%d of %d)\n", iop, nop);
2788
            break;
2789
        }
2790

2791 1
        shape = NAD_SHAPE(axisdata);
2792 1
        coord = NAD_INDEX(axisdata);
2793 1
        if (coord != 0) {
2794 1
            nonzerocoord = 1;
2795
        }
2796 1
        reducespace += (shape-coord-1) * factor;
2797 1
        factor *= shape;
2798
    }
2799

2800
    /*
2801
     * If there was any non-zero coordinate, the reduction inner
2802
     * loop doesn't fit in the buffersize, or the reduction inner loop
2803
     * covered the entire iteration size, can't do the double loop.
2804
     */
2805 1
    if (nonzerocoord || count < reducespace || idim == ndim) {
2806 1
        if (reducespace < count) {
2807 0
            count = reducespace;
2808
        }
2809 1
        *reduce_innersize = count;
2810
        /* In this case, we can't reuse the reduce loops */
2811 1
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2812 1
        return count;
2813
    }
2814

2815 1
    coord = NAD_INDEX(axisdata);
2816 1
    if (coord != 0) {
2817
        /*
2818
         * In this case, it is only safe to reuse the buffer if the amount
2819
         * of data copied is not more then the current axes, as is the
2820
         * case when reuse_reduce_loops was active already.
2821
         * It should be in principle OK when the idim loop returns immidiatly.
2822
         */
2823 1
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2824
    }
2825
    else {
2826
        /* In this case, we can reuse the reduce loops */
2827 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REUSE_REDUCE_LOOPS;
2828
    }
2829

2830 1
    *reduce_innersize = reducespace;
2831 1
    count /= reducespace;
2832

2833
    NPY_IT_DBG_PRINT2("Iterator: reduce_innersize %d count /ed %d\n",
2834
                    (int)reducespace, (int)count);
2835

2836
    /*
2837
     * Continue through the rest of the dimensions.  If there are
2838
     * two separated reduction axes, we may have to cut the buffer
2839
     * short again.
2840
     */
2841 1
    *reduce_outerdim = idim;
2842 1
    reducespace = 1;
2843 1
    factor = 1;
2844
    /* Indicate which REDUCE operands have stride 0 at the current level */
2845 1
    strides = NAD_STRIDES(axisdata);
2846 1
    for (iop = 0; iop < nop; ++iop) {
2847 1
        stride0op[iop] = (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE) &&
2848 1
                           (strides[iop] == 0);
2849
        NPY_IT_DBG_PRINT2("Iterator: Operand %d has stride 0 in "
2850
                        "the outer loop? %d\n", iop, (int)stride0op[iop]);
2851
    }
2852 1
    shape = NAD_SHAPE(axisdata);
2853 1
    reducespace += (shape-coord-1) * factor;
2854 1
    factor *= shape;
2855 1
    NIT_ADVANCE_AXISDATA(axisdata, 1);
2856 1
    ++idim;
2857

2858 1
    for (; idim < ndim && reducespace < count;
2859 1
                                ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2860
        NPY_IT_DBG_PRINT2("Iterator: outer loop reducespace %d, count %d\n",
2861
                                (int)reducespace, (int)count);
2862 1
        strides = NAD_STRIDES(axisdata);
2863 1
        for (iop = 0; iop < nop; ++iop) {
2864
            /*
2865
             * If a reduce stride switched from zero to non-zero, or
2866
             * vice versa, that's the point where the data will stop
2867
             * being the same element or will repeat, and if the
2868
             * buffer starts with an all zero multi-index up to this
2869
             * point, gives us the reduce_innersize.
2870
             */
2871 1
            if((stride0op[iop] && (strides[iop] != 0)) ||
2872 1
                        (!stride0op[iop] &&
2873 1
                         (strides[iop] == 0) &&
2874 1
                         (op_itflags[iop]&NPY_OP_ITFLAG_REDUCE))) {
2875
                NPY_IT_DBG_PRINT1("Iterator: Reduce operation limits "
2876
                                    "buffer to %d\n", (int)reducespace);
2877
                /*
2878
                 * This terminates the outer level of our double loop.
2879
                 */
2880 1
                if (count <= reducespace) {
2881 0
                    return count * (*reduce_innersize);
2882
                }
2883
                else {
2884 1
                    return reducespace * (*reduce_innersize);
2885
                }
2886
            }
2887
        }
2888

2889 1
        shape = NAD_SHAPE(axisdata);
2890 1
        coord = NAD_INDEX(axisdata);
2891
        if (coord != 0) {
2892
            nonzerocoord = 1;
2893
        }
2894 1
        reducespace += (shape-coord-1) * factor;
2895 1
        factor *= shape;
2896
    }
2897

2898 1
    if (reducespace < count) {
2899 0
        count = reducespace;
2900
    }
2901 1
    return count * (*reduce_innersize);
2902
}
2903

2904
NPY_NO_EXPORT npy_bool
2905 1
npyiter_has_writeback(NpyIter *iter)
2906
{
2907
    int iop, nop;
2908
    npyiter_opitflags *op_itflags;
2909 1
    if (iter == NULL) {
2910
        return 0;
2911
    }
2912 1
    nop = NIT_NOP(iter);
2913 1
    op_itflags = NIT_OPITFLAGS(iter);
2914

2915 1
    for (iop=0; iop<nop; iop++) {
2916 1
        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
2917
            return NPY_TRUE;
2918
        }
2919
    }
2920
    return NPY_FALSE;
2921
}
2922
#undef NPY_ITERATOR_IMPLEMENTATION_CODE

Read our documentation on viewing source code .

Loading