1
/*
2
 * This file implements the construction, copying, and destruction
3
 * aspects of NumPy's nditer.
4
 *
5
 * Copyright (c) 2010-2011 by Mark Wiebe (mwwiebe@gmail.com)
6
 * The University of British Columbia
7
 *
8
 * Copyright (c) 2011 Enthought, Inc
9
 *
10
 * See LICENSE.txt for the license.
11
 */
12
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
13

14
/* Indicate that this .c file is allowed to include the header */
15
#define NPY_ITERATOR_IMPLEMENTATION_CODE
16
#include "nditer_impl.h"
17

18
#include "arrayobject.h"
19
#include "array_coercion.h"
20
#include "templ_common.h"
21
#include "array_assign.h"
22

23
/* Internal helper functions private to this file */
24
static int
25
npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags);
26
static int
27
npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
28
                        const npy_intp *itershape);
29
static int
30
npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
31
                       int oa_ndim);
32
static int
33
npyiter_check_per_op_flags(npy_uint32 flags, npyiter_opitflags *op_itflags);
34
static int
35
npyiter_prepare_one_operand(PyArrayObject **op,
36
                        char **op_dataptr,
37
                        PyArray_Descr *op_request_dtype,
38
                        PyArray_Descr** op_dtype,
39
                        npy_uint32 flags,
40
                        npy_uint32 op_flags, npyiter_opitflags *op_itflags);
41
static int
42
npyiter_prepare_operands(int nop,
43
                    PyArrayObject **op_in,
44
                    PyArrayObject **op,
45
                    char **op_dataptr,
46
                    PyArray_Descr **op_request_dtypes,
47
                    PyArray_Descr **op_dtype,
48
                    npy_uint32 flags,
49
                    npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
50
                    npy_int8 *out_maskop);
51
static int
52
npyiter_check_casting(int nop, PyArrayObject **op,
53
                    PyArray_Descr **op_dtype,
54
                    NPY_CASTING casting,
55
                    npyiter_opitflags *op_itflags);
56
static int
57
npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
58
                    char **op_dataptr,
59
                    const npy_uint32 *op_flags, int **op_axes,
60
                    npy_intp const *itershape);
61
static NPY_INLINE int
62
npyiter_get_op_axis(int axis, npy_bool *reduction_axis);
63
static void
64
npyiter_replace_axisdata(
65
        NpyIter *iter, int iop, PyArrayObject *op,
66
        int orig_op_ndim, const int *op_axes);
67
static void
68
npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags);
69
static void
70
npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order);
71
static void
72
npyiter_flip_negative_strides(NpyIter *iter);
73
static void
74
npyiter_reverse_axis_ordering(NpyIter *iter);
75
static void
76
npyiter_find_best_axis_ordering(NpyIter *iter);
77
static PyArray_Descr *
78
npyiter_get_common_dtype(int nop, PyArrayObject **op,
79
                        const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
80
                        PyArray_Descr **op_request_dtypes,
81
                        int only_inputs);
82
static PyArrayObject *
83
npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
84
                npy_uint32 flags, npyiter_opitflags *op_itflags,
85
                int op_ndim, npy_intp const *shape,
86
                PyArray_Descr *op_dtype, const int *op_axes);
87
static int
88
npyiter_allocate_arrays(NpyIter *iter,
89
                        npy_uint32 flags,
90
                        PyArray_Descr **op_dtype, PyTypeObject *subtype,
91
                        const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
92
                        int **op_axes);
93
static void
94
npyiter_get_priority_subtype(int nop, PyArrayObject **op,
95
                            const npyiter_opitflags *op_itflags,
96
                            double *subtype_priority, PyTypeObject **subtype);
97
static int
98
npyiter_allocate_transfer_functions(NpyIter *iter);
99

100

101
/*NUMPY_API
102
 * Allocate a new iterator for multiple array objects, and advanced
103
 * options for controlling the broadcasting, shape, and buffer size.
104
 */
105
NPY_NO_EXPORT NpyIter *
106 1
NpyIter_AdvancedNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
107
                 NPY_ORDER order, NPY_CASTING casting,
108
                 npy_uint32 *op_flags,
109
                 PyArray_Descr **op_request_dtypes,
110
                 int oa_ndim, int **op_axes, npy_intp *itershape,
111
                 npy_intp buffersize)
112
{
113 1
    npy_uint32 itflags = NPY_ITFLAG_IDENTPERM;
114
    int idim, ndim;
115
    int iop;
116

117
    /* The iterator being constructed */
118
    NpyIter *iter;
119

120
    /* Per-operand values */
121
    PyArrayObject **op;
122
    PyArray_Descr **op_dtype;
123
    npyiter_opitflags *op_itflags;
124
    char **op_dataptr;
125

126
    npy_int8 *perm;
127 1
    NpyIter_BufferData *bufferdata = NULL;
128 1
    int any_allocate = 0, any_missing_dtypes = 0, need_subtype = 0;
129

130
    /* The subtype for automatically allocated outputs */
131 1
    double subtype_priority = NPY_PRIORITY;
132 1
    PyTypeObject *subtype = &PyArray_Type;
133

134
#if NPY_IT_CONSTRUCTION_TIMING
135
    npy_intp c_temp,
136
            c_start,
137
            c_check_op_axes,
138
            c_check_global_flags,
139
            c_calculate_ndim,
140
            c_malloc,
141
            c_prepare_operands,
142
            c_fill_axisdata,
143
            c_compute_index_strides,
144
            c_apply_forced_iteration_order,
145
            c_find_best_axis_ordering,
146
            c_get_priority_subtype,
147
            c_find_output_common_dtype,
148
            c_check_casting,
149
            c_allocate_arrays,
150
            c_coalesce_axes,
151
            c_prepare_buffers;
152
#endif
153

154
    NPY_IT_TIME_POINT(c_start);
155

156 1
    if (nop > NPY_MAXARGS) {
157 0
        PyErr_Format(PyExc_ValueError,
158
            "Cannot construct an iterator with more than %d operands "
159
            "(%d were requested)", NPY_MAXARGS, nop);
160 0
        return NULL;
161
    }
162

163
    /*
164
     * Before 1.8, if `oa_ndim == 0`, this meant `op_axes != NULL` was an error.
165
     * With 1.8, `oa_ndim == -1` takes this role, while op_axes in that case
166
     * enforces a 0-d iterator. Using `oa_ndim == 0` with `op_axes == NULL`
167
     * is thus an error in 1.13 after deprecation.
168
     */
169 1
    if ((oa_ndim == 0) && (op_axes == NULL)) {
170 0
        PyErr_Format(PyExc_ValueError,
171
            "Using `oa_ndim == 0` when `op_axes` is NULL. "
172
            "Use `oa_ndim == -1` or the MultiNew "
173
            "iterator for NumPy <1.8 compatibility");
174 0
        return NULL;
175
    }
176

177
    /* Error check 'oa_ndim' and 'op_axes', which must be used together */
178 1
    if (!npyiter_check_op_axes(nop, oa_ndim, op_axes, itershape)) {
179
        return NULL;
180
    }
181

182
    NPY_IT_TIME_POINT(c_check_op_axes);
183

184
    /* Check the global iterator flags */
185 1
    if (!npyiter_check_global_flags(flags, &itflags)) {
186
        return NULL;
187
    }
188

189
    NPY_IT_TIME_POINT(c_check_global_flags);
190

191
    /* Calculate how many dimensions the iterator should have */
192 1
    ndim = npyiter_calculate_ndim(nop, op_in, oa_ndim);
193

194
    NPY_IT_TIME_POINT(c_calculate_ndim);
195

196
    /* Allocate memory for the iterator */
197 1
    iter = (NpyIter*)
198 1
                PyObject_Malloc(NIT_SIZEOF_ITERATOR(itflags, ndim, nop));
199

200
    NPY_IT_TIME_POINT(c_malloc);
201

202
    /* Fill in the basic data */
203 1
    NIT_ITFLAGS(iter) = itflags;
204 1
    NIT_NDIM(iter) = ndim;
205 1
    NIT_NOP(iter) = nop;
206 1
    NIT_MASKOP(iter) = -1;
207 1
    NIT_ITERINDEX(iter) = 0;
208 1
    memset(NIT_BASEOFFSETS(iter), 0, (nop+1)*NPY_SIZEOF_INTP);
209

210 1
    op = NIT_OPERANDS(iter);
211 1
    op_dtype = NIT_DTYPES(iter);
212 1
    op_itflags = NIT_OPITFLAGS(iter);
213 1
    op_dataptr = NIT_RESETDATAPTR(iter);
214

215
    /* Prepare all the operands */
216 1
    if (!npyiter_prepare_operands(nop, op_in, op, op_dataptr,
217
                        op_request_dtypes, op_dtype,
218
                        flags,
219
                        op_flags, op_itflags,
220
                        &NIT_MASKOP(iter))) {
221 1
        PyObject_Free(iter);
222 1
        return NULL;
223
    }
224
    /* Set resetindex to zero as well (it's just after the resetdataptr) */
225 1
    op_dataptr[nop] = 0;
226

227
    NPY_IT_TIME_POINT(c_prepare_operands);
228

229
    /*
230
     * Initialize buffer data (must set the buffers and transferdata
231
     * to NULL before we might deallocate the iterator).
232
     */
233 1
    if (itflags & NPY_ITFLAG_BUFFER) {
234 1
        bufferdata = NIT_BUFFERDATA(iter);
235 1
        NBF_SIZE(bufferdata) = 0;
236 1
        memset(NBF_BUFFERS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
237 1
        memset(NBF_PTRS(bufferdata), 0, nop*NPY_SIZEOF_INTP);
238 1
        memset(NBF_READTRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
239 1
        memset(NBF_WRITETRANSFERDATA(bufferdata), 0, nop*NPY_SIZEOF_INTP);
240
    }
241

242
    /* Fill in the AXISDATA arrays and set the ITERSIZE field */
243 1
    if (!npyiter_fill_axisdata(iter, flags, op_itflags, op_dataptr,
244
                                        op_flags, op_axes, itershape)) {
245 1
        NpyIter_Deallocate(iter);
246 1
        return NULL;
247
    }
248

249
    NPY_IT_TIME_POINT(c_fill_axisdata);
250

251 1
    if (itflags & NPY_ITFLAG_BUFFER) {
252
        /*
253
         * If buffering is enabled and no buffersize was given, use a default
254
         * chosen to be big enough to get some amortization benefits, but
255
         * small enough to be cache-friendly.
256
         */
257 1
        if (buffersize <= 0) {
258 1
            buffersize = NPY_BUFSIZE;
259
        }
260
        /* No point in a buffer bigger than the iteration size */
261 1
        if (buffersize > NIT_ITERSIZE(iter)) {
262 1
            buffersize = NIT_ITERSIZE(iter);
263
        }
264 1
        NBF_BUFFERSIZE(bufferdata) = buffersize;
265

266
        /*
267
         * Initialize for use in FirstVisit, which may be called before
268
         * the buffers are filled and the reduce pos is updated.
269
         */
270 1
        NBF_REDUCE_POS(bufferdata) = 0;
271
    }
272

273
    /*
274
     * If an index was requested, compute the strides for it.
275
     * Note that we must do this before changing the order of the
276
     * axes
277
     */
278 1
    npyiter_compute_index_strides(iter, flags);
279

280
    NPY_IT_TIME_POINT(c_compute_index_strides);
281

282
    /* Initialize the perm to the identity */
283 1
    perm = NIT_PERM(iter);
284 1
    for(idim = 0; idim < ndim; ++idim) {
285 1
        perm[idim] = (npy_int8)idim;
286
    }
287

288
    /*
289
     * If an iteration order is being forced, apply it.
290
     */
291 1
    npyiter_apply_forced_iteration_order(iter, order);
292 1
    itflags = NIT_ITFLAGS(iter);
293

294
    NPY_IT_TIME_POINT(c_apply_forced_iteration_order);
295

296
    /* Set some flags for allocated outputs */
297 1
    for (iop = 0; iop < nop; ++iop) {
298 1
        if (op[iop] == NULL) {
299
            /* Flag this so later we can avoid flipping axes */
300 1
            any_allocate = 1;
301
            /* If a subtype may be used, indicate so */
302 1
            if (!(op_flags[iop] & NPY_ITER_NO_SUBTYPE)) {
303 1
                need_subtype = 1;
304
            }
305
            /*
306
             * If the data type wasn't provided, will need to
307
             * calculate it.
308
             */
309 1
            if (op_dtype[iop] == NULL) {
310 1
                any_missing_dtypes = 1;
311
            }
312
        }
313
    }
314

315
    /*
316
     * If the ordering was not forced, reorder the axes
317
     * and flip negative strides to find the best one.
318
     */
319 1
    if (!(itflags & NPY_ITFLAG_FORCEDORDER)) {
320 1
        if (ndim > 1) {
321 1
            npyiter_find_best_axis_ordering(iter);
322
        }
323
        /*
324
         * If there's an output being allocated, we must not negate
325
         * any strides.
326
         */
327 1
        if (!any_allocate && !(flags & NPY_ITER_DONT_NEGATE_STRIDES)) {
328 1
            npyiter_flip_negative_strides(iter);
329
        }
330 1
        itflags = NIT_ITFLAGS(iter);
331
    }
332

333
    NPY_IT_TIME_POINT(c_find_best_axis_ordering);
334

335 1
    if (need_subtype) {
336 1
        npyiter_get_priority_subtype(nop, op, op_itflags,
337
                                     &subtype_priority, &subtype);
338
    }
339

340
    NPY_IT_TIME_POINT(c_get_priority_subtype);
341

342
    /*
343
     * If an automatically allocated output didn't have a specified
344
     * dtype, we need to figure it out now, before allocating the outputs.
345
     */
346 1
    if (any_missing_dtypes || (flags & NPY_ITER_COMMON_DTYPE)) {
347
        PyArray_Descr *dtype;
348 1
        int only_inputs = !(flags & NPY_ITER_COMMON_DTYPE);
349

350 1
        op = NIT_OPERANDS(iter);
351 1
        op_dtype = NIT_DTYPES(iter);
352

353 1
        dtype = npyiter_get_common_dtype(nop, op,
354
                                    op_itflags, op_dtype,
355
                                    op_request_dtypes,
356
                                    only_inputs);
357 1
        if (dtype == NULL) {
358 1
            NpyIter_Deallocate(iter);
359 1
            return NULL;
360
        }
361 1
        if (flags & NPY_ITER_COMMON_DTYPE) {
362
            NPY_IT_DBG_PRINT("Iterator: Replacing all data types\n");
363
            /* Replace all the data types */
364 1
            for (iop = 0; iop < nop; ++iop) {
365 1
                if (op_dtype[iop] != dtype) {
366 1
                    Py_XDECREF(op_dtype[iop]);
367 1
                    Py_INCREF(dtype);
368 1
                    op_dtype[iop] = dtype;
369
                }
370
            }
371
        }
372
        else {
373
            NPY_IT_DBG_PRINT("Iterator: Setting unset output data types\n");
374
            /* Replace the NULL data types */
375 1
            for (iop = 0; iop < nop; ++iop) {
376 1
                if (op_dtype[iop] == NULL) {
377 1
                    Py_INCREF(dtype);
378 1
                    op_dtype[iop] = dtype;
379
                }
380
            }
381
        }
382 1
        Py_DECREF(dtype);
383
    }
384

385
    NPY_IT_TIME_POINT(c_find_output_common_dtype);
386

387
    /*
388
     * All of the data types have been settled, so it's time
389
     * to check that data type conversions are following the
390
     * casting rules.
391
     */
392 1
    if (!npyiter_check_casting(nop, op, op_dtype, casting, op_itflags)) {
393 1
        NpyIter_Deallocate(iter);
394 1
        return NULL;
395
    }
396

397
    NPY_IT_TIME_POINT(c_check_casting);
398

399
    /*
400
     * At this point, the iteration order has been finalized. so
401
     * any allocation of ops that were NULL, or any temporary
402
     * copying due to casting/byte order/alignment can be
403
     * done now using a memory layout matching the iterator.
404
     */
405 1
    if (!npyiter_allocate_arrays(iter, flags, op_dtype, subtype, op_flags,
406
                            op_itflags, op_axes)) {
407 1
        NpyIter_Deallocate(iter);
408 1
        return NULL;
409
    }
410

411
    NPY_IT_TIME_POINT(c_allocate_arrays);
412

413
    /*
414
     * Finally, if a multi-index wasn't requested,
415
     * it may be possible to coalesce some axes together.
416
     */
417 1
    if (ndim > 1 && !(itflags & NPY_ITFLAG_HASMULTIINDEX)) {
418 1
        npyiter_coalesce_axes(iter);
419
        /*
420
         * The operation may have changed the layout, so we have to
421
         * get the internal pointers again.
422
         */
423 1
        itflags = NIT_ITFLAGS(iter);
424 1
        ndim = NIT_NDIM(iter);
425 1
        op = NIT_OPERANDS(iter);
426 1
        op_dtype = NIT_DTYPES(iter);
427 1
        op_itflags = NIT_OPITFLAGS(iter);
428 1
        op_dataptr = NIT_RESETDATAPTR(iter);
429
    }
430

431
    NPY_IT_TIME_POINT(c_coalesce_axes);
432

433
    /*
434
     * Now that the axes are finished, check whether we can apply
435
     * the single iteration optimization to the iternext function.
436
     */
437 1
    if (!(itflags & NPY_ITFLAG_BUFFER)) {
438 1
        NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
439 1
        if (itflags & NPY_ITFLAG_EXLOOP) {
440 1
            if (NIT_ITERSIZE(iter) == NAD_SHAPE(axisdata)) {
441 1
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
442
            }
443
        }
444 1
        else if (NIT_ITERSIZE(iter) == 1) {
445 1
            NIT_ITFLAGS(iter) |= NPY_ITFLAG_ONEITERATION;
446
        }
447
    }
448

449
    /*
450
     * If REFS_OK was specified, check whether there are any
451
     * reference arrays and flag it if so.
452
     */
453 1
    if (flags & NPY_ITER_REFS_OK) {
454 1
        for (iop = 0; iop < nop; ++iop) {
455 1
            PyArray_Descr *rdt = op_dtype[iop];
456 1
            if ((rdt->flags & (NPY_ITEM_REFCOUNT |
457
                                     NPY_ITEM_IS_POINTER |
458
                                     NPY_NEEDS_PYAPI)) != 0) {
459
                /* Iteration needs API access */
460 1
                NIT_ITFLAGS(iter) |= NPY_ITFLAG_NEEDSAPI;
461
            }
462
        }
463
    }
464

465
    /* If buffering is set without delayed allocation */
466 1
    if (itflags & NPY_ITFLAG_BUFFER) {
467 1
        if (!npyiter_allocate_transfer_functions(iter)) {
468 1
            NpyIter_Deallocate(iter);
469 1
            return NULL;
470
        }
471 1
        if (!(itflags & NPY_ITFLAG_DELAYBUF)) {
472
            /* Allocate the buffers */
473 1
            if (!npyiter_allocate_buffers(iter, NULL)) {
474 0
                NpyIter_Deallocate(iter);
475 0
                return NULL;
476
            }
477

478
            /* Prepare the next buffers and set iterend/size */
479 1
            if (npyiter_copy_to_buffers(iter, NULL) < 0) {
480 0
                NpyIter_Deallocate(iter);
481 0
                return NULL;
482
            }
483
        }
484
    }
485

486
    NPY_IT_TIME_POINT(c_prepare_buffers);
487

488
#if NPY_IT_CONSTRUCTION_TIMING
489
    printf("\nIterator construction timing:\n");
490
    NPY_IT_PRINT_TIME_START(c_start);
491
    NPY_IT_PRINT_TIME_VAR(c_check_op_axes);
492
    NPY_IT_PRINT_TIME_VAR(c_check_global_flags);
493
    NPY_IT_PRINT_TIME_VAR(c_calculate_ndim);
494
    NPY_IT_PRINT_TIME_VAR(c_malloc);
495
    NPY_IT_PRINT_TIME_VAR(c_prepare_operands);
496
    NPY_IT_PRINT_TIME_VAR(c_fill_axisdata);
497
    NPY_IT_PRINT_TIME_VAR(c_compute_index_strides);
498
    NPY_IT_PRINT_TIME_VAR(c_apply_forced_iteration_order);
499
    NPY_IT_PRINT_TIME_VAR(c_find_best_axis_ordering);
500
    NPY_IT_PRINT_TIME_VAR(c_get_priority_subtype);
501
    NPY_IT_PRINT_TIME_VAR(c_find_output_common_dtype);
502
    NPY_IT_PRINT_TIME_VAR(c_check_casting);
503
    NPY_IT_PRINT_TIME_VAR(c_allocate_arrays);
504
    NPY_IT_PRINT_TIME_VAR(c_coalesce_axes);
505
    NPY_IT_PRINT_TIME_VAR(c_prepare_buffers);
506
    printf("\n");
507
#endif
508

509
    return iter;
510
}
511

512
/*NUMPY_API
513
 * Allocate a new iterator for more than one array object, using
514
 * standard NumPy broadcasting rules and the default buffer size.
515
 */
516
NPY_NO_EXPORT NpyIter *
517 1
NpyIter_MultiNew(int nop, PyArrayObject **op_in, npy_uint32 flags,
518
                 NPY_ORDER order, NPY_CASTING casting,
519
                 npy_uint32 *op_flags,
520
                 PyArray_Descr **op_request_dtypes)
521
{
522 1
    return NpyIter_AdvancedNew(nop, op_in, flags, order, casting,
523
                            op_flags, op_request_dtypes,
524
                            -1, NULL, NULL, 0);
525
}
526

527
/*NUMPY_API
528
 * Allocate a new iterator for one array object.
529
 */
530
NPY_NO_EXPORT NpyIter *
531 1
NpyIter_New(PyArrayObject *op, npy_uint32 flags,
532
                  NPY_ORDER order, NPY_CASTING casting,
533
                  PyArray_Descr* dtype)
534
{
535
    /* Split the flags into separate global and op flags */
536 1
    npy_uint32 op_flags = flags & NPY_ITER_PER_OP_FLAGS;
537 1
    flags &= NPY_ITER_GLOBAL_FLAGS;
538

539 1
    return NpyIter_AdvancedNew(1, &op, flags, order, casting,
540
                            &op_flags, &dtype,
541
                            -1, NULL, NULL, 0);
542
}
543

544
/*NUMPY_API
545
 * Makes a copy of the iterator
546
 */
547
NPY_NO_EXPORT NpyIter *
548 1
NpyIter_Copy(NpyIter *iter)
549
{
550 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
551 1
    int ndim = NIT_NDIM(iter);
552 1
    int iop, nop = NIT_NOP(iter);
553 1
    int out_of_memory = 0;
554

555
    npy_intp size;
556
    NpyIter *newiter;
557
    PyArrayObject **objects;
558
    PyArray_Descr **dtypes;
559

560
    /* Allocate memory for the new iterator */
561 1
    size = NIT_SIZEOF_ITERATOR(itflags, ndim, nop);
562 1
    newiter = (NpyIter*)PyObject_Malloc(size);
563

564
    /* Copy the raw values to the new iterator */
565 1
    memcpy(newiter, iter, size);
566

567
    /* Take ownership of references to the operands and dtypes */
568 1
    objects = NIT_OPERANDS(newiter);
569 1
    dtypes = NIT_DTYPES(newiter);
570 1
    for (iop = 0; iop < nop; ++iop) {
571 1
        Py_INCREF(objects[iop]);
572 1
        Py_INCREF(dtypes[iop]);
573
    }
574

575
    /* Allocate buffers and make copies of the transfer data if necessary */
576 1
    if (itflags & NPY_ITFLAG_BUFFER) {
577
        NpyIter_BufferData *bufferdata;
578
        npy_intp buffersize, itemsize;
579
        char **buffers;
580
        NpyAuxData **readtransferdata, **writetransferdata;
581

582 1
        bufferdata = NIT_BUFFERDATA(newiter);
583 1
        buffers = NBF_BUFFERS(bufferdata);
584 1
        readtransferdata = NBF_READTRANSFERDATA(bufferdata);
585 1
        writetransferdata = NBF_WRITETRANSFERDATA(bufferdata);
586 1
        buffersize = NBF_BUFFERSIZE(bufferdata);
587

588 1
        for (iop = 0; iop < nop; ++iop) {
589 1
            if (buffers[iop] != NULL) {
590 1
                if (out_of_memory) {
591 0
                    buffers[iop] = NULL;
592
                }
593
                else {
594 1
                    itemsize = dtypes[iop]->elsize;
595 1
                    buffers[iop] = PyArray_malloc(itemsize*buffersize);
596 1
                    if (buffers[iop] == NULL) {
597 0
                        out_of_memory = 1;
598
                    }
599
                }
600
            }
601

602 1
            if (readtransferdata[iop] != NULL) {
603 1
                if (out_of_memory) {
604 0
                    readtransferdata[iop] = NULL;
605
                }
606
                else {
607 1
                    readtransferdata[iop] =
608 1
                          NPY_AUXDATA_CLONE(readtransferdata[iop]);
609 1
                    if (readtransferdata[iop] == NULL) {
610 0
                        out_of_memory = 1;
611
                    }
612
                }
613
            }
614

615 1
            if (writetransferdata[iop] != NULL) {
616 0
                if (out_of_memory) {
617 0
                    writetransferdata[iop] = NULL;
618
                }
619
                else {
620 0
                    writetransferdata[iop] =
621 0
                          NPY_AUXDATA_CLONE(writetransferdata[iop]);
622 0
                    if (writetransferdata[iop] == NULL) {
623 0
                        out_of_memory = 1;
624
                    }
625
                }
626
            }
627
        }
628

629
        /* Initialize the buffers to the current iterindex */
630 1
        if (!out_of_memory && NBF_SIZE(bufferdata) > 0) {
631 1
            npyiter_goto_iterindex(newiter, NIT_ITERINDEX(newiter));
632

633
            /* Prepare the next buffers and set iterend/size */
634 1
            npyiter_copy_to_buffers(newiter, NULL);
635
        }
636
    }
637

638 1
    if (out_of_memory) {
639 0
        NpyIter_Deallocate(newiter);
640 0
        PyErr_NoMemory();
641 0
        return NULL;
642
    }
643

644
    return newiter;
645
}
646

647
/*NUMPY_API
648
 * Deallocate an iterator.
649
 *
650
 * To correctly work when an error is in progress, we have to check
651
 * `PyErr_Occurred()`. This is necessary when buffers are not finalized
652
 * or WritebackIfCopy is used. We could avoid that check by exposing a new
653
 * function which is passed in whether or not a Python error is already set.
654
 */
655
NPY_NO_EXPORT int
656 1
NpyIter_Deallocate(NpyIter *iter)
657
{
658 1
    int success = PyErr_Occurred() == NULL;
659

660
    npy_uint32 itflags;
661
    /*int ndim = NIT_NDIM(iter);*/
662
    int iop, nop;
663
    PyArray_Descr **dtype;
664
    PyArrayObject **object;
665
    npyiter_opitflags *op_itflags;
666

667 1
    if (iter == NULL) {
668
        return success;
669
    }
670

671 1
    itflags = NIT_ITFLAGS(iter);
672 1
    nop = NIT_NOP(iter);
673 1
    dtype = NIT_DTYPES(iter);
674 1
    object = NIT_OPERANDS(iter);
675 1
    op_itflags = NIT_OPITFLAGS(iter);
676

677
    /* Deallocate any buffers and buffering data */
678 1
    if (itflags & NPY_ITFLAG_BUFFER) {
679
        /* Ensure no data is held by the buffers before they are cleared */
680 1
        if (success) {
681 1
            if (npyiter_copy_from_buffers(iter) < 0) {
682 0
                success = NPY_FAIL;
683
            }
684
        }
685
        else {
686 1
            npyiter_clear_buffers(iter);
687
        }
688

689 1
        NpyIter_BufferData *bufferdata = NIT_BUFFERDATA(iter);
690
        char **buffers;
691
        NpyAuxData **transferdata;
692

693
        /* buffers */
694 1
        buffers = NBF_BUFFERS(bufferdata);
695 1
        for (iop = 0; iop < nop; ++iop, ++buffers) {
696 1
            PyArray_free(*buffers);
697
        }
698
        /* read bufferdata */
699 1
        transferdata = NBF_READTRANSFERDATA(bufferdata);
700 1
        for(iop = 0; iop < nop; ++iop, ++transferdata) {
701 1
            if (*transferdata) {
702 1
                NPY_AUXDATA_FREE(*transferdata);
703
            }
704
        }
705
        /* write bufferdata */
706 1
        transferdata = NBF_WRITETRANSFERDATA(bufferdata);
707 1
        for(iop = 0; iop < nop; ++iop, ++transferdata) {
708 1
            if (*transferdata) {
709 1
                NPY_AUXDATA_FREE(*transferdata);
710
            }
711
        }
712
    }
713

714
    /*
715
     * Deallocate all the dtypes and objects that were iterated and resolve
716
     * any writeback buffers created by the iterator.
717
     */
718 1
    for (iop = 0; iop < nop; ++iop, ++dtype, ++object) {
719 1
        if (op_itflags[iop] & NPY_OP_ITFLAG_HAS_WRITEBACK) {
720 1
            if (success && PyArray_ResolveWritebackIfCopy(*object) < 0) {
721
                success = 0;
722
            }
723
            else {
724 1
                PyArray_DiscardWritebackIfCopy(*object);
725
            }
726
        }
727 1
        Py_XDECREF(*dtype);
728 1
        Py_XDECREF(*object);
729
    }
730

731
    /* Deallocate the iterator memory */
732 1
    PyObject_Free(iter);
733 1
    return success;
734
}
735

736

737
/* Checks 'flags' for (C|F)_ORDER_INDEX, MULTI_INDEX, and EXTERNAL_LOOP,
738
 * setting the appropriate internal flags in 'itflags'.
739
 *
740
 * Returns 1 on success, 0 on error.
741
 */
742
static int
743 1
npyiter_check_global_flags(npy_uint32 flags, npy_uint32* itflags)
744
{
745 1
    if ((flags & NPY_ITER_PER_OP_FLAGS) != 0) {
746 0
        PyErr_SetString(PyExc_ValueError,
747
                    "A per-operand flag was passed as a global flag "
748
                    "to the iterator constructor");
749 0
        return 0;
750
    }
751

752
    /* Check for an index */
753 1
    if (flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
754 1
        if ((flags & (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) ==
755
                    (NPY_ITER_C_INDEX | NPY_ITER_F_INDEX)) {
756 1
            PyErr_SetString(PyExc_ValueError,
757
                    "Iterator flags C_INDEX and "
758
                    "F_INDEX cannot both be specified");
759 1
            return 0;
760
        }
761 1
        (*itflags) |= NPY_ITFLAG_HASINDEX;
762
    }
763
    /* Check if a multi-index was requested */
764 1
    if (flags & NPY_ITER_MULTI_INDEX) {
765
        /*
766
         * This flag primarily disables dimension manipulations that
767
         * would produce an incorrect multi-index.
768
         */
769 1
        (*itflags) |= NPY_ITFLAG_HASMULTIINDEX;
770
    }
771
    /* Check if the caller wants to handle inner iteration */
772 1
    if (flags & NPY_ITER_EXTERNAL_LOOP) {
773 1
        if ((*itflags) & (NPY_ITFLAG_HASINDEX | NPY_ITFLAG_HASMULTIINDEX)) {
774 1
            PyErr_SetString(PyExc_ValueError,
775
                    "Iterator flag EXTERNAL_LOOP cannot be used "
776
                    "if an index or multi-index is being tracked");
777 1
            return 0;
778
        }
779 1
        (*itflags) |= NPY_ITFLAG_EXLOOP;
780
    }
781
    /* Ranged */
782 1
    if (flags & NPY_ITER_RANGED) {
783 1
        (*itflags) |= NPY_ITFLAG_RANGE;
784 1
        if ((flags & NPY_ITER_EXTERNAL_LOOP) &&
785
                                    !(flags & NPY_ITER_BUFFERED)) {
786 0
            PyErr_SetString(PyExc_ValueError,
787
                    "Iterator flag RANGED cannot be used with "
788
                    "the flag EXTERNAL_LOOP unless "
789
                    "BUFFERED is also enabled");
790 0
            return 0;
791
        }
792
    }
793
    /* Buffering */
794 1
    if (flags & NPY_ITER_BUFFERED) {
795 1
        (*itflags) |= NPY_ITFLAG_BUFFER;
796 1
        if (flags & NPY_ITER_GROWINNER) {
797 1
            (*itflags) |= NPY_ITFLAG_GROWINNER;
798
        }
799 1
        if (flags & NPY_ITER_DELAY_BUFALLOC) {
800 1
            (*itflags) |= NPY_ITFLAG_DELAYBUF;
801
        }
802
    }
803

804
    return 1;
805
}
806

807
static int
808 1
npyiter_check_op_axes(int nop, int oa_ndim, int **op_axes,
809
                        const npy_intp *itershape)
810
{
811
    char axes_dupcheck[NPY_MAXDIMS];
812
    int iop, idim;
813

814 1
    if (oa_ndim < 0) {
815
        /*
816
         * If `oa_ndim < 0`, `op_axes` and `itershape` are signalled to
817
         * be unused and should be NULL. (Before NumPy 1.8 this was
818
         * signalled by `oa_ndim == 0`.)
819
         */
820 1
        if (op_axes != NULL || itershape != NULL) {
821 0
            PyErr_Format(PyExc_ValueError,
822
                    "If 'op_axes' or 'itershape' is not NULL in the iterator "
823
                    "constructor, 'oa_ndim' must be zero or greater");
824 0
            return 0;
825
        }
826
        return 1;
827
    }
828 1
    if (oa_ndim > NPY_MAXDIMS) {
829 0
        PyErr_Format(PyExc_ValueError,
830
                "Cannot construct an iterator with more than %d dimensions "
831
                "(%d were requested for op_axes)",
832
                NPY_MAXDIMS, oa_ndim);
833 0
        return 0;
834
    }
835 1
    if (op_axes == NULL) {
836 0
        PyErr_Format(PyExc_ValueError,
837
                "If 'oa_ndim' is zero or greater in the iterator "
838
                "constructor, then op_axes cannot be NULL");
839 0
        return 0;
840
    }
841

842
    /* Check that there are no duplicates in op_axes */
843 1
    for (iop = 0; iop < nop; ++iop) {
844 1
        int *axes = op_axes[iop];
845 1
        if (axes != NULL) {
846 1
            memset(axes_dupcheck, 0, NPY_MAXDIMS);
847 1
            for (idim = 0; idim < oa_ndim; ++idim) {
848 1
                int i = npyiter_get_op_axis(axes[idim], NULL);
849

850 1
                if (i >= 0) {
851 1
                    if (i >= NPY_MAXDIMS) {
852 0
                        PyErr_Format(PyExc_ValueError,
853
                                "The 'op_axes' provided to the iterator "
854
                                "constructor for operand %d "
855
                                "contained invalid "
856
                                "values %d", iop, i);
857 0
                        return 0;
858
                    }
859 1
                    else if (axes_dupcheck[i] == 1) {
860 1
                        PyErr_Format(PyExc_ValueError,
861
                                "The 'op_axes' provided to the iterator "
862
                                "constructor for operand %d "
863
                                "contained duplicate "
864
                                "value %d", iop, i);
865 1
                        return 0;
866
                    }
867
                    else {
868 1
                        axes_dupcheck[i] = 1;
869
                    }
870
                }
871
            }
872
        }
873
    }
874

875
    return 1;
876
}
877

878
static int
879
npyiter_calculate_ndim(int nop, PyArrayObject **op_in,
880
                       int oa_ndim)
881
{
882
    /* If 'op_axes' is being used, force 'ndim' */
883 1
    if (oa_ndim >= 0 ) {
884
        return oa_ndim;
885
    }
886
    /* Otherwise it's the maximum 'ndim' from the operands */
887
    else {
888
        int ndim = 0, iop;
889

890 1
        for (iop = 0; iop < nop; ++iop) {
891 1
            if (op_in[iop] != NULL) {
892 1
                int ondim = PyArray_NDIM(op_in[iop]);
893 1
                if (ondim > ndim) {
894 1
                    ndim = ondim;
895
                }
896
            }
897

898
        }
899

900
        return ndim;
901
    }
902
}
903

904
/*
905
 * Checks the per-operand input flags, and fills in op_itflags.
906
 *
907
 * Returns 1 on success, 0 on failure.
908
 */
909
static int
910 1
npyiter_check_per_op_flags(npy_uint32 op_flags, npyiter_opitflags *op_itflags)
911
{
912 1
    if ((op_flags & NPY_ITER_GLOBAL_FLAGS) != 0) {
913 0
        PyErr_SetString(PyExc_ValueError,
914
                    "A global iterator flag was passed as a per-operand flag "
915
                    "to the iterator constructor");
916 0
        return 0;
917
    }
918

919
    /* Check the read/write flags */
920 1
    if (op_flags & NPY_ITER_READONLY) {
921
        /* The read/write flags are mutually exclusive */
922 1
        if (op_flags & (NPY_ITER_READWRITE|NPY_ITER_WRITEONLY)) {
923 1
            PyErr_SetString(PyExc_ValueError,
924
                    "Only one of the iterator flags READWRITE, "
925
                    "READONLY, and WRITEONLY may be "
926
                    "specified for an operand");
927 1
            return 0;
928
        }
929

930 1
        *op_itflags = NPY_OP_ITFLAG_READ;
931
    }
932 1
    else if (op_flags & NPY_ITER_READWRITE) {
933
        /* The read/write flags are mutually exclusive */
934 1
        if (op_flags & NPY_ITER_WRITEONLY) {
935 1
            PyErr_SetString(PyExc_ValueError,
936
                    "Only one of the iterator flags READWRITE, "
937
                    "READONLY, and WRITEONLY may be "
938
                    "specified for an operand");
939 1
            return 0;
940
        }
941

942 1
        *op_itflags = NPY_OP_ITFLAG_READ|NPY_OP_ITFLAG_WRITE;
943
    }
944 1
    else if(op_flags & NPY_ITER_WRITEONLY) {
945 1
        *op_itflags = NPY_OP_ITFLAG_WRITE;
946
    }
947
    else {
948 1
        PyErr_SetString(PyExc_ValueError,
949
                "None of the iterator flags READWRITE, "
950
                "READONLY, or WRITEONLY were "
951
                "specified for an operand");
952 1
        return 0;
953
    }
954

955
    /* Check the flags for temporary copies */
956 1
    if (((*op_itflags) & NPY_OP_ITFLAG_WRITE) &&
957 1
                (op_flags & (NPY_ITER_COPY |
958
                           NPY_ITER_UPDATEIFCOPY)) == NPY_ITER_COPY) {
959 0
        PyErr_SetString(PyExc_ValueError,
960
                "If an iterator operand is writeable, must use "
961
                "the flag UPDATEIFCOPY instead of "
962
                "COPY");
963 0
        return 0;
964
    }
965

966
    /* Check the flag for a write masked operands */
967 1
    if (op_flags & NPY_ITER_WRITEMASKED) {
968 1
        if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
969 1
            PyErr_SetString(PyExc_ValueError,
970
                "The iterator flag WRITEMASKED may only "
971
                "be used with READWRITE or WRITEONLY");
972 1
            return 0;
973
        }
974 1
        if ((op_flags & NPY_ITER_ARRAYMASK) != 0) {
975 1
            PyErr_SetString(PyExc_ValueError,
976
                "The iterator flag WRITEMASKED may not "
977
                "be used together with ARRAYMASK");
978 1
            return 0;
979
        }
980 1
        *op_itflags |= NPY_OP_ITFLAG_WRITEMASKED;
981
    }
982

983 1
    if ((op_flags & NPY_ITER_VIRTUAL) != 0) {
984 0
        if ((op_flags & NPY_ITER_READWRITE) == 0) {
985 0
            PyErr_SetString(PyExc_ValueError,
986
                "The iterator flag VIRTUAL should be "
987
                "be used together with READWRITE");
988 0
            return 0;
989
        }
990 0
        *op_itflags |= NPY_OP_ITFLAG_VIRTUAL;
991
    }
992

993
    return 1;
994
}
995

996
/*
997
 * Prepares a a constructor operand.  Assumes a reference to 'op'
998
 * is owned, and that 'op' may be replaced.  Fills in 'op_dataptr',
999
 * 'op_dtype', and may modify 'op_itflags'.
1000
 *
1001
 * Returns 1 on success, 0 on failure.
1002
 */
1003
static int
1004 1
npyiter_prepare_one_operand(PyArrayObject **op,
1005
                        char **op_dataptr,
1006
                        PyArray_Descr *op_request_dtype,
1007
                        PyArray_Descr **op_dtype,
1008
                        npy_uint32 flags,
1009
                        npy_uint32 op_flags, npyiter_opitflags *op_itflags)
1010
{
1011
    /* NULL operands must be automatically allocated outputs */
1012 1
    if (*op == NULL) {
1013
        /* ALLOCATE or VIRTUAL should be enabled */
1014 1
        if ((op_flags & (NPY_ITER_ALLOCATE|NPY_ITER_VIRTUAL)) == 0) {
1015 0
            PyErr_SetString(PyExc_ValueError,
1016
                    "Iterator operand was NULL, but neither the "
1017
                    "ALLOCATE nor the VIRTUAL flag was specified");
1018 0
            return 0;
1019
        }
1020

1021 1
        if (op_flags & NPY_ITER_ALLOCATE) {
1022
            /* Writing should be enabled */
1023 1
            if (!((*op_itflags) & NPY_OP_ITFLAG_WRITE)) {
1024 1
                PyErr_SetString(PyExc_ValueError,
1025
                        "Automatic allocation was requested for an iterator "
1026
                        "operand, but it wasn't flagged for writing");
1027 1
                return 0;
1028
            }
1029
            /*
1030
             * Reading should be disabled if buffering is enabled without
1031
             * also enabling NPY_ITER_DELAY_BUFALLOC.  In all other cases,
1032
             * the caller may initialize the allocated operand to a value
1033
             * before beginning iteration.
1034
             */
1035 1
            if (((flags & (NPY_ITER_BUFFERED |
1036 1
                            NPY_ITER_DELAY_BUFALLOC)) == NPY_ITER_BUFFERED) &&
1037
                    ((*op_itflags) & NPY_OP_ITFLAG_READ)) {
1038 1
                PyErr_SetString(PyExc_ValueError,
1039
                        "Automatic allocation was requested for an iterator "
1040
                        "operand, and it was flagged as readable, but "
1041
                        "buffering  without delayed allocation was enabled");
1042 1
                return 0;
1043
            }
1044

1045
            /* If a requested dtype was provided, use it, otherwise NULL */
1046 1
            Py_XINCREF(op_request_dtype);
1047 1
            *op_dtype = op_request_dtype;
1048
        }
1049
        else {
1050 0
            *op_dtype = NULL;
1051
        }
1052

1053
        /* Specify bool if no dtype was requested for the mask */
1054 1
        if (op_flags & NPY_ITER_ARRAYMASK) {
1055 0
            if (*op_dtype == NULL) {
1056 0
                *op_dtype = PyArray_DescrFromType(NPY_BOOL);
1057 0
                if (*op_dtype == NULL) {
1058
                    return 0;
1059
                }
1060
            }
1061
        }
1062

1063 1
        *op_dataptr = NULL;
1064

1065 1
        return 1;
1066
    }
1067

1068
    /* VIRTUAL operands must be NULL */
1069 1
    if (op_flags & NPY_ITER_VIRTUAL) {
1070 0
        PyErr_SetString(PyExc_ValueError,
1071
                "Iterator operand flag VIRTUAL was specified, "
1072
                "but the operand was not NULL");
1073 0
        return 0;
1074
    }
1075

1076

1077 1
    if (PyArray_Check(*op)) {
1078

1079 1
        if ((*op_itflags) & NPY_OP_ITFLAG_WRITE
1080 1
            && PyArray_FailUnlessWriteable(*op, "operand array with iterator "
1081
                                           "write flag set") < 0) {
1082
            return 0;
1083
        }
1084 1
        if (!(flags & NPY_ITER_ZEROSIZE_OK) && PyArray_SIZE(*op) == 0) {
1085 1
            PyErr_SetString(PyExc_ValueError,
1086
                    "Iteration of zero-sized operands is not enabled");
1087 1
            return 0;
1088
        }
1089 1
        *op_dataptr = PyArray_BYTES(*op);
1090
        /* PyArray_DESCR does not give us a reference */
1091 1
        *op_dtype = PyArray_DESCR(*op);
1092 1
        if (*op_dtype == NULL) {
1093 0
            PyErr_SetString(PyExc_ValueError,
1094
                    "Iterator input operand has no dtype descr");
1095 0
            return 0;
1096
        }
1097 1
        Py_INCREF(*op_dtype);
1098
        /*
1099
         * If references weren't specifically allowed, make sure there
1100
         * are no references in the inputs or requested dtypes.
1101
         */
1102 1
        if (!(flags & NPY_ITER_REFS_OK)) {
1103 1
            PyArray_Descr *dt = PyArray_DESCR(*op);
1104 1
            if (((dt->flags & (NPY_ITEM_REFCOUNT |
1105 1
                           NPY_ITEM_IS_POINTER)) != 0) ||
1106 0
                    (dt != *op_dtype &&
1107 0
                        (((*op_dtype)->flags & (NPY_ITEM_REFCOUNT |
1108
                                             NPY_ITEM_IS_POINTER))) != 0)) {
1109 1
                PyErr_SetString(PyExc_TypeError,
1110
                        "Iterator operand or requested dtype holds "
1111
                        "references, but the REFS_OK flag was not enabled");
1112 1
                return 0;
1113
            }
1114
        }
1115
        /*
1116
         * Checking whether casts are valid is done later, once the
1117
         * final data types have been selected.  For now, just store the
1118
         * requested type.
1119
         */
1120 1
        if (op_request_dtype != NULL) {
1121
            /* We just have a borrowed reference to op_request_dtype */
1122 1
            Py_SETREF(*op_dtype, PyArray_AdaptDescriptorToArray(
1123
                                            *op, (PyObject *)op_request_dtype));
1124 1
            if (*op_dtype == NULL) {
1125
                return 0;
1126
            }
1127
        }
1128

1129
        /* Check if the operand is in the byte order requested */
1130 1
        if (op_flags & NPY_ITER_NBO) {
1131
            /* Check byte order */
1132 1
            if (!PyArray_ISNBO((*op_dtype)->byteorder)) {
1133
                PyArray_Descr *nbo_dtype;
1134

1135
                /* Replace with a new descr which is in native byte order */
1136 1
                nbo_dtype = PyArray_DescrNewByteorder(*op_dtype, NPY_NATIVE);
1137 1
                Py_DECREF(*op_dtype);
1138 1
                *op_dtype = nbo_dtype;
1139

1140
                NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1141
                                    "because of NPY_ITER_NBO\n");
1142
                /* Indicate that byte order or alignment needs fixing */
1143 1
                *op_itflags |= NPY_OP_ITFLAG_CAST;
1144
            }
1145
        }
1146
        /* Check if the operand is aligned */
1147 1
        if (op_flags & NPY_ITER_ALIGNED) {
1148
            /* Check alignment */
1149 1
            if (!IsAligned(*op)) {
1150
                NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1151
                                    "because of NPY_ITER_ALIGNED\n");
1152 1
                *op_itflags |= NPY_OP_ITFLAG_CAST;
1153
            }
1154
        }
1155
        /*
1156
         * The check for NPY_ITER_CONTIG can only be done later,
1157
         * once the final iteration order is settled.
1158
         */
1159
    }
1160
    else {
1161 0
        PyErr_SetString(PyExc_ValueError,
1162
                "Iterator inputs must be ndarrays");
1163 0
        return 0;
1164
    }
1165

1166
    return 1;
1167
}
1168

1169
/*
1170
 * Process all the operands, copying new references so further processing
1171
 * can replace the arrays if copying is necessary.
1172
 */
1173
static int
1174 1
npyiter_prepare_operands(int nop, PyArrayObject **op_in,
1175
                    PyArrayObject **op,
1176
                    char **op_dataptr,
1177
                    PyArray_Descr **op_request_dtypes,
1178
                    PyArray_Descr **op_dtype,
1179
                    npy_uint32 flags,
1180
                    npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
1181
                    npy_int8 *out_maskop)
1182
{
1183
    int iop, i;
1184 1
    npy_int8 maskop = -1;
1185 1
    int any_writemasked_ops = 0;
1186

1187
    /*
1188
     * Here we just prepare the provided operands.
1189
     */
1190 1
    for (iop = 0; iop < nop; ++iop) {
1191 1
        op[iop] = op_in[iop];
1192 1
        Py_XINCREF(op[iop]);
1193 1
        op_dtype[iop] = NULL;
1194

1195
        /* Check the readonly/writeonly flags, and fill in op_itflags */
1196 1
        if (!npyiter_check_per_op_flags(op_flags[iop], &op_itflags[iop])) {
1197
            goto fail_iop;
1198
        }
1199

1200
        /* Extract the operand which is for masked iteration */
1201 1
        if ((op_flags[iop] & NPY_ITER_ARRAYMASK) != 0) {
1202 1
            if (maskop != -1) {
1203 1
                PyErr_SetString(PyExc_ValueError,
1204
                        "Only one iterator operand may receive an "
1205
                        "ARRAYMASK flag");
1206 1
                goto fail_iop;
1207
            }
1208

1209 1
            maskop = iop;
1210 1
            *out_maskop = iop;
1211
        }
1212

1213 1
        if (op_flags[iop] & NPY_ITER_WRITEMASKED) {
1214 1
            any_writemasked_ops = 1;
1215
        }
1216

1217
        /*
1218
         * Prepare the operand.  This produces an op_dtype[iop] reference
1219
         * on success.
1220
         */
1221 1
        if (!npyiter_prepare_one_operand(&op[iop],
1222
                        &op_dataptr[iop],
1223 1
                        op_request_dtypes ? op_request_dtypes[iop] : NULL,
1224
                        &op_dtype[iop],
1225
                        flags,
1226
                        op_flags[iop], &op_itflags[iop])) {
1227
            goto fail_iop;
1228
        }
1229
    }
1230

1231
    /* If all the operands were NULL, it's an error */
1232 1
    if (op[0] == NULL) {
1233
        int all_null = 1;
1234 1
        for (iop = 1; iop < nop; ++iop) {
1235 1
            if (op[iop] != NULL) {
1236
                all_null = 0;
1237
                break;
1238
            }
1239
        }
1240 1
        if (all_null) {
1241 1
            PyErr_SetString(PyExc_ValueError,
1242
                    "At least one iterator operand must be non-NULL");
1243 1
            goto fail_nop;
1244
        }
1245
    }
1246

1247 1
    if (any_writemasked_ops && maskop < 0) {
1248 1
        PyErr_SetString(PyExc_ValueError,
1249
                "An iterator operand was flagged as WRITEMASKED, "
1250
                "but no ARRAYMASK operand was given to supply "
1251
                "the mask");
1252 1
        goto fail_nop;
1253
    }
1254 1
    else if (!any_writemasked_ops && maskop >= 0) {
1255 1
        PyErr_SetString(PyExc_ValueError,
1256
                "An iterator operand was flagged as the ARRAYMASK, "
1257
                "but no WRITEMASKED operands were given to use "
1258
                "the mask");
1259 1
        goto fail_nop;
1260
    }
1261

1262
    return 1;
1263

1264 1
  fail_nop:
1265 1
    iop = nop - 1;
1266 1
  fail_iop:
1267 1
    for (i = 0; i < iop+1; ++i) {
1268 1
        Py_XDECREF(op[i]);
1269 1
        Py_XDECREF(op_dtype[i]);
1270
    }
1271
    return 0;
1272
}
1273

1274
static const char *
1275
npyiter_casting_to_string(NPY_CASTING casting)
1276
{
1277 1
    switch (casting) {
1278
        case NPY_NO_CASTING:
1279
            return "'no'";
1280 1
        case NPY_EQUIV_CASTING:
1281
            return "'equiv'";
1282 1
        case NPY_SAFE_CASTING:
1283
            return "'safe'";
1284 1
        case NPY_SAME_KIND_CASTING:
1285
            return "'same_kind'";
1286 1
        case NPY_UNSAFE_CASTING:
1287
            return "'unsafe'";
1288 0
        default:
1289
            return "<unknown>";
1290
    }
1291
}
1292

1293

1294
static int
1295 1
npyiter_check_casting(int nop, PyArrayObject **op,
1296
                    PyArray_Descr **op_dtype,
1297
                    NPY_CASTING casting,
1298
                    npyiter_opitflags *op_itflags)
1299
{
1300
    int iop;
1301

1302 1
    for(iop = 0; iop < nop; ++iop) {
1303
        NPY_IT_DBG_PRINT1("Iterator: Checking casting for operand %d\n",
1304
                            (int)iop);
1305
#if NPY_IT_DBG_TRACING
1306
        printf("op: ");
1307
        if (op[iop] != NULL) {
1308
            PyObject_Print((PyObject *)PyArray_DESCR(op[iop]), stdout, 0);
1309
        }
1310
        else {
1311
            printf("<null>");
1312
        }
1313
        printf(", iter: ");
1314
        PyObject_Print((PyObject *)op_dtype[iop], stdout, 0);
1315
        printf("\n");
1316
#endif
1317
        /* If the types aren't equivalent, a cast is necessary */
1318 1
        if (op[iop] != NULL && !PyArray_EquivTypes(PyArray_DESCR(op[iop]),
1319 1
                                                     op_dtype[iop])) {
1320
            /* Check read (op -> temp) casting */
1321 1
            if ((op_itflags[iop] & NPY_OP_ITFLAG_READ) &&
1322 1
                        !PyArray_CanCastArrayTo(op[iop],
1323
                                          op_dtype[iop],
1324
                                          casting)) {
1325 1
                PyErr_Format(PyExc_TypeError,
1326
                        "Iterator operand %d dtype could not be cast from "
1327
                        "%R to %R according to the rule %s",
1328
                        iop, PyArray_DESCR(op[iop]), op_dtype[iop],
1329
                        npyiter_casting_to_string(casting));
1330 1
                return 0;
1331
            }
1332
            /* Check write (temp -> op) casting */
1333 1
            if ((op_itflags[iop] & NPY_OP_ITFLAG_WRITE) &&
1334 1
                        !PyArray_CanCastTypeTo(op_dtype[iop],
1335
                                          PyArray_DESCR(op[iop]),
1336
                                          casting)) {
1337 1
                PyErr_Format(PyExc_TypeError,
1338
                        "Iterator requested dtype could not be cast from "
1339
                        "%R to %R, the operand %d dtype, "
1340
                        "according to the rule %s",
1341
                        op_dtype[iop], PyArray_DESCR(op[iop]), iop,
1342
                        npyiter_casting_to_string(casting));
1343 1
                return 0;
1344
            }
1345

1346
            NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
1347
                                "because the types aren't equivalent\n");
1348
            /* Indicate that this operand needs casting */
1349 1
            op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
1350
        }
1351
    }
1352

1353
    return 1;
1354
}
1355

1356
/*
1357
 * Checks that the mask broadcasts to the WRITEMASK REDUCE
1358
 * operand 'iop', but 'iop' never broadcasts to the mask.
1359
 * If 'iop' broadcasts to the mask, the result would be more
1360
 * than one mask value per reduction element, something which
1361
 * is invalid.
1362
 *
1363
 * This check should only be called after all the operands
1364
 * have been filled in.
1365
 *
1366
 * Returns 1 on success, 0 on error.
1367
 */
1368
static int
1369 1
check_mask_for_writemasked_reduction(NpyIter *iter, int iop)
1370
{
1371 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1372 1
    int idim, ndim = NIT_NDIM(iter);
1373 1
    int nop = NIT_NOP(iter);
1374 1
    int maskop = NIT_MASKOP(iter);
1375

1376
    NpyIter_AxisData *axisdata;
1377
    npy_intp sizeof_axisdata;
1378

1379 1
    axisdata = NIT_AXISDATA(iter);
1380 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1381

1382 1
    for(idim = 0; idim < ndim; ++idim) {
1383
        npy_intp maskstride, istride;
1384

1385 1
        istride = NAD_STRIDES(axisdata)[iop];
1386 1
        maskstride = NAD_STRIDES(axisdata)[maskop];
1387

1388
        /*
1389
         * If 'iop' is being broadcast to 'maskop', we have
1390
         * the invalid situation described above.
1391
         */
1392 1
        if (maskstride != 0 && istride == 0) {
1393 1
            PyErr_SetString(PyExc_ValueError,
1394
                    "Iterator reduction operand is WRITEMASKED, "
1395
                    "but also broadcasts to multiple mask values. "
1396
                    "There can be only one mask value per WRITEMASKED "
1397
                    "element.");
1398 1
            return 0;
1399
        }
1400

1401 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
1402
    }
1403

1404
    return 1;
1405
}
1406

1407
/*
1408
 * Check whether a reduction is OK based on the flags and the operand being
1409
 * readwrite. This path is deprecated, since usually only specific axes
1410
 * should be reduced. If axes are specified explicitely, the flag is
1411
 * unnecessary.
1412
 */
1413
static int
1414 1
npyiter_check_reduce_ok_and_set_flags(
1415
        NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1416
        int dim) {
1417
    /* If it's writeable, this means a reduction */
1418 1
    if (*op_itflags & NPY_OP_ITFLAG_WRITE) {
1419 1
        if (!(flags & NPY_ITER_REDUCE_OK)) {
1420 1
            PyErr_Format(PyExc_ValueError,
1421
                    "output operand requires a reduction along dimension %d, "
1422
                    "but the reduction is not enabled. The dimension size of 1 "
1423
                    "does not match the expected output shape.", dim);
1424
            return 0;
1425
        }
1426 1
        if (!(*op_itflags & NPY_OP_ITFLAG_READ)) {
1427 0
            PyErr_SetString(PyExc_ValueError,
1428
                    "output operand requires a reduction, but is flagged as "
1429
                    "write-only, not read-write");
1430
            return 0;
1431
        }
1432
        NPY_IT_DBG_PRINT("Iterator: Indicating that a reduction is"
1433
                         "occurring\n");
1434

1435 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1436 1
        *op_itflags |= NPY_OP_ITFLAG_REDUCE;
1437
    }
1438
    return 1;
1439
}
1440

1441
/**
1442
 * Removes the (additive) NPY_ITER_REDUCTION_AXIS indication and sets
1443
 * is_forced_broadcast to 1 if it is set. Otherwise to 0.
1444
 *
1445
 * @param axis The op_axes[i] to normalize.
1446
 * @param reduction_axis Output 1 if a reduction axis, otherwise 0.
1447
 * @returns The normalized axis (without reduce axis flag).
1448
 */
1449
static NPY_INLINE int
1450
npyiter_get_op_axis(int axis, npy_bool *reduction_axis) {
1451 1
    npy_bool forced_broadcast = axis >= NPY_ITER_REDUCTION_AXIS(-1);
1452

1453
    if (reduction_axis != NULL) {
1454 1
        *reduction_axis = forced_broadcast;
1455
    }
1456 1
    if (forced_broadcast) {
1457 1
        return axis - NPY_ITER_REDUCTION_AXIS(0);
1458
    }
1459
    return axis;
1460
}
1461

1462
/*
1463
 * Fills in the AXISDATA for the 'nop' operands, broadcasting
1464
 * the dimensionas as necessary.  Also fills
1465
 * in the ITERSIZE data member.
1466
 *
1467
 * If op_axes is not NULL, it should point to an array of ndim-sized
1468
 * arrays, one for each op.
1469
 *
1470
 * Returns 1 on success, 0 on failure.
1471
 */
1472
static int
1473 1
npyiter_fill_axisdata(NpyIter *iter, npy_uint32 flags, npyiter_opitflags *op_itflags,
1474
                    char **op_dataptr,
1475
                    const npy_uint32 *op_flags, int **op_axes,
1476
                    npy_intp const *itershape)
1477
{
1478 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1479 1
    int idim, ndim = NIT_NDIM(iter);
1480 1
    int iop, nop = NIT_NOP(iter);
1481 1
    int maskop = NIT_MASKOP(iter);
1482

1483
    int ondim;
1484
    NpyIter_AxisData *axisdata;
1485
    npy_intp sizeof_axisdata;
1486 1
    PyArrayObject **op = NIT_OPERANDS(iter), *op_cur;
1487
    npy_intp broadcast_shape[NPY_MAXDIMS];
1488

1489
    /* First broadcast the shapes together */
1490 1
    if (itershape == NULL) {
1491 1
        for (idim = 0; idim < ndim; ++idim) {
1492 1
            broadcast_shape[idim] = 1;
1493
        }
1494
    }
1495
    else {
1496 1
        for (idim = 0; idim < ndim; ++idim) {
1497 1
            broadcast_shape[idim] = itershape[idim];
1498
            /* Negative shape entries are deduced from the operands */
1499 1
            if (broadcast_shape[idim] < 0) {
1500 1
                broadcast_shape[idim] = 1;
1501
            }
1502
        }
1503
    }
1504 1
    for (iop = 0; iop < nop; ++iop) {
1505 1
        op_cur = op[iop];
1506 1
        if (op_cur != NULL) {
1507 1
            npy_intp *shape = PyArray_DIMS(op_cur);
1508 1
            ondim = PyArray_NDIM(op_cur);
1509

1510 1
            if (op_axes == NULL || op_axes[iop] == NULL) {
1511
                /*
1512
                 * Possible if op_axes are being used, but
1513
                 * op_axes[iop] is NULL
1514
                 */
1515 1
                if (ondim > ndim) {
1516 1
                    PyErr_SetString(PyExc_ValueError,
1517
                            "input operand has more dimensions than allowed "
1518
                            "by the axis remapping");
1519 1
                    return 0;
1520
                }
1521 1
                for (idim = 0; idim < ondim; ++idim) {
1522 1
                    npy_intp bshape = broadcast_shape[idim+ndim-ondim];
1523 1
                    npy_intp op_shape = shape[idim];
1524

1525 1
                    if (bshape == 1) {
1526 1
                        broadcast_shape[idim+ndim-ondim] = op_shape;
1527
                    }
1528 1
                    else if (bshape != op_shape && op_shape != 1) {
1529
                        goto broadcast_error;
1530
                    }
1531
                }
1532
            }
1533
            else {
1534
                int *axes = op_axes[iop];
1535 1
                for (idim = 0; idim < ndim; ++idim) {
1536 1
                    int i = npyiter_get_op_axis(axes[idim], NULL);
1537

1538 1
                    if (i >= 0) {
1539 1
                        if (i < ondim) {
1540 1
                            npy_intp bshape = broadcast_shape[idim];
1541 1
                            npy_intp op_shape = shape[i];
1542

1543 1
                            if (bshape == 1) {
1544 1
                                broadcast_shape[idim] = op_shape;
1545
                            }
1546 1
                            else if (bshape != op_shape && op_shape != 1) {
1547
                                goto broadcast_error;
1548
                            }
1549
                        }
1550
                        else {
1551 1
                            PyErr_Format(PyExc_ValueError,
1552
                                    "Iterator input op_axes[%d][%d] (==%d) "
1553
                                    "is not a valid axis of op[%d], which "
1554
                                    "has %d dimensions ",
1555 1
                                    iop, (ndim-idim-1), i,
1556
                                    iop, ondim);
1557 1
                            return 0;
1558
                        }
1559
                    }
1560
                }
1561
            }
1562
        }
1563
    }
1564
    /*
1565
     * If a shape was provided with a 1 entry, make sure that entry didn't
1566
     * get expanded by broadcasting.
1567
     */
1568 1
    if (itershape != NULL) {
1569 1
        for (idim = 0; idim < ndim; ++idim) {
1570 1
            if (itershape[idim] == 1 && broadcast_shape[idim] != 1) {
1571
                goto broadcast_error;
1572
            }
1573
        }
1574
    }
1575

1576 1
    axisdata = NIT_AXISDATA(iter);
1577 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1578

1579 1
    if (ndim == 0) {
1580
        /* Need to fill the first axisdata, even if the iterator is 0-d */
1581 1
        NAD_SHAPE(axisdata) = 1;
1582 1
        NAD_INDEX(axisdata) = 0;
1583 1
        memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1584 1
        memset(NAD_STRIDES(axisdata), 0, NPY_SIZEOF_INTP*nop);
1585
    }
1586

1587
    /* Now process the operands, filling in the axisdata */
1588 1
    for (idim = 0; idim < ndim; ++idim) {
1589 1
        npy_intp bshape = broadcast_shape[ndim-idim-1];
1590 1
        npy_intp *strides = NAD_STRIDES(axisdata);
1591

1592 1
        NAD_SHAPE(axisdata) = bshape;
1593 1
        NAD_INDEX(axisdata) = 0;
1594 1
        memcpy(NAD_PTRS(axisdata), op_dataptr, NPY_SIZEOF_INTP*nop);
1595

1596 1
        for (iop = 0; iop < nop; ++iop) {
1597 1
            op_cur = op[iop];
1598

1599 1
            if (op_axes == NULL || op_axes[iop] == NULL) {
1600 1
                if (op_cur == NULL) {
1601 1
                    strides[iop] = 0;
1602
                }
1603
                else {
1604 1
                    ondim = PyArray_NDIM(op_cur);
1605 1
                    if (bshape == 1) {
1606 1
                        strides[iop] = 0;
1607 1
                        if (idim >= ondim &&
1608 1
                                    (op_flags[iop] & NPY_ITER_NO_BROADCAST)) {
1609
                            goto operand_different_than_broadcast;
1610
                        }
1611
                    }
1612 1
                    else if (idim >= ondim ||
1613 1
                                    PyArray_DIM(op_cur, ondim-idim-1) == 1) {
1614 1
                        strides[iop] = 0;
1615 1
                        if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1616
                            goto operand_different_than_broadcast;
1617
                        }
1618
                        /* If it's writeable, this means a reduction */
1619 1
                        if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
1620 1
                            if (!(flags & NPY_ITER_REDUCE_OK)) {
1621 1
                                PyErr_SetString(PyExc_ValueError,
1622
                                        "output operand requires a "
1623
                                        "reduction, but reduction is "
1624
                                        "not enabled");
1625 1
                                return 0;
1626
                            }
1627 1
                            if (!(op_itflags[iop] & NPY_OP_ITFLAG_READ)) {
1628 0
                                PyErr_SetString(PyExc_ValueError,
1629
                                        "output operand requires a "
1630
                                        "reduction, but is flagged as "
1631
                                        "write-only, not read-write");
1632 0
                                return 0;
1633
                            }
1634
                            /*
1635
                             * The ARRAYMASK can't be a reduction, because
1636
                             * it would be possible to write back to the
1637
                             * array once when the ARRAYMASK says 'True',
1638
                             * then have the reduction on the ARRAYMASK
1639
                             * later flip to 'False', indicating that the
1640
                             * write back should never have been done,
1641
                             * and violating the strict masking semantics
1642
                             */
1643 1
                            if (iop == maskop) {
1644 1
                                PyErr_SetString(PyExc_ValueError,
1645
                                        "output operand requires a "
1646
                                        "reduction, but is flagged as "
1647
                                        "the ARRAYMASK operand which "
1648
                                        "is not permitted to be the "
1649
                                        "result of a reduction");
1650 1
                                return 0;
1651
                            }
1652

1653 1
                            NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1654 1
                            op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1655
                        }
1656
                    }
1657
                    else {
1658 1
                        strides[iop] = PyArray_STRIDE(op_cur, ondim-idim-1);
1659
                    }
1660
                }
1661
            }
1662
            else {
1663 1
                int *axes = op_axes[iop];
1664
                npy_bool reduction_axis;
1665
                int i;
1666 1
                i = npyiter_get_op_axis(axes[ndim - idim - 1], &reduction_axis);
1667

1668 1
                if (reduction_axis) {
1669
                    /* This is explicitly a reduction axis */
1670 1
                    strides[iop] = 0;
1671 1
                    NIT_ITFLAGS(iter) |= NPY_ITFLAG_REDUCE;
1672 1
                    op_itflags[iop] |= NPY_OP_ITFLAG_REDUCE;
1673

1674 1
                    if (NPY_UNLIKELY((i >= 0) && (op_cur != NULL) &&
1675
                            (PyArray_DIM(op_cur, i) != 1))) {
1676 1
                        PyErr_Format(PyExc_ValueError,
1677
                                "operand was set up as a reduction along axis "
1678
                                "%d, but the length of the axis is %zd "
1679
                                "(it has to be 1)",
1680 1
                                i, (Py_ssize_t)PyArray_DIM(op_cur, i));
1681 1
                        return 0;
1682
                    }
1683
                }
1684 1
                else if (bshape == 1) {
1685
                    /*
1686
                     * If the full iterator shape is 1, zero always works.
1687
                     * NOTE: We thus always allow broadcast dimensions (i = -1)
1688
                     *       if the shape is 1.
1689
                     */
1690 1
                    strides[iop] = 0;
1691
                }
1692 1
                else if (i >= 0) {
1693 1
                    if (op_cur == NULL) {
1694
                        /* stride is filled later, shape will match `bshape` */
1695 1
                        strides[iop] = 0;
1696
                    }
1697 1
                    else if (PyArray_DIM(op_cur, i) == 1) {
1698 1
                        strides[iop] = 0;
1699 1
                        if (op_flags[iop] & NPY_ITER_NO_BROADCAST) {
1700
                            goto operand_different_than_broadcast;
1701
                        }
1702 1
                        if (!npyiter_check_reduce_ok_and_set_flags(
1703 1
                                iter, flags, &op_itflags[iop], i)) {
1704
                            return 0;
1705
                        }
1706
                    }
1707
                    else {
1708 1
                        strides[iop] = PyArray_STRIDE(op_cur, i);
1709
                    }
1710
                }
1711
                else {
1712 1
                    strides[iop] = 0;
1713 1
                    if (!npyiter_check_reduce_ok_and_set_flags(
1714 1
                            iter, flags, &op_itflags[iop], i)) {
1715
                        return 0;
1716
                    }
1717
                }
1718
            }
1719
        }
1720

1721 1
        NIT_ADVANCE_AXISDATA(axisdata, 1);
1722
    }
1723

1724
    /* Now fill in the ITERSIZE member */
1725 1
    NIT_ITERSIZE(iter) = 1;
1726 1
    for (idim = 0; idim < ndim; ++idim) {
1727 1
        if (npy_mul_with_overflow_intp(&NIT_ITERSIZE(iter),
1728
                    NIT_ITERSIZE(iter), broadcast_shape[idim])) {
1729 1
            if ((itflags & NPY_ITFLAG_HASMULTIINDEX) &&
1730 1
                    !(itflags & NPY_ITFLAG_HASINDEX) &&
1731
                    !(itflags & NPY_ITFLAG_BUFFER)) {
1732
                /*
1733
                 * If RemoveAxis may be called, the size check is delayed
1734
                 * until either the multi index is removed, or GetIterNext
1735
                 * is called.
1736
                 */
1737 1
                NIT_ITERSIZE(iter) = -1;
1738 1
                break;
1739
            }
1740
            else {
1741 1
                PyErr_SetString(PyExc_ValueError, "iterator is too large");
1742 1
                return 0;
1743
            }
1744
        }
1745
    }
1746
    /* The range defaults to everything */
1747 1
    NIT_ITERSTART(iter) = 0;
1748 1
    NIT_ITEREND(iter) = NIT_ITERSIZE(iter);
1749

1750 1
    return 1;
1751

1752 1
broadcast_error: {
1753
        PyObject *errmsg, *tmp;
1754
        npy_intp remdims[NPY_MAXDIMS];
1755
        char *tmpstr;
1756

1757 1
        if (op_axes == NULL) {
1758 1
            errmsg = PyUnicode_FromString("operands could not be broadcast "
1759
                                          "together with shapes ");
1760 1
            if (errmsg == NULL) {
1761
                return 0;
1762
            }
1763 1
            for (iop = 0; iop < nop; ++iop) {
1764 1
                if (op[iop] != NULL) {
1765 1
                    tmp = convert_shape_to_string(PyArray_NDIM(op[iop]),
1766 1
                                                    PyArray_DIMS(op[iop]),
1767
                                                    " ");
1768 1
                    if (tmp == NULL) {
1769 0
                        Py_DECREF(errmsg);
1770
                        return 0;
1771
                    }
1772 1
                    PyUString_ConcatAndDel(&errmsg, tmp);
1773 1
                    if (errmsg == NULL) {
1774
                        return 0;
1775
                    }
1776
                }
1777
            }
1778 1
            if (itershape != NULL) {
1779 0
                tmp = PyUnicode_FromString("and requested shape ");
1780 0
                if (tmp == NULL) {
1781 0
                    Py_DECREF(errmsg);
1782
                    return 0;
1783
                }
1784 0
                PyUString_ConcatAndDel(&errmsg, tmp);
1785 0
                if (errmsg == NULL) {
1786
                    return 0;
1787
                }
1788

1789 0
                tmp = convert_shape_to_string(ndim, itershape, "");
1790 0
                if (tmp == NULL) {
1791 0
                    Py_DECREF(errmsg);
1792
                    return 0;
1793
                }
1794 0
                PyUString_ConcatAndDel(&errmsg, tmp);
1795 0
                if (errmsg == NULL) {
1796
                    return 0;
1797
                }
1798

1799
            }
1800 1
            PyErr_SetObject(PyExc_ValueError, errmsg);
1801 1
            Py_DECREF(errmsg);
1802
        }
1803
        else {
1804 1
            errmsg = PyUnicode_FromString("operands could not be broadcast "
1805
                                          "together with remapped shapes "
1806
                                          "[original->remapped]: ");
1807 1
            for (iop = 0; iop < nop; ++iop) {
1808 1
                if (op[iop] != NULL) {
1809 1
                    int *axes = op_axes[iop];
1810

1811 1
                    tmpstr = (axes == NULL) ? " " : "->";
1812 1
                    tmp = convert_shape_to_string(PyArray_NDIM(op[iop]),
1813 1
                                                    PyArray_DIMS(op[iop]),
1814
                                                    tmpstr);
1815 1
                    if (tmp == NULL) {
1816
                        return 0;
1817
                    }
1818 1
                    PyUString_ConcatAndDel(&errmsg, tmp);
1819 1
                    if (errmsg == NULL) {
1820
                        return 0;
1821
                    }
1822

1823 1
                    if (axes != NULL) {
1824 1
                        for (idim = 0; idim < ndim; ++idim) {
1825 1
                            int i = npyiter_get_op_axis(axes[idim], NULL);
1826

1827 1
                            if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1828 1
                                remdims[idim] = PyArray_DIM(op[iop], i);
1829
                            }
1830
                            else {
1831 1
                                remdims[idim] = -1;
1832
                            }
1833
                        }
1834 1
                        tmp = convert_shape_to_string(ndim, remdims, " ");
1835 1
                        if (tmp == NULL) {
1836
                            return 0;
1837
                        }
1838 1
                        PyUString_ConcatAndDel(&errmsg, tmp);
1839 1
                        if (errmsg == NULL) {
1840
                            return 0;
1841
                        }
1842
                    }
1843
                }
1844
            }
1845 1
            if (itershape != NULL) {
1846 1
                tmp = PyUnicode_FromString("and requested shape ");
1847 1
                if (tmp == NULL) {
1848 0
                    Py_DECREF(errmsg);
1849
                    return 0;
1850
                }
1851 1
                PyUString_ConcatAndDel(&errmsg, tmp);
1852 1
                if (errmsg == NULL) {
1853
                    return 0;
1854
                }
1855

1856 1
                tmp = convert_shape_to_string(ndim, itershape, "");
1857 1
                if (tmp == NULL) {
1858 0
                    Py_DECREF(errmsg);
1859
                    return 0;
1860
                }
1861 1
                PyUString_ConcatAndDel(&errmsg, tmp);
1862 1
                if (errmsg == NULL) {
1863
                    return 0;
1864
                }
1865

1866
            }
1867 1
            PyErr_SetObject(PyExc_ValueError, errmsg);
1868 1
            Py_DECREF(errmsg);
1869
        }
1870

1871
        return 0;
1872
    }
1873

1874 1
operand_different_than_broadcast: {
1875
        npy_intp remdims[NPY_MAXDIMS];
1876
        PyObject *errmsg, *tmp;
1877

1878
        /* Start of error message */
1879 1
        if (op_flags[iop] & NPY_ITER_READONLY) {
1880 1
            errmsg = PyUnicode_FromString("non-broadcastable operand "
1881
                                          "with shape ");
1882
        }
1883
        else {
1884 1
            errmsg = PyUnicode_FromString("non-broadcastable output "
1885
                                          "operand with shape ");
1886
        }
1887 1
        if (errmsg == NULL) {
1888
            return 0;
1889
        }
1890

1891
        /* Operand shape */
1892 1
        tmp = convert_shape_to_string(PyArray_NDIM(op[iop]),
1893 1
                                        PyArray_DIMS(op[iop]), "");
1894 1
        if (tmp == NULL) {
1895
            return 0;
1896
        }
1897 1
        PyUString_ConcatAndDel(&errmsg, tmp);
1898 1
        if (errmsg == NULL) {
1899
            return 0;
1900
        }
1901
        /* Remapped operand shape */
1902 1
        if (op_axes != NULL && op_axes[iop] != NULL) {
1903
            int *axes = op_axes[iop];
1904

1905 0
            for (idim = 0; idim < ndim; ++idim) {
1906 0
                npy_intp i = axes[ndim-idim-1];
1907

1908 0
                if (i >= 0 && i < PyArray_NDIM(op[iop])) {
1909 0
                    remdims[idim] = PyArray_DIM(op[iop], i);
1910
                }
1911
                else {
1912 0
                    remdims[idim] = -1;
1913
                }
1914
            }
1915

1916 0
            tmp = PyUnicode_FromString(" [remapped to ");
1917 0
            if (tmp == NULL) {
1918
                return 0;
1919
            }
1920 0
            PyUString_ConcatAndDel(&errmsg, tmp);
1921 0
            if (errmsg == NULL) {
1922
                return 0;
1923
            }
1924

1925 0
            tmp = convert_shape_to_string(ndim, remdims, "]");
1926 0
            if (tmp == NULL) {
1927
                return 0;
1928
            }
1929 0
            PyUString_ConcatAndDel(&errmsg, tmp);
1930 0
            if (errmsg == NULL) {
1931
                return 0;
1932
            }
1933
        }
1934

1935 1
        tmp = PyUnicode_FromString(" doesn't match the broadcast shape ");
1936 1
        if (tmp == NULL) {
1937
            return 0;
1938
        }
1939 1
        PyUString_ConcatAndDel(&errmsg, tmp);
1940 1
        if (errmsg == NULL) {
1941
            return 0;
1942
        }
1943

1944
        /* Broadcast shape */
1945 1
        tmp = convert_shape_to_string(ndim, broadcast_shape, "");
1946 1
        if (tmp == NULL) {
1947
            return 0;
1948
        }
1949 1
        PyUString_ConcatAndDel(&errmsg, tmp);
1950 1
        if (errmsg == NULL) {
1951
            return 0;
1952
        }
1953

1954 1
        PyErr_SetObject(PyExc_ValueError, errmsg);
1955 1
        Py_DECREF(errmsg);
1956

1957
        return 0;
1958
    }
1959
}
1960

1961
/*
1962
 * Replaces the AXISDATA for the iop'th operand, broadcasting
1963
 * the dimensions as necessary.  Assumes the replacement array is
1964
 * exactly the same shape as the original array used when
1965
 * npy_fill_axisdata was called.
1966
 *
1967
 * If op_axes is not NULL, it should point to an ndim-sized
1968
 * array.
1969
 */
1970
static void
1971 1
npyiter_replace_axisdata(
1972
        NpyIter *iter, int iop, PyArrayObject *op,
1973
        int orig_op_ndim, const int *op_axes)
1974
{
1975 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
1976 1
    int idim, ndim = NIT_NDIM(iter);
1977 1
    int nop = NIT_NOP(iter);
1978 1
    char *op_dataptr = PyArray_DATA(op);
1979

1980
    NpyIter_AxisData *axisdata0, *axisdata;
1981
    npy_intp sizeof_axisdata;
1982
    npy_int8 *perm;
1983 1
    npy_intp baseoffset = 0;
1984

1985 1
    perm = NIT_PERM(iter);
1986 1
    axisdata0 = NIT_AXISDATA(iter);
1987 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
1988

1989
    /*
1990
     * Replace just the strides which were non-zero, and compute
1991
     * the base data address.
1992
     */
1993 1
    axisdata = axisdata0;
1994

1995 1
    if (op_axes != NULL) {
1996 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
1997
            int i;
1998
            npy_bool axis_flipped;
1999
            npy_intp shape;
2000

2001
            /* Apply perm to get the original axis, and check if its flipped */
2002 1
            i = npyiter_undo_iter_axis_perm(idim, ndim, perm, &axis_flipped);
2003

2004 1
            i = npyiter_get_op_axis(op_axes[i], NULL);
2005
            assert(i < orig_op_ndim);
2006 1
            if (i >= 0) {
2007 1
                shape = PyArray_DIM(op, i);
2008 1
                if (shape != 1) {
2009 1
                    npy_intp stride = PyArray_STRIDE(op, i);
2010 1
                    if (axis_flipped) {
2011 1
                        NAD_STRIDES(axisdata)[iop] = -stride;
2012 1
                        baseoffset += stride*(shape-1);
2013
                    }
2014
                    else {
2015 1
                        NAD_STRIDES(axisdata)[iop] = stride;
2016
                    }
2017
                }
2018
            }
2019
        }
2020
    }
2021
    else {
2022 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2023
            int i;
2024
            npy_bool axis_flipped;
2025
            npy_intp shape;
2026

2027 1
            i = npyiter_undo_iter_axis_perm(
2028
                    idim, orig_op_ndim, perm, &axis_flipped);
2029

2030 1
            if (i >= 0) {
2031 1
                shape = PyArray_DIM(op, i);
2032 1
                if (shape != 1) {
2033 1
                    npy_intp stride = PyArray_STRIDE(op, i);
2034 1
                    if (axis_flipped) {
2035 1
                        NAD_STRIDES(axisdata)[iop] = -stride;
2036 1
                        baseoffset += stride*(shape-1);
2037
                    }
2038
                    else {
2039 1
                        NAD_STRIDES(axisdata)[iop] = stride;
2040
                    }
2041
                }
2042
            }
2043
        }
2044
    }
2045

2046 1
    op_dataptr += baseoffset;
2047

2048
    /* Now the base data pointer is calculated, set it everywhere it's needed */
2049 1
    NIT_RESETDATAPTR(iter)[iop] = op_dataptr;
2050 1
    NIT_BASEOFFSETS(iter)[iop] = baseoffset;
2051 1
    axisdata = axisdata0;
2052
    /* Fill at least one axisdata, for the 0-d case */
2053 1
    NAD_PTRS(axisdata)[iop] = op_dataptr;
2054 1
    NIT_ADVANCE_AXISDATA(axisdata, 1);
2055 1
    for (idim = 1; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2056 1
        NAD_PTRS(axisdata)[iop] = op_dataptr;
2057
    }
2058
}
2059

2060
/*
2061
 * Computes the iterator's index strides and initializes the index values
2062
 * to zero.
2063
 *
2064
 * This must be called before the axes (i.e. the AXISDATA array) may
2065
 * be reordered.
2066
 */
2067
static void
2068 1
npyiter_compute_index_strides(NpyIter *iter, npy_uint32 flags)
2069
{
2070 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2071 1
    int idim, ndim = NIT_NDIM(iter);
2072 1
    int nop = NIT_NOP(iter);
2073

2074
    npy_intp indexstride;
2075
    NpyIter_AxisData *axisdata;
2076
    npy_intp sizeof_axisdata;
2077

2078
    /*
2079
     * If there is only one element being iterated, we just have
2080
     * to touch the first AXISDATA because nothing will ever be
2081
     * incremented. This also initializes the data for the 0-d case.
2082
     */
2083 1
    if (NIT_ITERSIZE(iter) == 1) {
2084 1
        if (itflags & NPY_ITFLAG_HASINDEX) {
2085 0
            axisdata = NIT_AXISDATA(iter);
2086 0
            NAD_PTRS(axisdata)[nop] = 0;
2087
        }
2088
        return;
2089
    }
2090

2091 1
    if (flags & NPY_ITER_C_INDEX) {
2092 1
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2093 1
        axisdata = NIT_AXISDATA(iter);
2094 1
        indexstride = 1;
2095 1
        for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2096 1
            npy_intp shape = NAD_SHAPE(axisdata);
2097

2098 1
            if (shape == 1) {
2099 0
                NAD_STRIDES(axisdata)[nop] = 0;
2100
            }
2101
            else {
2102 1
                NAD_STRIDES(axisdata)[nop] = indexstride;
2103
            }
2104 1
            NAD_PTRS(axisdata)[nop] = 0;
2105 1
            indexstride *= shape;
2106
        }
2107
    }
2108 1
    else if (flags & NPY_ITER_F_INDEX) {
2109 1
        sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2110 1
        axisdata = NIT_INDEX_AXISDATA(NIT_AXISDATA(iter), ndim-1);
2111 1
        indexstride = 1;
2112 1
        for(idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, -1)) {
2113 1
            npy_intp shape = NAD_SHAPE(axisdata);
2114

2115 1
            if (shape == 1) {
2116 0
                NAD_STRIDES(axisdata)[nop] = 0;
2117
            }
2118
            else {
2119 1
                NAD_STRIDES(axisdata)[nop] = indexstride;
2120
            }
2121 1
            NAD_PTRS(axisdata)[nop] = 0;
2122 1
            indexstride *= shape;
2123
        }
2124
    }
2125
}
2126

2127
/*
2128
 * If the order is NPY_KEEPORDER, lets the iterator find the best
2129
 * iteration order, otherwise forces it.  Indicates in the itflags that
2130
 * whether the iteration order was forced.
2131
 */
2132
static void
2133 1
npyiter_apply_forced_iteration_order(NpyIter *iter, NPY_ORDER order)
2134
{
2135
    /*npy_uint32 itflags = NIT_ITFLAGS(iter);*/
2136 1
    int ndim = NIT_NDIM(iter);
2137 1
    int iop, nop = NIT_NOP(iter);
2138

2139 1
    switch (order) {
2140 1
    case NPY_CORDER:
2141 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2142 1
        break;
2143 1
    case NPY_FORTRANORDER:
2144 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2145
        /* Only need to actually do something if there is more than 1 dim */
2146 1
        if (ndim > 1) {
2147 1
            npyiter_reverse_axis_ordering(iter);
2148
        }
2149
        break;
2150 1
    case NPY_ANYORDER:
2151 1
        NIT_ITFLAGS(iter) |= NPY_ITFLAG_FORCEDORDER;
2152
        /* Only need to actually do something if there is more than 1 dim */
2153 1
        if (ndim > 1) {
2154 1
            PyArrayObject **op = NIT_OPERANDS(iter);
2155 1
            int forder = 1;
2156

2157
            /* Check that all the array inputs are fortran order */
2158 1
            for (iop = 0; iop < nop; ++iop, ++op) {
2159 1
                if (*op && !PyArray_CHKFLAGS(*op, NPY_ARRAY_F_CONTIGUOUS)) {
2160
                    forder = 0;
2161
                    break;
2162
                }
2163
            }
2164

2165 1
            if (forder) {
2166 1
                npyiter_reverse_axis_ordering(iter);
2167
            }
2168
        }
2169
        break;
2170
    case NPY_KEEPORDER:
2171
        /* Don't set the forced order flag here... */
2172
        break;
2173
    }
2174
}
2175

2176
/*
2177
 * This function negates any strides in the iterator
2178
 * which are negative.  When iterating more than one
2179
 * object, it only flips strides when they are all
2180
 * negative or zero.
2181
 */
2182
static void
2183 1
npyiter_flip_negative_strides(NpyIter *iter)
2184
{
2185 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2186 1
    int idim, ndim = NIT_NDIM(iter);
2187 1
    int iop, nop = NIT_NOP(iter);
2188

2189 1
    npy_intp istrides, nstrides = NAD_NSTRIDES();
2190
    NpyIter_AxisData *axisdata, *axisdata0;
2191
    npy_intp *baseoffsets;
2192 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2193 1
    int any_flipped = 0;
2194

2195 1
    axisdata0 = axisdata = NIT_AXISDATA(iter);
2196 1
    baseoffsets = NIT_BASEOFFSETS(iter);
2197 1
    for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2198 1
        npy_intp *strides = NAD_STRIDES(axisdata);
2199 1
        int any_negative = 0;
2200

2201
        /*
2202
         * Check the signs of all the operand strides.
2203
         */
2204 1
        for (iop = 0; iop < nop; ++iop) {
2205 1
            if (strides[iop] < 0) {
2206
                any_negative = 1;
2207
            }
2208 1
            else if (strides[iop] != 0) {
2209
                break;
2210
            }
2211
        }
2212
        /*
2213
         * If at least one stride is negative and none are positive,
2214
         * flip all the strides for this dimension.
2215
         */
2216 1
        if (any_negative && iop == nop) {
2217 1
            npy_intp shapem1 = NAD_SHAPE(axisdata) - 1;
2218

2219 1
            for (istrides = 0; istrides < nstrides; ++istrides) {
2220 1
                npy_intp stride = strides[istrides];
2221

2222
                /* Adjust the base pointers to start at the end */
2223 1
                baseoffsets[istrides] += shapem1 * stride;
2224
                /* Flip the stride */
2225 1
                strides[istrides] = -stride;
2226
            }
2227
            /*
2228
             * Make the perm entry negative so get_multi_index
2229
             * knows it's flipped
2230
             */
2231 1
            NIT_PERM(iter)[idim] = -1-NIT_PERM(iter)[idim];
2232

2233 1
            any_flipped = 1;
2234
        }
2235
    }
2236

2237
    /*
2238
     * If any strides were flipped, the base pointers were adjusted
2239
     * in the first AXISDATA, and need to be copied to all the rest
2240
     */
2241 1
    if (any_flipped) {
2242 1
        char **resetdataptr = NIT_RESETDATAPTR(iter);
2243

2244 1
        for (istrides = 0; istrides < nstrides; ++istrides) {
2245 1
            resetdataptr[istrides] += baseoffsets[istrides];
2246
        }
2247
        axisdata = axisdata0;
2248 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2249 1
            char **ptrs = NAD_PTRS(axisdata);
2250 1
            for (istrides = 0; istrides < nstrides; ++istrides) {
2251 1
                ptrs[istrides] = resetdataptr[istrides];
2252
            }
2253
        }
2254
        /*
2255
         * Indicate that some of the perm entries are negative,
2256
         * and that it's not (strictly speaking) the identity perm.
2257
         */
2258 1
        NIT_ITFLAGS(iter) = (NIT_ITFLAGS(iter)|NPY_ITFLAG_NEGPERM) &
2259
                            ~NPY_ITFLAG_IDENTPERM;
2260
    }
2261
}
2262

2263
static void
2264 1
npyiter_reverse_axis_ordering(NpyIter *iter)
2265
{
2266 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2267 1
    int ndim = NIT_NDIM(iter);
2268 1
    int nop = NIT_NOP(iter);
2269

2270
    npy_intp i, temp, size;
2271
    npy_intp *first, *last;
2272
    npy_int8 *perm;
2273

2274 1
    size = NIT_AXISDATA_SIZEOF(itflags, ndim, nop)/NPY_SIZEOF_INTP;
2275 1
    first = (npy_intp*)NIT_AXISDATA(iter);
2276 1
    last = first + (ndim-1)*size;
2277

2278
    /* This loop reverses the order of the AXISDATA array */
2279 1
    while (first < last) {
2280 1
        for (i = 0; i < size; ++i) {
2281 1
            temp = first[i];
2282 1
            first[i] = last[i];
2283 1
            last[i] = temp;
2284
        }
2285 1
        first += size;
2286 1
        last -= size;
2287
    }
2288

2289
    /* Store the perm we applied */
2290
    perm = NIT_PERM(iter);
2291 1
    for(i = ndim-1; i >= 0; --i, ++perm) {
2292 1
        *perm = (npy_int8)i;
2293
    }
2294

2295 1
    NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2296
}
2297

2298
static NPY_INLINE npy_intp
2299
intp_abs(npy_intp x)
2300
{
2301 1
    return (x < 0) ? -x : x;
2302
}
2303

2304
static void
2305 1
npyiter_find_best_axis_ordering(NpyIter *iter)
2306
{
2307 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2308 1
    int idim, ndim = NIT_NDIM(iter);
2309 1
    int iop, nop = NIT_NOP(iter);
2310

2311
    npy_intp ax_i0, ax_i1, ax_ipos;
2312
    npy_int8 ax_j0, ax_j1;
2313
    npy_int8 *perm;
2314 1
    NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
2315 1
    npy_intp sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2316 1
    int permuted = 0;
2317

2318 1
    perm = NIT_PERM(iter);
2319

2320
    /*
2321
     * Do a custom stable insertion sort.  Note that because
2322
     * the AXISDATA has been reversed from C order, this
2323
     * is sorting from smallest stride to biggest stride.
2324
     */
2325 1
    for (ax_i0 = 1; ax_i0 < ndim; ++ax_i0) {
2326
        npy_intp *strides0;
2327

2328
        /* 'ax_ipos' is where perm[ax_i0] will get inserted */
2329 1
        ax_ipos = ax_i0;
2330 1
        ax_j0 = perm[ax_i0];
2331

2332 1
        strides0 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j0));
2333 1
        for (ax_i1 = ax_i0-1; ax_i1 >= 0; --ax_i1) {
2334 1
            int ambig = 1, shouldswap = 0;
2335
            npy_intp *strides1;
2336

2337 1
            ax_j1 = perm[ax_i1];
2338

2339 1
            strides1 = NAD_STRIDES(NIT_INDEX_AXISDATA(axisdata, ax_j1));
2340

2341 1
            for (iop = 0; iop < nop; ++iop) {
2342 1
                if (strides0[iop] != 0 && strides1[iop] != 0) {
2343 1
                    if (intp_abs(strides1[iop]) <=
2344 1
                                            intp_abs(strides0[iop])) {
2345
                        /*
2346
                         * Set swap even if it's not ambiguous already,
2347
                         * because in the case of conflicts between
2348
                         * different operands, C-order wins.
2349
                         */
2350
                        shouldswap = 0;
2351
                    }
2352
                    else {
2353
                        /* Only set swap if it's still ambiguous */
2354 1
                        if (ambig) {
2355 1
                            shouldswap = 1;
2356
                        }
2357
                    }
2358

2359
                    /*
2360
                     * A comparison has been done, so it's
2361
                     * no longer ambiguous
2362
                     */
2363
                    ambig = 0;
2364
                }
2365
            }
2366
            /*
2367
             * If the comparison was unambiguous, either shift
2368
             * 'ax_ipos' to 'ax_i1' or stop looking for an insertion
2369
             * point
2370
             */
2371 1
            if (!ambig) {
2372 1
                if (shouldswap) {
2373
                    ax_ipos = ax_i1;
2374
                }
2375
                else {
2376
                    break;
2377
                }
2378
            }
2379
        }
2380

2381
        /* Insert perm[ax_i0] into the right place */
2382 1
        if (ax_ipos != ax_i0) {
2383 1
            for (ax_i1 = ax_i0; ax_i1 > ax_ipos; --ax_i1) {
2384 1
                perm[ax_i1] = perm[ax_i1-1];
2385
            }
2386 1
            perm[ax_ipos] = ax_j0;
2387 1
            permuted = 1;
2388
        }
2389
    }
2390

2391
    /* Apply the computed permutation to the AXISDATA array */
2392 1
    if (permuted == 1) {
2393 1
        npy_intp i, size = sizeof_axisdata/NPY_SIZEOF_INTP;
2394
        NpyIter_AxisData *ad_i;
2395

2396
        /* Use the index as a flag, set each to 1 */
2397 1
        ad_i = axisdata;
2398 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(ad_i, 1)) {
2399 1
            NAD_INDEX(ad_i) = 1;
2400
        }
2401
        /* Apply the permutation by following the cycles */
2402 1
        for (idim = 0; idim < ndim; ++idim) {
2403 1
            ad_i = NIT_INDEX_AXISDATA(axisdata, idim);
2404

2405
            /* If this axis hasn't been touched yet, process it */
2406 1
            if (NAD_INDEX(ad_i) == 1) {
2407 1
                npy_int8 pidim = perm[idim];
2408
                npy_intp tmp;
2409
                NpyIter_AxisData *ad_p, *ad_q;
2410

2411 1
                if (pidim != idim) {
2412
                    /* Follow the cycle, copying the data */
2413 1
                    for (i = 0; i < size; ++i) {
2414 1
                        pidim = perm[idim];
2415 1
                        ad_q = ad_i;
2416 1
                        tmp = *((npy_intp*)ad_q + i);
2417 1
                        while (pidim != idim) {
2418 1
                            ad_p = NIT_INDEX_AXISDATA(axisdata, pidim);
2419 1
                            *((npy_intp*)ad_q + i) = *((npy_intp*)ad_p + i);
2420

2421 1
                            ad_q = ad_p;
2422 1
                            pidim = perm[(int)pidim];
2423
                        }
2424 1
                        *((npy_intp*)ad_q + i) = tmp;
2425
                    }
2426
                    /* Follow the cycle again, marking it as done */
2427 1
                    pidim = perm[idim];
2428

2429 1
                    while (pidim != idim) {
2430 1
                        NAD_INDEX(NIT_INDEX_AXISDATA(axisdata, pidim)) = 0;
2431 1
                        pidim = perm[(int)pidim];
2432
                    }
2433
                }
2434 1
                NAD_INDEX(ad_i) = 0;
2435
            }
2436
        }
2437
        /* Clear the identity perm flag */
2438 1
        NIT_ITFLAGS(iter) &= ~NPY_ITFLAG_IDENTPERM;
2439
    }
2440
}
2441

2442
/*
2443
 * Calculates a dtype that all the types can be promoted to, using the
2444
 * ufunc rules.  If only_inputs is 1, it leaves any operands that
2445
 * are not read from out of the calculation.
2446
 */
2447
static PyArray_Descr *
2448 1
npyiter_get_common_dtype(int nop, PyArrayObject **op,
2449
                        const npyiter_opitflags *op_itflags, PyArray_Descr **op_dtype,
2450
                        PyArray_Descr **op_request_dtypes,
2451
                        int only_inputs)
2452
{
2453
    int iop;
2454 1
    npy_intp narrs = 0, ndtypes = 0;
2455
    PyArrayObject *arrs[NPY_MAXARGS];
2456
    PyArray_Descr *dtypes[NPY_MAXARGS];
2457
    PyArray_Descr *ret;
2458

2459
    NPY_IT_DBG_PRINT("Iterator: Getting a common data type from operands\n");
2460

2461 1
    for (iop = 0; iop < nop; ++iop) {
2462 1
        if (op_dtype[iop] != NULL &&
2463 1
                    (!only_inputs || (op_itflags[iop] & NPY_OP_ITFLAG_READ))) {
2464
            /* If no dtype was requested and the op is a scalar, pass the op */
2465 1
            if ((op_request_dtypes == NULL ||
2466 1
                            op_request_dtypes[iop] == NULL) &&
2467 1
                                            PyArray_NDIM(op[iop]) == 0) {
2468 1
                arrs[narrs++] = op[iop];
2469
            }
2470
            /* Otherwise just pass in the dtype */
2471
            else {
2472 1
                dtypes[ndtypes++] = op_dtype[iop];
2473
            }
2474
        }
2475
    }
2476

2477 1
    if (narrs == 0) {
2478
        npy_intp i;
2479 1
        ret = dtypes[0];
2480 1
        for (i = 1; i < ndtypes; ++i) {
2481 1
            if (ret != dtypes[i])
2482
                break;
2483
        }
2484 1
        if (i == ndtypes) {
2485 1
            if (ndtypes == 1 || PyArray_ISNBO(ret->byteorder)) {
2486 1
                Py_INCREF(ret);
2487
            }
2488
            else {
2489 1
                ret = PyArray_DescrNewByteorder(ret, NPY_NATIVE);
2490
            }
2491
        }
2492
        else {
2493 1
            ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2494
        }
2495
    }
2496
    else {
2497 1
        ret = PyArray_ResultType(narrs, arrs, ndtypes, dtypes);
2498
    }
2499

2500 1
    return ret;
2501
}
2502

2503
/*
2504
 * Allocates a temporary array which can be used to replace op
2505
 * in the iteration.  Its dtype will be op_dtype.
2506
 *
2507
 * The result array has a memory ordering which matches the iterator,
2508
 * which may or may not match that of op.  The parameter 'shape' may be
2509
 * NULL, in which case it is filled in from the iterator's shape.
2510
 *
2511
 * This function must be called before any axes are coalesced.
2512
 */
2513
static PyArrayObject *
2514 1
npyiter_new_temp_array(NpyIter *iter, PyTypeObject *subtype,
2515
                npy_uint32 flags, npyiter_opitflags *op_itflags,
2516
                int op_ndim, npy_intp const *shape,
2517
                PyArray_Descr *op_dtype, const int *op_axes)
2518
{
2519 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2520 1
    int idim, ndim = NIT_NDIM(iter);
2521
    int used_op_ndim;
2522 1
    int nop = NIT_NOP(iter);
2523

2524 1
    npy_int8 *perm = NIT_PERM(iter);
2525
    npy_intp new_shape[NPY_MAXDIMS], strides[NPY_MAXDIMS];
2526 1
    npy_intp stride = op_dtype->elsize;
2527
    NpyIter_AxisData *axisdata;
2528
    npy_intp sizeof_axisdata;
2529
    int i;
2530

2531
    PyArrayObject *ret;
2532

2533
    /*
2534
     * There is an interaction with array-dtypes here, which
2535
     * generally works. Let's say you make an nditer with an
2536
     * output dtype of a 2-double array. All-scalar inputs
2537
     * will result in a 1-dimensional output with shape (2).
2538
     * Everything still works out in the nditer, because the
2539
     * new dimension is always added on the end, and it cares
2540
     * about what happens at the beginning.
2541
     */
2542

2543
    /* If it's a scalar, don't need to check the axes */
2544 1
    if (op_ndim == 0) {
2545 1
        Py_INCREF(op_dtype);
2546 1
        ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, 0,
2547
                               NULL, NULL, NULL, 0, NULL);
2548

2549 1
        return ret;
2550
    }
2551

2552 1
    axisdata = NIT_AXISDATA(iter);
2553 1
    sizeof_axisdata = NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
2554

2555
    /* Initialize the strides to invalid values */
2556 1
    for (i = 0; i < op_ndim; ++i) {
2557 1
        strides[i] = NPY_MAX_INTP;
2558
    }
2559

2560 1
    if (op_axes != NULL) {
2561
        used_op_ndim = 0;
2562 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2563
            npy_bool reduction_axis;
2564

2565
            /* Apply the perm to get the original axis */
2566 1
            i = npyiter_undo_iter_axis_perm(idim, ndim, perm, NULL);
2567 1
            i = npyiter_get_op_axis(op_axes[i], &reduction_axis);
2568

2569 1
            if (i >= 0) {
2570
                NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2571
                                    "for iterator dimension %d to %d\n", (int)i,
2572
                                    (int)idim, (int)stride);
2573 1
                used_op_ndim += 1;
2574 1
                strides[i] = stride;
2575 1
                if (shape == NULL) {
2576 1
                    if (reduction_axis) {
2577
                        /* reduction axes always have a length of 1 */
2578 1
                        new_shape[i] = 1;
2579
                    }
2580
                    else {
2581 1
                        new_shape[i] = NAD_SHAPE(axisdata);
2582
                    }
2583 1
                    stride *= new_shape[i];
2584 1
                    if (i >= ndim) {
2585 1
                        PyErr_Format(PyExc_ValueError,
2586
                                "automatically allocated output array "
2587
                                "specified with an inconsistent axis mapping; "
2588
                                "the axis mapping cannot include dimension %d "
2589
                                "which is too large for the iterator dimension "
2590
                                "of %d.", i, ndim);
2591 1
                        return NULL;
2592
                    }
2593
                }
2594
                else {
2595
                    assert(!reduction_axis || shape[i] == 1);
2596 1
                    stride *= shape[i];
2597
                }
2598
            }
2599
            else {
2600 1
                if (shape == NULL) {
2601
                    /*
2602
                     * If deleting this axis produces a reduction, but
2603
                     * reduction wasn't enabled, throw an error.
2604
                     * NOTE: We currently always allow new-axis if the iteration
2605
                     *       size is 1 (thus allowing broadcasting sometimes).
2606
                     */
2607 1
                    if (!reduction_axis && NAD_SHAPE(axisdata) != 1) {
2608 1
                        if (!npyiter_check_reduce_ok_and_set_flags(
2609
                                iter, flags, op_itflags, i)) {
2610
                            return NULL;
2611
                        }
2612
                    }
2613
                }
2614
            }
2615
        }
2616
    }
2617
    else {
2618
        used_op_ndim = ndim;
2619 1
        for (idim = 0; idim < ndim; ++idim, NIT_ADVANCE_AXISDATA(axisdata, 1)) {
2620
            /* Apply the perm to get the original axis */
2621 1
            i = npyiter_undo_iter_axis_perm(idim, op_ndim, perm, NULL);
2622

2623 1
            if (i >= 0) {
2624
                NPY_IT_DBG_PRINT3("Iterator: Setting allocated stride %d "
2625
                                    "for iterator dimension %d to %d\n", (int)i,
2626
                                    (int)idim, (int)stride);
2627 1
                strides[i] = stride;
2628 1
                if (shape == NULL) {
2629 1
                    new_shape[i] = NAD_SHAPE(axisdata);
2630 1
                    stride *= new_shape[i];
2631
                }
2632
                else {
2633 1
                    stride *= shape[i];
2634
                }
2635
            }
2636
        }
2637
    }
2638

2639 1
    if (shape == NULL) {
2640
        /* If shape was NULL, use the shape we calculated */
2641
        op_ndim = used_op_ndim;
2642
        shape = new_shape;
2643
        /*
2644
         * If there's a gap in the array's dimensions, it's an error.
2645
         * For instance, if op_axes [0, 2] is specified, there will a place
2646
         * in the strides array where the value is not set.
2647
         */
2648 1
        for (i = 0; i < op_ndim; i++) {
2649 1
            if (strides[i] == NPY_MAX_INTP) {
2650 1
                PyErr_Format(PyExc_ValueError,
2651
                        "automatically allocated output array "
2652
                        "specified with an inconsistent axis mapping; "
2653
                        "the axis mapping is missing an entry for "
2654
                        "dimension %d.", i);
2655 1
                return NULL;
2656
            }
2657
        }
2658
    }
2659 1
    else if (used_op_ndim < op_ndim) {
2660
        /*
2661
         * If custom axes were specified, some dimensions may not have
2662
         * been used. These are additional axes which are ignored in the
2663
         * iterator but need to be handled here.
2664
         */
2665
        npy_intp factor, itemsize, new_strides[NPY_MAXDIMS];
2666

2667
        /* Fill in the missing strides in C order */
2668 1
        factor = 1;
2669 1
        itemsize = op_dtype->elsize;
2670 1
        for (i = op_ndim-1; i >= 0; --i) {
2671 1
            if (strides[i] == NPY_MAX_INTP) {
2672 1
                new_strides[i] = factor * itemsize;
2673 1
                factor *= shape[i];
2674
            }
2675
        }
2676

2677
        /*
2678
         * Copy the missing strides, and multiply the existing strides
2679
         * by the calculated factor.  This way, the missing strides
2680
         * are tighter together in memory, which is good for nested
2681
         * loops.
2682
         */
2683 1
        for (i = 0; i < op_ndim; ++i) {
2684 1
            if (strides[i] == NPY_MAX_INTP) {
2685 1
                strides[i] = new_strides[i];
2686
            }
2687
            else {
2688 1
                strides[i] *= factor;
2689
            }
2690
        }
2691
    }
2692

2693
    /* Allocate the temporary array */
2694 1
    Py_INCREF(op_dtype);
2695 1
    ret = (PyArrayObject *)PyArray_NewFromDescr(subtype, op_dtype, op_ndim,
2696
                               shape, strides, NULL, 0, NULL);
2697 1
    if (ret == NULL) {
2698
        return NULL;
2699
    }
2700

2701
    /* Double-check that the subtype didn't mess with the dimensions */
2702 1
    if (subtype != &PyArray_Type) {
2703
        /*
2704
         * TODO: the dtype could have a subarray, which adds new dimensions
2705
         *       to `ret`, that should typically be fine, but will break
2706
         *       in this branch.
2707
         */
2708 1
        if (PyArray_NDIM(ret) != op_ndim ||
2709 1
                    !PyArray_CompareLists(shape, PyArray_DIMS(ret), op_ndim)) {
2710 1
            PyErr_SetString(PyExc_RuntimeError,
2711
                    "Iterator automatic output has an array subtype "
2712
                    "which changed the dimensions of the output");
2713 1
            Py_DECREF(ret);
2714
            return NULL;
2715
        }
2716
    }
2717

2718
    return ret;
2719
}
2720

2721
static int
2722 1
npyiter_allocate_arrays(NpyIter *iter,
2723
                        npy_uint32 flags,
2724
                        PyArray_Descr **op_dtype, PyTypeObject *subtype,
2725
                        const npy_uint32 *op_flags, npyiter_opitflags *op_itflags,
2726
                        int **op_axes)
2727
{
2728 1
    npy_uint32 itflags = NIT_ITFLAGS(iter);
2729 1
    int idim, ndim = NIT_NDIM(iter);
2730 1
    int iop, nop = NIT_NOP(iter);
2731

2732 1
    int check_writemasked_reductions = 0;
2733

2734 1
    NpyIter_BufferData *bufferdata = NULL;
2735 1
    PyArrayObject **op = NIT_OPERANDS(iter);
2736

2737 1
    if (itflags & NPY_ITFLAG_BUFFER) {
2738 1
        bufferdata = NIT_BUFFERDATA(iter);
2739
    }
2740

2741 1
    if (flags & NPY_ITER_COPY_IF_OVERLAP) {
2742
        /*
2743
         * Perform operand memory overlap checks, if requested.
2744
         *
2745
         * If any write operand has memory overlap with any read operand,
2746
         * eliminate all overlap by making temporary copies, by enabling
2747
         * NPY_OP_ITFLAG_FORCECOPY for the write operand to force WRITEBACKIFCOPY.
2748
         *
2749
         * Operands with NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE enabled are not
2750
         * considered overlapping if the arrays are exactly the same. In this
2751
         * case, the iterator loops through them in the same order element by
2752
         * element.  (As usual, the user-provided inner loop is assumed to be
2753
         * able to deal with this level of simple aliasing.)
2754
         */
2755 1
        for (iop = 0; iop < nop; ++iop) {
2756 1
            int may_share_memory = 0;
2757
            int iother;
2758

2759 1
            if (op[iop] == NULL) {
2760
                /* Iterator will always allocate */
2761 1
                continue;
2762
            }
2763

2764 1
            if (!(op_itflags[iop] & NPY_OP_ITFLAG_WRITE)) {
2765
                /*
2766
                 * Copy output operands only, not inputs.
2767
                 * A more sophisticated heuristic could be
2768
                 * substituted here later.
2769
                 */
2770 1
                continue;
2771
            }
2772

2773 1
            for (iother = 0; iother < nop; ++iother) {
2774 1
                if (iother == iop || op[iother] == NULL) {
2775 1
                    continue;
2776
                }
2777

2778 1
                if (!(op_itflags[iother] & NPY_OP_ITFLAG_READ)) {
2779
                    /* No data dependence for arrays not read from */
2780 1
                    continue;
2781
                }
2782

2783 1
                if (op_itflags[iother] & NPY_OP_ITFLAG_FORCECOPY) {
2784
                    /* Already copied */
2785 0
                    continue;
2786
                }
2787

2788
                /*
2789
                 * If the arrays are views to exactly the same data, no need
2790
                 * to make copies, if the caller (eg ufunc) says it accesses
2791
                 * data only in the iterator order.
2792
                 *
2793
                 * However, if there is internal overlap (e.g. a zero stride on
2794
                 * a non-unit dimension), a copy cannot be avoided.
2795
                 */
2796 1
                if ((op_flags[iop] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2797 1
                    (op_flags[iother] & NPY_ITER_OVERLAP_ASSUME_ELEMENTWISE) &&
2798 1
                    PyArray_BYTES(op[iop]) == PyArray_BYTES(op[iother]) &&
2799 1
                    PyArray_NDIM(op[iop]) == PyArray_NDIM(op[iother]) &&
2800 1
                    PyArray_CompareLists(PyArray_DIMS(op[iop]),
2801 1
                                         PyArray_DIMS(op[iother]),
2802 1
                                         PyArray_NDIM(op[iop])) &&
2803 1
                    PyArray_CompareLists(PyArray_STRIDES(op[iop]),
2804 1
                                         PyArray_STRIDES(op[iother]),
2805 1
                                         PyArray_NDIM(op[iop])) &&
2806 1
                    PyArray_DESCR(op[iop]) == PyArray_DESCR(op[iother]) &&
2807 1
                    solve_may_have_internal_overlap(op[iop], 1) == 0) {
2808

2809 1
                    continue;
2810
                }
2811

2812
                /*
2813
                 * Use max work = 1. If the arrays are large, it might
2814
                 * make sense to go further.
2815
                 */
2816 1
                may_share_memory = solve_may_share_memory(op[iop],
2817
                                                          op[iother],
2818
                                                          1);
2819

2820 1
                if (may_share_memory) {
2821 1
                    op_itflags[iop] |= NPY_OP_ITFLAG_FORCECOPY;
2822 1
                    break;
2823
                }
2824
            }
2825
        }
2826
    }
2827

2828 1
    for (iop = 0; iop < nop; ++iop) {
2829
        /*
2830
         * Check whether there are any WRITEMASKED REDUCE operands
2831
         * which should be validated after all the strides are filled
2832
         * in.
2833
         */
2834 1
        if ((op_itflags[iop] &
2835
                (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
2836
                        (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
2837 1
            check_writemasked_reductions = 1;
2838
        }
2839

2840
        /* NULL means an output the iterator should allocate */
2841 1
        if (op[iop] == NULL) {
2842
            PyArrayObject *out;
2843
            PyTypeObject *op_subtype;
2844

2845
            /* Check whether the subtype was disabled */
2846 1
            op_subtype = (op_flags[iop] & NPY_ITER_NO_SUBTYPE) ?
2847 1
                                                &PyArray_Type : subtype;
2848

2849
            /*
2850
             * Allocate the output array.
2851
             *
2852
             * Note that here, ndim is always correct if no op_axes was given
2853
             * (but the actual dimension of op can be larger). If op_axes
2854
             * is given, ndim is not actually used.
2855
             */
2856 1
            out = npyiter_new_temp_array(iter, op_subtype,
2857
                                        flags, &op_itflags[iop],
2858
                                        ndim,
2859
                                        NULL,
2860 1
                                        op_dtype[iop],
2861 1
                                        op_axes ? op_axes[iop] : NULL);
2862 1
            if (out == NULL) {
2863
                return 0;
2864
            }
2865

2866 1
            op[iop] = out;
2867

2868
            /*
2869
             * Now we need to replace the pointers and strides with values
2870
             * from the new array.
2871
             */
2872 1
            npyiter_replace_axisdata(iter, iop, op[iop], ndim,
2873 1
                    op_axes ? op_axes[iop] : NULL);
2874

2875
            /*
2876
             * New arrays are guaranteed true-aligned, but copy/cast code
2877
             * needs uint-alignment in addition.
2878
             */
2879 1
            if (IsUintAligned(out)) {
2880 1
                op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2881
            }
2882
            /* New arrays need no cast */
2883 1
            op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2884
        }
2885
        /*
2886
         * If casting is required, the operand is read-only, and
2887
         * it's an array scalar, make a copy whether or not the
2888
         * copy flag is enabled.
2889
         */
2890 1
        else if ((op_itflags[iop] & (NPY_OP_ITFLAG_CAST |
2891
                         NPY_OP_ITFLAG_READ |
2892
                         NPY_OP_ITFLAG_WRITE)) == (NPY_OP_ITFLAG_CAST |
2893 1
                                                   NPY_OP_ITFLAG_READ) &&
2894 1
                          PyArray_NDIM(op[iop]) == 0) {
2895
            PyArrayObject *temp;
2896 1
            Py_INCREF(op_dtype[iop]);
2897 1
            temp = (PyArrayObject *)PyArray_NewFromDescr(
2898
                                        &PyArray_Type, op_dtype[iop],
2899
                                        0, NULL, NULL, NULL, 0, NULL);
2900 1
            if (temp == NULL) {
2901
                return 0;
2902
            }
2903 1
            if (PyArray_CopyInto(temp, op[iop]) != 0) {
2904 0
                Py_DECREF(temp);
2905
                return 0;
2906
            }
2907 1
            Py_DECREF(op[iop]);
2908 1
            op[iop] = temp;
2909

2910
            /*
2911
             * Now we need to replace the pointers and strides with values
2912
             * from the temporary array.
2913
             */
2914 1
            npyiter_replace_axisdata(iter, iop, op[iop], 0, NULL);
2915

2916
            /*
2917
             * New arrays are guaranteed true-aligned, but copy/cast code
2918
             * needs uint-alignment in addition.
2919
             */
2920 1
            if (IsUintAligned(temp)) {
2921 1
                op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2922
            }
2923
            /*
2924
             * New arrays need no cast, and in the case
2925
             * of scalars, always have stride 0 so never need buffering
2926
             */
2927 1
            op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
2928 1
            op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2929 1
            if (itflags & NPY_ITFLAG_BUFFER) {
2930 1
                NBF_STRIDES(bufferdata)[iop] = 0;
2931
            }
2932
        }
2933
        /*
2934
         * Make a temporary copy if,
2935
         * 1. If casting is required and permitted, or,
2936
         * 2. If force-copy is requested
2937
         */
2938 1
        else if (((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
2939 1
                        (op_flags[iop] &
2940 1
                        (NPY_ITER_COPY|NPY_ITER_UPDATEIFCOPY))) ||
2941 1
                 (op_itflags[iop] & NPY_OP_ITFLAG_FORCECOPY)) {
2942
            PyArrayObject *temp;
2943 1
            int ondim = PyArray_NDIM(op[iop]);
2944

2945
            /* Allocate the temporary array, if possible */
2946 1
            temp = npyiter_new_temp_array(iter, &PyArray_Type,
2947
                                        flags, &op_itflags[iop],
2948
                                        ondim,
2949 1
                                        PyArray_DIMS(op[iop]),
2950 1
                                        op_dtype[iop],
2951 1
                                        op_axes ? op_axes[iop] : NULL);
2952 1
            if (temp == NULL) {
2953
                return 0;
2954
            }
2955

2956
            /*
2957
             * If the data will be read, copy it into temp.
2958
             * TODO: It might be possible to do a view into
2959
             *       op[iop]'s mask instead here.
2960
             */
2961 1
            if (op_itflags[iop] & NPY_OP_ITFLAG_READ) {
2962 1
                if (PyArray_CopyInto(temp, op[iop]) != 0) {
2963 0
                    Py_DECREF(temp);
2964
                    return 0;
2965
                }
2966
            }
2967
            /* If the data will be written to, set WRITEBACKIFCOPY
2968
               and require a context manager */
2969 1
            if (op_itflags[iop] & NPY_OP_ITFLAG_WRITE) {
2970 1
                Py_INCREF(op[iop]);
2971 1
                if (PyArray_SetWritebackIfCopyBase(temp, op[iop]) < 0) {
2972 0
                    Py_DECREF(temp);
2973
                    return 0;
2974
                }
2975 1
                op_itflags[iop] |= NPY_OP_ITFLAG_HAS_WRITEBACK;
2976
            }
2977

2978 1
            Py_DECREF(op[iop]);
2979 1
            op[iop] = temp;
2980

2981
            /*
2982
             * Now we need to replace the pointers and strides with values
2983
             * from the temporary array.
2984
             */
2985 1
            npyiter_replace_axisdata(iter, iop, op[iop], ondim,
2986 1
                    op_axes ? op_axes[iop] : NULL);
2987

2988
            /*
2989
             * New arrays are guaranteed true-aligned, but copy/cast code
2990
             * additionally needs uint-alignment in addition.
2991
             */
2992 1
            if (IsUintAligned(temp)) {
2993 1
                op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
2994
            }
2995
            /* The temporary copy needs no cast */
2996 1
            op_itflags[iop] &= ~NPY_OP_ITFLAG_CAST;
2997
        }
2998
        else {
2999
            /*
3000
             * Buffering must be enabled for casting/conversion if copy
3001
             * wasn't specified.
3002
             */
3003 1
            if ((op_itflags[iop] & NPY_OP_ITFLAG_CAST) &&
3004
                                  !(itflags & NPY_ITFLAG_BUFFER)) {
3005 1
                PyErr_SetString(PyExc_TypeError,
3006
                        "Iterator operand required copying or buffering, "
3007
                        "but neither copying nor buffering was enabled");
3008 1
                return 0;
3009
            }
3010

3011
            /*
3012
             * If the operand is aligned, any buffering can use aligned
3013
             * optimizations.
3014
             */
3015 1
            if (IsUintAligned(op[iop])) {
3016 1
                op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
3017
            }
3018
        }
3019

3020
        /* Here we can finally check for contiguous iteration */
3021 1
        if (op_flags[iop] & NPY_ITER_CONTIG) {
3022 1
            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3023 1
            npy_intp stride = NAD_STRIDES(axisdata)[iop];
3024

3025 1
            if (stride != op_dtype[iop]->elsize) {
3026
                NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
3027
                                    "because of NPY_ITER_CONTIG\n");
3028 1
                op_itflags[iop] |= NPY_OP_ITFLAG_CAST;
3029 1
                if (!(itflags & NPY_ITFLAG_BUFFER)) {
3030 0
                    PyErr_SetString(PyExc_TypeError,
3031
                            "Iterator operand required buffering, "
3032
                            "to be contiguous as requested, but "
3033
                            "buffering is not enabled");
3034 0
                    return 0;
3035
                }
3036
            }
3037
        }
3038

3039
        /*
3040
         * If no alignment, byte swap, or casting is needed,
3041
         * the inner stride of this operand works for the whole
3042
         * array, we can set NPY_OP_ITFLAG_BUFNEVER.
3043
         */
3044 1
        if ((itflags & NPY_ITFLAG_BUFFER) &&
3045 1
                                !(op_itflags[iop] & NPY_OP_ITFLAG_CAST)) {
3046 1
            NpyIter_AxisData *axisdata = NIT_AXISDATA(iter);
3047 1
            if (ndim <= 1) {
3048 1
                op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3049 1
                NBF_STRIDES(bufferdata)[iop] = NAD_STRIDES(axisdata)[iop];
3050
            }
3051 1
            else if (PyArray_NDIM(op[iop]) > 0) {
3052 1
                npy_intp stride, shape, innerstride = 0, innershape;
3053 1
                npy_intp sizeof_axisdata =
3054 1
                                    NIT_AXISDATA_SIZEOF(itflags, ndim, nop);
3055
                /* Find stride of the first non-empty shape */
3056 1
                for (idim = 0; idim < ndim; ++idim) {
3057 1
                    innershape = NAD_SHAPE(axisdata);
3058 1
                    if (innershape != 1) {
3059 1
                        innerstride = NAD_STRIDES(axisdata)[iop];
3060 1
                        break;
3061
                    }
3062 1
                    NIT_ADVANCE_AXISDATA(axisdata, 1);
3063
                }
3064 1
                ++idim;
3065 1
                NIT_ADVANCE_AXISDATA(axisdata, 1);
3066
                /* Check that everything could have coalesced together */
3067 1
                for (; idim < ndim; ++idim) {
3068 1
                    stride = NAD_STRIDES(axisdata)[iop];
3069 1
                    shape = NAD_SHAPE(axisdata);
3070 1
                    if (shape != 1) {
3071
                        /*
3072
                         * If N times the inner stride doesn't equal this
3073
                         * stride, the multi-dimensionality is needed.
3074
                         */
3075 1
                        if (innerstride*innershape != stride) {
3076
                            break;
3077
                        }
3078
                        else {
3079 1
                            innershape *= shape;
3080
                        }
3081
                    }
3082 1
                    NIT_ADVANCE_AXISDATA(axisdata, 1);
3083
                }
3084
                /*
3085
                 * If we looped all the way to the end, one stride works.
3086
                 * Set that stride, because it may not belong to the first
3087
                 * dimension.
3088
                 */
3089 1
                if (idim == ndim) {
3090 1
                    op_itflags[iop] |= NPY_OP_ITFLAG_BUFNEVER;
3091 1
                    NBF_STRIDES(bufferdata)[iop] = innerstride;
3092
                }
3093
            }
3094
        }
3095
    }
3096

3097 1
    if (check_writemasked_reductions) {
3098 1
        for (iop = 0; iop < nop; ++iop) {
3099
            /*
3100
             * Check whether there are any WRITEMASKED REDUCE operands
3101
             * which should be validated now that all the strides are filled
3102
             * in.
3103
             */
3104 1
            if ((op_itflags[iop] &
3105
                    (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) ==
3106
                        (NPY_OP_ITFLAG_WRITEMASKED | NPY_OP_ITFLAG_REDUCE)) {
3107
                /*
3108
                 * If the ARRAYMASK has 'bigger' dimensions
3109
                 * than this REDUCE WRITEMASKED operand,
3110
                 * the result would be more than one mask
3111
                 * value per reduction element, something which
3112
                 * is invalid. This function provides validation
3113
                 * for that.
3114
                 */
3115 1
                if (!check_mask_for_writemasked_reduction(iter