1
|
|
/*
|
2
|
|
* This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY*
|
3
|
|
* by the iterator implementation code. All other internal NumPy code
|
4
|
|
* should use the exposed iterator API.
|
5
|
|
*/
|
6
|
|
#ifndef NPY_ITERATOR_IMPLEMENTATION_CODE
|
7
|
|
#error "This header is intended for use ONLY by iterator implementation code."
|
8
|
|
#endif
|
9
|
|
|
10
|
|
#ifndef _NPY_PRIVATE__NDITER_IMPL_H_
|
11
|
|
#define _NPY_PRIVATE__NDITER_IMPL_H_
|
12
|
|
|
13
|
|
#define PY_SSIZE_T_CLEAN
|
14
|
|
#include "Python.h"
|
15
|
|
#include "structmember.h"
|
16
|
|
|
17
|
|
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
|
18
|
|
#define _MULTIARRAYMODULE
|
19
|
|
#include <numpy/arrayobject.h>
|
20
|
|
#include <npy_pycompat.h>
|
21
|
|
#include "convert_datatype.h"
|
22
|
|
|
23
|
|
#include "lowlevel_strided_loops.h"
|
24
|
|
|
25
|
|
/********** ITERATOR CONSTRUCTION TIMING **************/
|
26
|
|
#define NPY_IT_CONSTRUCTION_TIMING 0
|
27
|
|
|
28
|
|
#if NPY_IT_CONSTRUCTION_TIMING
|
29
|
|
#define NPY_IT_TIME_POINT(var) { \
|
30
|
|
unsigned int hi, lo; \
|
31
|
|
__asm__ __volatile__ ( \
|
32
|
|
"rdtsc" \
|
33
|
|
: "=d" (hi), "=a" (lo)); \
|
34
|
|
var = (((unsigned long long)hi) << 32) | lo; \
|
35
|
|
}
|
36
|
|
#define NPY_IT_PRINT_TIME_START(var) { \
|
37
|
|
printf("%30s: start\n", #var); \
|
38
|
|
c_temp = var; \
|
39
|
|
}
|
40
|
|
#define NPY_IT_PRINT_TIME_VAR(var) { \
|
41
|
|
printf("%30s: %6.0f clocks\n", #var, \
|
42
|
|
((double)(var-c_temp))); \
|
43
|
|
c_temp = var; \
|
44
|
|
}
|
45
|
|
#else
|
46
|
|
#define NPY_IT_TIME_POINT(var)
|
47
|
|
#endif
|
48
|
|
|
49
|
|
/******************************************************/
|
50
|
|
|
51
|
|
/********** PRINTF DEBUG TRACING **************/
|
52
|
|
#define NPY_IT_DBG_TRACING 0
|
53
|
|
|
54
|
|
#if NPY_IT_DBG_TRACING
|
55
|
|
#define NPY_IT_DBG_PRINT(s) printf("%s", s)
|
56
|
|
#define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1)
|
57
|
|
#define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2)
|
58
|
|
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3)
|
59
|
|
#else
|
60
|
|
#define NPY_IT_DBG_PRINT(s)
|
61
|
|
#define NPY_IT_DBG_PRINT1(s, p1)
|
62
|
|
#define NPY_IT_DBG_PRINT2(s, p1, p2)
|
63
|
|
#define NPY_IT_DBG_PRINT3(s, p1, p2, p3)
|
64
|
|
#endif
|
65
|
|
/**********************************************/
|
66
|
|
|
67
|
|
/* Rounds up a number of bytes to be divisible by sizeof intp */
|
68
|
|
#if NPY_SIZEOF_INTP == 4
|
69
|
|
#define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4))
|
70
|
|
#else
|
71
|
|
#define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8))
|
72
|
|
#endif
|
73
|
|
|
74
|
|
/* Internal iterator flags */
|
75
|
|
|
76
|
|
/* The perm is the identity */
|
77
|
|
#define NPY_ITFLAG_IDENTPERM 0x0001
|
78
|
|
/* The perm has negative entries (indicating flipped axes) */
|
79
|
|
#define NPY_ITFLAG_NEGPERM 0x0002
|
80
|
|
/* The iterator is tracking an index */
|
81
|
|
#define NPY_ITFLAG_HASINDEX 0x0004
|
82
|
|
/* The iterator is tracking a multi-index */
|
83
|
|
#define NPY_ITFLAG_HASMULTIINDEX 0x0008
|
84
|
|
/* The iteration order was forced on construction */
|
85
|
|
#define NPY_ITFLAG_FORCEDORDER 0x0010
|
86
|
|
/* The inner loop is handled outside the iterator */
|
87
|
|
#define NPY_ITFLAG_EXLOOP 0x0020
|
88
|
|
/* The iterator is ranged */
|
89
|
|
#define NPY_ITFLAG_RANGE 0x0040
|
90
|
|
/* The iterator is buffered */
|
91
|
|
#define NPY_ITFLAG_BUFFER 0x0080
|
92
|
|
/* The iterator should grow the buffered inner loop when possible */
|
93
|
|
#define NPY_ITFLAG_GROWINNER 0x0100
|
94
|
|
/* There is just one iteration, can specialize iternext for that */
|
95
|
|
#define NPY_ITFLAG_ONEITERATION 0x0200
|
96
|
|
/* Delay buffer allocation until first Reset* call */
|
97
|
|
#define NPY_ITFLAG_DELAYBUF 0x0400
|
98
|
|
/* Iteration needs API access during iternext */
|
99
|
|
#define NPY_ITFLAG_NEEDSAPI 0x0800
|
100
|
|
/* Iteration includes one or more operands being reduced */
|
101
|
|
#define NPY_ITFLAG_REDUCE 0x1000
|
102
|
|
/* Reduce iteration doesn't need to recalculate reduce loops next time */
|
103
|
|
#define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000
|
104
|
|
|
105
|
|
/* Internal iterator per-operand iterator flags */
|
106
|
|
|
107
|
|
/* The operand will be written to */
|
108
|
|
#define NPY_OP_ITFLAG_WRITE 0x0001
|
109
|
|
/* The operand will be read from */
|
110
|
|
#define NPY_OP_ITFLAG_READ 0x0002
|
111
|
|
/* The operand needs type conversion/byte swapping/alignment */
|
112
|
|
#define NPY_OP_ITFLAG_CAST 0x0004
|
113
|
|
/* The operand never needs buffering */
|
114
|
|
#define NPY_OP_ITFLAG_BUFNEVER 0x0008
|
115
|
|
/* The operand is aligned */
|
116
|
|
#define NPY_OP_ITFLAG_ALIGNED 0x0010
|
117
|
|
/* The operand is being reduced */
|
118
|
|
#define NPY_OP_ITFLAG_REDUCE 0x0020
|
119
|
|
/* The operand is for temporary use, does not have a backing array */
|
120
|
|
#define NPY_OP_ITFLAG_VIRTUAL 0x0040
|
121
|
|
/* The operand requires masking when copying buffer -> array */
|
122
|
|
#define NPY_OP_ITFLAG_WRITEMASKED 0x0080
|
123
|
|
/* The operand's data pointer is pointing into its buffer */
|
124
|
|
#define NPY_OP_ITFLAG_USINGBUFFER 0x0100
|
125
|
|
/* The operand must be copied (with UPDATEIFCOPY if also ITFLAG_WRITE) */
|
126
|
|
#define NPY_OP_ITFLAG_FORCECOPY 0x0200
|
127
|
|
/* The operand has temporary data, write it back at dealloc */
|
128
|
|
#define NPY_OP_ITFLAG_HAS_WRITEBACK 0x0400
|
129
|
|
|
130
|
|
/*
|
131
|
|
* The data layout of the iterator is fully specified by
|
132
|
|
* a triple (itflags, ndim, nop). These three variables
|
133
|
|
* are expected to exist in all functions calling these macros,
|
134
|
|
* either as true variables initialized to the correct values
|
135
|
|
* from the iterator, or as constants in the case of specialized
|
136
|
|
* functions such as the various iternext functions.
|
137
|
|
*/
|
138
|
|
|
139
|
|
struct NpyIter_InternalOnly {
|
140
|
|
/* Initial fixed position data */
|
141
|
|
npy_uint32 itflags;
|
142
|
|
npy_uint8 ndim, nop;
|
143
|
|
npy_int8 maskop;
|
144
|
|
npy_intp itersize, iterstart, iterend;
|
145
|
|
/* iterindex is only used if RANGED or BUFFERED is set */
|
146
|
|
npy_intp iterindex;
|
147
|
|
/* The rest is variable */
|
148
|
|
char iter_flexdata;
|
149
|
|
};
|
150
|
|
|
151
|
|
typedef struct NpyIter_AD NpyIter_AxisData;
|
152
|
|
typedef struct NpyIter_BD NpyIter_BufferData;
|
153
|
|
|
154
|
|
typedef npy_int16 npyiter_opitflags;
|
155
|
|
|
156
|
|
/* Byte sizes of the iterator members */
|
157
|
|
#define NIT_PERM_SIZEOF(itflags, ndim, nop) \
|
158
|
|
NPY_INTP_ALIGNED(NPY_MAXDIMS)
|
159
|
|
#define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \
|
160
|
|
((NPY_SIZEOF_INTP)*(nop))
|
161
|
|
#define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \
|
162
|
|
((NPY_SIZEOF_INTP)*(nop+1))
|
163
|
|
#define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \
|
164
|
|
((NPY_SIZEOF_INTP)*(nop+1))
|
165
|
|
#define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \
|
166
|
|
((NPY_SIZEOF_INTP)*(nop))
|
167
|
|
#define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \
|
168
|
|
(NPY_INTP_ALIGNED(sizeof(npyiter_opitflags) * nop))
|
169
|
|
#define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \
|
170
|
|
((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0)
|
171
|
|
|
172
|
|
/* Byte offsets of the iterator members starting from iter->iter_flexdata */
|
173
|
|
#define NIT_PERM_OFFSET() \
|
174
|
|
(0)
|
175
|
|
#define NIT_DTYPES_OFFSET(itflags, ndim, nop) \
|
176
|
|
(NIT_PERM_OFFSET() + \
|
177
|
|
NIT_PERM_SIZEOF(itflags, ndim, nop))
|
178
|
|
#define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \
|
179
|
|
(NIT_DTYPES_OFFSET(itflags, ndim, nop) + \
|
180
|
|
NIT_DTYPES_SIZEOF(itflags, ndim, nop))
|
181
|
|
#define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \
|
182
|
|
(NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \
|
183
|
|
NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop))
|
184
|
|
#define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \
|
185
|
|
(NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \
|
186
|
|
NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop))
|
187
|
|
#define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \
|
188
|
|
(NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \
|
189
|
|
NIT_OPERANDS_SIZEOF(itflags, ndim, nop))
|
190
|
|
#define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \
|
191
|
|
(NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \
|
192
|
|
NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop))
|
193
|
|
#define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \
|
194
|
|
(NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \
|
195
|
|
NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop))
|
196
|
|
|
197
|
|
/* Internal-only ITERATOR DATA MEMBER ACCESS */
|
198
|
|
#define NIT_ITFLAGS(iter) \
|
199
|
|
((iter)->itflags)
|
200
|
|
#define NIT_NDIM(iter) \
|
201
|
|
((iter)->ndim)
|
202
|
|
#define NIT_NOP(iter) \
|
203
|
|
((iter)->nop)
|
204
|
|
#define NIT_MASKOP(iter) \
|
205
|
|
((iter)->maskop)
|
206
|
|
#define NIT_ITERSIZE(iter) \
|
207
|
|
(iter->itersize)
|
208
|
|
#define NIT_ITERSTART(iter) \
|
209
|
|
(iter->iterstart)
|
210
|
|
#define NIT_ITEREND(iter) \
|
211
|
|
(iter->iterend)
|
212
|
|
#define NIT_ITERINDEX(iter) \
|
213
|
|
(iter->iterindex)
|
214
|
|
#define NIT_PERM(iter) ((npy_int8 *)( \
|
215
|
|
&(iter)->iter_flexdata + NIT_PERM_OFFSET()))
|
216
|
|
#define NIT_DTYPES(iter) ((PyArray_Descr **)( \
|
217
|
|
&(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop)))
|
218
|
|
#define NIT_RESETDATAPTR(iter) ((char **)( \
|
219
|
|
&(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop)))
|
220
|
|
#define NIT_BASEOFFSETS(iter) ((npy_intp *)( \
|
221
|
|
&(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop)))
|
222
|
|
#define NIT_OPERANDS(iter) ((PyArrayObject **)( \
|
223
|
|
&(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop)))
|
224
|
|
#define NIT_OPITFLAGS(iter) ((npyiter_opitflags *)( \
|
225
|
|
&(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)))
|
226
|
|
#define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \
|
227
|
|
&(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop)))
|
228
|
|
#define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \
|
229
|
|
&(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop)))
|
230
|
|
|
231
|
|
/* Internal-only BUFFERDATA MEMBER ACCESS */
|
232
|
|
struct NpyIter_BD {
|
233
|
|
npy_intp buffersize, size, bufiterend,
|
234
|
|
reduce_pos, reduce_outersize, reduce_outerdim;
|
235
|
|
npy_intp bd_flexdata;
|
236
|
|
};
|
237
|
|
#define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize)
|
238
|
|
#define NBF_SIZE(bufferdata) ((bufferdata)->size)
|
239
|
|
#define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend)
|
240
|
|
#define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos)
|
241
|
|
#define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize)
|
242
|
|
#define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim)
|
243
|
|
#define NBF_STRIDES(bufferdata) ( \
|
244
|
|
&(bufferdata)->bd_flexdata + 0)
|
245
|
|
#define NBF_PTRS(bufferdata) ((char **) \
|
246
|
|
(&(bufferdata)->bd_flexdata + 1*(nop)))
|
247
|
|
#define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \
|
248
|
|
(&(bufferdata)->bd_flexdata + 2*(nop)))
|
249
|
|
#define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \
|
250
|
|
(&(bufferdata)->bd_flexdata + 3*(nop)))
|
251
|
|
#define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
|
252
|
|
(&(bufferdata)->bd_flexdata + 4*(nop)))
|
253
|
|
#define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \
|
254
|
|
(&(bufferdata)->bd_flexdata + 5*(nop)))
|
255
|
|
#define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
|
256
|
|
(&(bufferdata)->bd_flexdata + 6*(nop)))
|
257
|
|
#define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \
|
258
|
|
(&(bufferdata)->bd_flexdata + 7*(nop)))
|
259
|
|
#define NBF_BUFFERS(bufferdata) ((char **) \
|
260
|
|
(&(bufferdata)->bd_flexdata + 8*(nop)))
|
261
|
|
|
262
|
|
/* Internal-only AXISDATA MEMBER ACCESS. */
|
263
|
|
struct NpyIter_AD {
|
264
|
|
npy_intp shape, index;
|
265
|
|
npy_intp ad_flexdata;
|
266
|
|
};
|
267
|
|
#define NAD_SHAPE(axisdata) ((axisdata)->shape)
|
268
|
|
#define NAD_INDEX(axisdata) ((axisdata)->index)
|
269
|
|
#define NAD_STRIDES(axisdata) ( \
|
270
|
|
&(axisdata)->ad_flexdata + 0)
|
271
|
|
#define NAD_PTRS(axisdata) ((char **) \
|
272
|
|
&(axisdata)->ad_flexdata + 1*(nop+1))
|
273
|
|
|
274
|
|
#define NAD_NSTRIDES() \
|
275
|
|
((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0))
|
276
|
|
|
277
|
|
/* Size of one AXISDATA struct within the iterator */
|
278
|
|
#define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \
|
279
|
|
/* intp shape */ \
|
280
|
|
1 + \
|
281
|
|
/* intp index */ \
|
282
|
|
1 + \
|
283
|
|
/* intp stride[nop+1] AND char* ptr[nop+1] */ \
|
284
|
|
2*((nop)+1) \
|
285
|
|
)*NPY_SIZEOF_INTP )
|
286
|
|
|
287
|
|
/*
|
288
|
|
* Macro to advance an AXISDATA pointer by a specified count.
|
289
|
|
* Requires that sizeof_axisdata be previously initialized
|
290
|
|
* to NIT_AXISDATA_SIZEOF(itflags, ndim, nop).
|
291
|
|
*/
|
292
|
|
#define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \
|
293
|
|
(((char *)(axisdata)) + (index)*sizeof_axisdata))
|
294
|
|
#define NIT_ADVANCE_AXISDATA(axisdata, count) \
|
295
|
|
axisdata = NIT_INDEX_AXISDATA(axisdata, count)
|
296
|
|
|
297
|
|
/* Size of the whole iterator */
|
298
|
|
#define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \
|
299
|
|
sizeof(struct NpyIter_InternalOnly) + \
|
300
|
|
NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \
|
301
|
|
NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1))
|
302
|
|
|
303
|
|
/* Internal helper functions shared between implementation files */
|
304
|
|
|
305
|
|
/**
|
306
|
|
* Undo the axis permutation of the iterator. When the operand has fewer
|
307
|
|
* dimensions then the iterator, this can return negative values for
|
308
|
|
* inserted (broadcast) dimensions.
|
309
|
|
*
|
310
|
|
* @param axis Axis for which to undo the iterator axis permutation.
|
311
|
|
* @param ndim If `op_axes` is being used, this is the iterator dimension,
|
312
|
|
* otherwise this is the operand dimension.
|
313
|
|
* @param perm The iterator axis permutation NIT_PERM(iter)
|
314
|
|
* @param axis_flipped Will be set to true if this is a flipped axis
|
315
|
|
* (i.e. is iterated in reversed order) and otherwise false.
|
316
|
|
* Can be NULL if the information is not needed.
|
317
|
|
* @return The unpermuted axis. Without `op_axes` this is correct, with
|
318
|
|
* `op_axes` this indexes into `op_axes` (unpermuted iterator axis)
|
319
|
|
*/
|
320
|
|
static NPY_INLINE int
|
321
|
|
npyiter_undo_iter_axis_perm(
|
322
|
|
int axis, int ndim, const npy_int8 *perm, npy_bool *axis_flipped)
|
323
|
|
{
|
324
|
1
|
npy_int8 p = perm[axis];
|
325
|
|
/* The iterator treats axis reversed, thus adjust by ndim */
|
326
|
1
|
npy_bool flipped = p < 0;
|
327
|
|
if (axis_flipped != NULL) {
|
328
|
1
|
*axis_flipped = flipped;
|
329
|
|
}
|
330
|
1
|
if (flipped) {
|
331
|
1
|
axis = ndim + p;
|
332
|
|
}
|
333
|
|
else {
|
334
|
1
|
axis = ndim - p - 1;
|
335
|
|
}
|
336
|
|
return axis;
|
337
|
|
}
|
338
|
|
|
339
|
|
NPY_NO_EXPORT void
|
340
|
|
npyiter_coalesce_axes(NpyIter *iter);
|
341
|
|
NPY_NO_EXPORT int
|
342
|
|
npyiter_allocate_buffers(NpyIter *iter, char **errmsg);
|
343
|
|
NPY_NO_EXPORT void
|
344
|
|
npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex);
|
345
|
|
NPY_NO_EXPORT int
|
346
|
|
npyiter_copy_from_buffers(NpyIter *iter);
|
347
|
|
NPY_NO_EXPORT int
|
348
|
|
npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
|
349
|
|
NPY_NO_EXPORT void
|
350
|
|
npyiter_clear_buffers(NpyIter *iter);
|
351
|
|
|
352
|
|
#endif
|