1
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
2
#define _UMATHMODULE
3
#define _MULTIARRAYMODULE
4

5
#include "Python.h"
6

7
#include "numpy/npy_3kcompat.h"
8

9
#include "lowlevel_strided_loops.h"
10
#include "numpy/arrayobject.h"
11

12
#include "descriptor.h"
13
#include "convert_datatype.h"
14
#include "dtypemeta.h"
15

16
#include "array_coercion.h"
17
#include "ctors.h"
18
#include "common.h"
19
#include "_datetime.h"
20
#include "npy_import.h"
21

22

23
/*
24
 * This file defines helpers for some of the ctors.c functions which
25
 * create an array from Python sequences and types.
26
 * When creating an array with ``np.array(...)`` we have to do two main things:
27
 *
28
 * 1. Find the exact shape of the resulting array
29
 * 2. Find the correct dtype of the resulting array.
30
 *
31
 * In most cases these two things are can be done in a single processing step.
32
 * There are in principle three different calls that should be distinguished:
33
 *
34
 * 1. The user calls ``np.array(..., dtype=np.dtype("<f8"))``
35
 * 2. The user calls ``np.array(..., dtype="S")``
36
 * 3. The user calls ``np.array(...)``
37
 *
38
 * In the first case, in principle only the shape needs to be found. In the
39
 * second case, the DType class (e.g. string) is already known but the DType
40
 * instance (e.g. length of the string) has to be found.
41
 * In the last case the DType class needs to be found as well. Note that
42
 * it is not necessary to find the DType class of the entire array, but
43
 * the DType class needs to be found for each element before the actual
44
 * dtype instance can be found.
45
 *
46
 * Further, there are a few other things to keep in mind when coercing arrays:
47
 *
48
 *   * For UFunc promotion, Python scalars need to be handled specially to
49
 *     allow value based casting.  This requires python complex/float to
50
 *     have their own DTypes.
51
 *   * It is necessary to decide whether or not a sequence is an element.
52
 *     For example tuples are considered elements for structured dtypes, but
53
 *     otherwise are considered sequences.
54
 *     This means that if a dtype is given (either as a class or instance),
55
 *     it can effect the dimension discovery part.
56
 *     For the "special" NumPy types structured void and "c" (single character)
57
 *     this is special cased.  For future user-types, this is currently
58
 *     handled by providing calling an `is_known_scalar` method.  This method
59
 *     currently ensures that Python numerical types are handled quickly.
60
 *
61
 * In the initial version of this implementation, it is assumed that dtype
62
 * discovery can be implemented sufficiently fast.  That is, it is not
63
 * necessary to create fast paths that only find the correct shape e.g. when
64
 * ``dtype=np.dtype("f8")`` is given.
65
 *
66
 * The code here avoid multiple conversion of array-like objects (including
67
 * sequences). These objects are cached after conversion, which will require
68
 * additional memory, but can drastically speed up coercion from from array
69
 * like objects.
70
 */
71

72

73
/*
74
 * For finding a DType quickly from a type, it is easiest to have a
75
 * a mapping of pytype -> DType.
76
 * TODO: This mapping means that it is currently impossible to delete a
77
 *       pair of pytype <-> DType.  To resolve this, it is necessary to
78
 *       weakly reference the pytype. As long as the pytype is alive, we
79
 *       want to be able to use `np.array([pytype()])`.
80
 *       It should be possible to retrofit this without too much trouble
81
 *       (all type objects support weak references).
82
 */
83
PyObject *_global_pytype_to_type_dict = NULL;
84

85

86
/* Enum to track or signal some things during dtype and shape discovery */
87
enum _dtype_discovery_flags {
88
    FOUND_RAGGED_ARRAY = 1 << 0,
89
    GAVE_SUBCLASS_WARNING = 1 << 1,
90
    PROMOTION_FAILED = 1 << 2,
91
    DISCOVER_STRINGS_AS_SEQUENCES = 1 << 3,
92
    DISCOVER_TUPLES_AS_ELEMENTS = 1 << 4,
93
    MAX_DIMS_WAS_REACHED = 1 << 5,
94
    DESCRIPTOR_WAS_SET = 1 << 6,
95
};
96

97

98
/**
99
 * Adds known sequence types to the global type dictionary, note that when
100
 * a DType is passed in, this lookup may be ignored.
101
 *
102
 * @return -1 on error 0 on success
103
 */
104
static int
105 1
_prime_global_pytype_to_type_dict(void)
106
{
107
    int res;
108

109
    /* Add the basic Python sequence types */
110 1
    res = PyDict_SetItem(_global_pytype_to_type_dict,
111
                         (PyObject *)&PyList_Type, Py_None);
112 1
    if (res < 0) {
113
        return -1;
114
    }
115 1
    res = PyDict_SetItem(_global_pytype_to_type_dict,
116
                         (PyObject *)&PyTuple_Type, Py_None);
117 1
    if (res < 0) {
118
        return -1;
119
    }
120
    /* NumPy Arrays are not handled as scalars */
121 1
    res = PyDict_SetItem(_global_pytype_to_type_dict,
122
                         (PyObject *)&PyArray_Type, Py_None);
123 1
    if (res < 0) {
124
        return -1;
125
    }
126 1
    return 0;
127
}
128

129

130
/**
131
 * Add a new mapping from a python type to the DType class.
132
 *
133
 * This assumes that the DType class is guaranteed to hold on the
134
 * python type (this assumption is guaranteed).
135
 * This functionality supercedes ``_typenum_fromtypeobj``.
136
 *
137
 * @param DType DType to map the python type to
138
 * @param pytype Python type to map from
139
 * @param userdef Whether or not it is user defined. We ensure that user
140
 *        defined scalars subclass from our scalars (for now).
141
 */
142
NPY_NO_EXPORT int
143 1
_PyArray_MapPyTypeToDType(
144
        PyArray_DTypeMeta *DType, PyTypeObject *pytype, npy_bool userdef)
145
{
146 1
    PyObject *Dtype_obj = (PyObject *)DType;
147

148 1
    if (userdef) {
149
        /*
150
         * It seems we did not strictly enforce this in the legacy dtype
151
         * API, but assume that it is always true. Further, this could be
152
         * relaxed in the future. In particular we should have a new
153
         * superclass of ``np.generic`` in order to note enforce the array
154
         * scalar behaviour.
155
         */
156 1
        if (!PyObject_IsSubclass((PyObject *)pytype, (PyObject *)&PyGenericArrType_Type)) {
157 0
            PyErr_Format(PyExc_RuntimeError,
158
                    "currently it is only possible to register a DType "
159
                    "for scalars deriving from `np.generic`, got '%S'.",
160
                    (PyObject *)pytype);
161 0
            return -1;
162
        }
163
    }
164

165
    /* Create the global dictionary if it does not exist */
166 1
    if (NPY_UNLIKELY(_global_pytype_to_type_dict == NULL)) {
167 1
        _global_pytype_to_type_dict = PyDict_New();
168 1
        if (_global_pytype_to_type_dict == NULL) {
169
            return -1;
170
        }
171 1
        if (_prime_global_pytype_to_type_dict() < 0) {
172
            return -1;
173
        }
174
    }
175

176 1
    int res = PyDict_Contains(_global_pytype_to_type_dict, (PyObject *)pytype);
177 1
    if (res < 0) {
178
        return -1;
179
    }
180 1
    else if (res) {
181 0
        PyErr_SetString(PyExc_RuntimeError,
182
                "Can only map one python type to DType.");
183 0
        return -1;
184
    }
185

186 1
    return PyDict_SetItem(_global_pytype_to_type_dict,
187
            (PyObject *)pytype, Dtype_obj);
188
}
189

190

191
/**
192
 * Lookup the DType for a registered known python scalar type.
193
 *
194
 * @param pytype Python Type to look up
195
 * @return DType, None if it a known non-scalar, or NULL if an unknown object.
196
 */
197
static NPY_INLINE PyArray_DTypeMeta *
198 1
discover_dtype_from_pytype(PyTypeObject *pytype)
199
{
200
    PyObject *DType;
201

202 1
    if (pytype == &PyArray_Type) {
203 1
        Py_INCREF(Py_None);
204 1
        return (PyArray_DTypeMeta *)Py_None;
205
    }
206

207 1
    DType = PyDict_GetItem(_global_pytype_to_type_dict, (PyObject *)pytype);
208 1
    if (DType == NULL) {
209
        /* the python type is not known */
210
        return NULL;
211
    }
212

213 1
    Py_INCREF(DType);
214 1
    if (DType == Py_None) {
215
        return (PyArray_DTypeMeta *)Py_None;
216
    }
217
    assert(PyObject_TypeCheck(DType, (PyTypeObject *)&PyArrayDTypeMeta_Type));
218 1
    return (PyArray_DTypeMeta *)DType;
219
}
220

221

222
/**
223
 * Find the correct DType class for the given python type. If flags is NULL
224
 * this is not used to discover a dtype, but only for conversion to an
225
 * existing dtype. In that case the Python (not NumPy) scalar subclass
226
 * checks are skipped.
227
 *
228
 * @param obj The python object, mainly type(pyobj) is used, the object
229
 *        is passed to reuse existing code at this time only.
230
 * @param flags Flags used to know if warnings were already given. If
231
 *        flags is NULL, this is not
232
 * @param fixed_DType if not NULL, will be checked first for whether or not
233
 *        it can/wants to handle the (possible) scalar value.
234
 * @return New reference to either a DType class, Py_None, or NULL on error.
235
 */
236
static NPY_INLINE PyArray_DTypeMeta *
237 1
discover_dtype_from_pyobject(
238
        PyObject *obj, enum _dtype_discovery_flags *flags,
239
        PyArray_DTypeMeta *fixed_DType)
240
{
241 1
    if (fixed_DType != NULL) {
242
        /*
243
         * Let the given DType handle the discovery.  This is when the
244
         * scalar-type matches exactly, or the DType signals that it can
245
         * handle the scalar-type.  (Even if it cannot handle here it may be
246
         * asked to attempt to do so later, if no other matching DType exists.)
247
         */
248 1
        if ((Py_TYPE(obj) == fixed_DType->scalar_type) ||
249 1
                (fixed_DType->is_known_scalar_type != NULL &&
250 1
                 fixed_DType->is_known_scalar_type(fixed_DType, Py_TYPE(obj)))) {
251 1
            Py_INCREF(fixed_DType);
252 1
            return fixed_DType;
253
        }
254
    }
255

256 1
    PyArray_DTypeMeta *DType = discover_dtype_from_pytype(Py_TYPE(obj));
257 1
    if (DType != NULL) {
258
        return DType;
259
    }
260
    /*
261
     * At this point we have not found a clear mapping, but mainly for
262
     * backward compatibility we have to make some further attempts at
263
     * interpreting the input as a known scalar type.
264
     */
265
    PyArray_Descr *legacy_descr;
266 1
    if (PyArray_IsScalar(obj, Generic)) {
267 1
        legacy_descr = PyArray_DescrFromScalar(obj);
268 1
        if (legacy_descr == NULL) {
269
            return NULL;
270
        }
271
    }
272 1
    else if (flags == NULL) {
273 1
        Py_INCREF(Py_None);
274 1
        return (PyArray_DTypeMeta *)Py_None;
275
    }
276 1
    else if (PyBytes_Check(obj)) {
277 0
        legacy_descr = PyArray_DescrFromType(NPY_BYTE);
278
    }
279 1
    else if (PyUnicode_Check(obj)) {
280 0
        legacy_descr = PyArray_DescrFromType(NPY_UNICODE);
281
    }
282
    else {
283 1
        legacy_descr = _array_find_python_scalar_type(obj);
284
    }
285

286 1
    if (legacy_descr != NULL) {
287 1
        DType = NPY_DTYPE(legacy_descr);
288 1
        Py_INCREF(DType);
289 1
        Py_DECREF(legacy_descr);
290
        /* TODO: Enable warning about subclass handling */
291
        if (0 && !((*flags) & GAVE_SUBCLASS_WARNING)) {
292
            if (DEPRECATE_FUTUREWARNING(
293
                    "in the future NumPy will not automatically find the "
294
                    "dtype for subclasses of scalars known to NumPy (i.e. "
295
                    "python types). Use the appropriate `dtype=...` to create "
296
                    "this array. This will use the `object` dtype or raise "
297
                    "an error in the future.") < 0) {
298
                return NULL;
299
            }
300
            *flags |= GAVE_SUBCLASS_WARNING;
301
        }
302
        return DType;
303
    }
304 1
    Py_INCREF(Py_None);
305 1
    return (PyArray_DTypeMeta *)Py_None;
306
}
307

308

309
/*
310
 * This function should probably become public API eventually.  At this
311
 * time it is implemented by falling back to `PyArray_AdaptFlexibleDType`.
312
 * We will use `CastingImpl[from, to].adjust_descriptors(...)` to implement
313
 * this logic.
314
 */
315
static NPY_INLINE PyArray_Descr *
316 1
cast_descriptor_to_fixed_dtype(
317
        PyArray_Descr *descr, PyArray_DTypeMeta *fixed_DType)
318
{
319 1
    if (fixed_DType == NULL) {
320
        /* Nothing to do, we only need to promote the new dtype */
321 1
        Py_INCREF(descr);
322 1
        return descr;
323
    }
324

325 1
    if (!fixed_DType->parametric) {
326
        /*
327
         * Don't actually do anything, the default is always the result
328
         * of any cast.
329
         */
330 0
        return fixed_DType->default_descr(fixed_DType);
331
    }
332 1
    if (PyObject_TypeCheck((PyObject *)descr, (PyTypeObject *)fixed_DType)) {
333 1
        Py_INCREF(descr);
334 1
        return descr;
335
    }
336
    /*
337
     * TODO: When this is implemented for all dtypes, the special cases
338
     *       can be removed...
339
     */
340 1
    if (fixed_DType->legacy && fixed_DType->parametric &&
341 1
            NPY_DTYPE(descr)->legacy) {
342 1
        PyArray_Descr *flex_dtype = PyArray_DescrFromType(fixed_DType->type_num);
343 1
        return PyArray_AdaptFlexibleDType(descr, flex_dtype);
344
    }
345

346 0
    PyErr_SetString(PyExc_NotImplementedError,
347
            "Must use casting to find the correct dtype, this is "
348
            "not yet implemented! "
349
            "(It should not be possible to hit this code currently!)");
350 0
    return NULL;
351
}
352

353

354
/**
355
 * Discover the correct descriptor from a known DType class and scalar.
356
 * If the fixed DType can discover a dtype instance/descr all is fine,
357
 * if it cannot and DType is used instead, a cast will have to be tried.
358
 *
359
 * @param fixed_DType A user provided fixed DType, can be NULL
360
 * @param DType A discovered DType (by discover_dtype_from_pyobject);
361
 *        this can be identical to `fixed_DType`, if it obj is a
362
 *        known scalar. Can be `NULL` indicating no known type.
363
 * @param obj The Python scalar object. At the time of calling this function
364
 *        it must be known that `obj` should represent a scalar.
365
 */
366
static NPY_INLINE PyArray_Descr *
367 1
find_scalar_descriptor(
368
        PyArray_DTypeMeta *fixed_DType, PyArray_DTypeMeta *DType,
369
        PyObject *obj)
370
{
371
    PyArray_Descr *descr;
372

373 1
    if (DType == NULL && fixed_DType == NULL) {
374
        /* No known DType and no fixed one means we go to object. */
375 1
        return PyArray_DescrFromType(NPY_OBJECT);
376
    }
377 1
    else if (DType == NULL) {
378
        /*
379
         * If no DType is known/found, give the fixed give one a second
380
         * chance.  This allows for example string, to call `str(obj)` to
381
         * figure out the length for arbitrary objects.
382
         */
383 1
        descr = fixed_DType->discover_descr_from_pyobject(fixed_DType, obj);
384
    }
385
    else {
386 1
        descr = DType->discover_descr_from_pyobject(DType, obj);
387
    }
388 1
    if (descr == NULL) {
389
        return NULL;
390
    }
391 1
    if (fixed_DType == NULL) {
392
        return descr;
393
    }
394

395 1
    Py_SETREF(descr, cast_descriptor_to_fixed_dtype(descr, fixed_DType));
396
    return descr;
397
}
398

399

400
/**
401
 * Assign a single element in an array from a python value.
402
 *
403
 * The dtypes SETITEM should only be trusted to generally do the right
404
 * thing if something is known to be a scalar *and* is of a python type known
405
 * to the DType (which should include all basic Python math types), but in
406
 * general a cast may be necessary.
407
 * This function handles the cast, which is for example hit when assigning
408
 * a float128 to complex128.
409
 *
410
 * At this time, this function does not support arrays (historically we
411
 * mainly supported arrays through `__float__()`, etc.). Such support should
412
 * possibly be added (although when called from `PyArray_AssignFromCache`
413
 * the input cannot be an array).
414
 * Note that this is also problematic for some array-likes, such as
415
 * `astropy.units.Quantity` and `np.ma.masked`.  These are used to us calling
416
 * `__float__`/`__int__` for 0-D instances in many cases.
417
 * Eventually, we may want to define this as wrong: They must use DTypes
418
 * instead of (only) subclasses.  Until then, here as well as in
419
 * `PyArray_AssignFromCache` (which already does this), we need to special
420
 * case 0-D array-likes to behave like arbitrary (unknown!) Python objects.
421
 *
422
 * @param descr
423
 * @param item
424
 * @param value
425
 * @return 0 on success -1 on failure.
426
 */
427
/*
428
 * TODO: This function should possibly be public API.
429
 */
430
NPY_NO_EXPORT int
431 1
PyArray_Pack(PyArray_Descr *descr, char *item, PyObject *value)
432
{
433 1
    PyArrayObject_fields arr_fields = {
434
            .flags = NPY_ARRAY_WRITEABLE,  /* assume array is not behaved. */
435
        };
436 1
    Py_SET_TYPE(&arr_fields, &PyArray_Type);
437 1
    Py_SET_REFCNT(&arr_fields, 1);
438

439 1
    if (NPY_UNLIKELY(descr->type_num == NPY_OBJECT)) {
440
        /*
441
         * We always have store objects directly, casting will lose some
442
         * type information. Any other dtype discards the type information.
443
         * TODO: For a Categorical[object] this path may be necessary?
444
         */
445 1
        arr_fields.descr = descr;
446 1
        return descr->f->setitem(value, item, &arr_fields);
447
    }
448

449
    /* discover_dtype_from_pyobject includes a check for is_known_scalar_type */
450 1
    PyArray_DTypeMeta *DType = discover_dtype_from_pyobject(
451 1
            value, NULL, NPY_DTYPE(descr));
452 1
    if (DType == NULL) {
453
        return -1;
454
    }
455 1
    if (DType == NPY_DTYPE(descr) || DType == (PyArray_DTypeMeta *)Py_None) {
456
        /* We can set the element directly (or at least will try to) */
457 1
        Py_XDECREF(DType);
458 1
        arr_fields.descr = descr;
459 1
        return descr->f->setitem(value, item, &arr_fields);
460
    }
461
    PyArray_Descr *tmp_descr;
462 1
    tmp_descr = DType->discover_descr_from_pyobject(DType, value);
463 1
    Py_DECREF(DType);
464 1
    if (tmp_descr == NULL) {
465
        return -1;
466
    }
467

468 1
    char *data = PyObject_Malloc(tmp_descr->elsize);
469 1
    if (data == NULL) {
470 0
        PyErr_NoMemory();
471 0
        Py_DECREF(tmp_descr);
472
        return -1;
473
    }
474 1
    if (PyDataType_FLAGCHK(tmp_descr, NPY_NEEDS_INIT)) {
475 1
        memset(data, 0, tmp_descr->elsize);
476
    }
477 1
    arr_fields.descr = tmp_descr;
478 1
    if (tmp_descr->f->setitem(value, data, &arr_fields) < 0) {
479 0
        PyObject_Free(data);
480 0
        Py_DECREF(tmp_descr);
481
        return -1;
482
    }
483 1
    if (PyDataType_REFCHK(tmp_descr)) {
484
        /* We could probably use move-references above */
485 0
        PyArray_Item_INCREF(data, tmp_descr);
486
    }
487

488 1
    int res = 0;
489 1
    int needs_api = 0;
490
    PyArray_StridedUnaryOp *stransfer;
491
    NpyAuxData *transferdata;
492 1
    if (PyArray_GetDTypeTransferFunction(
493
            0, 0, 0, tmp_descr, descr, 0, &stransfer, &transferdata,
494
            &needs_api) == NPY_FAIL) {
495
        res = -1;
496
        goto finish;
497
    }
498 1
    if (stransfer(item, 0, data, 0, 1, tmp_descr->elsize, transferdata) < 0) {
499 0
        res = -1;
500
    }
501 1
    NPY_AUXDATA_FREE(transferdata);
502

503 1
  finish:
504 1
    if (PyDataType_REFCHK(tmp_descr)) {
505
        /* We could probably use move-references above */
506 0
        PyArray_Item_XDECREF(data, tmp_descr);
507
    }
508 1
    PyObject_Free(data);
509 1
    Py_DECREF(tmp_descr);
510
    return res;
511
}
512

513

514
static int
515 1
update_shape(int curr_ndim, int *max_ndim,
516
             npy_intp out_shape[NPY_MAXDIMS], int new_ndim,
517
             const npy_intp new_shape[NPY_MAXDIMS], npy_bool sequence,
518
             enum _dtype_discovery_flags *flags)
519
{
520 1
    int success = 0;  /* unsuccessful if array is ragged */
521 1
    const npy_bool max_dims_reached = *flags & MAX_DIMS_WAS_REACHED;
522

523 1
    if (curr_ndim + new_ndim > *max_ndim) {
524 1
        success = -1;
525
        /* Only update/check as many dims as possible, max_ndim is unchanged */
526 1
        new_ndim = *max_ndim - curr_ndim;
527
    }
528 1
    else if (!sequence && (*max_ndim != curr_ndim + new_ndim)) {
529
        /*
530
         * Sequences do not update max_ndim, otherwise shrink and check.
531
         * This is depth first, so if it is already set, `out_shape` is filled.
532
         */
533 1
        *max_ndim = curr_ndim + new_ndim;
534
        /* If a shape was already set, this is also ragged */
535 1
        if (max_dims_reached) {
536 1
            success = -1;
537
        }
538
    }
539 1
    for (int i = 0; i < new_ndim; i++) {
540 1
        npy_intp curr_dim = out_shape[curr_ndim + i];
541 1
        npy_intp new_dim = new_shape[i];
542

543 1
        if (!max_dims_reached) {
544 1
            out_shape[curr_ndim + i] = new_dim;
545
        }
546 1
        else if (new_dim != curr_dim) {
547
            /* The array is ragged, and this dimension is unusable already */
548 1
            success = -1;
549 1
            if (!sequence) {
550
                /* Remove dimensions that we cannot use: */
551 1
                *max_ndim -= new_ndim - i;
552
            }
553
            else {
554
                assert(i == 0);
555
                /* max_ndim is usually not updated for sequences, so set now: */
556 1
                *max_ndim = curr_ndim;
557
            }
558
            break;
559
        }
560
    }
561 1
    if (!sequence) {
562 1
        *flags |= MAX_DIMS_WAS_REACHED;
563
    }
564 1
    return success;
565
}
566

567

568
#define COERCION_CACHE_CACHE_SIZE 5
569
static int _coercion_cache_num = 0;
570
static coercion_cache_obj *_coercion_cache_cache[COERCION_CACHE_CACHE_SIZE];
571

572
/*
573
 * Steals a reference to the object.
574
 */
575
static NPY_INLINE int
576 1
npy_new_coercion_cache(
577
        PyObject *converted_obj, PyObject *arr_or_sequence, npy_bool sequence,
578
        coercion_cache_obj ***next_ptr, int ndim)
579
{
580
    coercion_cache_obj *cache;
581 1
    if (_coercion_cache_num > 0) {
582 1
        _coercion_cache_num--;
583 1
        cache = _coercion_cache_cache[_coercion_cache_num];
584
    }
585
    else {
586 1
        cache = PyObject_MALLOC(sizeof(coercion_cache_obj));
587
    }
588 1
    if (cache == NULL) {
589 0
        PyErr_NoMemory();
590 0
        return -1;
591
    }
592 1
    cache->converted_obj = converted_obj;
593 1
    cache->arr_or_sequence = arr_or_sequence;
594 1
    cache->sequence = sequence;
595 1
    cache->depth = ndim;
596 1
    cache->next = NULL;
597 1
    **next_ptr = cache;
598 1
    *next_ptr = &(cache->next);
599 1
    return 0;
600
}
601

602
/**
603
 * Unlink coercion cache item.
604
 *
605
 * @param current
606
 * @return next coercion cache object (or NULL)
607
 */
608
NPY_NO_EXPORT NPY_INLINE coercion_cache_obj *
609 1
npy_unlink_coercion_cache(coercion_cache_obj *current)
610
{
611 1
    coercion_cache_obj *next = current->next;
612 1
    Py_DECREF(current->arr_or_sequence);
613 1
    if (_coercion_cache_num < COERCION_CACHE_CACHE_SIZE) {
614 1
        _coercion_cache_cache[_coercion_cache_num] = current;
615 1
        _coercion_cache_num++;
616
    }
617
    else {
618 1
        PyObject_FREE(current);
619
    }
620 1
    return next;
621
}
622

623
NPY_NO_EXPORT NPY_INLINE void
624 1
npy_free_coercion_cache(coercion_cache_obj *next) {
625
    /* We only need to check from the last used cache pos */
626 1
    while (next != NULL) {
627 1
        next = npy_unlink_coercion_cache(next);
628
    }
629
}
630

631
#undef COERCION_CACHE_CACHE_SIZE
632

633
/**
634
 * Do the promotion step and possible casting. This function should
635
 * never be called if a descriptor was requested. In that case the output
636
 * dtype is not of importance, so we must not risk promotion errors.
637
 *
638
 * @param out_descr The current descriptor.
639
 * @param descr The newly found descriptor to promote with
640
 * @param flags dtype discover flags to signal failed promotion.
641
 * @return -1 on error, 0 on success.
642
 */
643
static NPY_INLINE int
644 1
handle_promotion(PyArray_Descr **out_descr, PyArray_Descr *descr,
645
        enum _dtype_discovery_flags *flags)
646
{
647
    assert(!(*flags & DESCRIPTOR_WAS_SET));
648

649 1
    if (*out_descr == NULL) {
650 1
        Py_INCREF(descr);
651 1
        *out_descr = descr;
652 1
        return 0;
653
    }
654 1
    PyArray_Descr *new_descr = PyArray_PromoteTypes(descr, *out_descr);
655 1
    if (new_descr == NULL) {
656 1
        PyErr_Clear();
657 1
        *flags |= PROMOTION_FAILED;
658
        /* Continue with object, since we may need the dimensionality */
659 1
        new_descr = PyArray_DescrFromType(NPY_OBJECT);
660
    }
661 1
    Py_SETREF(*out_descr, new_descr);
662
    return 0;
663
}
664

665

666
/**
667
 * Handle a leave node (known scalar) during dtype and shape discovery.
668
 *
669
 * @param obj The python object or nested sequence to convert
670
 * @param max_dims The maximum number of dimensions.
671
 * @param curr_dims The current number of dimensions (depth in the recursion)
672
 * @param out_shape The discovered output shape, will be filled
673
 * @param coercion_cache The coercion cache object to use.
674
 * @param DType the DType class that should be used, or NULL, if not provided.
675
 * @param flags used signal that this is a ragged array, used internally and
676
 *        can be expanded if necessary.
677
 */
678
static NPY_INLINE int
679 1
handle_scalar(
680
        PyObject *obj, int curr_dims, int *max_dims,
681
        PyArray_Descr **out_descr, npy_intp *out_shape,
682
        PyArray_DTypeMeta *fixed_DType,
683
        enum _dtype_discovery_flags *flags, PyArray_DTypeMeta *DType)
684
{
685
    PyArray_Descr *descr;
686

687 1
    if (update_shape(curr_dims, max_dims, out_shape,
688
            0, NULL, NPY_FALSE, flags) < 0) {
689 1
        *flags |= FOUND_RAGGED_ARRAY;
690 1
        return *max_dims;
691
    }
692 1
    if (*flags & DESCRIPTOR_WAS_SET) {
693
        /* no need to do any promotion */
694 1
        return *max_dims;
695
    }
696
    /* This is a scalar, so find the descriptor */
697 1
    descr = find_scalar_descriptor(fixed_DType, DType, obj);
698 1
    if (descr == NULL) {
699
        return -1;
700
    }
701 1
    if (handle_promotion(out_descr, descr, flags) < 0) {
702 0
        Py_DECREF(descr);
703
        return -1;
704
    }
705 1
    Py_DECREF(descr);
706 1
    return *max_dims;
707
}
708

709

710
/**
711
 * Return the correct descriptor given an array object and a DType class.
712
 *
713
 * This is identical to casting the arrays descriptor/dtype to the new
714
 * DType class
715
 *
716
 * @param arr The array object.
717
 * @param DType The DType class to cast to (or NULL for convenience)
718
 * @param out_descr The output descriptor will set. The result can be NULL
719
 *        when the array is of object dtype and has no elements.
720
 *
721
 * @return -1 on failure, 0 on success.
722
 */
723
static int
724 1
find_descriptor_from_array(
725
        PyArrayObject *arr, PyArray_DTypeMeta *DType, PyArray_Descr **out_descr)
726
{
727 1
    enum _dtype_discovery_flags flags = 0;
728 1
    *out_descr = NULL;
729

730 1
    if (NPY_UNLIKELY(DType != NULL && DType->parametric &&
731
            PyArray_ISOBJECT(arr))) {
732
        /*
733
         * We have one special case, if (and only if) the input array is of
734
         * object DType and the dtype is not fixed already but parametric.
735
         * Then, we allow inspection of all elements, treating them as
736
         * elements. We do this recursively, so nested 0-D arrays can work,
737
         * but nested higher dimensional arrays will lead to an error.
738
         */
739
        assert(DType->type_num != NPY_OBJECT);  /* not parametric */
740

741
        PyArrayIterObject *iter;
742 1
        iter = (PyArrayIterObject *)PyArray_IterNew((PyObject *)arr);
743 1
        if (iter == NULL) {
744
            return -1;
745
        }
746 1
        while (iter->index < iter->size) {
747
            PyArray_DTypeMeta *item_DType;
748
            /*
749
             * Note: If the array contains typed objects we may need to use
750
             *       the dtype to use casting for finding the correct instance.
751
             */
752 1
            PyObject *elem = PyArray_GETITEM(arr, iter->dataptr);
753 1
            if (elem == NULL) {
754 0
                Py_DECREF(iter);
755 0
                return -1;
756
            }
757 1
            item_DType = discover_dtype_from_pyobject(elem, &flags, DType);
758 1
            if (item_DType == NULL) {
759 0
                Py_DECREF(iter);
760 0
                Py_DECREF(elem);
761
                return -1;
762
            }
763 1
            if (item_DType == (PyArray_DTypeMeta *)Py_None) {
764 1
                Py_SETREF(item_DType, NULL);
765
            }
766 1
            int flat_max_dims = 0;
767 1
            if (handle_scalar(elem, 0, &flat_max_dims, out_descr,
768
                    NULL, DType, &flags, item_DType) < 0) {
769 0
                Py_DECREF(iter);
770 0
                Py_DECREF(elem);
771 0
                Py_XDECREF(item_DType);
772
                return -1;
773
            }
774 1
            Py_XDECREF(item_DType);
775 1
            Py_DECREF(elem);
776 1
            PyArray_ITER_NEXT(iter);
777
        }
778 1
        Py_DECREF(iter);
779
    }
780 1
    else if (DType != NULL && NPY_UNLIKELY(DType->type_num == NPY_DATETIME) &&
781 1
                PyArray_ISSTRING(arr)) {
782
        /*
783
         * TODO: This branch should be deprecated IMO, the workaround is
784
         *       to cast to the object to a string array. Although a specific
785
         *       function (if there is even any need) would be better.
786
         *       This is value based casting!
787
         * Unless of course we actually want to support this kind of thing
788
         * in general (not just for object dtype)...
789
         */
790
        PyArray_DatetimeMetaData meta;
791 1
        meta.base = NPY_FR_GENERIC;
792 1
        meta.num = 1;
793

794 1
        if (find_string_array_datetime64_type(arr, &meta) < 0) {
795 0
            return -1;
796
        }
797
        else {
798 1
            *out_descr = create_datetime_dtype(NPY_DATETIME, &meta);
799 1
            if (*out_descr == NULL) {
800
                return -1;
801
            }
802
        }
803
    }
804
    else {
805
        /*
806
         * If this is not an object array figure out the dtype cast,
807
         * or simply use the returned DType.
808
         */
809 1
        *out_descr = cast_descriptor_to_fixed_dtype(
810
                     PyArray_DESCR(arr), DType);
811 1
        if (*out_descr == NULL) {
812
            return -1;
813
        }
814
    }
815
    return 0;
816
}
817

818
/**
819
 * Given a dtype or DType object, find the correct descriptor to cast the
820
 * array to.
821
 *
822
 * This function is identical to normal casting using only the dtype, however,
823
 * it supports inspecting the elements when the array has object dtype
824
 * (and the given datatype describes a parametric DType class).
825
 *
826
 * @param arr
827
 * @param dtype A dtype instance or class.
828
 * @return A concrete dtype instance or NULL
829
 */
830
NPY_NO_EXPORT PyArray_Descr *
831 1
PyArray_AdaptDescriptorToArray(PyArrayObject *arr, PyObject *dtype)
832
{
833
    /* If the requested dtype is flexible, adapt it */
834
    PyArray_Descr *new_dtype;
835
    PyArray_DTypeMeta *new_DType;
836
    int res;
837

838 1
    res = PyArray_ExtractDTypeAndDescriptor((PyObject *)dtype,
839
            &new_dtype, &new_DType);
840 1
    if (res < 0) {
841
        return NULL;
842
    }
843 1
    if (new_dtype == NULL) {
844 1
        res = find_descriptor_from_array(arr, new_DType, &new_dtype);
845 1
        if (res < 0) {
846 0
            Py_DECREF(new_DType);
847
            return NULL;
848
        }
849 1
        if (new_dtype == NULL) {
850
            /* This is an object array but contained no elements, use default */
851 0
            new_dtype = new_DType->default_descr(new_DType);
852
        }
853
    }
854 1
    Py_DECREF(new_DType);
855 1
    return new_dtype;
856
}
857

858

859
/**
860
 * Recursion helper for `PyArray_DiscoverDTypeAndShape`.  See its
861
 * documentation for additional details.
862
 *
863
 * @param obj The current (possibly nested) object
864
 * @param curr_dims The current depth, i.e. initially 0 and increasing.
865
 * @param max_dims Maximum number of dimensions, modified during discovery.
866
 * @param out_descr dtype instance (or NULL) to promoted and update.
867
 * @param out_shape The current shape (updated)
868
 * @param coercion_cache_tail_ptr The tail of the linked list of coercion
869
 *        cache objects, which hold on to converted sequences and arrays.
870
 *        This is a pointer to the `->next` slot of the previous cache so
871
 *        that we can append a new cache object (and update this pointer).
872
 *        (Initially it is a pointer to the user-provided head pointer).
873
 * @param fixed_DType User provided fixed DType class
874
 * @param flags Discovery flags (reporting and behaviour flags, see def.)
875
 * @return The updated number of maximum dimensions (i.e. scalars will set
876
 *         this to the current dimensions).
877
 */
878
NPY_NO_EXPORT int
879 1
PyArray_DiscoverDTypeAndShape_Recursive(
880
        PyObject *obj, int curr_dims, int max_dims, PyArray_Descr**out_descr,
881
        npy_intp out_shape[NPY_MAXDIMS],
882
        coercion_cache_obj ***coercion_cache_tail_ptr,
883
        PyArray_DTypeMeta *fixed_DType, enum _dtype_discovery_flags *flags)
884
{
885 1
    PyArrayObject *arr = NULL;
886
    PyObject *seq;
887

888
    /*
889
     * The first step is to find the DType class if it was not provided,
890
     * alternatively we have to find out that this is not a scalar at all
891
     * (which could fail and lead us to `object` dtype).
892
     */
893 1
    PyArray_DTypeMeta *DType = NULL;
894

895 1
    if (NPY_UNLIKELY(*flags & DISCOVER_STRINGS_AS_SEQUENCES)) {
896
        /*
897
         * We currently support that bytes/strings are considered sequences,
898
         * if the dtype is np.dtype('c'), this should be deprecated probably,
899
         * but requires hacks right now.
900
         */
901 1
        if (PyBytes_Check(obj) && PyBytes_Size(obj) != 1) {
902
            goto force_sequence_due_to_char_dtype;
903
        }
904 1
        else if (PyUnicode_Check(obj) && PyUnicode_GetLength(obj) != 1) {
905
            goto force_sequence_due_to_char_dtype;
906
        }
907
    }
908

909
    /* If this is a known scalar, find the corresponding DType class */
910 1
    DType = discover_dtype_from_pyobject(obj, flags, fixed_DType);
911 1
    if (DType == NULL) {
912
        return -1;
913
    }
914 1
    else if (DType == (PyArray_DTypeMeta *)Py_None) {
915 1
        Py_DECREF(Py_None);
916
    }
917
    else {
918 1
        max_dims = handle_scalar(
919
                obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
920
                flags, DType);
921 1
        Py_DECREF(DType);
922 1
        return max_dims;
923
    }
924

925
    /*
926
     * At this point we expect to find either a sequence, or an array-like.
927
     * Although it is still possible that this fails and we have to use
928
     * `object`.
929
     */
930 1
    if (PyArray_Check(obj)) {
931 1
        arr = (PyArrayObject *)obj;
932 1
        Py_INCREF(arr);
933
    }
934
    else {
935 1
        PyArray_Descr *requested_descr = NULL;
936 1
        if (*flags & DESCRIPTOR_WAS_SET) {
937
            /* __array__ may be passed the requested descriptor if provided */
938 1
            requested_descr = *out_descr;
939
        }
940 1
        arr = (PyArrayObject *)_array_from_array_like(obj,
941
                requested_descr, 0, NULL);
942 1
        if (arr == NULL) {
943
            return -1;
944
        }
945 1
        else if (arr == (PyArrayObject *)Py_NotImplemented) {
946 1
            Py_DECREF(arr);
947
            arr = NULL;
948
        }
949
    }
950 1
    if (arr != NULL) {
951
        /*
952
         * This is an array object which will be added to the cache, keeps
953
         * the reference to the array alive (takes ownership).
954
         */
955 1
        if (npy_new_coercion_cache(obj, (PyObject *)arr,
956
                0, coercion_cache_tail_ptr, curr_dims) < 0) {
957
            return -1;
958
        }
959

960 1
        if (curr_dims == 0) {
961
            /*
962
             * Special case for reverse broadcasting, ignore max_dims if this
963
             * is a single array-like object; needed for PyArray_CopyObject.
964
             */
965 1
            memcpy(out_shape, PyArray_SHAPE(arr),
966 1
                   PyArray_NDIM(arr) * sizeof(npy_intp));
967 1
            max_dims = PyArray_NDIM(arr);
968
        }
969 1
        else if (update_shape(curr_dims, &max_dims, out_shape,
970 1
                PyArray_NDIM(arr), PyArray_SHAPE(arr), NPY_FALSE, flags) < 0) {
971 1
            *flags |= FOUND_RAGGED_ARRAY;
972 1
            return max_dims;
973
        }
974

975 1
        if (*flags & DESCRIPTOR_WAS_SET) {
976 1
            return max_dims;
977
        }
978
        /*
979
         * For arrays we may not just need to cast the dtype to the user
980
         * provided fixed_DType. If this is an object array, the elements
981
         * may need to be inspected individually.
982
         * Note, this finds the descriptor of the array first and only then
983
         * promotes here (different associativity).
984
         */
985
        PyArray_Descr *cast_descr;
986 1
        if (find_descriptor_from_array(arr, fixed_DType, &cast_descr) < 0) {
987
            return -1;
988
        }
989 1
        if (cast_descr == NULL) {
990
            /* object array with no elements, no need to promote/adjust. */
991 0
            return max_dims;
992
        }
993 1
        if (handle_promotion(out_descr, cast_descr, flags) < 0) {
994 0
            Py_DECREF(cast_descr);
995
            return -1;
996
        }
997 1
        Py_DECREF(cast_descr);
998 1
        return max_dims;
999
    }
1000

1001
    /*
1002
     * The last step is to assume the input should be handled as a sequence
1003
     * and to handle it recursively. That is, unless we have hit the
1004
     * dimension limit.
1005
     */
1006 1
    npy_bool is_sequence = (PySequence_Check(obj) && PySequence_Size(obj) >= 0);
1007 1
    if (NPY_UNLIKELY(*flags & DISCOVER_TUPLES_AS_ELEMENTS) &&
1008 1
            PyTuple_Check(obj)) {
1009 1
        is_sequence = NPY_FALSE;
1010
    }
1011 1
    if (curr_dims == max_dims || !is_sequence) {
1012
        /* Clear any PySequence_Size error which would corrupts further calls */
1013 1
        PyErr_Clear();
1014 1
        max_dims = handle_scalar(
1015
                obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
1016
                flags, NULL);
1017 1
        if (is_sequence) {
1018
            /* Flag as ragged or too deep array */
1019 1
            *flags |= FOUND_RAGGED_ARRAY;
1020
        }
1021
        return max_dims;
1022
    }
1023
    /* If we stop supporting bytes/str subclasses, more may be required here: */
1024
    assert(!PyBytes_Check(obj) && !PyUnicode_Check(obj));
1025

1026 1
  force_sequence_due_to_char_dtype:
1027

1028
    /* Ensure we have a sequence (required for PyPy) */
1029 1
    seq = PySequence_Fast(obj, "Could not convert object to sequence");
1030 1
    if (seq == NULL) {
1031
        /*
1032
         * Specifically do not fail on things that look like a dictionary,
1033
         * instead treat them as scalar.
1034
         */
1035 1
        if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1036 1
            PyErr_Clear();
1037 1
            max_dims = handle_scalar(
1038
                    obj, curr_dims, &max_dims, out_descr, out_shape, fixed_DType,
1039
                    flags, NULL);
1040 1
            return max_dims;
1041
        }
1042
        return -1;
1043
    }
1044
    /* The cache takes ownership of the sequence here. */
1045 1
    if (npy_new_coercion_cache(obj, seq, 1, coercion_cache_tail_ptr, curr_dims) < 0) {
1046
        return -1;
1047
    }
1048

1049 1
    npy_intp size = PySequence_Fast_GET_SIZE(seq);
1050 1
    PyObject **objects = PySequence_Fast_ITEMS(seq);
1051

1052 1
    if (update_shape(curr_dims, &max_dims,
1053
                     out_shape, 1, &size, NPY_TRUE, flags) < 0) {
1054
        /* But do update, if there this is a ragged case */
1055 1
        *flags |= FOUND_RAGGED_ARRAY;
1056 1
        return max_dims;
1057
    }
1058 1
    if (size == 0) {
1059
        /* If the sequence is empty, this must be the last dimension */
1060 1
        *flags |= MAX_DIMS_WAS_REACHED;
1061 1
        return curr_dims + 1;
1062
    }
1063

1064
    /* Recursive call for each sequence item */
1065 1
    for (Py_ssize_t i = 0; i < size; i++) {
1066 1
        max_dims = PyArray_DiscoverDTypeAndShape_Recursive(
1067 1
                objects[i], curr_dims + 1, max_dims,
1068
                out_descr, out_shape, coercion_cache_tail_ptr, fixed_DType,
1069
                flags);
1070

1071 1
        if (max_dims < 0) {
1072
            return -1;
1073
        }
1074
    }
1075 1
    return max_dims;
1076
}
1077

1078

1079
/**
1080
 * Finds the DType and shape of an arbitrary nested sequence. This is the
1081
 * general purpose function to find the parameters of the array (but not
1082
 * the array itself) as returned by `np.array()`
1083
 *
1084
 * Note: Before considering to make part of this public, we should consider
1085
 *       whether things such as `out_descr != NULL` should be supported in
1086
 *       a public API.
1087
 *
1088
 * @param obj Scalar or nested sequences.
1089
 * @param max_dims Maximum number of dimensions (after this scalars are forced)
1090
 * @param out_shape Will be filled with the output shape (more than the actual
1091
 *        shape may be written).
1092
 * @param coercion_cache NULL initialized reference to a cache pointer.
1093
 *        May be set to the first coercion_cache, and has to be freed using
1094
 *        npy_free_coercion_cache.
1095
 *        This should be stored in a thread-safe manner (i.e. function static)
1096
 *        and is designed to be consumed by `PyArray_AssignFromCache`.
1097
 *        If not consumed, must be freed using `npy_free_coercion_cache`.
1098
 * @param fixed_DType A user provided fixed DType class.
1099
 * @param requested_descr A user provided fixed descriptor. This is always
1100
 *        returned as the discovered descriptor, but currently only used
1101
 *        for the ``__array__`` protocol.
1102
 * @param out_descr Set to the discovered output descriptor. This may be
1103
 *        non NULL but only when fixed_DType/requested_descr are not given.
1104
 *        If non NULL, it is the first dtype being promoted and used if there
1105
 *        are no elements.
1106
 *        The result may be unchanged (remain NULL) when converting a
1107
 *        sequence with no elements. In this case it is callers responsibility
1108
 *        to choose a default.
1109
 * @return dimensions of the discovered object or -1 on error.
1110
 *         WARNING: If (and only if) the output is a single array, the ndim
1111
 *         returned _can_ exceed the maximum allowed number of dimensions.
1112
 *         It might be nice to deprecate this? But it allows things such as
1113
 *         `arr1d[...] = np.array([[1,2,3,4]])`
1114
 */
1115
NPY_NO_EXPORT int
1116 1
PyArray_DiscoverDTypeAndShape(
1117
        PyObject *obj, int max_dims,
1118
        npy_intp out_shape[NPY_MAXDIMS],
1119
        coercion_cache_obj **coercion_cache,
1120
        PyArray_DTypeMeta *fixed_DType, PyArray_Descr *requested_descr,
1121
        PyArray_Descr **out_descr)
1122
{
1123 1
    coercion_cache_obj **coercion_cache_head = coercion_cache;
1124 1
    *coercion_cache = NULL;
1125 1
    enum _dtype_discovery_flags flags = 0;
1126

1127
    /*
1128
     * Support a passed in descriptor (but only if nothing was specified).
1129
     */
1130
    assert(*out_descr == NULL || fixed_DType == NULL);
1131
    /* Validate input of requested descriptor and DType */
1132
    if (fixed_DType != NULL) {
1133
        assert(PyObject_TypeCheck(
1134
                (PyObject *)fixed_DType, (PyTypeObject *)&PyArrayDTypeMeta_Type));
1135
    }
1136

1137 1
    if (requested_descr != NULL) {
1138
        assert(fixed_DType == NPY_DTYPE(requested_descr));
1139
        /* The output descriptor must be the input. */
1140 1
        Py_INCREF(requested_descr);
1141 1
        *out_descr = requested_descr;
1142 1
        flags |= DESCRIPTOR_WAS_SET;
1143
    }
1144

1145
    /*
1146
     * Call the recursive function, the setup for this may need expanding
1147
     * to handle caching better.
1148
     */
1149

1150
    /* Legacy discovery flags */
1151 1
    if (requested_descr != NULL) {
1152 1
        if (requested_descr->type_num == NPY_STRING &&
1153
                requested_descr->type == 'c') {
1154
            /* Character dtype variation of string (should be deprecated...) */
1155 1
            flags |= DISCOVER_STRINGS_AS_SEQUENCES;
1156
        }
1157 1
        else if (requested_descr->type_num == NPY_VOID &&
1158 1
                    (requested_descr->names || requested_descr->subarray))  {
1159
            /* Void is a chimera, in that it may or may not be structured... */
1160 1
            flags |= DISCOVER_TUPLES_AS_ELEMENTS;
1161
        }
1162
    }
1163

1164 1
    int ndim = PyArray_DiscoverDTypeAndShape_Recursive(
1165
            obj, 0, max_dims, out_descr, out_shape, &coercion_cache,
1166
            fixed_DType, &flags);
1167 1
    if (ndim < 0) {
1168
        goto fail;
1169
    }
1170

1171 1
    if (NPY_UNLIKELY(flags & FOUND_RAGGED_ARRAY)) {
1172
        /*
1173
         * If max-dims was reached and the dimensions reduced, this is ragged.
1174
         * Otherwise, we merely reached the maximum dimensions, which is
1175
         * slightly different. This happens for example for `[1, [2, 3]]`
1176
         * where the maximum dimensions is 1, but then a sequence found.
1177
         *
1178
         * In this case we need to inform the user and clean out the cache
1179
         * since it may be too deep.
1180
         */
1181

1182
        /* Handle reaching the maximum depth differently: */
1183 1
        int too_deep = ndim == max_dims;
1184

1185 1
        if (fixed_DType == NULL) {
1186
            /* This is discovered as object, but deprecated */
1187
            static PyObject *visibleDeprecationWarning = NULL;
1188 1
            npy_cache_import(
1189
                    "numpy", "VisibleDeprecationWarning",
1190
                    &visibleDeprecationWarning);
1191 1
            if (visibleDeprecationWarning == NULL) {
1192
                goto fail;
1193
            }
1194 1
            if (!too_deep) {
1195
                /* NumPy 1.19, 2019-11-01 */
1196 1
                if (PyErr_WarnEx(visibleDeprecationWarning,
1197
                        "Creating an ndarray from ragged nested sequences (which "
1198
                        "is a list-or-tuple of lists-or-tuples-or ndarrays with "
1199
                        "different lengths or shapes) is deprecated. If you "
1200
                        "meant to do this, you must specify 'dtype=object' "
1201
                        "when creating the ndarray.", 1) < 0) {
1202
                    goto fail;
1203
                }
1204
            }
1205
            else {
1206
                /* NumPy 1.20, 2020-05-08 */
1207
                /* Note, max_dims should normally always be NPY_MAXDIMS here */
1208 1
                if (PyErr_WarnFormat(visibleDeprecationWarning, 1,
1209
                        "Creating an ndarray from nested sequences exceeding "
1210
                        "the maximum number of dimensions of %d is deprecated. "
1211
                        "If you mean to do this, you must specify "
1212
                        "'dtype=object' when creating the ndarray.",
1213
                        max_dims) < 0) {
1214
                    goto fail;
1215
                }
1216
            }
1217
            /* Ensure that ragged arrays always return object dtype */
1218 1
            Py_XSETREF(*out_descr, PyArray_DescrFromType(NPY_OBJECT));
1219
        }
1220 1
        else if (fixed_DType->type_num != NPY_OBJECT) {
1221
            /* Only object DType supports ragged cases unify error */
1222

1223
            /*
1224
             * We used to let certain ragged arrays pass if they also
1225
             * support e.g. conversion using `float(arr)`, which currently
1226
             * works for arrays with only one element.
1227
             * Thus we catch at least most of such cases here and give a
1228
             * DeprecationWarning instead of an error.
1229
             * Note that some of these will actually error later on when
1230
             * attempting to do the actual assign.
1231
             */
1232 1
            int deprecate_single_element_ragged = 0;
1233 1
            coercion_cache_obj *current = *coercion_cache_head;
1234 1
            while (current != NULL) {
1235 1
                if (current->sequence) {
1236 1
                    if (current->depth == ndim) {
1237
                        /*
1238
                         * Assume that only array-likes will allow the deprecated
1239
                         * behaviour
1240
                         */
1241
                        deprecate_single_element_ragged = 0;
1242
                        break;
1243
                    }
1244
                    /* check next converted sequence/array-like */
1245 1
                    current = current->next;
1246 1
                    continue;
1247
                }
1248 1
                PyArrayObject *arr = (PyArrayObject *)(current->arr_or_sequence);
1249
                assert(PyArray_NDIM(arr) + current->depth >= ndim);
1250 1
                if (PyArray_NDIM(arr) != ndim - current->depth) {
1251
                    /* This array is not compatible with the final shape */
1252 1
                    if (PyArray_SIZE(arr) != 1) {
1253
                        deprecate_single_element_ragged = 0;
1254
                        break;
1255
                    }
1256
                    deprecate_single_element_ragged = 1;
1257
                }
1258 1
                current = current->next;
1259
            }
1260

1261 1
            if (deprecate_single_element_ragged) {
1262
                /* Deprecated 2020-07-24, NumPy 1.20 */
1263 1
                if (DEPRECATE(
1264
                        "setting an array element with a sequence. "
1265
                        "This was supported in some cases where the elements "
1266
                        "are arrays with a single element. For example "
1267
                        "`np.array([1, np.array([2])], dtype=int)`. "
1268
                        "In the future this will raise the same ValueError as "
1269
                        "`np.array([1, [2]], dtype=int)`.") < 0) {
1270
                    goto fail;
1271
                }
1272
            }
1273 1
            else if (!too_deep) {
1274 1
                PyObject *shape = PyArray_IntTupleFromIntp(ndim, out_shape);
1275 1
                PyErr_Format(PyExc_ValueError,
1276
                        "setting an array element with a sequence. The "
1277
                        "requested array has an inhomogeneous shape after "
1278
                        "%d dimensions. The detected shape was "
1279
                        "%R + inhomogeneous part.",
1280
                        ndim, shape);
1281 1
                Py_DECREF(shape);
1282
                goto fail;
1283
            }
1284
            else {
1285 1
                PyErr_Format(PyExc_ValueError,
1286
                        "setting an array element with a sequence. The "
1287
                        "requested array would exceed the maximum number of "
1288
                        "dimension of %d.",
1289
                        max_dims);
1290 1
                goto fail;
1291
            }
1292
        }
1293

1294
        /*
1295
         * If the array is ragged, the cache may be too deep, so clean it.
1296
         * The cache is left at the same depth as the array though.
1297
         */
1298 1
        coercion_cache_obj **next_ptr = coercion_cache_head;
1299 1
        coercion_cache_obj *current = *coercion_cache_head;  /* item to check */
1300 1
        while (current != NULL) {
1301 1
            if (current->depth > ndim) {
1302
                /* delete "next" cache item and advanced it (unlike later) */
1303 1
                current = npy_unlink_coercion_cache(current);
1304 1
                continue;
1305
            }
1306
            /* advance both prev and next, and set prev->next to new item */
1307 1
            *next_ptr = current;
1308 1
            next_ptr = &(current->next);
1309 1
            current = current->next;
1310
        }
1311 1
        *next_ptr = NULL;
1312
    }
1313
    /* We could check here for max-ndims being reached as well */
1314

1315 1
    if (requested_descr != NULL) {
1316
        /* descriptor was provided, we did not accidentally change it */
1317
        assert(*out_descr == requested_descr);
1318
    }
1319 1
    else if (NPY_UNLIKELY(*out_descr == NULL)) {
1320
        /*
1321
         * When the object contained no elements (sequence of length zero),
1322
         * the no descriptor may have been found. When a DType was requested
1323
         * we use it to define the output dtype.
1324
         * Otherwise, out_descr will remain NULL and the caller has to set
1325
         * the correct default.
1326
         */
1327 1
        if (fixed_DType != NULL) {
1328 1
            if (fixed_DType->default_descr == NULL) {
1329 0
                Py_INCREF(fixed_DType->singleton);
1330 0
                *out_descr = fixed_DType->singleton;
1331
            }
1332
            else {
1333 1
                *out_descr = fixed_DType->default_descr(fixed_DType);
1334 1
                if (*out_descr == NULL) {
1335
                    goto fail;
1336
                }
1337
            }
1338
        }
1339
    }
1340
    return ndim;
1341

1342 1
  fail:
1343 1
    npy_free_coercion_cache(*coercion_cache_head);
1344 1
    *coercion_cache_head = NULL;
1345 1
    Py_XSETREF(*out_descr, NULL);
1346
    return -1;
1347
}
1348

1349

1350

1351
/**
1352
 * Check the descriptor is a legacy "flexible" DType instance, this is
1353
 * an instance which is (normally) not attached to an array, such as a string
1354
 * of length 0 or a datetime with no unit.
1355
 * These should be largely deprecated, and represent only the DType class
1356
 * for most `dtype` parameters.
1357
 *
1358
 * TODO: This function should eventually recieve a deprecation warning and
1359
 *       be removed.
1360
 *
1361
 * @param descr
1362
 * @return 1 if this is not a concrete dtype instance 0 otherwise
1363
 */
1364
static int
1365 1
descr_is_legacy_parametric_instance(PyArray_Descr *descr)
1366
{
1367 1
    if (PyDataType_ISUNSIZED(descr)) {
1368
        return 1;
1369
    }
1370
    /* Flexible descr with generic time unit (which can be adapted) */
1371 1
    if (PyDataType_ISDATETIME(descr)) {
1372
        PyArray_DatetimeMetaData *meta;
1373 1
        meta = get_datetime_metadata_from_dtype(descr);
1374 1
        if (meta->base == NPY_FR_GENERIC) {
1375
            return 1;
1376
        }
1377
    }
1378
    return 0;
1379
}
1380

1381

1382
/**
1383
 * Given either a DType instance or class, (or legacy flexible instance),
1384
 * ands sets output dtype instance and DType class. Both results may be
1385
 * NULL, but if `out_descr` is set `out_DType` will always be the
1386
 * corresponding class.
1387
 *
1388
 * @param dtype
1389
 * @param out_descr
1390
 * @param out_DType
1391
 * @return 0 on success -1 on failure
1392
 */
1393
NPY_NO_EXPORT int
1394 1
PyArray_ExtractDTypeAndDescriptor(PyObject *dtype,
1395
        PyArray_Descr **out_descr, PyArray_DTypeMeta **out_DType)
1396
{
1397 1
    *out_DType = NULL;
1398 1
    *out_descr = NULL;
1399

1400 1
    if (dtype != NULL) {
1401 1
        if (PyObject_TypeCheck(dtype, (PyTypeObject *)&PyArrayDTypeMeta_Type)) {
1402
            assert(dtype != (PyObject * )&PyArrayDescr_Type);  /* not np.dtype */
1403 0
            *out_DType = (PyArray_DTypeMeta *)dtype;
1404 0
            Py_INCREF(*out_DType);
1405
        }
1406 1
        else if (PyObject_TypeCheck((PyObject *)Py_TYPE(dtype),
1407
                    (PyTypeObject *)&PyArrayDTypeMeta_Type)) {
1408 1
            *out_DType = NPY_DTYPE(dtype);
1409 1
            Py_INCREF(*out_DType);
1410 1
            if (!descr_is_legacy_parametric_instance((PyArray_Descr *)dtype)) {
1411 1
                *out_descr = (PyArray_Descr *)dtype;
1412 1
                Py_INCREF(*out_descr);
1413
            }
1414
        }
1415
        else {
1416 0
            PyErr_SetString(PyExc_TypeError,
1417
                    "dtype parameter must be a DType instance or class.");
1418 0
            return -1;
1419
        }
1420
    }
1421
    return 0;
1422
}
1423

1424

1425
/*
1426
 * Python API function to expose the dtype+shape discovery functionality
1427
 * directly.
1428
 */
1429
NPY_NO_EXPORT PyObject *
1430 1
_discover_array_parameters(PyObject *NPY_UNUSED(self),
1431
                           PyObject *args, PyObject *kwargs)
1432
{
1433
    static char *kwlist[] = {"obj", "dtype", NULL};
1434

1435
    PyObject *obj;
1436 1
    PyObject *dtype = NULL;
1437 1
    PyArray_Descr *fixed_descriptor = NULL;
1438 1
    PyArray_DTypeMeta *fixed_DType = NULL;
1439
    npy_intp shape[NPY_MAXDIMS];
1440

1441 1
    if (!PyArg_ParseTupleAndKeywords(
1442
            args, kwargs, "O|O:_discover_array_parameters", kwlist,
1443
            &obj, &dtype)) {
1444
        return NULL;
1445
    }
1446

1447 1
    if (PyArray_ExtractDTypeAndDescriptor(dtype,
1448
            &fixed_descriptor, &fixed_DType) < 0) {
1449
        return NULL;
1450
    }
1451

1452 1
    coercion_cache_obj *coercion_cache = NULL;
1453 1
    PyObject *out_dtype = NULL;
1454 1
    int ndim = PyArray_DiscoverDTypeAndShape(
1455
            obj, NPY_MAXDIMS, shape,
1456
            &coercion_cache,
1457
            fixed_DType, fixed_descriptor, (PyArray_Descr **)&out_dtype);
1458 1
    Py_XDECREF(fixed_DType);
1459 1
    Py_XDECREF(fixed_descriptor);
1460 1
    if (ndim < 0) {
1461
        return NULL;
1462
    }
1463 1
    npy_free_coercion_cache(coercion_cache);
1464 1
    if (out_dtype == NULL) {
1465
        /* Empty sequence, report this as None. */
1466 0
        out_dtype = Py_None;
1467 0
        Py_INCREF(Py_None);
1468
    }
1469

1470 1
    PyObject *shape_tuple = PyArray_IntTupleFromIntp(ndim, shape);
1471 1
    if (shape_tuple == NULL) {
1472
        return NULL;
1473
    }
1474

1475 1
    PyObject *res = PyTuple_Pack(2, (PyObject *)out_dtype, shape_tuple);
1476 1
    Py_DECREF(out_dtype);
1477 1
    Py_DECREF(shape_tuple);
1478
    return res;
1479
}

Read our documentation on viewing source code .

Loading