1
#include "npy_cpu_features.h"
2
#include "npy_cpu_dispatch.h" // To guarantee the CPU baseline definitions are in scope.
3
#include "numpy/npy_common.h" // for NPY_INLINE
4
#include "numpy/npy_cpu.h" // To guarantee the CPU definitions are in scope.
5

6
/******************** Private Definitions *********************/
7

8
// Hold all CPU features boolean values
9
static unsigned char npy__cpu_have[NPY_CPU_FEATURE_MAX];
10

11
/******************** Private Declarations *********************/
12

13
// Almost detect all CPU features in runtime
14
static void
15
npy__cpu_init_features(void);
16
/*
17
 * Disable CPU dispatched features at runtime if environment variable
18
 * 'NPY_DISABLE_CPU_FEATURES' is defined.
19
 * Multiple features can be present, and separated by space, comma, or tab.
20
 * Raises an error if parsing fails or if the feature was not enabled
21
*/
22
static int
23
npy__cpu_try_disable_env(void);
24

25
/* Ensure the build's CPU baseline features are supported at runtime */
26
static int
27
npy__cpu_validate_baseline(void);
28

29
/******************** Public Definitions *********************/
30

31
NPY_VISIBILITY_HIDDEN int
32 1
npy_cpu_have(int feature_id)
33
{
34 1
    if (feature_id <= NPY_CPU_FEATURE_NONE || feature_id >= NPY_CPU_FEATURE_MAX)
35
        return 0;
36 1
    return npy__cpu_have[feature_id];
37
}
38

39
NPY_VISIBILITY_HIDDEN int
40 1
npy_cpu_init(void)
41
{
42 1
    npy__cpu_init_features();
43 1
    if (npy__cpu_validate_baseline() < 0) {
44
        return -1;
45
    }
46 1
    if (npy__cpu_try_disable_env() < 0) {
47
        return -1;
48
    }
49 1
    return 0;
50
}
51

52
NPY_VISIBILITY_HIDDEN PyObject *
53 0
npy_cpu_features_dict(void)
54
{
55 0
    PyObject *dict = PyDict_New();
56 0
    if (dict) {
57
    /**begin repeat
58
     * #feature = MMX, SSE, SSE2, SSE3, SSSE3, SSE41, POPCNT, SSE42,
59
     *            AVX, F16C, XOP, FMA4, FMA3, AVX2, AVX512F,
60
     *            AVX512CD, AVX512ER, AVX512PF, AVX5124FMAPS, AVX5124VNNIW,
61
     *            AVX512VPOPCNTDQ, AVX512VL, AVX512BW, AVX512DQ, AVX512VNNI,
62
     *            AVX512IFMA, AVX512VBMI, AVX512VBMI2, AVX512BITALG,
63
     *            AVX512_KNL, AVX512_KNM, AVX512_SKX, AVX512_CLX, AVX512_CNL, AVX512_ICL,
64
     *            VSX, VSX2, VSX3,
65
     *            NEON, NEON_FP16, NEON_VFPV4, ASIMD, FPHP, ASIMDHP, ASIMDDP, ASIMDFHM#
66
    */
67 0
        if (PyDict_SetItemString(dict, "@feature@",
68 0
            npy__cpu_have[NPY_CPU_FEATURE_@feature@] ? Py_True : Py_False) < 0) {
69 0
            Py_DECREF(dict);
70
            return NULL;
71
        }
72
    /**end repeat**/
73
    }
74
    return dict;
75
}
76

77
#define NPY__CPU_PYLIST_APPEND_CB(FEATURE, LIST) \
78
    item = PyUnicode_FromString(NPY_TOSTRING(FEATURE)); \
79
    if (item == NULL) { \
80
        Py_DECREF(LIST); \
81
        return NULL; \
82
    } \
83
    PyList_SET_ITEM(LIST, index++, item);
84

85
NPY_VISIBILITY_HIDDEN PyObject *
86 0
npy_cpu_baseline_list(void)
87
{
88
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_BASELINE_N > 0
89 0
    PyObject *list = PyList_New(NPY_WITH_CPU_BASELINE_N), *item;
90 0
    int index = 0;
91 0
    if (list != NULL) {
92 0
        NPY_WITH_CPU_BASELINE_CALL(NPY__CPU_PYLIST_APPEND_CB, list)
93
    }
94
    return list;
95
#else
96
    return PyList_New(0);
97
#endif
98
}
99

100
NPY_VISIBILITY_HIDDEN PyObject *
101 0
npy_cpu_dispatch_list(void)
102
{
103
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_DISPATCH_N > 0
104 0
    PyObject *list = PyList_New(NPY_WITH_CPU_DISPATCH_N), *item;
105 0
    int index = 0;
106 0
    if (list != NULL) {
107 0
        NPY_WITH_CPU_DISPATCH_CALL(NPY__CPU_PYLIST_APPEND_CB, list)
108
    }
109
    return list;
110
#else
111
    return PyList_New(0);
112
#endif
113
}
114

115
/******************** Private Definitions *********************/
116
#define NPY__CPU_FEATURE_ID_CB(FEATURE, WITH_FEATURE)     \
117
    if (strcmp(NPY_TOSTRING(FEATURE), WITH_FEATURE) == 0) \
118
        return NPY_CAT(NPY_CPU_FEATURE_, FEATURE);
119
/**
120
 * Returns CPU feature's ID, if the 'feature' was part of baseline
121
 * features that had been configured via --cpu-baseline
122
 * otherwise it returns 0
123
*/
124
static NPY_INLINE int
125 0
npy__cpu_baseline_fid(const char *feature)
126
{
127
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_BASELINE_N > 0
128 0
    NPY_WITH_CPU_BASELINE_CALL(NPY__CPU_FEATURE_ID_CB, feature)
129
#endif
130 0
    return 0;
131
}
132
/**
133
 * Returns CPU feature's ID, if the 'feature' was part of dispatched
134
 * features that had been configured via --cpu-dispatch
135
 * otherwise it returns 0
136
*/
137
static NPY_INLINE int
138 0
npy__cpu_dispatch_fid(const char *feature)
139
{
140
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_DISPATCH_N > 0
141 0
    NPY_WITH_CPU_DISPATCH_CALL(NPY__CPU_FEATURE_ID_CB, feature)
142
#endif
143 0
    return 0;
144
}
145

146
static int
147 1
npy__cpu_validate_baseline(void)
148
{
149
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_BASELINE_N > 0
150
    char baseline_failure[sizeof(NPY_WITH_CPU_BASELINE) + 1];
151 1
    char *fptr = &baseline_failure[0];
152

153
    #define NPY__CPU_VALIDATE_CB(FEATURE, DUMMY)                  \
154
        if (!npy__cpu_have[NPY_CAT(NPY_CPU_FEATURE_, FEATURE)]) { \
155
            const int size = sizeof(NPY_TOSTRING(FEATURE));       \
156
            memcpy(fptr, NPY_TOSTRING(FEATURE), size);            \
157
            fptr[size] = ' '; fptr += size + 1;                   \
158
        }
159 1
    NPY_WITH_CPU_BASELINE_CALL(NPY__CPU_VALIDATE_CB, DUMMY) // extra arg for msvc
160 1
    *fptr = '\0';
161

162 1
    if (baseline_failure[0] != '\0') {
163 0
        *(fptr-1) = '\0'; // trim the last space
164 0
        PyErr_Format(PyExc_RuntimeError,
165
            "NumPy was built with baseline optimizations: \n"
166
            "(" NPY_WITH_CPU_BASELINE ") but your machine doesn't support:\n(%s).",
167
            baseline_failure
168
        );
169 0
        return -1;
170
    }
171
#endif
172
    return 0;
173
}
174

175
static int
176 1
npy__cpu_try_disable_env(void)
177
{
178 1
    char *disenv = getenv("NPY_DISABLE_CPU_FEATURES");
179 1
    if (disenv == NULL || disenv[0] == 0) {
180
        return 0;
181
    }
182
    #define NPY__CPU_ENV_ERR_HEAD \
183
        "During parsing environment variable 'NPY_DISABLE_CPU_FEATURES':\n"
184

185
#if !defined(NPY_DISABLE_OPTIMIZATION) && NPY_WITH_CPU_DISPATCH_N > 0
186
    #define NPY__MAX_VAR_LEN 1024 // More than enough for this era
187 0
    size_t var_len = strlen(disenv) + 1;
188 0
    if (var_len > NPY__MAX_VAR_LEN) {
189 0
        PyErr_Format(PyExc_RuntimeError,
190
            "Length of environment variable 'NPY_DISABLE_CPU_FEATURES' is %d, only %d accepted",
191
            var_len, NPY__MAX_VAR_LEN - 1
192
        );
193 0
        return -1;
194
    }
195
    char disable_features[NPY__MAX_VAR_LEN];
196 0
    memcpy(disable_features, disenv, var_len);
197

198
    char nexist[NPY__MAX_VAR_LEN];
199 0
    char *nexist_cur = &nexist[0];
200

201
    char notsupp[sizeof(NPY_WITH_CPU_DISPATCH) + 1];
202 0
    char *notsupp_cur = &notsupp[0];
203

204
    //comma and space including (htab, vtab, CR, LF, FF)
205 0
    const char *delim = ", \t\v\r\n\f";
206 0
    char *feature = strtok(disable_features, delim);
207 0
    while (feature) {
208 0
        if (npy__cpu_baseline_fid(feature) > 0) {
209 0
            PyErr_Format(PyExc_RuntimeError,
210
                NPY__CPU_ENV_ERR_HEAD
211
                "You cannot disable CPU feature '%s', since it is part of "
212
                "the baseline optimizations:\n"
213
                "(" NPY_WITH_CPU_BASELINE ").",
214
                feature
215
            );
216 0
            return -1;
217
        }
218
        // check if the feature is part of dispatched features
219 0
        int feature_id = npy__cpu_dispatch_fid(feature);
220 0
        if (feature_id == 0) {
221 0
            int flen = strlen(feature);
222 0
            memcpy(nexist_cur, feature, flen);
223 0
            nexist_cur[flen] = ' '; nexist_cur += flen + 1;
224 0
            goto next;
225
        }
226
        // check if the feature supported by the running machine
227 0
        if (!npy__cpu_have[feature_id]) {
228 0
            int flen = strlen(feature);
229 0
            memcpy(notsupp_cur, feature, flen);
230 0
            notsupp_cur[flen] = ' '; notsupp_cur += flen + 1;
231 0
            goto next;
232
        }
233
        // Finaly we can disable it
234 0
        npy__cpu_have[feature_id] = 0;
235 0
    next:
236 0
        feature = strtok(NULL, delim);
237
    }
238

239 0
    *nexist_cur = '\0';
240 0
    if (nexist[0] != '\0') {
241 0
        *(nexist_cur-1) = '\0'; // trim the last space
242 0
        if (PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
243
                NPY__CPU_ENV_ERR_HEAD
244
                "You cannot disable CPU features (%s), since "
245
                "they are not part of the dispatched optimizations\n"
246
                "(" NPY_WITH_CPU_DISPATCH ").",
247
                nexist
248
        ) < 0) {
249
            return -1;
250
        }
251
    }
252

253 0
    *notsupp_cur = '\0';
254 0
    if (notsupp[0] != '\0') {
255 0
        *(notsupp_cur-1) = '\0'; // trim the last space
256 0
        if (PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
257
                NPY__CPU_ENV_ERR_HEAD
258
                "You cannot disable CPU features (%s), since "
259
                "they are not supported by your machine.",
260
                notsupp
261
        ) < 0) {
262
            return -1;
263
        }
264
    }
265
#else
266
    if (PyErr_WarnFormat(PyExc_RuntimeWarning, 1,
267
            NPY__CPU_ENV_ERR_HEAD
268
            "You cannot use environment variable 'NPY_DISABLE_CPU_FEATURES', since "
269
        #ifdef NPY_DISABLE_OPTIMIZATION
270
            "the NumPy library was compiled with optimization disabled."
271
        #else
272
            "the NumPy library was compiled without any dispatched optimizations."
273
        #endif
274
    ) < 0) {
275
        return -1;
276
    }
277
#endif
278
    return 0;
279
}
280

281
/****************************************************************
282
 * This section is reserved to defining @npy__cpu_init_features
283
 * for each CPU architecture, please try to keep it clean. Ty
284
 ****************************************************************/
285

286
/***************** X86 ******************/
287

288
#if defined(NPY_CPU_AMD64) || defined(NPY_CPU_X86)
289

290
#ifdef _MSC_VER
291
    #include <intrin.h>
292
#elif defined(__INTEL_COMPILER)
293
    #include <immintrin.h>
294
#endif
295

296
static int
297
npy__cpu_getxcr0(void)
298
{
299
#if defined(_MSC_VER) || defined (__INTEL_COMPILER)
300
    return _xgetbv(0);
301
#elif defined(__GNUC__) || defined(__clang__)
302
    /* named form of xgetbv not supported on OSX, so must use byte form, see:
303
     * https://github.com/asmjit/asmjit/issues/78
304
    */
305
    unsigned int eax, edx;
306 1
    __asm(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
307 1
    return eax;
308
#else
309
    return 0;
310
#endif
311
}
312

313
static void
314
npy__cpu_cpuid(int reg[4], int func_id)
315
{
316
#if defined(_MSC_VER)
317
    __cpuidex(reg, func_id, 0);
318
#elif defined(__INTEL_COMPILER)
319
    __cpuid(reg, func_id);
320
#elif defined(__GNUC__) || defined(__clang__)
321
    #if defined(NPY_CPU_X86) && defined(__PIC__)
322
        // %ebx may be the PIC register
323
        __asm__("xchg{l}\t{%%}ebx, %1\n\t"
324
                "cpuid\n\t"
325
                "xchg{l}\t{%%}ebx, %1\n\t"
326
                : "=a" (reg[0]), "=r" (reg[1]), "=c" (reg[2]),
327
                  "=d" (reg[3])
328
                : "a" (func_id), "c" (0)
329
        );
330
    #else
331 1
        __asm__("cpuid\n\t"
332
                : "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]),
333
                  "=d" (reg[3])
334
                : "a" (func_id), "c" (0)
335
        );
336
    #endif
337
#else
338
    reg[0] = 0;
339
#endif
340
}
341

342
static void
343 1
npy__cpu_init_features(void)
344
{
345 1
    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
346

347
    // validate platform support
348 1
    int reg[] = {0, 0, 0, 0};
349 1
    npy__cpu_cpuid(reg, 0);
350 1
    if (reg[0] == 0) {
351 0
       npy__cpu_have[NPY_CPU_FEATURE_MMX]  = 1;
352 0
       npy__cpu_have[NPY_CPU_FEATURE_SSE]  = 1;
353 0
       npy__cpu_have[NPY_CPU_FEATURE_SSE2] = 1;
354
       #ifdef NPY_CPU_AMD64
355 0
           npy__cpu_have[NPY_CPU_FEATURE_SSE3] = 1;
356
       #endif
357 0
       return;
358
    }
359

360 1
    npy__cpu_cpuid(reg, 1);
361 1
    npy__cpu_have[NPY_CPU_FEATURE_MMX]    = (reg[3] & (1 << 23)) != 0;
362 1
    npy__cpu_have[NPY_CPU_FEATURE_SSE]    = (reg[3] & (1 << 25)) != 0;
363 1
    npy__cpu_have[NPY_CPU_FEATURE_SSE2]   = (reg[3] & (1 << 26)) != 0;
364 1
    npy__cpu_have[NPY_CPU_FEATURE_SSE3]   = (reg[2] & (1 << 0))  != 0;
365 1
    npy__cpu_have[NPY_CPU_FEATURE_SSSE3]  = (reg[2] & (1 << 9))  != 0;
366 1
    npy__cpu_have[NPY_CPU_FEATURE_SSE41]  = (reg[2] & (1 << 19)) != 0;
367 1
    npy__cpu_have[NPY_CPU_FEATURE_POPCNT] = (reg[2] & (1 << 23)) != 0;
368 1
    npy__cpu_have[NPY_CPU_FEATURE_SSE42]  = (reg[2] & (1 << 20)) != 0;
369 1
    npy__cpu_have[NPY_CPU_FEATURE_F16C]   = (reg[2] & (1 << 29)) != 0;
370

371
    // check OSXSAVE
372 1
    if ((reg[2] & (1 << 27)) == 0)
373
        return;
374
    // check AVX OS support
375 1
    int xcr = npy__cpu_getxcr0();
376 1
    if ((xcr & 6) != 6)
377
        return;
378 1
    npy__cpu_have[NPY_CPU_FEATURE_AVX]    = (reg[2] & (1 << 28)) != 0;
379 1
    if (!npy__cpu_have[NPY_CPU_FEATURE_AVX])
380
        return;
381 1
    npy__cpu_have[NPY_CPU_FEATURE_FMA3]   = (reg[2] & (1 << 12)) != 0;
382

383
    // second call to the cpuid to get extended AMD feature bits
384 1
    npy__cpu_cpuid(reg, 0x80000001);
385 1
    npy__cpu_have[NPY_CPU_FEATURE_XOP]    = (reg[2] & (1 << 11)) != 0;
386 1
    npy__cpu_have[NPY_CPU_FEATURE_FMA4]   = (reg[2] & (1 << 16)) != 0;
387

388
    // third call to the cpuid to get extended AVX2 & AVX512 feature bits
389 1
    npy__cpu_cpuid(reg, 7);
390 1
    npy__cpu_have[NPY_CPU_FEATURE_AVX2]   = (reg[1] & (1 << 5))  != 0;
391 1
    if (!npy__cpu_have[NPY_CPU_FEATURE_AVX2])
392
        return;
393
    // detect AVX2 & FMA3
394 1
    npy__cpu_have[NPY_CPU_FEATURE_FMA]    = npy__cpu_have[NPY_CPU_FEATURE_FMA3];
395

396
    // check AVX512 OS support
397 1
    if ((xcr & 0xe6) != 0xe6)
398
        return;
399 1
    npy__cpu_have[NPY_CPU_FEATURE_AVX512F]  = (reg[1] & (1 << 16)) != 0;
400 1
    npy__cpu_have[NPY_CPU_FEATURE_AVX512CD] = (reg[1] & (1 << 28)) != 0;
401 1
    if (npy__cpu_have[NPY_CPU_FEATURE_AVX512F] && npy__cpu_have[NPY_CPU_FEATURE_AVX512CD]) {
402
        // Knights Landing
403 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512PF]        = (reg[1] & (1 << 26)) != 0;
404 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512ER]        = (reg[1] & (1 << 27)) != 0;
405 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_KNL]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512ER] &&
406
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512PF];
407
        // Knights Mill
408 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512VPOPCNTDQ] = (reg[2] & (1 << 14)) != 0;
409 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX5124VNNIW]    = (reg[3] & (1 << 2))  != 0;
410 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX5124FMAPS]    = (reg[3] & (1 << 3))  != 0;
411 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_KNM]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512_KNL] &&
412 0
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX5124FMAPS] &&
413 1
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX5124VNNIW] &&
414
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VPOPCNTDQ];
415

416
        // Skylake-X
417 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512DQ]        = (reg[1] & (1 << 17)) != 0;
418 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512BW]        = (reg[1] & (1 << 30)) != 0;
419 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512VL]        = (reg[1] & (1 << 31)) != 0;
420 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_SKX]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512BW] &&
421 1
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512DQ] &&
422
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VL];
423
        // Cascade Lake
424 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512VNNI]      = (reg[2] & (1 << 11)) != 0;
425 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_CLX]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512_SKX] &&
426
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VNNI];
427

428
        // Cannon Lake
429 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512IFMA]      = (reg[1] & (1 << 21)) != 0;
430 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI]      = (reg[2] & (1 << 1))  != 0;
431 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_CNL]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512_SKX] &&
432 1
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512IFMA] &&
433
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI];
434
        // Ice Lake
435 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI2]     = (reg[2] & (1 << 6))  != 0;
436 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512BITALG]    = (reg[2] & (1 << 12)) != 0;
437 1
        npy__cpu_have[NPY_CPU_FEATURE_AVX512_ICL]      = npy__cpu_have[NPY_CPU_FEATURE_AVX512_CLX] &&
438 0
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512_CNL] &&
439 0
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VBMI2] &&
440 1
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512BITALG] &&
441
                                                         npy__cpu_have[NPY_CPU_FEATURE_AVX512VPOPCNTDQ];
442
    }
443
}
444

445
/***************** POWER ******************/
446

447
#elif defined(NPY_CPU_PPC64) || defined(NPY_CPU_PPC64LE)
448

449
#ifdef __linux__
450
    #include <sys/auxv.h>
451
    #ifndef AT_HWCAP2
452
        #define AT_HWCAP2 26
453
    #endif
454
    #ifndef PPC_FEATURE2_ARCH_3_00
455
        #define PPC_FEATURE2_ARCH_3_00 0x00800000
456
    #endif
457
#endif
458

459
static void
460
npy__cpu_init_features(void)
461
{
462
    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
463
#ifdef __linux__
464
    unsigned int hwcap = getauxval(AT_HWCAP);
465
    if ((hwcap & PPC_FEATURE_HAS_VSX) == 0)
466
        return;
467

468
    hwcap = getauxval(AT_HWCAP2);
469
    if (hwcap & PPC_FEATURE2_ARCH_3_00)
470
    {
471
        npy__cpu_have[NPY_CPU_FEATURE_VSX]  =
472
        npy__cpu_have[NPY_CPU_FEATURE_VSX2] =
473
        npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
474
        return;
475
    }
476
    npy__cpu_have[NPY_CPU_FEATURE_VSX2] = (hwcap & PPC_FEATURE2_ARCH_2_07) != 0;
477
    npy__cpu_have[NPY_CPU_FEATURE_VSX]  = 1;
478
// TODO: AIX, FreeBSD
479
#else
480
    npy__cpu_have[NPY_CPU_FEATURE_VSX]  = 1;
481
    #if defined(NPY_CPU_PPC64LE) || defined(NPY_HAVE_VSX2)
482
    npy__cpu_have[NPY_CPU_FEATURE_VSX2] = 1;
483
    #endif
484
    #ifdef NPY_HAVE_VSX3
485
    npy__cpu_have[NPY_CPU_FEATURE_VSX3] = 1;
486
    #endif
487
#endif
488
}
489

490
/***************** ARM ******************/
491

492
#elif defined(__arm__) || defined(__aarch64__)
493

494
static NPY_INLINE void
495
npy__cpu_init_features_arm8(void)
496
{
497
    npy__cpu_have[NPY_CPU_FEATURE_NEON]       =
498
    npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16]  =
499
    npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] =
500
    npy__cpu_have[NPY_CPU_FEATURE_ASIMD]      = 1;
501
}
502

503
#ifdef __linux__
504
/*
505
 * we aren't sure of what kind kernel or clib we deal with
506
 * so we play it safe
507
*/
508
#include <stdio.h>
509
#include "npy_cpuinfo_parser.h"
510

511
__attribute__((weak)) unsigned long getauxval(unsigned long); // linker should handle it
512
static int
513
npy__cpu_init_features_linux(void)
514
{
515
    unsigned long hwcap = 0, hwcap2 = 0;
516
    if (getauxval != 0) {
517
        hwcap = getauxval(NPY__HWCAP);
518
    #ifdef __arm__
519
        hwcap2 = getauxval(NPY__HWCAP2);
520
    #endif
521
    } else {
522
        unsigned long auxv[2];
523
        int fd = open("/proc/self/auxv", O_RDONLY);
524
        if (fd >= 0) {
525
            while (read(fd, &auxv, sizeof(auxv)) == sizeof(auxv)) {
526
                if (auxv[0] == NPY__HWCAP) {
527
                    hwcap = auxv[1];
528
                }
529
            #ifdef __arm__
530
                else if (auxv[0] == NPY__HWCAP2) {
531
                    hwcap2 = auxv[1];
532
                }
533
            #endif
534
                // detect the end
535
                else if (auxv[0] == 0 && auxv[1] == 0) {
536
                    break;
537
                }
538
            }
539
            close(fd);
540
        }
541
    }
542
    if (hwcap == 0 && hwcap2 == 0) {
543
        /*
544
         * try parsing with /proc/cpuinfo, if sandboxed
545
         * failback to compiler definitions
546
        */
547
        if(!get_feature_from_proc_cpuinfo(&hwcap, &hwcap2)) {
548
            return 0;
549
        }
550
    }
551
#ifdef __arm__
552
    // Detect Arm8 (aarch32 state)
553
    if ((hwcap2 & NPY__HWCAP2_AES)  || (hwcap2 & NPY__HWCAP2_SHA1)  ||
554
        (hwcap2 & NPY__HWCAP2_SHA2) || (hwcap2 & NPY__HWCAP2_PMULL) ||
555
        (hwcap2 & NPY__HWCAP2_CRC32))
556
    {
557
        hwcap = hwcap2;
558
#else
559
    if (1)
560
    {
561
        if (!(hwcap & (NPY__HWCAP_FP | NPY__HWCAP_ASIMD))) {
562
            // Is this could happen? maybe disabled by kernel
563
            // BTW this will break the baseline of AARCH64
564
            return 1;
565
        }
566
#endif
567
        npy__cpu_have[NPY_CPU_FEATURE_FPHP]       = (hwcap & NPY__HWCAP_FPHP)     != 0;
568
        npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP]    = (hwcap & NPY__HWCAP_ASIMDHP)  != 0;
569
        npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP]    = (hwcap & NPY__HWCAP_ASIMDDP)  != 0;
570
        npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM]   = (hwcap & NPY__HWCAP_ASIMDFHM) != 0;
571
        npy__cpu_init_features_arm8();
572
    } else {
573
        npy__cpu_have[NPY_CPU_FEATURE_NEON]       = (hwcap & NPY__HWCAP_NEON)   != 0;
574
        if (npy__cpu_have[NPY_CPU_FEATURE_NEON]) {
575
            npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16]  = (hwcap & NPY__HWCAP_HALF) != 0;
576
            npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = (hwcap & NPY__HWCAP_VFPv4) != 0;
577
        }
578
    }
579
    return 1;
580
}
581
#endif
582

583
static void
584
npy__cpu_init_features(void)
585
{
586
    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
587
#ifdef __linux__
588
    if (npy__cpu_init_features_linux())
589
        return;
590
#endif
591
    // We have nothing else todo
592
#if defined(NPY_HAVE_ASIMD) || defined(__aarch64__) || (defined(__ARM_ARCH) && __ARM_ARCH >= 8)
593
    #if defined(NPY_HAVE_FPHP) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
594
    npy__cpu_have[NPY_CPU_FEATURE_FPHP] = 1;
595
    #endif
596
    #if defined(NPY_HAVE_ASIMDHP) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC)
597
    npy__cpu_have[NPY_CPU_FEATURE_ASIMDHP] = 1;
598
    #endif
599
    #if defined(NPY_HAVE_ASIMDDP) || defined(__ARM_FEATURE_DOTPROD)
600
    npy__cpu_have[NPY_CPU_FEATURE_ASIMDDP] = 1;
601
    #endif
602
    #if defined(NPY_HAVE_ASIMDFHM) || defined(__ARM_FEATURE_FP16FML)
603
    npy__cpu_have[NPY_CPU_FEATURE_ASIMDFHM] = 1;
604
    #endif
605
    npy__cpu_init_features_arm8();
606
#else
607
    #if defined(NPY_HAVE_NEON) || defined(__ARM_NEON__)
608
        npy__cpu_have[NPY_CPU_FEATURE_NEON] = 1;
609
    #endif
610
    #if defined(NPY_HAVE_NEON_FP16) || defined(__ARM_FP16_FORMAT_IEEE) || (defined(__ARM_FP) && (__ARM_FP & 2))
611
        npy__cpu_have[NPY_CPU_FEATURE_NEON_FP16] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
612
    #endif
613
    #if defined(NPY_HAVE_NEON_VFPV4) || defined(__ARM_FEATURE_FMA)
614
        npy__cpu_have[NPY_CPU_FEATURE_NEON_VFPV4] = npy__cpu_have[NPY_CPU_FEATURE_NEON];
615
    #endif
616
#endif
617
}
618

619
/*********** Unsupported ARCH ***********/
620
#else
621
static void
622
npy__cpu_init_features(void)
623
{
624
    /*
625
     * just in case if the compiler doesn't respect ANSI
626
     * but for knowing paltforms it still nessecery, because @npy__cpu_init_features
627
     * may called multiple of times and we need to clear the disabled features by
628
     * ENV Var or maybe in the future we can support other methods like
629
     * global variables, go back to @npy__cpu_try_disable_env for more understanding
630
     */
631
    memset(npy__cpu_have, 0, sizeof(npy__cpu_have[0]) * NPY_CPU_FEATURE_MAX);
632
}
633
#endif

Read our documentation on viewing source code .

Loading