1
|
|
/*
|
2
|
|
* This header provides numpy a consistent interface to CBLAS code. It is needed
|
3
|
|
* because not all providers of cblas provide cblas.h. For instance, MKL provides
|
4
|
|
* mkl_cblas.h and also typedefs the CBLAS_XXX enums.
|
5
|
|
*/
|
6
|
|
#ifndef _NPY_CBLAS_H_
|
7
|
|
#define _NPY_CBLAS_H_
|
8
|
|
|
9
|
|
#include <stddef.h>
|
10
|
|
|
11
|
|
/* Allow the use in C++ code. */
|
12
|
|
#ifdef __cplusplus
|
13
|
|
extern "C"
|
14
|
|
{
|
15
|
|
#endif
|
16
|
|
|
17
|
|
/*
|
18
|
|
* Enumerated and derived types
|
19
|
|
*/
|
20
|
|
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102};
|
21
|
|
enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113};
|
22
|
|
enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
|
23
|
|
enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
|
24
|
|
enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
|
25
|
|
|
26
|
|
#define CBLAS_INDEX size_t /* this may vary between platforms */
|
27
|
|
|
28
|
|
#ifdef NO_APPEND_FORTRAN
|
29
|
|
#define BLAS_FORTRAN_SUFFIX
|
30
|
|
#else
|
31
|
|
#define BLAS_FORTRAN_SUFFIX _
|
32
|
|
#endif
|
33
|
|
|
34
|
|
#ifndef BLAS_SYMBOL_PREFIX
|
35
|
|
#define BLAS_SYMBOL_PREFIX
|
36
|
|
#endif
|
37
|
|
|
38
|
|
#ifndef BLAS_SYMBOL_SUFFIX
|
39
|
|
#define BLAS_SYMBOL_SUFFIX
|
40
|
|
#endif
|
41
|
|
|
42
|
|
#define BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2) prefix ## name ## suffix ## suffix2
|
43
|
|
#define BLAS_FUNC_EXPAND(name,prefix,suffix,suffix2) BLAS_FUNC_CONCAT(name,prefix,suffix,suffix2)
|
44
|
|
|
45
|
|
#define CBLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,,BLAS_SYMBOL_SUFFIX)
|
46
|
|
#define BLAS_FUNC(name) BLAS_FUNC_EXPAND(name,BLAS_SYMBOL_PREFIX,BLAS_FORTRAN_SUFFIX,BLAS_SYMBOL_SUFFIX)
|
47
|
|
|
48
|
|
#ifdef HAVE_BLAS_ILP64
|
49
|
|
#define CBLAS_INT npy_int64
|
50
|
|
#define CBLAS_INT_MAX NPY_MAX_INT64
|
51
|
|
#else
|
52
|
|
#define CBLAS_INT int
|
53
|
|
#define CBLAS_INT_MAX INT_MAX
|
54
|
|
#endif
|
55
|
|
|
56
|
|
#define BLASNAME(name) CBLAS_FUNC(name)
|
57
|
|
#define BLASINT CBLAS_INT
|
58
|
|
|
59
|
|
#include "npy_cblas_base.h"
|
60
|
|
|
61
|
|
#undef BLASINT
|
62
|
|
#undef BLASNAME
|
63
|
|
|
64
|
|
|
65
|
|
/*
|
66
|
|
* Convert NumPy stride to BLAS stride. Returns 0 if conversion cannot be done
|
67
|
|
* (BLAS won't handle negative or zero strides the way we want).
|
68
|
|
*/
|
69
|
|
static NPY_INLINE CBLAS_INT
|
70
|
|
blas_stride(npy_intp stride, unsigned itemsize)
|
71
|
|
{
|
72
|
|
/*
|
73
|
|
* Should probably check pointer alignment also, but this may cause
|
74
|
|
* problems if we require complex to be 16 byte aligned.
|
75
|
|
*/
|
76
|
1
|
if (stride > 0 && (stride % itemsize) == 0) {
|
77
|
1
|
stride /= itemsize;
|
78
|
1
|
if (stride <= CBLAS_INT_MAX) {
|
79
|
1
|
return stride;
|
80
|
|
}
|
81
|
|
}
|
82
|
|
return 0;
|
83
|
|
}
|
84
|
|
|
85
|
|
/*
|
86
|
|
* Define a chunksize for CBLAS.
|
87
|
|
*
|
88
|
|
* The chunksize is the greatest power of two less than CBLAS_INT_MAX.
|
89
|
|
*/
|
90
|
|
#if NPY_MAX_INTP > CBLAS_INT_MAX
|
91
|
|
# define NPY_CBLAS_CHUNK (CBLAS_INT_MAX / 2 + 1)
|
92
|
|
#else
|
93
|
|
# define NPY_CBLAS_CHUNK NPY_MAX_INTP
|
94
|
|
#endif
|
95
|
|
|
96
|
|
|
97
|
|
#ifdef __cplusplus
|
98
|
|
}
|
99
|
|
#endif
|
100
|
|
|
101
|
|
#endif
|