1
|
|
/* -*- c -*- */
|
2
|
|
|
3
|
|
/*
|
4
|
|
* The purpose of this module is to add faster sort functions
|
5
|
|
* that are type-specific. This is done by altering the
|
6
|
|
* function table for the builtin descriptors.
|
7
|
|
*
|
8
|
|
* These sorting functions are copied almost directly from numarray
|
9
|
|
* with a few modifications (complex comparisons compare the imaginary
|
10
|
|
* part if the real parts are equal, for example), and the names
|
11
|
|
* are changed.
|
12
|
|
*
|
13
|
|
* The original sorting code is due to Charles R. Harris who wrote
|
14
|
|
* it for numarray.
|
15
|
|
*/
|
16
|
|
|
17
|
|
/*
|
18
|
|
* Quick sort is usually the fastest, but the worst case scenario can
|
19
|
|
* be slower than the merge and heap sorts. The merge sort requires
|
20
|
|
* extra memory and so for large arrays may not be useful.
|
21
|
|
*
|
22
|
|
* The merge sort is *stable*, meaning that equal components
|
23
|
|
* are unmoved from their entry versions, so it can be used to
|
24
|
|
* implement lexigraphic sorting on multiple keys.
|
25
|
|
*
|
26
|
|
* The heap sort is included for completeness.
|
27
|
|
*/
|
28
|
|
|
29
|
|
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
|
30
|
|
|
31
|
|
#include "npy_sort.h"
|
32
|
|
#include "npysort_common.h"
|
33
|
|
#include <stdlib.h>
|
34
|
|
|
35
|
|
#define NOT_USED NPY_UNUSED(unused)
|
36
|
|
#define PYA_QS_STACK 100
|
37
|
|
#define SMALL_QUICKSORT 15
|
38
|
|
#define SMALL_MERGESORT 20
|
39
|
|
#define SMALL_STRING 16
|
40
|
|
|
41
|
|
|
42
|
|
/*
|
43
|
|
*****************************************************************************
|
44
|
|
** NUMERIC SORTS **
|
45
|
|
*****************************************************************************
|
46
|
|
*/
|
47
|
|
|
48
|
|
|
49
|
|
/**begin repeat
|
50
|
|
*
|
51
|
|
* #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
|
52
|
|
* LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
|
53
|
|
* CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA#
|
54
|
|
* #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
|
55
|
|
* longlong, ulonglong, half, float, double, longdouble,
|
56
|
|
* cfloat, cdouble, clongdouble, datetime, timedelta#
|
57
|
|
* #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
|
58
|
|
* npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
|
59
|
|
* npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
|
60
|
|
* npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
|
61
|
|
*/
|
62
|
|
|
63
|
|
static void
|
64
|
0
|
mergesort0_@suff@(@type@ *pl, @type@ *pr, @type@ *pw)
|
65
|
|
{
|
66
|
|
@type@ vp, *pi, *pj, *pk, *pm;
|
67
|
|
|
68
|
0
|
if (pr - pl > SMALL_MERGESORT) {
|
69
|
|
/* merge sort */
|
70
|
0
|
pm = pl + ((pr - pl) >> 1);
|
71
|
0
|
mergesort0_@suff@(pl, pm, pw);
|
72
|
0
|
mergesort0_@suff@(pm, pr, pw);
|
73
|
0
|
for (pi = pw, pj = pl; pj < pm;) {
|
74
|
0
|
*pi++ = *pj++;
|
75
|
|
}
|
76
|
0
|
pi = pw + (pm - pl);
|
77
|
0
|
pj = pw;
|
78
|
0
|
pk = pl;
|
79
|
0
|
while (pj < pi && pm < pr) {
|
80
|
0
|
if (@TYPE@_LT(*pm, *pj)) {
|
81
|
0
|
*pk++ = *pm++;
|
82
|
|
}
|
83
|
|
else {
|
84
|
0
|
*pk++ = *pj++;
|
85
|
|
}
|
86
|
|
}
|
87
|
0
|
while(pj < pi) {
|
88
|
0
|
*pk++ = *pj++;
|
89
|
|
}
|
90
|
|
}
|
91
|
|
else {
|
92
|
|
/* insertion sort */
|
93
|
0
|
for (pi = pl + 1; pi < pr; ++pi) {
|
94
|
0
|
vp = *pi;
|
95
|
0
|
pj = pi;
|
96
|
0
|
pk = pi - 1;
|
97
|
0
|
while (pj > pl && @TYPE@_LT(vp, *pk)) {
|
98
|
0
|
*pj-- = *pk--;
|
99
|
|
}
|
100
|
0
|
*pj = vp;
|
101
|
|
}
|
102
|
|
}
|
103
|
0
|
}
|
104
|
|
|
105
|
|
|
106
|
|
NPY_NO_EXPORT int
|
107
|
0
|
mergesort_@suff@(void *start, npy_intp num, void *NOT_USED)
|
108
|
|
{
|
109
|
|
@type@ *pl, *pr, *pw;
|
110
|
|
|
111
|
0
|
pl = start;
|
112
|
0
|
pr = pl + num;
|
113
|
0
|
pw = malloc((num/2) * sizeof(@type@));
|
114
|
0
|
if (pw == NULL) {
|
115
|
|
return -NPY_ENOMEM;
|
116
|
|
}
|
117
|
0
|
mergesort0_@suff@(pl, pr, pw);
|
118
|
|
|
119
|
0
|
free(pw);
|
120
|
0
|
return 0;
|
121
|
|
}
|
122
|
|
|
123
|
|
|
124
|
|
static void
|
125
|
0
|
amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw)
|
126
|
|
{
|
127
|
|
@type@ vp;
|
128
|
|
npy_intp vi, *pi, *pj, *pk, *pm;
|
129
|
|
|
130
|
0
|
if (pr - pl > SMALL_MERGESORT) {
|
131
|
|
/* merge sort */
|
132
|
0
|
pm = pl + ((pr - pl) >> 1);
|
133
|
0
|
amergesort0_@suff@(pl, pm, v, pw);
|
134
|
0
|
amergesort0_@suff@(pm, pr, v, pw);
|
135
|
0
|
for (pi = pw, pj = pl; pj < pm;) {
|
136
|
0
|
*pi++ = *pj++;
|
137
|
|
}
|
138
|
0
|
pi = pw + (pm - pl);
|
139
|
0
|
pj = pw;
|
140
|
0
|
pk = pl;
|
141
|
0
|
while (pj < pi && pm < pr) {
|
142
|
0
|
if (@TYPE@_LT(v[*pm], v[*pj])) {
|
143
|
0
|
*pk++ = *pm++;
|
144
|
|
}
|
145
|
|
else {
|
146
|
0
|
*pk++ = *pj++;
|
147
|
|
}
|
148
|
|
}
|
149
|
0
|
while(pj < pi) {
|
150
|
0
|
*pk++ = *pj++;
|
151
|
|
}
|
152
|
|
}
|
153
|
|
else {
|
154
|
|
/* insertion sort */
|
155
|
0
|
for (pi = pl + 1; pi < pr; ++pi) {
|
156
|
0
|
vi = *pi;
|
157
|
0
|
vp = v[vi];
|
158
|
0
|
pj = pi;
|
159
|
0
|
pk = pi - 1;
|
160
|
0
|
while (pj > pl && @TYPE@_LT(vp, v[*pk])) {
|
161
|
0
|
*pj-- = *pk--;
|
162
|
|
}
|
163
|
0
|
*pj = vi;
|
164
|
|
}
|
165
|
|
}
|
166
|
0
|
}
|
167
|
|
|
168
|
|
|
169
|
|
NPY_NO_EXPORT int
|
170
|
0
|
amergesort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *NOT_USED)
|
171
|
|
{
|
172
|
|
npy_intp *pl, *pr, *pw;
|
173
|
|
|
174
|
0
|
pl = tosort;
|
175
|
0
|
pr = pl + num;
|
176
|
0
|
pw = malloc((num/2) * sizeof(npy_intp));
|
177
|
0
|
if (pw == NULL) {
|
178
|
|
return -NPY_ENOMEM;
|
179
|
|
}
|
180
|
0
|
amergesort0_@suff@(pl, pr, v, pw);
|
181
|
0
|
free(pw);
|
182
|
|
|
183
|
0
|
return 0;
|
184
|
|
}
|
185
|
|
|
186
|
|
/**end repeat**/
|
187
|
|
|
188
|
|
|
189
|
|
/*
|
190
|
|
*****************************************************************************
|
191
|
|
** STRING SORTS **
|
192
|
|
*****************************************************************************
|
193
|
|
*/
|
194
|
|
|
195
|
|
|
196
|
|
/**begin repeat
|
197
|
|
*
|
198
|
|
* #TYPE = STRING, UNICODE#
|
199
|
|
* #suff = string, unicode#
|
200
|
|
* #type = npy_char, npy_ucs4#
|
201
|
|
*/
|
202
|
|
|
203
|
|
static void
|
204
|
0
|
mergesort0_@suff@(@type@ *pl, @type@ *pr, @type@ *pw, @type@ *vp, size_t len)
|
205
|
|
{
|
206
|
|
@type@ *pi, *pj, *pk, *pm;
|
207
|
|
|
208
|
0
|
if ((size_t)(pr - pl) > SMALL_MERGESORT*len) {
|
209
|
|
/* merge sort */
|
210
|
0
|
pm = pl + (((pr - pl)/len) >> 1)*len;
|
211
|
0
|
mergesort0_@suff@(pl, pm, pw, vp, len);
|
212
|
0
|
mergesort0_@suff@(pm, pr, pw, vp, len);
|
213
|
0
|
@TYPE@_COPY(pw, pl, pm - pl);
|
214
|
0
|
pi = pw + (pm - pl);
|
215
|
0
|
pj = pw;
|
216
|
0
|
pk = pl;
|
217
|
0
|
while (pj < pi && pm < pr) {
|
218
|
0
|
if (@TYPE@_LT(pm, pj, len)) {
|
219
|
0
|
@TYPE@_COPY(pk, pm, len);
|
220
|
0
|
pm += len;
|
221
|
0
|
pk += len;
|
222
|
|
}
|
223
|
|
else {
|
224
|
0
|
@TYPE@_COPY(pk, pj, len);
|
225
|
0
|
pj += len;
|
226
|
0
|
pk += len;
|
227
|
|
}
|
228
|
|
}
|
229
|
0
|
@TYPE@_COPY(pk, pj, pi - pj);
|
230
|
|
}
|
231
|
|
else {
|
232
|
|
/* insertion sort */
|
233
|
0
|
for (pi = pl + len; pi < pr; pi += len) {
|
234
|
0
|
@TYPE@_COPY(vp, pi, len);
|
235
|
0
|
pj = pi;
|
236
|
0
|
pk = pi - len;
|
237
|
0
|
while (pj > pl && @TYPE@_LT(vp, pk, len)) {
|
238
|
0
|
@TYPE@_COPY(pj, pk, len);
|
239
|
0
|
pj -= len;
|
240
|
0
|
pk -= len;
|
241
|
|
}
|
242
|
0
|
@TYPE@_COPY(pj, vp, len);
|
243
|
|
}
|
244
|
|
}
|
245
|
0
|
}
|
246
|
|
|
247
|
|
|
248
|
|
NPY_NO_EXPORT int
|
249
|
0
|
mergesort_@suff@(void *start, npy_intp num, void *varr)
|
250
|
|
{
|
251
|
0
|
PyArrayObject *arr = varr;
|
252
|
0
|
size_t elsize = PyArray_ITEMSIZE(arr);
|
253
|
0
|
size_t len = elsize / sizeof(@type@);
|
254
|
|
@type@ *pl, *pr, *pw, *vp;
|
255
|
0
|
int err = 0;
|
256
|
|
|
257
|
|
/* Items that have zero size don't make sense to sort */
|
258
|
0
|
if (elsize == 0) {
|
259
|
|
return 0;
|
260
|
|
}
|
261
|
|
|
262
|
0
|
pl = start;
|
263
|
0
|
pr = pl + num*len;
|
264
|
0
|
pw = malloc((num/2) * elsize);
|
265
|
0
|
if (pw == NULL) {
|
266
|
|
err = -NPY_ENOMEM;
|
267
|
|
goto fail_0;
|
268
|
|
}
|
269
|
0
|
vp = malloc(elsize);
|
270
|
0
|
if (vp == NULL) {
|
271
|
|
err = -NPY_ENOMEM;
|
272
|
|
goto fail_1;
|
273
|
|
}
|
274
|
0
|
mergesort0_@suff@(pl, pr, pw, vp, len);
|
275
|
|
|
276
|
0
|
free(vp);
|
277
|
0
|
fail_1:
|
278
|
0
|
free(pw);
|
279
|
|
fail_0:
|
280
|
|
return err;
|
281
|
|
}
|
282
|
|
|
283
|
|
|
284
|
|
static void
|
285
|
0
|
amergesort0_@suff@(npy_intp *pl, npy_intp *pr, @type@ *v, npy_intp *pw, size_t len)
|
286
|
|
{
|
287
|
|
@type@ *vp;
|
288
|
|
npy_intp vi, *pi, *pj, *pk, *pm;
|
289
|
|
|
290
|
0
|
if (pr - pl > SMALL_MERGESORT) {
|
291
|
|
/* merge sort */
|
292
|
0
|
pm = pl + ((pr - pl) >> 1);
|
293
|
0
|
amergesort0_@suff@(pl, pm, v, pw, len);
|
294
|
0
|
amergesort0_@suff@(pm, pr, v, pw, len);
|
295
|
0
|
for (pi = pw, pj = pl; pj < pm;) {
|
296
|
0
|
*pi++ = *pj++;
|
297
|
|
}
|
298
|
0
|
pi = pw + (pm - pl);
|
299
|
0
|
pj = pw;
|
300
|
0
|
pk = pl;
|
301
|
0
|
while (pj < pi && pm < pr) {
|
302
|
0
|
if (@TYPE@_LT(v + (*pm)*len, v + (*pj)*len, len)) {
|
303
|
0
|
*pk++ = *pm++;
|
304
|
|
}
|
305
|
|
else {
|
306
|
0
|
*pk++ = *pj++;
|
307
|
|
}
|
308
|
|
}
|
309
|
0
|
while (pj < pi) {
|
310
|
0
|
*pk++ = *pj++;
|
311
|
|
}
|
312
|
|
}
|
313
|
|
else {
|
314
|
|
/* insertion sort */
|
315
|
0
|
for (pi = pl + 1; pi < pr; ++pi) {
|
316
|
0
|
vi = *pi;
|
317
|
0
|
vp = v + vi*len;
|
318
|
0
|
pj = pi;
|
319
|
0
|
pk = pi - 1;
|
320
|
0
|
while (pj > pl && @TYPE@_LT(vp, v + (*pk)*len, len)) {
|
321
|
0
|
*pj-- = *pk--;
|
322
|
|
}
|
323
|
0
|
*pj = vi;
|
324
|
|
}
|
325
|
|
}
|
326
|
0
|
}
|
327
|
|
|
328
|
|
|
329
|
|
NPY_NO_EXPORT int
|
330
|
0
|
amergesort_@suff@(void *v, npy_intp *tosort, npy_intp num, void *varr)
|
331
|
|
{
|
332
|
0
|
PyArrayObject *arr = varr;
|
333
|
0
|
size_t elsize = PyArray_ITEMSIZE(arr);
|
334
|
0
|
size_t len = elsize / sizeof(@type@);
|
335
|
|
npy_intp *pl, *pr, *pw;
|
336
|
|
|
337
|
|
/* Items that have zero size don't make sense to sort */
|
338
|
0
|
if (elsize == 0) {
|
339
|
|
return 0;
|
340
|
|
}
|
341
|
|
|
342
|
0
|
pl = tosort;
|
343
|
0
|
pr = pl + num;
|
344
|
0
|
pw = malloc((num/2) * sizeof(npy_intp));
|
345
|
0
|
if (pw == NULL) {
|
346
|
|
return -NPY_ENOMEM;
|
347
|
|
}
|
348
|
0
|
amergesort0_@suff@(pl, pr, v, pw, len);
|
349
|
0
|
free(pw);
|
350
|
|
|
351
|
0
|
return 0;
|
352
|
|
}
|
353
|
|
|
354
|
|
/**end repeat**/
|
355
|
|
|
356
|
|
|
357
|
|
/*
|
358
|
|
*****************************************************************************
|
359
|
|
** GENERIC SORT **
|
360
|
|
*****************************************************************************
|
361
|
|
*/
|
362
|
|
|
363
|
|
|
364
|
|
static void
|
365
|
0
|
npy_mergesort0(char *pl, char *pr, char *pw, char *vp, npy_intp elsize,
|
366
|
|
PyArray_CompareFunc *cmp, PyArrayObject *arr)
|
367
|
|
{
|
368
|
|
char *pi, *pj, *pk, *pm;
|
369
|
|
|
370
|
0
|
if (pr - pl > SMALL_MERGESORT*elsize) {
|
371
|
|
/* merge sort */
|
372
|
0
|
pm = pl + (((pr - pl)/elsize) >> 1)*elsize;
|
373
|
0
|
npy_mergesort0(pl, pm, pw, vp, elsize, cmp, arr);
|
374
|
0
|
npy_mergesort0(pm, pr, pw, vp, elsize, cmp, arr);
|
375
|
0
|
GENERIC_COPY(pw, pl, pm - pl);
|
376
|
0
|
pi = pw + (pm - pl);
|
377
|
0
|
pj = pw;
|
378
|
0
|
pk = pl;
|
379
|
0
|
while (pj < pi && pm < pr) {
|
380
|
0
|
if (cmp(pm, pj, arr) < 0) {
|
381
|
0
|
GENERIC_COPY(pk, pm, elsize);
|
382
|
0
|
pm += elsize;
|
383
|
0
|
pk += elsize;
|
384
|
|
}
|
385
|
|
else {
|
386
|
0
|
GENERIC_COPY(pk, pj, elsize);
|
387
|
0
|
pj += elsize;
|
388
|
0
|
pk += elsize;
|
389
|
|
}
|
390
|
|
}
|
391
|
0
|
GENERIC_COPY(pk, pj, pi - pj);
|
392
|
|
}
|
393
|
|
else {
|
394
|
|
/* insertion sort */
|
395
|
0
|
for (pi = pl + elsize; pi < pr; pi += elsize) {
|
396
|
0
|
GENERIC_COPY(vp, pi, elsize);
|
397
|
0
|
pj = pi;
|
398
|
0
|
pk = pi - elsize;
|
399
|
0
|
while (pj > pl && cmp(vp, pk, arr) < 0) {
|
400
|
0
|
GENERIC_COPY(pj, pk, elsize);
|
401
|
0
|
pj -= elsize;
|
402
|
0
|
pk -= elsize;
|
403
|
|
}
|
404
|
0
|
GENERIC_COPY(pj, vp, elsize);
|
405
|
|
}
|
406
|
|
}
|
407
|
0
|
}
|
408
|
|
|
409
|
|
|
410
|
|
NPY_NO_EXPORT int
|
411
|
0
|
npy_mergesort(void *start, npy_intp num, void *varr)
|
412
|
|
{
|
413
|
0
|
PyArrayObject *arr = varr;
|
414
|
0
|
npy_intp elsize = PyArray_ITEMSIZE(arr);
|
415
|
0
|
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
|
416
|
0
|
char *pl = start;
|
417
|
0
|
char *pr = pl + num*elsize;
|
418
|
|
char *pw;
|
419
|
|
char *vp;
|
420
|
0
|
int err = -NPY_ENOMEM;
|
421
|
|
|
422
|
|
/* Items that have zero size don't make sense to sort */
|
423
|
0
|
if (elsize == 0) {
|
424
|
|
return 0;
|
425
|
|
}
|
426
|
|
|
427
|
0
|
pw = malloc((num >> 1) *elsize);
|
428
|
0
|
vp = malloc(elsize);
|
429
|
|
|
430
|
0
|
if (pw != NULL && vp != NULL) {
|
431
|
0
|
npy_mergesort0(pl, pr, pw, vp, elsize, cmp, arr);
|
432
|
0
|
err = 0;
|
433
|
|
}
|
434
|
|
|
435
|
0
|
free(vp);
|
436
|
0
|
free(pw);
|
437
|
|
|
438
|
0
|
return err;
|
439
|
|
}
|
440
|
|
|
441
|
|
|
442
|
|
static void
|
443
|
0
|
npy_amergesort0(npy_intp *pl, npy_intp *pr, char *v, npy_intp *pw,
|
444
|
|
npy_intp elsize, PyArray_CompareFunc *cmp, PyArrayObject *arr)
|
445
|
|
{
|
446
|
|
char *vp;
|
447
|
|
npy_intp vi, *pi, *pj, *pk, *pm;
|
448
|
|
|
449
|
0
|
if (pr - pl > SMALL_MERGESORT) {
|
450
|
|
/* merge sort */
|
451
|
0
|
pm = pl + ((pr - pl) >> 1);
|
452
|
0
|
npy_amergesort0(pl, pm, v, pw, elsize, cmp, arr);
|
453
|
0
|
npy_amergesort0(pm, pr, v, pw, elsize, cmp, arr);
|
454
|
0
|
for (pi = pw, pj = pl; pj < pm;) {
|
455
|
0
|
*pi++ = *pj++;
|
456
|
|
}
|
457
|
0
|
pi = pw + (pm - pl);
|
458
|
0
|
pj = pw;
|
459
|
0
|
pk = pl;
|
460
|
0
|
while (pj < pi && pm < pr) {
|
461
|
0
|
if (cmp(v + (*pm)*elsize, v + (*pj)*elsize, arr) < 0) {
|
462
|
0
|
*pk++ = *pm++;
|
463
|
|
}
|
464
|
|
else {
|
465
|
0
|
*pk++ = *pj++;
|
466
|
|
}
|
467
|
|
}
|
468
|
0
|
while (pj < pi) {
|
469
|
0
|
*pk++ = *pj++;
|
470
|
|
}
|
471
|
|
}
|
472
|
|
else {
|
473
|
|
/* insertion sort */
|
474
|
0
|
for (pi = pl + 1; pi < pr; ++pi) {
|
475
|
0
|
vi = *pi;
|
476
|
0
|
vp = v + vi*elsize;
|
477
|
0
|
pj = pi;
|
478
|
0
|
pk = pi - 1;
|
479
|
0
|
while (pj > pl && cmp(vp, v + (*pk)*elsize, arr) < 0) {
|
480
|
0
|
*pj-- = *pk--;
|
481
|
|
}
|
482
|
0
|
*pj = vi;
|
483
|
|
}
|
484
|
|
}
|
485
|
0
|
}
|
486
|
|
|
487
|
|
|
488
|
|
NPY_NO_EXPORT int
|
489
|
0
|
npy_amergesort(void *v, npy_intp *tosort, npy_intp num, void *varr)
|
490
|
|
{
|
491
|
0
|
PyArrayObject *arr = varr;
|
492
|
0
|
npy_intp elsize = PyArray_ITEMSIZE(arr);
|
493
|
0
|
PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
|
494
|
|
npy_intp *pl, *pr, *pw;
|
495
|
|
|
496
|
|
/* Items that have zero size don't make sense to sort */
|
497
|
0
|
if (elsize == 0) {
|
498
|
|
return 0;
|
499
|
|
}
|
500
|
|
|
501
|
0
|
pl = tosort;
|
502
|
0
|
pr = pl + num;
|
503
|
0
|
pw = malloc((num >> 1) * sizeof(npy_intp));
|
504
|
0
|
if (pw == NULL) {
|
505
|
|
return -NPY_ENOMEM;
|
506
|
|
}
|
507
|
0
|
npy_amergesort0(pl, pr, v, pw, elsize, cmp, arr);
|
508
|
0
|
free(pw);
|
509
|
|
|
510
|
0
|
return 0;
|
511
|
|
}
|