1
/* -*- c -*- */
2

3
/*
4
 * The purpose of this module is to add faster sort functions
5
 * that are type-specific.  This is done by altering the
6
 * function table for the builtin descriptors.
7
 *
8
 * These sorting functions are copied almost directly from numarray
9
 * with a few modifications (complex comparisons compare the imaginary
10
 * part if the real parts are equal, for example), and the names
11
 * are changed.
12
 *
13
 * The original sorting code is due to Charles R. Harris who wrote
14
 * it for numarray.
15
 */
16

17
/*
18
 * Quick sort is usually the fastest, but the worst case scenario can
19
 * be slower than the merge and heap sorts.  The merge sort requires
20
 * extra memory and so for large arrays may not be useful.
21
 *
22
 * The merge sort is *stable*, meaning that equal components
23
 * are unmoved from their entry versions, so it can be used to
24
 * implement lexigraphic sorting on multiple keys.
25
 *
26
 * The heap sort is included for completeness.
27
 */
28

29

30
/* For details of Timsort, refer to
31
 * https://github.com/python/cpython/blob/3.7/Objects/listsort.txt
32
 */
33

34
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
35

36
#include "npy_sort.h"
37
#include "npysort_common.h"
38
#include <stdlib.h>
39

40
/* enough for 32 * 1.618 ** 128 elements */
41
#define TIMSORT_STACK_SIZE 128
42

43

44

45
static npy_intp compute_min_run(npy_intp num)
46
{
47 1
    npy_intp r = 0;
48

49 1
    while (64 < num) {
50 1
        r |= num & 1;
51 1
        num >>= 1;
52
    }
53

54 1
    return num + r;
55
}
56

57
typedef struct {
58
    npy_intp s; /* start pointer */
59
    npy_intp l; /* length */
60
} run;
61

62

63
/* buffer for argsort. Declared here to avoid multiple declarations. */
64
typedef struct {
65
    npy_intp *pw;
66
    npy_intp size;
67
} buffer_intp;
68

69

70
/* buffer method */
71
static NPY_INLINE int
72 1
resize_buffer_intp(buffer_intp *buffer, npy_intp new_size)
73
{
74 1
    if (new_size <= buffer->size) {
75
        return 0;
76
    }
77

78 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
79 1
        buffer->pw = malloc(new_size * sizeof(npy_intp));
80
    } else {
81 1
        buffer->pw = realloc(buffer->pw, new_size * sizeof(npy_intp));
82
    }
83

84 1
    buffer->size = new_size;
85

86 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
87
        return -NPY_ENOMEM;
88
    } else {
89 1
        return 0;
90
    }
91
}
92

93
/*
94
 *****************************************************************************
95
 **                            NUMERIC SORTS                                **
96
 *****************************************************************************
97
 */
98

99

100
/**begin repeat
101
 *
102
 * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
103
 *         LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE,
104
 *         CFLOAT, CDOUBLE, CLONGDOUBLE, DATETIME, TIMEDELTA#
105
 * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
106
 *         longlong, ulonglong, half, float, double, longdouble,
107
 *         cfloat, cdouble, clongdouble, datetime, timedelta#
108
 * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
109
 *         npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong,
110
 *         npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat,
111
 *         npy_cdouble, npy_clongdouble, npy_datetime, npy_timedelta#
112
 */
113

114

115
typedef struct {
116
    @type@ * pw;
117
    npy_intp size;
118
} buffer_@suff@;
119

120

121
static NPY_INLINE int
122 0
resize_buffer_@suff@(buffer_@suff@ *buffer, npy_intp new_size)
123
{
124 0
    if (new_size <= buffer->size) {
125
        return 0;
126
    }
127

128 0
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
129 0
        buffer->pw = malloc(new_size * sizeof(@type@));
130
    } else {
131 0
        buffer->pw = realloc(buffer->pw, new_size * sizeof(@type@));
132
    }
133

134 0
    buffer->size = new_size;
135

136 0
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
137
        return -NPY_ENOMEM;
138
    } else {
139 0
        return 0;
140
    }
141
}
142

143

144
static npy_intp
145 1
count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun)
146
{
147
    npy_intp sz;
148
    @type@ vc, *pl, *pi, *pj, *pr;
149

150 1
    if (NPY_UNLIKELY(num - l == 1)) {
151
        return 1;
152
    }
153

154 1
    pl = arr + l;
155

156
    /* (not strictly) ascending sequence */
157 1
    if (!@TYPE@_LT(*(pl + 1), *pl)) {
158 1
        for (pi = pl + 1; pi < arr + num - 1 && !@TYPE@_LT(*(pi + 1), *pi); ++pi) {
159
        }
160
    } else {  /* (strictly) descending sequence */
161 1
        for (pi = pl + 1; pi < arr + num - 1 && @TYPE@_LT(*(pi + 1), *pi); ++pi) {
162
        }
163

164 1
        for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
165 1
            @TYPE@_SWAP(*pj, *pr);
166
        }
167
    }
168

169 1
    ++pi;
170 1
    sz = pi - pl;
171

172 1
    if (sz < minrun) {
173 0
        if (l + minrun < num) {
174
            sz = minrun;
175
        } else {
176 0
            sz = num - l;
177
        }
178

179 0
        pr = pl + sz;
180

181
        /* insertion sort */
182 0
        for (; pi < pr; ++pi) {
183 0
            vc = *pi;
184 0
            pj = pi;
185

186 0
            while (pl < pj && @TYPE@_LT(vc, *(pj - 1))) {
187 0
                *pj = *(pj - 1);
188 0
                --pj;
189
            }
190

191 0
            *pj = vc;
192
        }
193
    }
194

195
    return sz;
196
}
197

198

199
/* when the left part of the array (p1) is smaller, copy p1 to buffer
200
 * and merge from left to right
201
 */
202
static void
203 0
merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
204
                  @type@ *p3)
205
{
206 0
    @type@ *end = p2 + l2;
207 0
    memcpy(p3, p1, sizeof(@type@) * l1);
208
    /* first element must be in p2 otherwise skipped in the caller */
209 0
    *p1++ = *p2++;
210

211 0
    while (p1 < p2 && p2 < end) {
212 0
        if (@TYPE@_LT(*p2, *p3)) {
213 0
            *p1++ = *p2++;
214
        } else {
215 0
            *p1++ = *p3++;
216
        }
217
    }
218

219 0
    if (p1 != p2) {
220 0
        memcpy(p1, p3, sizeof(@type@) * (p2 - p1));
221
    }
222 0
}
223

224

225
/* when the right part of the array (p2) is smaller, copy p2 to buffer
226
 * and merge from right to left
227
 */
228
static void
229 0
merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
230
                   @type@ *p3)
231
{
232
    npy_intp ofs;
233 0
    @type@ *start = p1 - 1;
234 0
    memcpy(p3, p2, sizeof(@type@) * l2);
235 0
    p1 += l1 - 1;
236 0
    p2 += l2 - 1;
237 0
    p3 += l2 - 1;
238
    /* first element must be in p1 otherwise skipped in the caller */
239 0
    *p2-- = *p1--;
240

241 0
    while (p1 < p2 && start < p1) {
242 0
        if (@TYPE@_LT(*p3, *p1)) {
243 0
            *p2-- = *p1--;
244
        } else {
245 0
            *p2-- = *p3--;
246
        }
247
    }
248

249 0
    if (p1 != p2) {
250 0
        ofs = p2 - start;
251 0
        memcpy(start + 1, p3 - ofs + 1, sizeof(@type@) * ofs);
252
    }
253 0
}
254

255

256
/* Note: the naming convention of gallop functions are different from that of
257
 * CPython. For example, here gallop_right means gallop from left toward right,
258
 * whereas in CPython gallop_right means gallop
259
 * and find the right most element among equal elements
260
 */
261
static npy_intp
262 0
gallop_right_@suff@(const @type@ *arr, const npy_intp size, const @type@ key)
263
{
264
    npy_intp last_ofs, ofs, m;
265

266 0
    if (@TYPE@_LT(key, arr[0])) {
267
        return 0;
268
    }
269

270
    last_ofs = 0;
271
    ofs = 1;
272

273
    for (;;) {
274 0
        if (size <= ofs || ofs < 0) {
275
            ofs = size; /* arr[ofs] is never accessed */
276
            break;
277
        }
278

279 0
        if (@TYPE@_LT(key, arr[ofs])) {
280
            break;
281
        } else {
282 0
            last_ofs = ofs;
283
            /* ofs = 1, 3, 7, 15... */
284 0
            ofs = (ofs << 1) + 1;
285
        }
286
    }
287

288
    /* now that arr[last_ofs] <= key < arr[ofs] */
289 0
    while (last_ofs + 1 < ofs) {
290 0
        m = last_ofs + ((ofs - last_ofs) >> 1);
291

292 0
        if (@TYPE@_LT(key, arr[m])) {
293
            ofs = m;
294
        } else {
295 0
            last_ofs = m;
296
        }
297
    }
298

299
    /* now that arr[ofs-1] <= key < arr[ofs] */
300
    return ofs;
301
}
302

303

304
static npy_intp
305 0
gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ key)
306
{
307
    npy_intp last_ofs, ofs, l, m, r;
308

309 0
    if (@TYPE@_LT(arr[size - 1], key)) {
310
        return size;
311
    }
312

313
    last_ofs = 0;
314
    ofs = 1;
315

316
    for (;;) {
317 0
        if (size <= ofs || ofs < 0) {
318
            ofs = size;
319
            break;
320
        }
321

322 0
        if (@TYPE@_LT(arr[size - ofs - 1], key)) {
323
            break;
324
        } else {
325 0
            last_ofs = ofs;
326 0
            ofs = (ofs << 1) + 1;
327
        }
328
    }
329

330
    /* now that arr[size-ofs-1] < key <= arr[size-last_ofs-1] */
331 0
    l = size - ofs - 1;
332 0
    r = size - last_ofs - 1;
333

334 0
    while (l + 1 < r) {
335 0
        m = l + ((r - l) >> 1);
336

337 0
        if (@TYPE@_LT(arr[m], key)) {
338
            l = m;
339
        } else {
340 0
            r = m;
341
        }
342
    }
343

344
    /* now that arr[r-1] < key <= arr[r] */
345
    return r;
346
}
347

348

349
static int
350 0
merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at,
351
                buffer_@suff@ *buffer)
352
{
353
    int ret;
354
    npy_intp s1, l1, s2, l2, k;
355
    @type@ *p1, *p2;
356 0
    s1 = stack[at].s;
357 0
    l1 = stack[at].l;
358 0
    s2 = stack[at + 1].s;
359 0
    l2 = stack[at + 1].l;
360
    /* arr[s2] belongs to arr[s1+k].
361
     * if try to comment this out for debugging purpose, remember
362
     * in the merging process the first element is skipped
363
     */
364 0
    k = gallop_right_@suff@(arr + s1, l1, arr[s2]);
365

366 0
    if (l1 == k) {
367
        /* already sorted */
368
        return 0;
369
    }
370

371 0
    p1 = arr + s1 + k;
372 0
    l1 -= k;
373 0
    p2 = arr + s2;
374
    /* arr[s2-1] belongs to arr[s2+l2] */
375 0
    l2 = gallop_left_@suff@(arr + s2, l2, arr[s2 - 1]);
376

377 0
    if (l2 < l1) {
378 0
        ret = resize_buffer_@suff@(buffer, l2);
379

380 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
381

382 0
        merge_right_@suff@(p1, l1, p2, l2, buffer->pw);
383
    } else {
384 0
        ret = resize_buffer_@suff@(buffer, l1);
385

386 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
387

388 0
        merge_left_@suff@(p1, l1, p2, l2, buffer->pw);
389
    }
390

391
    return 0;
392
}
393

394

395
static int
396 1
try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
397
                    buffer_@suff@ *buffer)
398
{
399
    int ret;
400
    npy_intp A, B, C, top;
401 1
    top = *stack_ptr;
402

403 1
    while (1 < top) {
404 0
        B = stack[top - 2].l;
405 0
        C = stack[top - 1].l;
406

407 0
        if ((2 < top && stack[top - 3].l <= B + C) ||
408 0
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
409 0
            A = stack[top - 3].l;
410

411 0
            if (A <= C) {
412 0
                ret = merge_at_@suff@(arr, stack, top - 3, buffer);
413

414 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
415

416 0
                stack[top - 3].l += B;
417 0
                stack[top - 2] = stack[top - 1];
418 0
                --top;
419
            } else {
420 0
                ret = merge_at_@suff@(arr, stack, top - 2, buffer);
421

422 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
423

424 0
                stack[top - 2].l += C;
425 0
                --top;
426
            }
427 0
        } else if (1 < top && B <= C) {
428 0
            ret = merge_at_@suff@(arr, stack, top - 2, buffer);
429

430 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
431

432 0
            stack[top - 2].l += C;
433 0
            --top;
434
        } else {
435
            break;
436
        }
437
    }
438

439 1
    *stack_ptr = top;
440 1
    return 0;
441
}
442

443
static int
444 1
force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
445
                      buffer_@suff@ *buffer)
446
{
447
    int ret;
448 1
    npy_intp top = *stack_ptr;
449

450 1
    while (2 < top) {
451 0
        if (stack[top - 3].l <= stack[top - 1].l) {
452 0
            ret = merge_at_@suff@(arr, stack, top - 3, buffer);
453

454 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
455

456 0
            stack[top - 3].l += stack[top - 2].l;
457 0
            stack[top - 2] = stack[top - 1];
458 0
            --top;
459
        } else {
460 0
            ret = merge_at_@suff@(arr, stack, top - 2, buffer);
461

462 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
463

464 0
            stack[top - 2].l += stack[top - 1].l;
465 0
            --top;
466
        }
467
    }
468

469 1
    if (1 < top) {
470 0
        ret = merge_at_@suff@(arr, stack, top - 2, buffer);
471

472 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
473
    }
474

475
    return 0;
476
}
477

478

479
NPY_NO_EXPORT int
480 1
timsort_@suff@(void *start, npy_intp num, void *NPY_UNUSED(varr))
481
{
482
    int ret;
483
    npy_intp l, n, stack_ptr, minrun;
484
    buffer_@suff@ buffer;
485
    run stack[TIMSORT_STACK_SIZE];
486 1
    buffer.pw = NULL;
487 1
    buffer.size = 0;
488 1
    stack_ptr = 0;
489 1
    minrun = compute_min_run(num);
490

491 1
    for (l = 0; l < num;) {
492 1
        n = count_run_@suff@(start, l, num, minrun);
493 1
        stack[stack_ptr].s = l;
494 1
        stack[stack_ptr].l = n;
495 1
        ++stack_ptr;
496 1
        ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer);
497

498 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
499

500 1
        l += n;
501
    }
502

503 1
    ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer);
504

505 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
506

507 1
    ret = 0;
508 1
cleanup:
509

510 1
    if (buffer.pw != NULL) {
511 0
        free(buffer.pw);
512
    }
513

514 1
    return ret;
515
}
516

517

518
/* argsort */
519

520

521
static npy_intp
522 1
acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num,
523
                  npy_intp minrun)
524
{
525
    npy_intp sz;
526
    @type@ vc;
527
    npy_intp vi;
528
    npy_intp *pl, *pi, *pj, *pr;
529

530 1
    if (NPY_UNLIKELY(num - l == 1)) {
531
        return 1;
532
    }
533

534 1
    pl = tosort + l;
535

536
    /* (not strictly) ascending sequence */
537 1
    if (!@TYPE@_LT(arr[*(pl + 1)], arr[*pl])) {
538 1
        for (pi = pl + 1; pi < tosort + num - 1
539 1
                && !@TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) {
540
        }
541
    } else {  /* (strictly) descending sequence */
542 1
        for (pi = pl + 1; pi < tosort + num - 1
543 1
                && @TYPE@_LT(arr[*(pi + 1)], arr[*pi]); ++pi) {
544
        }
545

546 1
        for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
547 1
            INTP_SWAP(*pj, *pr);
548
        }
549
    }
550

551 1
    ++pi;
552 1
    sz = pi - pl;
553

554 1
    if (sz < minrun) {
555 1
        if (l + minrun < num) {
556
            sz = minrun;
557
        } else {
558 1
            sz = num - l;
559
        }
560

561 1
        pr = pl + sz;
562

563
        /* insertion sort */
564 1
        for (; pi < pr; ++pi) {
565 1
            vi = *pi;
566 1
            vc = arr[*pi];
567 1
            pj = pi;
568

569 1
            while (pl < pj && @TYPE@_LT(vc, arr[*(pj - 1)])) {
570 1
                *pj = *(pj - 1);
571 1
                --pj;
572
            }
573

574 1
            *pj = vi;
575
        }
576
    }
577

578
    return sz;
579
}
580

581

582
static npy_intp
583 1
agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort,
584
                     const npy_intp size, const @type@ key)
585
{
586
    npy_intp last_ofs, ofs, m;
587

588 1
    if (@TYPE@_LT(key, arr[tosort[0]])) {
589
        return 0;
590
    }
591

592
    last_ofs = 0;
593
    ofs = 1;
594

595
    for (;;) {
596 1
        if (size <= ofs || ofs < 0) {
597
            ofs = size; /* arr[ofs] is never accessed */
598
            break;
599
        }
600

601 1
        if (@TYPE@_LT(key, arr[tosort[ofs]])) {
602
            break;
603
        } else {
604 1
            last_ofs = ofs;
605
            /* ofs = 1, 3, 7, 15... */
606 1
            ofs = (ofs << 1) + 1;
607
        }
608
    }
609

610
    /* now that arr[tosort[last_ofs]] <= key < arr[tosort[ofs]] */
611 1
    while (last_ofs + 1 < ofs) {
612 1
        m = last_ofs + ((ofs - last_ofs) >> 1);
613

614 1
        if (@TYPE@_LT(key, arr[tosort[m]])) {
615
            ofs = m;
616
        } else {
617 1
            last_ofs = m;
618
        }
619
    }
620

621
    /* now that arr[tosort[ofs-1]] <= key < arr[tosort[ofs]] */
622
    return ofs;
623
}
624

625

626

627
static npy_intp
628 1
agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort,
629
                    const npy_intp size, const @type@ key)
630
{
631
    npy_intp last_ofs, ofs, l, m, r;
632

633 1
    if (@TYPE@_LT(arr[tosort[size - 1]], key)) {
634
        return size;
635
    }
636

637
    last_ofs = 0;
638
    ofs = 1;
639

640
    for (;;) {
641 1
        if (size <= ofs || ofs < 0) {
642
            ofs = size;
643
            break;
644
        }
645

646 1
        if (@TYPE@_LT(arr[tosort[size - ofs - 1]], key)) {
647
            break;
648
        } else {
649 1
            last_ofs = ofs;
650 1
            ofs = (ofs << 1) + 1;
651
        }
652
    }
653

654
    /* now that arr[tosort[size-ofs-1]] < key <= arr[tosort[size-last_ofs-1]] */
655 1
    l = size - ofs - 1;
656 1
    r = size - last_ofs - 1;
657

658 1
    while (l + 1 < r) {
659 1
        m = l + ((r - l) >> 1);
660

661 1
        if (@TYPE@_LT(arr[tosort[m]], key)) {
662
            l = m;
663
        } else {
664 1
            r = m;
665
        }
666
    }
667

668
    /* now that arr[tosort[r-1]] < key <= arr[tosort[r]] */
669
    return r;
670
}
671

672

673
static void
674 1
amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
675
                   npy_intp l2,
676
                   npy_intp *p3)
677
{
678 1
    npy_intp *end = p2 + l2;
679 1
    memcpy(p3, p1, sizeof(npy_intp) * l1);
680
    /* first element must be in p2 otherwise skipped in the caller */
681 1
    *p1++ = *p2++;
682

683 1
    while (p1 < p2 && p2 < end) {
684 1
        if (@TYPE@_LT(arr[*p2], arr[*p3])) {
685 1
            *p1++ = *p2++;
686
        } else {
687 1
            *p1++ = *p3++;
688
        }
689
    }
690

691 1
    if (p1 != p2) {
692 1
        memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
693
    }
694 1
}
695

696

697
static void
698 1
amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
699
                    npy_intp l2,
700
                    npy_intp *p3)
701
{
702
    npy_intp ofs;
703 1
    npy_intp *start = p1 - 1;
704 1
    memcpy(p3, p2, sizeof(npy_intp) * l2);
705 1
    p1 += l1 - 1;
706 1
    p2 += l2 - 1;
707 1
    p3 += l2 - 1;
708
    /* first element must be in p1 otherwise skipped in the caller */
709 1
    *p2-- = *p1--;
710

711 1
    while (p1 < p2 && start < p1) {
712 1
        if (@TYPE@_LT(arr[*p3], arr[*p1])) {
713 1
            *p2-- = *p1--;
714
        } else {
715 1
            *p2-- = *p3--;
716
        }
717
    }
718

719 1
    if (p1 != p2) {
720 1
        ofs = p2 - start;
721 1
        memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
722
    }
723 1
}
724

725

726
static int
727 1
amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack,
728
                 const npy_intp at,
729
                 buffer_intp *buffer)
730
{
731
    int ret;
732
    npy_intp s1, l1, s2, l2, k;
733
    npy_intp *p1, *p2;
734 1
    s1 = stack[at].s;
735 1
    l1 = stack[at].l;
736 1
    s2 = stack[at + 1].s;
737 1
    l2 = stack[at + 1].l;
738
    /* tosort[s2] belongs to tosort[s1+k] */
739 1
    k = agallop_right_@suff@(arr, tosort + s1, l1, arr[tosort[s2]]);
740

741 1
    if (l1 == k) {
742
        /* already sorted */
743
        return 0;
744
    }
745

746 1
    p1 = tosort + s1 + k;
747 1
    l1 -= k;
748 1
    p2 = tosort + s2;
749
    /* tosort[s2-1] belongs to tosort[s2+l2] */
750 1
    l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr[tosort[s2 - 1]]);
751

752 1
    if (l2 < l1) {
753 1
        ret = resize_buffer_intp(buffer, l2);
754

755 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
756

757 1
        amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw);
758
    } else {
759 1
        ret = resize_buffer_intp(buffer, l1);
760

761 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
762

763 1
        amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw);
764
    }
765

766
    return 0;
767
}
768

769

770
static int
771 1
atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
772
                     npy_intp *stack_ptr,
773
                     buffer_intp *buffer)
774
{
775
    int ret;
776
    npy_intp A, B, C, top;
777 1
    top = *stack_ptr;
778

779 1
    while (1 < top) {
780 1
        B = stack[top - 2].l;
781 1
        C = stack[top - 1].l;
782

783 1
        if ((2 < top && stack[top - 3].l <= B + C) ||
784 1
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
785 1
            A = stack[top - 3].l;
786

787 1
            if (A <= C) {
788 0
                ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer);
789

790 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
791

792 0
                stack[top - 3].l += B;
793 0
                stack[top - 2] = stack[top - 1];
794 0
                --top;
795
            } else {
796 1
                ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
797

798 1
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
799

800 1
                stack[top - 2].l += C;
801 1
                --top;
802
            }
803 1
        } else if (1 < top && B <= C) {
804 1
            ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
805

806 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
807

808 1
            stack[top - 2].l += C;
809 1
            --top;
810
        } else {
811
            break;
812
        }
813
    }
814

815 1
    *stack_ptr = top;
816 1
    return 0;
817
}
818

819

820
static int
821 1
aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
822
                       npy_intp *stack_ptr,
823
                       buffer_intp *buffer)
824
{
825
    int ret;
826 1
    npy_intp top = *stack_ptr;
827

828 1
    while (2 < top) {
829 1
        if (stack[top - 3].l <= stack[top - 1].l) {
830 0
            ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer);
831

832 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
833

834 0
            stack[top - 3].l += stack[top - 2].l;
835 0
            stack[top - 2] = stack[top - 1];
836 0
            --top;
837
        } else {
838 1
            ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
839

840 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
841

842 1
            stack[top - 2].l += stack[top - 1].l;
843 1
            --top;
844
        }
845
    }
846

847 1
    if (1 < top) {
848 1
        ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer);
849

850 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
851
    }
852

853
    return 0;
854
}
855

856

857
NPY_NO_EXPORT int
858 1
atimsort_@suff@(void *v, npy_intp *tosort, npy_intp num,
859
                void *NPY_UNUSED(varr))
860
{
861
    int ret;
862
    npy_intp l, n, stack_ptr, minrun;
863
    buffer_intp buffer;
864
    run stack[TIMSORT_STACK_SIZE];
865 1
    buffer.pw = NULL;
866 1
    buffer.size = 0;
867 1
    stack_ptr = 0;
868 1
    minrun = compute_min_run(num);
869

870 1
    for (l = 0; l < num;) {
871 1
        n = acount_run_@suff@(v, tosort, l, num, minrun);
872 1
        stack[stack_ptr].s = l;
873 1
        stack[stack_ptr].l = n;
874 1
        ++stack_ptr;
875 1
        ret = atry_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer);
876

877 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
878

879 1
        l += n;
880
    }
881

882 1
    ret = aforce_collapse_@suff@(v, tosort, stack, &stack_ptr, &buffer);
883

884 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
885

886 1
    ret = 0;
887 1
cleanup:
888

889 1
    if (buffer.pw != NULL) {
890 1
        free(buffer.pw);
891
    }
892

893 1
    return ret;
894
}
895

896
/**end repeat**/
897

898

899

900
/* For string sorts and generic sort, element comparisons are very expensive,
901
 * and the time cost of insertion sort (involves N**2 comparison) clearly hurts.
902
 * Implementing binary insertion sort and probably gallop mode during merging process
903
 * can hopefully boost the performance. Here as a temporary workaround we use shorter
904
 * run length to reduce the cost of insertion sort.
905
 */
906

907
static npy_intp compute_min_run_short(npy_intp num)
908
{
909 1
    npy_intp r = 0;
910

911 1
    while (16 < num) {
912 1
        r |= num & 1;
913 1
        num >>= 1;
914
    }
915

916 1
    return num + r;
917
}
918

919
/*
920
 *****************************************************************************
921
 **                             STRING SORTS                                **
922
 *****************************************************************************
923
 */
924

925

926
/**begin repeat
927
 *
928
 * #TYPE = STRING, UNICODE#
929
 * #suff = string, unicode#
930
 * #type = npy_char, npy_ucs4#
931
 */
932

933

934
typedef struct {
935
    @type@ * pw;
936
    npy_intp size;
937
    size_t len;
938
} buffer_@suff@;
939

940

941
static NPY_INLINE int
942 1
resize_buffer_@suff@(buffer_@suff@ *buffer, npy_intp new_size)
943
{
944 1
    if (new_size <= buffer->size) {
945
        return 0;
946
    }
947

948 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
949 1
        buffer->pw = malloc(sizeof(@type@) * new_size * buffer->len);
950
    } else {
951 0
        buffer->pw = realloc(buffer->pw,  sizeof(@type@) * new_size * buffer->len);
952
    }
953

954 1
    buffer->size = new_size;
955

956 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
957
        return -NPY_ENOMEM;
958
    } else {
959 1
        return 0;
960
    }
961
}
962

963

964
static npy_intp
965 1
count_run_@suff@(@type@ *arr, npy_intp l, npy_intp num, npy_intp minrun,
966
                 @type@ *vp, size_t len)
967
{
968
    npy_intp sz;
969
    @type@ *pl, *pi, *pj, *pr;
970

971 1
    if (NPY_UNLIKELY(num - l == 1)) {
972
        return 1;
973
    }
974

975 1
    pl = arr + l * len;
976

977
    /* (not strictly) ascending sequence */
978 1
    if (!@TYPE@_LT(pl + len, pl, len)) {
979 1
        for (pi = pl + len; pi < arr + (num - 1) * len
980 1
                && !@TYPE@_LT(pi + len, pi, len); pi += len) {
981
        }
982
    } else {  /* (strictly) descending sequence */
983 1
        for (pi = pl + len; pi < arr + (num - 1) * len
984 1
                && @TYPE@_LT(pi + len, pi, len); pi += len) {
985
        }
986

987 1
        for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) {
988 1
            @TYPE@_SWAP(pj, pr, len);
989
        }
990
    }
991

992 1
    pi += len;
993 1
    sz = (pi - pl) / len;
994

995 1
    if (sz < minrun) {
996 0
        if (l + minrun < num) {
997
            sz = minrun;
998
        } else {
999 0
            sz = num - l;
1000
        }
1001

1002 0
        pr = pl + sz * len;
1003

1004
        /* insertion sort */
1005 0
        for (; pi < pr; pi += len) {
1006 0
            @TYPE@_COPY(vp, pi, len);
1007 0
            pj = pi;
1008

1009 0
            while (pl < pj && @TYPE@_LT(vp, pj - len, len)) {
1010 0
                @TYPE@_COPY(pj, pj - len, len);
1011 0
                pj -= len;
1012
            }
1013

1014 0
            @TYPE@_COPY(pj, vp, len);
1015
        }
1016
    }
1017

1018
    return sz;
1019
}
1020

1021

1022
static npy_intp
1023 0
gallop_right_@suff@(const @type@ *arr, const npy_intp size,
1024
                    const @type@ *key, size_t len)
1025
{
1026
    npy_intp last_ofs, ofs, m;
1027

1028 0
    if (@TYPE@_LT(key, arr, len)) {
1029
        return 0;
1030
    }
1031

1032
    last_ofs = 0;
1033
    ofs = 1;
1034

1035
    for (;;) {
1036 0
        if (size <= ofs || ofs < 0) {
1037
            ofs = size; /* arr[ofs] is never accessed */
1038
            break;
1039
        }
1040

1041 0
        if (@TYPE@_LT(key, arr + ofs * len, len)) {
1042
            break;
1043
        } else {
1044 0
            last_ofs = ofs;
1045
            /* ofs = 1, 3, 7, 15... */
1046 0
            ofs = (ofs << 1) + 1;
1047
        }
1048
    }
1049

1050
    /* now that arr[last_ofs*len] <= key < arr[ofs*len] */
1051 0
    while (last_ofs + 1 < ofs) {
1052 0
        m = last_ofs + ((ofs - last_ofs) >> 1);
1053

1054 0
        if (@TYPE@_LT(key, arr + m * len, len)) {
1055
            ofs = m;
1056
        } else {
1057
            last_ofs = m;
1058
        }
1059
    }
1060

1061
    /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */
1062
    return ofs;
1063
}
1064

1065

1066

1067
static npy_intp
1068 0
gallop_left_@suff@(const @type@ *arr, const npy_intp size, const @type@ *key,
1069
                   size_t len)
1070
{
1071
    npy_intp last_ofs, ofs, l, m, r;
1072

1073 0
    if (@TYPE@_LT(arr + (size - 1) * len, key, len)) {
1074
        return size;
1075
    }
1076

1077
    last_ofs = 0;
1078
    ofs = 1;
1079

1080
    for (;;) {
1081 0
        if (size <= ofs || ofs < 0) {
1082
            ofs = size;
1083
            break;
1084
        }
1085

1086 0
        if (@TYPE@_LT(arr + (size - ofs - 1) * len, key, len)) {
1087
            break;
1088
        } else {
1089 0
            last_ofs = ofs;
1090 0
            ofs = (ofs << 1) + 1;
1091
        }
1092
    }
1093

1094
    /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */
1095 0
    l = size - ofs - 1;
1096 0
    r = size - last_ofs - 1;
1097

1098 0
    while (l + 1 < r) {
1099 0
        m = l + ((r - l) >> 1);
1100

1101 0
        if (@TYPE@_LT(arr + m * len, key, len)) {
1102
            l = m;
1103
        } else {
1104
            r = m;
1105
        }
1106
    }
1107

1108
    /* now that arr[(r-1)*len] < key <= arr[r*len] */
1109
    return r;
1110
}
1111

1112

1113
static void
1114 0
merge_left_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
1115
                  @type@ *p3, size_t len)
1116
{
1117 0
    @type@ *end = p2 + l2 * len;
1118 0
    memcpy(p3, p1, sizeof(@type@) * l1 * len);
1119
    /* first element must be in p2 otherwise skipped in the caller */
1120 0
    @TYPE@_COPY(p1, p2, len);
1121 0
    p1 += len;
1122 0
    p2 += len;
1123

1124 0
    while (p1 < p2 && p2 < end) {
1125 0
        if (@TYPE@_LT(p2, p3, len)) {
1126 0
            @TYPE@_COPY(p1, p2, len);
1127 0
            p1 += len;
1128 0
            p2 += len;
1129
        } else {
1130 0
            @TYPE@_COPY(p1, p3, len);
1131 0
            p1 += len;
1132 0
            p3 += len;
1133
        }
1134
    }
1135

1136 0
    if (p1 != p2) {
1137 0
        memcpy(p1, p3, sizeof(@type@) * (p2 - p1));
1138
    }
1139 0
}
1140

1141

1142
static void
1143 0
merge_right_@suff@(@type@ *p1, npy_intp l1, @type@ *p2, npy_intp l2,
1144
                   @type@ *p3, size_t len)
1145
{
1146
    npy_intp ofs;
1147 0
    @type@ *start = p1 - len;
1148 0
    memcpy(p3, p2, sizeof(@type@) * l2 * len);
1149 0
    p1 += (l1 - 1) * len;
1150 0
    p2 += (l2 - 1) * len;
1151 0
    p3 += (l2 - 1) * len;
1152
    /* first element must be in p1 otherwise skipped in the caller */
1153 0
    @TYPE@_COPY(p2, p1, len);
1154 0
    p2 -= len;
1155 0
    p1 -= len;
1156

1157 0
    while (p1 < p2 && start < p1) {
1158 0
        if (@TYPE@_LT(p3, p1, len)) {
1159 0
            @TYPE@_COPY(p2, p1, len);
1160 0
            p2 -= len;
1161 0
            p1 -= len;
1162
        } else {
1163 0
            @TYPE@_COPY(p2, p3, len);
1164 0
            p2 -= len;
1165 0
            p3 -= len;
1166
        }
1167
    }
1168

1169 0
    if (p1 != p2) {
1170 0
        ofs = p2 - start;
1171 0
        memcpy(start + len, p3 - ofs + len, sizeof(@type@) * ofs);
1172
    }
1173 0
}
1174

1175

1176
static int
1177 0
merge_at_@suff@(@type@ *arr, const run *stack, const npy_intp at,
1178
                buffer_@suff@ *buffer, size_t len)
1179
{
1180
    int ret;
1181
    npy_intp s1, l1, s2, l2, k;
1182
    @type@ *p1, *p2;
1183 0
    s1 = stack[at].s;
1184 0
    l1 = stack[at].l;
1185 0
    s2 = stack[at + 1].s;
1186 0
    l2 = stack[at + 1].l;
1187
    /* arr[s2] belongs to arr[s1+k] */
1188 0
    @TYPE@_COPY(buffer->pw, arr + s2 * len, len);
1189 0
    k = gallop_right_@suff@(arr + s1 * len, l1, buffer->pw, len);
1190

1191 0
    if (l1 == k) {
1192
        /* already sorted */
1193
        return 0;
1194
    }
1195

1196 0
    p1 = arr + (s1 + k) * len;
1197 0
    l1 -= k;
1198 0
    p2 = arr + s2 * len;
1199
    /* arr[s2-1] belongs to arr[s2+l2] */
1200 0
    @TYPE@_COPY(buffer->pw, arr + (s2 - 1) * len, len);
1201 0
    l2 = gallop_left_@suff@(arr + s2 * len, l2, buffer->pw, len);
1202

1203 0
    if (l2 < l1) {
1204 0
        ret = resize_buffer_@suff@(buffer, l2);
1205

1206 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1207

1208 0
        merge_right_@suff@(p1, l1, p2, l2, buffer->pw, len);
1209
    } else {
1210 0
        ret = resize_buffer_@suff@(buffer, l1);
1211

1212 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1213

1214 0
        merge_left_@suff@(p1, l1, p2, l2, buffer->pw, len);
1215
    }
1216

1217
    return 0;
1218
}
1219

1220

1221
static int
1222 1
try_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
1223
                    buffer_@suff@ *buffer, size_t len)
1224
{
1225
    int ret;
1226
    npy_intp A, B, C, top;
1227 1
    top = *stack_ptr;
1228

1229 1
    while (1 < top) {
1230 0
        B = stack[top - 2].l;
1231 0
        C = stack[top - 1].l;
1232

1233 0
        if ((2 < top && stack[top - 3].l <= B + C) ||
1234 0
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
1235 0
            A = stack[top - 3].l;
1236

1237 0
            if (A <= C) {
1238 0
                ret = merge_at_@suff@(arr, stack, top - 3, buffer, len);
1239

1240 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
1241

1242 0
                stack[top - 3].l += B;
1243 0
                stack[top - 2] = stack[top - 1];
1244 0
                --top;
1245
            } else {
1246 0
                ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
1247

1248 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
1249

1250 0
                stack[top - 2].l += C;
1251 0
                --top;
1252
            }
1253 0
        } else if (1 < top && B <= C) {
1254 0
            ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
1255

1256 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1257

1258 0
            stack[top - 2].l += C;
1259 0
            --top;
1260
        } else {
1261
            break;
1262
        }
1263
    }
1264

1265 1
    *stack_ptr = top;
1266 1
    return 0;
1267
}
1268

1269

1270
static int
1271 1
force_collapse_@suff@(@type@ *arr, run *stack, npy_intp *stack_ptr,
1272
                      buffer_@suff@ *buffer, size_t len)
1273
{
1274
    int ret;
1275 1
    npy_intp top = *stack_ptr;
1276

1277 1
    while (2 < top) {
1278 0
        if (stack[top - 3].l <= stack[top - 1].l) {
1279 0
            ret = merge_at_@suff@(arr, stack, top - 3, buffer, len);
1280

1281 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1282

1283 0
            stack[top - 3].l += stack[top - 2].l;
1284 0
            stack[top - 2] = stack[top - 1];
1285 0
            --top;
1286
        } else {
1287 0
            ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
1288

1289 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1290

1291 0
            stack[top - 2].l += stack[top - 1].l;
1292 0
            --top;
1293
        }
1294
    }
1295

1296 1
    if (1 < top) {
1297 0
        ret = merge_at_@suff@(arr, stack, top - 2, buffer, len);
1298

1299 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1300
    }
1301

1302
    return 0;
1303
}
1304

1305

1306
NPY_NO_EXPORT int
1307 1
timsort_@suff@(void *start, npy_intp num, void *varr)
1308
{
1309 1
    PyArrayObject *arr = varr;
1310 1
    size_t elsize = PyArray_ITEMSIZE(arr);
1311 1
    size_t len = elsize / sizeof(@type@);
1312
    int ret;
1313
    npy_intp l, n, stack_ptr, minrun;
1314
    run stack[TIMSORT_STACK_SIZE];
1315
    buffer_@suff@ buffer;
1316

1317
    /* Items that have zero size don't make sense to sort */
1318 1
    if (len == 0) {
1319
        return 0;
1320
    }
1321

1322 1
    buffer.pw = NULL;
1323 1
    buffer.size = 0;
1324 1
    buffer.len = len;
1325 1
    stack_ptr = 0;
1326 1
    minrun = compute_min_run_short(num);
1327
    /* used for insertion sort and gallop key */
1328 1
    ret = resize_buffer_@suff@(&buffer, 1);
1329

1330 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
1331

1332 1
    for (l = 0; l < num;) {
1333 1
        n = count_run_@suff@(start, l, num, minrun, buffer.pw, len);
1334
        /* both s and l are scaled by len */
1335 1
        stack[stack_ptr].s = l;
1336 1
        stack[stack_ptr].l = n;
1337 1
        ++stack_ptr;
1338 1
        ret = try_collapse_@suff@(start, stack, &stack_ptr, &buffer, len);
1339

1340 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
1341

1342 1
        l += n;
1343
    }
1344

1345 1
    ret = force_collapse_@suff@(start, stack, &stack_ptr, &buffer, len);
1346

1347 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
1348

1349 1
    ret = 0;
1350

1351 1
cleanup:
1352 1
    if (buffer.pw != NULL) {
1353 1
        free(buffer.pw);
1354
    }
1355
    return ret;
1356
}
1357

1358

1359
/* argsort */
1360

1361

1362
static npy_intp
1363 1
acount_run_@suff@(@type@ *arr, npy_intp *tosort, npy_intp l, npy_intp num,
1364
                  npy_intp minrun, size_t len)
1365
{
1366
    npy_intp sz;
1367
    npy_intp vi;
1368
    npy_intp *pl, *pi, *pj, *pr;
1369

1370 1
    if (NPY_UNLIKELY(num - l == 1)) {
1371
        return 1;
1372
    }
1373

1374 1
    pl = tosort + l;
1375

1376
    /* (not strictly) ascending sequence */
1377 1
    if (!@TYPE@_LT(arr + (*(pl + 1)) * len, arr + (*pl) * len, len)) {
1378 1
        for (pi = pl + 1; pi < tosort + num - 1
1379 1
                && !@TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) {
1380
        }
1381
    } else {  /* (strictly) descending sequence */
1382 1
        for (pi = pl + 1; pi < tosort + num - 1
1383 1
                && @TYPE@_LT(arr + (*(pi + 1)) * len, arr + (*pi) * len, len); ++pi) {
1384
        }
1385

1386 1
        for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
1387 1
            INTP_SWAP(*pj, *pr);
1388
        }
1389
    }
1390

1391 1
    ++pi;
1392 1
    sz = pi - pl;
1393

1394 1
    if (sz < minrun) {
1395 1
        if (l + minrun < num) {
1396
            sz = minrun;
1397
        } else {
1398 1
            sz = num - l;
1399
        }
1400

1401 1
        pr = pl + sz;
1402

1403
        /* insertion sort */
1404 1
        for (; pi < pr; ++pi) {
1405 1
            vi = *pi;
1406 1
            pj = pi;
1407

1408 1
            while (pl < pj && @TYPE@_LT(arr + vi * len, arr + (*(pj - 1)) * len, len)) {
1409 1
                *pj = *(pj - 1);
1410 1
                --pj;
1411
            }
1412

1413 1
            *pj = vi;
1414
        }
1415
    }
1416

1417
    return sz;
1418
}
1419

1420

1421
static npy_intp
1422 1
agallop_left_@suff@(const @type@ *arr, const npy_intp *tosort,
1423
                    const npy_intp size, const @type@ *key, size_t len)
1424
{
1425
    npy_intp last_ofs, ofs, l, m, r;
1426

1427 1
    if (@TYPE@_LT(arr + tosort[size - 1] * len, key, len)) {
1428
        return size;
1429
    }
1430

1431
    last_ofs = 0;
1432
    ofs = 1;
1433

1434
    for (;;) {
1435 1
        if (size <= ofs || ofs < 0) {
1436
            ofs = size;
1437
            break;
1438
        }
1439

1440 1
        if (@TYPE@_LT(arr + tosort[size - ofs - 1] * len, key, len)) {
1441
            break;
1442
        } else {
1443 1
            last_ofs = ofs;
1444 1
            ofs = (ofs << 1) + 1;
1445
        }
1446
    }
1447

1448
    /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */
1449 1
    l = size - ofs - 1;
1450 1
    r = size - last_ofs - 1;
1451

1452 1
    while (l + 1 < r) {
1453 1
        m = l + ((r - l) >> 1);
1454

1455 1
        if (@TYPE@_LT(arr + tosort[m] * len, key, len)) {
1456
            l = m;
1457
        } else {
1458
            r = m;
1459
        }
1460
    }
1461

1462
    /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */
1463
    return r;
1464
}
1465

1466

1467
static npy_intp
1468 1
agallop_right_@suff@(const @type@ *arr, const npy_intp *tosort,
1469
                     const npy_intp size, const @type@ *key, size_t len)
1470
{
1471
    npy_intp last_ofs, ofs, m;
1472

1473 1
    if (@TYPE@_LT(key, arr + tosort[0] * len, len)) {
1474
        return 0;
1475
    }
1476

1477
    last_ofs = 0;
1478
    ofs = 1;
1479

1480
    for (;;) {
1481 1
        if (size <= ofs || ofs < 0) {
1482
            ofs = size; /* arr[ofs] is never accessed */
1483
            break;
1484
        }
1485

1486 1
        if (@TYPE@_LT(key, arr + tosort[ofs] * len, len)) {
1487
            break;
1488
        } else {
1489 1
            last_ofs = ofs;
1490
            /* ofs = 1, 3, 7, 15... */
1491 1
            ofs = (ofs << 1) + 1;
1492
        }
1493
    }
1494

1495
    /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */
1496 1
    while (last_ofs + 1 < ofs) {
1497 1
        m = last_ofs + ((ofs - last_ofs) >> 1);
1498

1499 1
        if (@TYPE@_LT(key, arr + tosort[m] * len, len)) {
1500
            ofs = m;
1501
        } else {
1502
            last_ofs = m;
1503
        }
1504
    }
1505

1506
    /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */
1507
    return ofs;
1508
}
1509

1510

1511

1512
static void
1513 1
amerge_left_@suff@(@type@ *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
1514
                   npy_intp l2, npy_intp *p3, size_t len)
1515
{
1516 1
    npy_intp *end = p2 + l2;
1517 1
    memcpy(p3, p1, sizeof(npy_intp) * l1);
1518
    /* first element must be in p2 otherwise skipped in the caller */
1519 1
    *p1++ = *p2++;
1520

1521 1
    while (p1 < p2 && p2 < end) {
1522 1
        if (@TYPE@_LT(arr + (*p2) * len, arr + (*p3) * len, len)) {
1523 1
            *p1++ = *p2++;
1524
        } else {
1525 1
            *p1++ = *p3++;
1526
        }
1527
    }
1528

1529 1
    if (p1 != p2) {
1530 1
        memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
1531
    }
1532 1
}
1533

1534

1535
static void
1536 1
amerge_right_@suff@(@type@ *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
1537
                    npy_intp l2, npy_intp *p3, size_t len)
1538
{
1539
    npy_intp ofs;
1540 1
    npy_intp *start = p1 - 1;
1541 1
    memcpy(p3, p2, sizeof(npy_intp) * l2);
1542 1
    p1 += l1 - 1;
1543 1
    p2 += l2 - 1;
1544 1
    p3 += l2 - 1;
1545
    /* first element must be in p1 otherwise skipped in the caller */
1546 1
    *p2-- = *p1--;
1547

1548 1
    while (p1 < p2 && start < p1) {
1549 1
        if (@TYPE@_LT(arr + (*p3) * len, arr + (*p1) * len, len)) {
1550 1
            *p2-- = *p1--;
1551
        } else {
1552 1
            *p2-- = *p3--;
1553
        }
1554
    }
1555

1556 1
    if (p1 != p2) {
1557 1
        ofs = p2 - start;
1558 1
        memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
1559
    }
1560 1
}
1561

1562

1563

1564
static int
1565 1
amerge_at_@suff@(@type@ *arr, npy_intp *tosort, const run *stack,
1566
                 const npy_intp at, buffer_intp *buffer, size_t len)
1567
{
1568
    int ret;
1569
    npy_intp s1, l1, s2, l2, k;
1570
    npy_intp *p1, *p2;
1571 1
    s1 = stack[at].s;
1572 1
    l1 = stack[at].l;
1573 1
    s2 = stack[at + 1].s;
1574 1
    l2 = stack[at + 1].l;
1575
    /* tosort[s2] belongs to tosort[s1+k] */
1576 1
    k = agallop_right_@suff@(arr, tosort + s1, l1, arr + tosort[s2] * len, len);
1577

1578 1
    if (l1 == k) {
1579
        /* already sorted */
1580
        return 0;
1581
    }
1582

1583 1
    p1 = tosort + s1 + k;
1584 1
    l1 -= k;
1585 1
    p2 = tosort + s2;
1586
    /* tosort[s2-1] belongs to tosort[s2+l2] */
1587 1
    l2 = agallop_left_@suff@(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len,
1588
                             len);
1589

1590 1
    if (l2 < l1) {
1591 1
        ret = resize_buffer_intp(buffer, l2);
1592

1593 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1594

1595 1
        amerge_right_@suff@(arr, p1, l1, p2, l2, buffer->pw, len);
1596
    } else {
1597 1
        ret = resize_buffer_intp(buffer, l1);
1598

1599 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1600

1601 1
        amerge_left_@suff@(arr, p1, l1, p2, l2, buffer->pw, len);
1602
    }
1603

1604
    return 0;
1605
}
1606

1607

1608
static int
1609 1
atry_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
1610
                     npy_intp *stack_ptr, buffer_intp *buffer, size_t len)
1611
{
1612
    int ret;
1613
    npy_intp A, B, C, top;
1614 1
    top = *stack_ptr;
1615

1616 1
    while (1 < top) {
1617 1
        B = stack[top - 2].l;
1618 1
        C = stack[top - 1].l;
1619

1620 1
        if ((2 < top && stack[top - 3].l <= B + C) ||
1621 1
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
1622 1
            A = stack[top - 3].l;
1623

1624 1
            if (A <= C) {
1625 0
                ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len);
1626

1627 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
1628

1629 0
                stack[top - 3].l += B;
1630 0
                stack[top - 2] = stack[top - 1];
1631 0
                --top;
1632
            } else {
1633 1
                ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
1634

1635 1
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
1636

1637 1
                stack[top - 2].l += C;
1638 1
                --top;
1639
            }
1640 1
        } else if (1 < top && B <= C) {
1641 1
            ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
1642

1643 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1644

1645 1
            stack[top - 2].l += C;
1646 1
            --top;
1647
        } else {
1648
            break;
1649
        }
1650
    }
1651

1652 1
    *stack_ptr = top;
1653 1
    return 0;
1654
}
1655

1656

1657

1658
static int
1659 1
aforce_collapse_@suff@(@type@ *arr, npy_intp *tosort, run *stack,
1660
                       npy_intp *stack_ptr, buffer_intp *buffer, size_t len)
1661
{
1662
    int ret;
1663 1
    npy_intp top = *stack_ptr;
1664

1665 1
    while (2 < top) {
1666 1
        if (stack[top - 3].l <= stack[top - 1].l) {
1667 0
            ret = amerge_at_@suff@(arr, tosort, stack, top - 3, buffer, len);
1668

1669 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1670

1671 0
            stack[top - 3].l += stack[top - 2].l;
1672 0
            stack[top - 2] = stack[top - 1];
1673 0
            --top;
1674
        } else {
1675 1
            ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
1676

1677 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
1678

1679 1
            stack[top - 2].l += stack[top - 1].l;
1680 1
            --top;
1681
        }
1682
    }
1683

1684 1
    if (1 < top) {
1685 1
        ret = amerge_at_@suff@(arr, tosort, stack, top - 2, buffer, len);
1686

1687 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
1688
    }
1689

1690
    return 0;
1691
}
1692

1693

1694
NPY_NO_EXPORT int
1695 1
atimsort_@suff@(void *start, npy_intp *tosort, npy_intp num, void *varr)
1696
{
1697 1
    PyArrayObject *arr = varr;
1698 1
    size_t elsize = PyArray_ITEMSIZE(arr);
1699 1
    size_t len = elsize / sizeof(@type@);
1700
    int ret;
1701
    npy_intp l, n, stack_ptr, minrun;
1702
    run stack[TIMSORT_STACK_SIZE];
1703
    buffer_intp buffer;
1704

1705
    /* Items that have zero size don't make sense to sort */
1706 1
    if (len == 0) {
1707
        return 0;
1708
    }
1709

1710 1
    buffer.pw = NULL;
1711 1
    buffer.size = 0;
1712 1
    stack_ptr = 0;
1713 1
    minrun = compute_min_run_short(num);
1714

1715 1
    for (l = 0; l < num;) {
1716 1
        n = acount_run_@suff@(start, tosort, l, num, minrun, len);
1717
        /* both s and l are scaled by len */
1718 1
        stack[stack_ptr].s = l;
1719 1
        stack[stack_ptr].l = n;
1720 1
        ++stack_ptr;
1721 1
        ret = atry_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len);
1722

1723 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
1724

1725 1
        l += n;
1726
    }
1727

1728 1
    ret = aforce_collapse_@suff@(start, tosort, stack, &stack_ptr, &buffer, len);
1729

1730 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
1731

1732 1
    ret = 0;
1733

1734 1
cleanup:
1735 1
    if (buffer.pw != NULL) {
1736 1
        free(buffer.pw);
1737
    }
1738
    return ret;
1739
}
1740

1741

1742
/**end repeat**/
1743

1744

1745

1746
/*
1747
 *****************************************************************************
1748
 **                             GENERIC SORT                                **
1749
 *****************************************************************************
1750
 */
1751

1752

1753
typedef struct {
1754
    char *pw;
1755
    npy_intp size;
1756
    size_t len;
1757
} buffer_char;
1758

1759

1760
static NPY_INLINE int
1761 1
resize_buffer_char(buffer_char *buffer, npy_intp new_size)
1762
{
1763 1
    if (new_size <= buffer->size) {
1764
        return 0;
1765
    }
1766

1767 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
1768 1
        buffer->pw = malloc(sizeof(char) * new_size * buffer->len);
1769
    } else {
1770 0
        buffer->pw = realloc(buffer->pw,  sizeof(char) * new_size * buffer->len);
1771
    }
1772

1773 1
    buffer->size = new_size;
1774

1775 1
    if (NPY_UNLIKELY(buffer->pw == NULL)) {
1776
        return -NPY_ENOMEM;
1777
    } else {
1778 1
        return 0;
1779
    }
1780
}
1781

1782

1783
static npy_intp
1784 1
npy_count_run(char *arr, npy_intp l, npy_intp num, npy_intp minrun,
1785
              char *vp, size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
1786
{
1787
    npy_intp sz;
1788
    char *pl, *pi, *pj, *pr;
1789

1790 1
    if (NPY_UNLIKELY(num - l == 1)) {
1791
        return 1;
1792
    }
1793

1794 1
    pl = arr + l * len;
1795

1796
    /* (not strictly) ascending sequence */
1797 1
    if (cmp(pl, pl + len, py_arr) <= 0) {
1798 1
        for (pi = pl + len; pi < arr + (num - 1) * len
1799 1
                && cmp(pi, pi + len, py_arr) <= 0; pi += len) {
1800
        }
1801
    } else {  /* (strictly) descending sequence */
1802 1
        for (pi = pl + len; pi < arr + (num - 1) * len
1803 1
                && cmp(pi + len, pi, py_arr) < 0; pi += len) {
1804
        }
1805

1806 1
        for (pj = pl, pr = pi; pj < pr; pj += len, pr -= len) {
1807 1
            GENERIC_SWAP(pj, pr, len);
1808
        }
1809
    }
1810

1811 1
    pi += len;
1812 1
    sz = (pi - pl) / len;
1813

1814 1
    if (sz < minrun) {
1815 0
        if (l + minrun < num) {
1816
            sz = minrun;
1817
        } else {
1818 0
            sz = num - l;
1819
        }
1820

1821 0
        pr = pl + sz * len;
1822

1823
        /* insertion sort */
1824 0
        for (; pi < pr; pi += len) {
1825 0
            GENERIC_COPY(vp, pi, len);
1826 0
            pj = pi;
1827

1828 0
            while (pl < pj && cmp(vp, pj - len, py_arr) < 0) {
1829 0
                GENERIC_COPY(pj, pj - len, len);
1830 0
                pj -= len;
1831
            }
1832

1833 0
            GENERIC_COPY(pj, vp, len);
1834
        }
1835
    }
1836

1837
    return sz;
1838
}
1839

1840

1841
static npy_intp
1842 0
npy_gallop_right(const char *arr, const npy_intp size, const char *key,
1843
                 size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
1844
{
1845
    npy_intp last_ofs, ofs, m;
1846

1847 0
    if (cmp(key, arr, py_arr) < 0) {
1848
        return 0;
1849
    }
1850

1851
    last_ofs = 0;
1852
    ofs = 1;
1853

1854
    for (;;) {
1855 0
        if (size <= ofs || ofs < 0) {
1856
            ofs = size; /* arr[ofs] is never accessed */
1857
            break;
1858
        }
1859

1860 0
        if (cmp(key, arr + ofs * len, py_arr) < 0) {
1861
            break;
1862
        } else {
1863 0
            last_ofs = ofs;
1864
            /* ofs = 1, 3, 7, 15... */
1865 0
            ofs = (ofs << 1) + 1;
1866
        }
1867
    }
1868

1869
    /* now that arr[last_ofs*len] <= key < arr[ofs*len] */
1870 0
    while (last_ofs + 1 < ofs) {
1871 0
        m = last_ofs + ((ofs - last_ofs) >> 1);
1872

1873 0
        if (cmp(key, arr + m * len, py_arr) < 0) {
1874
            ofs = m;
1875
        } else {
1876 0
            last_ofs = m;
1877
        }
1878
    }
1879

1880
    /* now that arr[(ofs-1)*len] <= key < arr[ofs*len] */
1881
    return ofs;
1882
}
1883

1884

1885

1886
static npy_intp
1887 0
npy_gallop_left(const char *arr, const npy_intp size, const char *key,
1888
                size_t len, PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
1889
{
1890
    npy_intp last_ofs, ofs, l, m, r;
1891

1892 0
    if (cmp(arr + (size - 1) * len, key, py_arr) < 0) {
1893
        return size;
1894
    }
1895

1896
    last_ofs = 0;
1897
    ofs = 1;
1898

1899
    for (;;) {
1900 0
        if (size <= ofs || ofs < 0) {
1901
            ofs = size;
1902
            break;
1903
        }
1904

1905 0
        if (cmp(arr + (size - ofs - 1) * len, key, py_arr) < 0) {
1906
            break;
1907
        } else {
1908 0
            last_ofs = ofs;
1909 0
            ofs = (ofs << 1) + 1;
1910
        }
1911
    }
1912

1913
    /* now that arr[(size-ofs-1)*len] < key <= arr[(size-last_ofs-1)*len] */
1914 0
    l = size - ofs - 1;
1915 0
    r = size - last_ofs - 1;
1916

1917 0
    while (l + 1 < r) {
1918 0
        m = l + ((r - l) >> 1);
1919

1920 0
        if (cmp(arr + m * len, key, py_arr) < 0) {
1921
            l = m;
1922
        } else {
1923 0
            r = m;
1924
        }
1925
    }
1926

1927
    /* now that arr[(r-1)*len] < key <= arr[r*len] */
1928
    return r;
1929
}
1930

1931

1932
static void
1933 0
npy_merge_left(char *p1, npy_intp l1, char *p2, npy_intp l2,
1934
               char *p3, size_t len,
1935
               PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
1936
{
1937 0
    char *end = p2 + l2 * len;
1938 0
    memcpy(p3, p1, sizeof(char) * l1 * len);
1939
    /* first element must be in p2 otherwise skipped in the caller */
1940 0
    GENERIC_COPY(p1, p2, len);
1941 0
    p1 += len;
1942 0
    p2 += len;
1943

1944 0
    while (p1 < p2 && p2 < end) {
1945 0
        if (cmp(p2, p3, py_arr) < 0) {
1946 0
            GENERIC_COPY(p1, p2, len);
1947 0
            p1 += len;
1948 0
            p2 += len;
1949
        } else {
1950 0
            GENERIC_COPY(p1, p3, len);
1951 0
            p1 += len;
1952 0
            p3 += len;
1953
        }
1954
    }
1955

1956 0
    if (p1 != p2) {
1957 0
        memcpy(p1, p3, sizeof(char) * (p2 - p1));
1958
    }
1959 0
}
1960

1961

1962
static void
1963 0
npy_merge_right(char *p1, npy_intp l1, char *p2, npy_intp l2,
1964
                char *p3, size_t len,
1965
                PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
1966
{
1967
    npy_intp ofs;
1968 0
    char *start = p1 - len;
1969 0
    memcpy(p3, p2, sizeof(char) * l2 * len);
1970 0
    p1 += (l1 - 1) * len;
1971 0
    p2 += (l2 - 1) * len;
1972 0
    p3 += (l2 - 1) * len;
1973
    /* first element must be in p1 otherwise skipped in the caller */
1974 0
    GENERIC_COPY(p2, p1, len);
1975 0
    p2 -= len;
1976 0
    p1 -= len;
1977

1978 0
    while (p1 < p2 && start < p1) {
1979 0
        if (cmp(p3, p1, py_arr) < 0) {
1980 0
            GENERIC_COPY(p2, p1, len);
1981 0
            p2 -= len;
1982 0
            p1 -= len;
1983
        } else {
1984 0
            GENERIC_COPY(p2, p3, len);
1985 0
            p2 -= len;
1986 0
            p3 -= len;
1987
        }
1988
    }
1989

1990 0
    if (p1 != p2) {
1991 0
        ofs = p2 - start;
1992 0
        memcpy(start + len, p3 - ofs + len, sizeof(char) * ofs);
1993
    }
1994 0
}
1995

1996

1997

1998
static int
1999 0
npy_merge_at(char *arr, const run *stack, const npy_intp at,
2000
             buffer_char *buffer, size_t len,
2001
             PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2002
{
2003
    int ret;
2004
    npy_intp s1, l1, s2, l2, k;
2005
    char *p1, *p2;
2006 0
    s1 = stack[at].s;
2007 0
    l1 = stack[at].l;
2008 0
    s2 = stack[at + 1].s;
2009 0
    l2 = stack[at + 1].l;
2010
    /* arr[s2] belongs to arr[s1+k] */
2011 0
    GENERIC_COPY(buffer->pw, arr + s2 * len, len);
2012 0
    k = npy_gallop_right(arr + s1 * len, l1, buffer->pw, len, cmp, py_arr);
2013

2014 0
    if (l1 == k) {
2015
        /* already sorted */
2016
        return 0;
2017
    }
2018

2019 0
    p1 = arr + (s1 + k) * len;
2020 0
    l1 -= k;
2021 0
    p2 = arr + s2 * len;
2022
    /* arr[s2-1] belongs to arr[s2+l2] */
2023 0
    GENERIC_COPY(buffer->pw, arr + (s2 - 1) * len, len);
2024 0
    l2 = npy_gallop_left(arr + s2 * len, l2, buffer->pw, len, cmp, py_arr);
2025

2026 0
    if (l2 < l1) {
2027 0
        ret = resize_buffer_char(buffer, l2);
2028

2029 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2030

2031 0
        npy_merge_right(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
2032
    } else {
2033 0
        ret = resize_buffer_char(buffer, l1);
2034

2035 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2036

2037 0
        npy_merge_left(p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
2038
    }
2039

2040
    return 0;
2041
}
2042

2043

2044
static int
2045 1
npy_try_collapse(char *arr, run *stack, npy_intp *stack_ptr,
2046
                 buffer_char *buffer, size_t len,
2047
                 PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2048
{
2049
    int ret;
2050
    npy_intp A, B, C, top;
2051 1
    top = *stack_ptr;
2052

2053 1
    while (1 < top) {
2054 0
        B = stack[top - 2].l;
2055 0
        C = stack[top - 1].l;
2056

2057 0
        if ((2 < top && stack[top - 3].l <= B + C) ||
2058 0
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
2059 0
            A = stack[top - 3].l;
2060

2061 0
            if (A <= C) {
2062 0
                ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr);
2063

2064 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
2065

2066 0
                stack[top - 3].l += B;
2067 0
                stack[top - 2] = stack[top - 1];
2068 0
                --top;
2069
            } else {
2070 0
                ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
2071

2072 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
2073

2074 0
                stack[top - 2].l += C;
2075 0
                --top;
2076
            }
2077 0
        } else if (1 < top && B <= C) {
2078 0
            ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
2079

2080 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2081

2082 0
            stack[top - 2].l += C;
2083 0
            --top;
2084
        } else {
2085
            break;
2086
        }
2087
    }
2088

2089 1
    *stack_ptr = top;
2090 1
    return 0;
2091
}
2092

2093

2094
static int
2095 1
npy_force_collapse(char *arr, run *stack, npy_intp *stack_ptr,
2096
                   buffer_char *buffer, size_t len,
2097
                   PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2098
{
2099
    int ret;
2100 1
    npy_intp top = *stack_ptr;
2101

2102 1
    while (2 < top) {
2103 0
        if (stack[top - 3].l <= stack[top - 1].l) {
2104 0
            ret = npy_merge_at(arr, stack, top - 3, buffer, len, cmp, py_arr);
2105

2106 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2107

2108 0
            stack[top - 3].l += stack[top - 2].l;
2109 0
            stack[top - 2] = stack[top - 1];
2110 0
            --top;
2111
        } else {
2112 0
            ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
2113

2114 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2115

2116 0
            stack[top - 2].l += stack[top - 1].l;
2117 0
            --top;
2118
        }
2119
    }
2120

2121 1
    if (1 < top) {
2122 0
        ret = npy_merge_at(arr, stack, top - 2, buffer, len, cmp, py_arr);
2123

2124 0
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2125
    }
2126

2127
    return 0;
2128
}
2129

2130

2131
NPY_NO_EXPORT int
2132 1
npy_timsort(void *start, npy_intp num, void *varr)
2133
{
2134 1
    PyArrayObject *arr = varr;
2135 1
    size_t len = PyArray_ITEMSIZE(arr);
2136 1
    PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
2137
    int ret;
2138
    npy_intp l, n, stack_ptr, minrun;
2139
    run stack[TIMSORT_STACK_SIZE];
2140
    buffer_char buffer;
2141

2142
    /* Items that have zero size don't make sense to sort */
2143 1
    if (len == 0) {
2144
        return 0;
2145
    }
2146

2147 1
    buffer.pw = NULL;
2148 1
    buffer.size = 0;
2149 1
    buffer.len = len;
2150 1
    stack_ptr = 0;
2151 1
    minrun = compute_min_run_short(num);
2152

2153
    /* used for insertion sort and gallop key */
2154 1
    ret = resize_buffer_char(&buffer, len);
2155

2156 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
2157

2158 1
    for (l = 0; l < num;) {
2159 1
        n = npy_count_run(start, l, num, minrun, buffer.pw, len, cmp, arr);
2160

2161
        /* both s and l are scaled by len */
2162 1
        stack[stack_ptr].s = l;
2163 1
        stack[stack_ptr].l = n;
2164 1
        ++stack_ptr;
2165 1
        ret = npy_try_collapse(start, stack, &stack_ptr, &buffer, len, cmp, arr);
2166

2167 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
2168

2169 1
        l += n;
2170
    }
2171

2172 1
    ret = npy_force_collapse(start, stack, &stack_ptr, &buffer, len, cmp, arr);
2173

2174 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
2175

2176 1
    ret = 0;
2177

2178 1
cleanup:
2179 1
    if (buffer.pw != NULL) {
2180 1
        free(buffer.pw);
2181
    }
2182
    return ret;
2183
}
2184

2185

2186
/* argsort */
2187

2188
static npy_intp
2189 1
npy_acount_run(char *arr, npy_intp *tosort, npy_intp l, npy_intp num,
2190
               npy_intp minrun, size_t len,
2191
               PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2192
{
2193
    npy_intp sz;
2194
    npy_intp vi;
2195
    npy_intp *pl, *pi, *pj, *pr;
2196

2197 1
    if (NPY_UNLIKELY(num - l == 1)) {
2198
        return 1;
2199
    }
2200

2201 1
    pl = tosort + l;
2202

2203
    /* (not strictly) ascending sequence */
2204 1
    if (cmp(arr + (*pl) * len, arr + (*(pl + 1)) * len, py_arr) <= 0) {
2205 1
        for (pi = pl + 1; pi < tosort + num - 1
2206 1
                && cmp(arr + (*pi) * len, arr + (*(pi + 1)) * len, py_arr) <= 0; ++pi) {
2207
        }
2208
    } else {  /* (strictly) descending sequence */
2209 1
        for (pi = pl + 1; pi < tosort + num - 1
2210 1
                && cmp(arr + (*(pi + 1)) * len, arr + (*pi) * len, py_arr) < 0; ++pi) {
2211
        }
2212

2213 1
        for (pj = pl, pr = pi; pj < pr; ++pj, --pr) {
2214 1
            INTP_SWAP(*pj, *pr);
2215
        }
2216
    }
2217

2218 1
    ++pi;
2219 1
    sz = pi - pl;
2220

2221 1
    if (sz < minrun) {
2222 1
        if (l + minrun < num) {
2223
            sz = minrun;
2224
        } else {
2225 1
            sz = num - l;
2226
        }
2227

2228 1
        pr = pl + sz;
2229

2230
        /* insertion sort */
2231 1
        for (; pi < pr; ++pi) {
2232 1
            vi = *pi;
2233 1
            pj = pi;
2234

2235 1
            while (pl < pj && cmp(arr + vi * len, arr + (*(pj - 1)) * len, py_arr) < 0) {
2236 1
                *pj = *(pj - 1);
2237 1
                --pj;
2238
            }
2239

2240 1
            *pj = vi;
2241
        }
2242
    }
2243

2244
    return sz;
2245
}
2246

2247

2248
static npy_intp
2249 1
npy_agallop_left(const char *arr, const npy_intp *tosort,
2250
                 const npy_intp size, const char *key, size_t len,
2251
                 PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2252
{
2253
    npy_intp last_ofs, ofs, l, m, r;
2254

2255 1
    if (cmp(arr + tosort[size - 1] * len, key, py_arr) < 0) {
2256
        return size;
2257
    }
2258

2259
    last_ofs = 0;
2260
    ofs = 1;
2261

2262
    for (;;) {
2263 1
        if (size <= ofs || ofs < 0) {
2264
            ofs = size;
2265
            break;
2266
        }
2267

2268 1
        if (cmp(arr + tosort[size - ofs - 1] * len, key, py_arr) < 0) {
2269
            break;
2270
        } else {
2271 1
            last_ofs = ofs;
2272 1
            ofs = (ofs << 1) + 1;
2273
        }
2274
    }
2275

2276
    /* now that arr[tosort[size-ofs-1]*len] < key <= arr[tosort[size-last_ofs-1]*len] */
2277 1
    l = size - ofs - 1;
2278 1
    r = size - last_ofs - 1;
2279

2280 1
    while (l + 1 < r) {
2281 1
        m = l + ((r - l) >> 1);
2282

2283 1
        if (cmp(arr + tosort[m] * len, key, py_arr) < 0) {
2284
            l = m;
2285
        } else {
2286 1
            r = m;
2287
        }
2288
    }
2289

2290
    /* now that arr[tosort[r-1]*len] < key <= arr[tosort[r]*len] */
2291
    return r;
2292
}
2293

2294

2295
static npy_intp
2296 1
npy_agallop_right(const char *arr, const npy_intp *tosort,
2297
                  const npy_intp size, const char *key, size_t len,
2298
                  PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2299
{
2300
    npy_intp last_ofs, ofs, m;
2301

2302 1
    if (cmp(key, arr + tosort[0] * len, py_arr) < 0) {
2303
        return 0;
2304
    }
2305

2306
    last_ofs = 0;
2307
    ofs = 1;
2308

2309
    for (;;) {
2310 1
        if (size <= ofs || ofs < 0) {
2311
            ofs = size; /* arr[ofs] is never accessed */
2312
            break;
2313
        }
2314

2315 1
        if (cmp(key, arr + tosort[ofs] * len, py_arr) < 0) {
2316
            break;
2317
        } else {
2318 1
            last_ofs = ofs;
2319
            /* ofs = 1, 3, 7, 15... */
2320 1
            ofs = (ofs << 1) + 1;
2321
        }
2322
    }
2323

2324
    /* now that arr[tosort[last_ofs]*len] <= key < arr[tosort[ofs]*len] */
2325 1
    while (last_ofs + 1 < ofs) {
2326 1
        m = last_ofs + ((ofs - last_ofs) >> 1);
2327

2328 1
        if (cmp(key, arr + tosort[m] * len, py_arr) < 0) {
2329
            ofs = m;
2330
        } else {
2331 1
            last_ofs = m;
2332
        }
2333
    }
2334

2335
    /* now that arr[tosort[ofs-1]*len] <= key < arr[tosort[ofs]*len] */
2336
    return ofs;
2337
}
2338

2339

2340
static void
2341 1
npy_amerge_left(char *arr, npy_intp *p1, npy_intp l1, npy_intp *p2,
2342
                npy_intp l2, npy_intp *p3, size_t len,
2343
                PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2344
{
2345 1
    npy_intp *end = p2 + l2;
2346 1
    memcpy(p3, p1, sizeof(npy_intp) * l1);
2347
    /* first element must be in p2 otherwise skipped in the caller */
2348 1
    *p1++ = *p2++;
2349

2350 1
    while (p1 < p2 && p2 < end) {
2351 1
        if (cmp(arr + (*p2) * len, arr + (*p3) * len, py_arr) < 0) {
2352 1
            *p1++ = *p2++;
2353
        } else {
2354 1
            *p1++ = *p3++;
2355
        }
2356
    }
2357

2358 1
    if (p1 != p2) {
2359 1
        memcpy(p1, p3, sizeof(npy_intp) * (p2 - p1));
2360
    }
2361 1
}
2362

2363

2364
static void
2365 1
npy_amerge_right(char *arr, npy_intp* p1, npy_intp l1, npy_intp *p2,
2366
                 npy_intp l2, npy_intp *p3, size_t len,
2367
                 PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2368
{
2369
    npy_intp ofs;
2370 1
    npy_intp *start = p1 - 1;
2371 1
    memcpy(p3, p2, sizeof(npy_intp) * l2);
2372 1
    p1 += l1 - 1;
2373 1
    p2 += l2 - 1;
2374 1
    p3 += l2 - 1;
2375
    /* first element must be in p1 otherwise skipped in the caller */
2376 1
    *p2-- = *p1--;
2377

2378 1
    while (p1 < p2 && start < p1) {
2379 1
        if (cmp(arr + (*p3) * len, arr + (*p1) * len, py_arr) < 0) {
2380 1
            *p2-- = *p1--;
2381
        } else {
2382 1
            *p2-- = *p3--;
2383
        }
2384
    }
2385

2386 1
    if (p1 != p2) {
2387 1
        ofs = p2 - start;
2388 1
        memcpy(start + 1, p3 - ofs + 1, sizeof(npy_intp) * ofs);
2389
    }
2390 1
}
2391

2392

2393

2394
static int
2395 1
npy_amerge_at(char *arr, npy_intp *tosort, const run *stack,
2396
              const npy_intp at, buffer_intp *buffer, size_t len,
2397
              PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2398
{
2399
    int ret;
2400
    npy_intp s1, l1, s2, l2, k;
2401
    npy_intp *p1, *p2;
2402 1
    s1 = stack[at].s;
2403 1
    l1 = stack[at].l;
2404 1
    s2 = stack[at + 1].s;
2405 1
    l2 = stack[at + 1].l;
2406
    /* tosort[s2] belongs to tosort[s1+k] */
2407 1
    k = npy_agallop_right(arr, tosort + s1, l1, arr + tosort[s2] * len, len, cmp,
2408
                          py_arr);
2409

2410 1
    if (l1 == k) {
2411
        /* already sorted */
2412
        return 0;
2413
    }
2414

2415 1
    p1 = tosort + s1 + k;
2416 1
    l1 -= k;
2417 1
    p2 = tosort + s2;
2418
    /* tosort[s2-1] belongs to tosort[s2+l2] */
2419 1
    l2 = npy_agallop_left(arr, tosort + s2, l2, arr + tosort[s2 - 1] * len,
2420
                          len, cmp, py_arr);
2421

2422 1
    if (l2 < l1) {
2423 1
        ret = resize_buffer_intp(buffer, l2);
2424

2425 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2426

2427 1
        npy_amerge_right(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
2428
    } else {
2429 1
        ret = resize_buffer_intp(buffer, l1);
2430

2431 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2432

2433 1
        npy_amerge_left(arr, p1, l1, p2, l2, buffer->pw, len, cmp, py_arr);
2434
    }
2435

2436
    return 0;
2437
}
2438

2439

2440
static int
2441 1
npy_atry_collapse(char *arr, npy_intp *tosort, run *stack,
2442
                  npy_intp *stack_ptr, buffer_intp *buffer, size_t len,
2443
                  PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2444
{
2445
    int ret;
2446
    npy_intp A, B, C, top;
2447 1
    top = *stack_ptr;
2448

2449 1
    while (1 < top) {
2450 1
        B = stack[top - 2].l;
2451 1
        C = stack[top - 1].l;
2452

2453 1
        if ((2 < top && stack[top - 3].l <= B + C) ||
2454 1
                (3 < top && stack[top - 4].l <= stack[top - 3].l + B)) {
2455 1
            A = stack[top - 3].l;
2456

2457 1
            if (A <= C) {
2458 0
                ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr);
2459

2460 0
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
2461

2462 0
                stack[top - 3].l += B;
2463 0
                stack[top - 2] = stack[top - 1];
2464 0
                --top;
2465
            } else {
2466 1
                ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
2467

2468 1
                if (NPY_UNLIKELY(ret < 0)) { return ret; }
2469

2470 1
                stack[top - 2].l += C;
2471 1
                --top;
2472
            }
2473 1
        } else if (1 < top && B <= C) {
2474 1
            ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
2475

2476 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2477

2478 1
            stack[top - 2].l += C;
2479 1
            --top;
2480
        } else {
2481
            break;
2482
        }
2483
    }
2484

2485 1
    *stack_ptr = top;
2486 1
    return 0;
2487
}
2488

2489

2490
static int
2491 1
npy_aforce_collapse(char *arr, npy_intp *tosort, run *stack,
2492
                    npy_intp *stack_ptr, buffer_intp *buffer, size_t len,
2493
                    PyArray_CompareFunc *cmp, PyArrayObject *py_arr)
2494
{
2495
    int ret;
2496 1
    npy_intp top = *stack_ptr;
2497

2498 1
    while (2 < top) {
2499 1
        if (stack[top - 3].l <= stack[top - 1].l) {
2500 0
            ret = npy_amerge_at(arr, tosort, stack, top - 3, buffer, len, cmp, py_arr);
2501

2502 0
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2503

2504 0
            stack[top - 3].l += stack[top - 2].l;
2505 0
            stack[top - 2] = stack[top - 1];
2506 0
            --top;
2507
        } else {
2508 1
            ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
2509

2510 1
            if (NPY_UNLIKELY(ret < 0)) { return ret; }
2511

2512 1
            stack[top - 2].l += stack[top - 1].l;
2513 1
            --top;
2514
        }
2515
    }
2516

2517 1
    if (1 < top) {
2518 1
        ret = npy_amerge_at(arr, tosort, stack, top - 2, buffer, len, cmp, py_arr);
2519

2520 1
        if (NPY_UNLIKELY(ret < 0)) { return ret; }
2521
    }
2522

2523
    return 0;
2524
}
2525

2526

2527
NPY_NO_EXPORT int
2528 1
npy_atimsort(void *start, npy_intp *tosort, npy_intp num, void *varr)
2529
{
2530 1
    PyArrayObject *arr = varr;
2531 1
    size_t len = PyArray_ITEMSIZE(arr);
2532 1
    PyArray_CompareFunc *cmp = PyArray_DESCR(arr)->f->compare;
2533
    int ret;
2534
    npy_intp l, n, stack_ptr, minrun;
2535
    run stack[TIMSORT_STACK_SIZE];
2536
    buffer_intp buffer;
2537

2538
    /* Items that have zero size don't make sense to sort */
2539 1
    if (len == 0) {
2540
        return 0;
2541
    }
2542

2543 1
    buffer.pw = NULL;
2544 1
    buffer.size = 0;
2545 1
    stack_ptr = 0;
2546 1
    minrun = compute_min_run_short(num);
2547

2548 1
    for (l = 0; l < num;) {
2549 1
        n = npy_acount_run(start, tosort, l, num, minrun, len, cmp, arr);
2550
        /* both s and l are scaled by len */
2551 1
        stack[stack_ptr].s = l;
2552 1
        stack[stack_ptr].l = n;
2553 1
        ++stack_ptr;
2554 1
        ret = npy_atry_collapse(start, tosort, stack, &stack_ptr, &buffer, len, cmp,
2555
                                arr);
2556

2557 1
        if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
2558

2559 1
        l += n;
2560
    }
2561

2562 1
    ret = npy_aforce_collapse(start, tosort, stack, &stack_ptr, &buffer, len,
2563
                              cmp, arr);
2564

2565 1
    if (NPY_UNLIKELY(ret < 0)) { goto cleanup; }
2566

2567 1
    ret = 0;
2568

2569 1
cleanup:
2570 1
    if (buffer.pw != NULL) {
2571 1
        free(buffer.pw);
2572
    }
2573
    return ret;
2574
}

Read our documentation on viewing source code .

Loading