1 ```/* -*- c -*- */ ``` 2 3 ```/* ``` 4 ``` * ``` 5 ``` * The code is loosely based on the quickselect from ``` 6 ``` * Nicolas Devillard - 1998 public domain ``` 7 ``` * http://ndevilla.free.fr/median/median/ ``` 8 ``` * ``` 9 ``` * Quick select with median of 3 pivot is usually the fastest, ``` 10 ``` * but the worst case scenario can be quadratic complexity, ``` 11 ``` * e.g. np.roll(np.arange(x), x / 2) ``` 12 ``` * To avoid this if it recurses too much it falls back to the ``` 13 ``` * worst case linear median of median of group 5 pivot strategy. ``` 14 ``` */ ``` 15 16 17 ```#define NPY_NO_DEPRECATED_API NPY_API_VERSION ``` 18 19 ```#include "npy_sort.h" ``` 20 ```#include "npysort_common.h" ``` 21 ```#include "numpy/npy_math.h" ``` 22 ```#include "npy_partition.h" ``` 23 ```#include ``` 24 25 ```#define NOT_USED NPY_UNUSED(unused) ``` 26 27 28 ```/* ``` 29 ``` ***************************************************************************** ``` 30 ``` ** NUMERIC SORTS ** ``` 31 ``` ***************************************************************************** ``` 32 ``` */ ``` 33 34 35 ```static NPY_INLINE void store_pivot(npy_intp pivot, npy_intp kth, ``` 36 ``` npy_intp * pivots, npy_intp * npiv) ``` 37 ```{ ``` 38 1 ``` if (pivots == NULL) { ``` 39 ``` return; ``` 40 ``` } ``` 41 42 ``` /* ``` 43 ``` * If pivot is the requested kth store it, overwriting other pivots if ``` 44 ``` * required. This must be done so iterative partition can work without ``` 45 ``` * manually shifting lower data offset by kth each time ``` 46 ``` */ ``` 47 1 ``` if (pivot == kth && *npiv == NPY_MAX_PIVOT_STACK) { ``` 48 1 ``` pivots[*npiv - 1] = pivot; ``` 49 ``` } ``` 50 ``` /* ``` 51 ``` * we only need pivots larger than current kth, larger pivots are not ``` 52 ``` * useful as partitions on smaller kth would reorder the stored pivots ``` 53 ``` */ ``` 54 1 ``` else if (pivot >= kth && *npiv < NPY_MAX_PIVOT_STACK) { ``` 55 1 ``` pivots[*npiv] = pivot; ``` 56 1 ``` (*npiv) += 1; ``` 57 ``` } ``` 58 ```} ``` 59 60 ```/**begin repeat ``` 61 ``` * ``` 62 ``` * #TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG, ``` 63 ``` * LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, ``` 64 ``` * CFLOAT, CDOUBLE, CLONGDOUBLE# ``` 65 ``` * #suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong, ``` 66 ``` * longlong, ulonglong, half, float, double, longdouble, ``` 67 ``` * cfloat, cdouble, clongdouble# ``` 68 ``` * #type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, ``` 69 ``` * npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, ``` 70 ``` * npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, ``` 71 ``` * npy_cdouble, npy_clongdouble# ``` 72 ``` * #inexact = 0*11, 1*7# ``` 73 ``` */ ``` 74 75 ```static npy_intp ``` 76 ```amedian_of_median5_@suff@(@type@ *v, npy_intp* tosort, const npy_intp num, ``` 77 ``` npy_intp * pivots, ``` 78 ``` npy_intp * npiv); ``` 79 80 ```static npy_intp ``` 81 ```median_of_median5_@suff@(@type@ *v, const npy_intp num, ``` 82 ``` npy_intp * pivots, ``` 83 ``` npy_intp * npiv); ``` 84 85 ```/**begin repeat1 ``` 86 ``` * #name = , a# ``` 87 ``` * #idx = , tosort# ``` 88 ``` * #arg = 0, 1# ``` 89 ``` */ ``` 90 ```#if @arg@ ``` 91 ```/* helper macros to avoid duplication of direct/indirect selection */ ``` 92 ```#define IDX(x) tosort[x] ``` 93 ```#define SORTEE(x) tosort[x] ``` 94 ```#define SWAP INTP_SWAP ``` 95 ```#define MEDIAN3_SWAP(v, tosort, low, mid, high) \ ``` 96 ``` amedian3_swap_@suff@(v, tosort, low, mid, high) ``` 97 ```#define MEDIAN5(v, tosort, subleft) \ ``` 98 ``` amedian5_@suff@(v, tosort + subleft) ``` 99 ```#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \ ``` 100 ``` aunguarded_partition_@suff@(v, tosort, pivot, ll, hh) ``` 101 ```#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \ ``` 102 ``` aintroselect_@suff@(v, tosort, nmed, nmed / 2, pivots, npiv, NULL) ``` 103 ```#define DUMBSELECT(v, tosort, left, num, kth) \ ``` 104 ``` adumb_select_@suff@(v, tosort + left, num, kth) ``` 105 ```#else ``` 106 ```#define IDX(x) (x) ``` 107 ```#define SORTEE(x) v[x] ``` 108 ```#define SWAP @TYPE@_SWAP ``` 109 ```#define MEDIAN3_SWAP(v, tosort, low, mid, high) \ ``` 110 ``` median3_swap_@suff@(v, low, mid, high) ``` 111 ```#define MEDIAN5(v, tosort, subleft) \ ``` 112 ``` median5_@suff@(v + subleft) ``` 113 ```#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh) \ ``` 114 ``` unguarded_partition_@suff@(v, pivot, ll, hh) ``` 115 ```#define INTROSELECT(v, tosort, num, kth, pivots, npiv) \ ``` 116 ``` introselect_@suff@(v, nmed, nmed / 2, pivots, npiv, NULL) ``` 117 ```#define DUMBSELECT(v, tosort, left, num, kth) \ ``` 118 ``` dumb_select_@suff@(v + left, num, kth) ``` 119 ```#endif ``` 120 121 122 ```/* ``` 123 ``` * median of 3 pivot strategy ``` 124 ``` * gets min and median and moves median to low and min to low + 1 ``` 125 ``` * for efficient partitioning, see unguarded_partition ``` 126 ``` */ ``` 127 ```static NPY_INLINE void ``` 128 1 ```@name@median3_swap_@suff@(@type@ * v, ``` 129 ```#if @arg@ ``` 130 ``` npy_intp * tosort, ``` 131 ```#endif ``` 132 ``` npy_intp low, npy_intp mid, npy_intp high) ``` 133 ```{ ``` 134 1 ``` if (@TYPE@_LT(v[IDX(high)], v[IDX(mid)])) ``` 135 1 ``` SWAP(SORTEE(high), SORTEE(mid)); ``` 136 1 ``` if (@TYPE@_LT(v[IDX(high)], v[IDX(low)])) ``` 137 1 ``` SWAP(SORTEE(high), SORTEE(low)); ``` 138 ``` /* move pivot to low */ ``` 139 1 ``` if (@TYPE@_LT(v[IDX(low)], v[IDX(mid)])) ``` 140 1 ``` SWAP(SORTEE(low), SORTEE(mid)); ``` 141 ``` /* move 3-lowest element to low + 1 */ ``` 142 1 ``` SWAP(SORTEE(mid), SORTEE(low + 1)); ``` 143 1 ```} ``` 144 145 146 ```/* select index of median of five elements */ ``` 147 1 ```static npy_intp @name@median5_@suff@( ``` 148 ```#if @arg@ ``` 149 ``` const @type@ * v, npy_intp * tosort ``` 150 ```#else ``` 151 ``` @type@ * v ``` 152 ```#endif ``` 153 ``` ) ``` 154 ```{ ``` 155 ``` /* could be optimized as we only need the index (no swaps) */ ``` 156 1 ``` if (@TYPE@_LT(v[IDX(1)], v[IDX(0)])) { ``` 157 1 ``` SWAP(SORTEE(1), SORTEE(0)); ``` 158 ``` } ``` 159 1 ``` if (@TYPE@_LT(v[IDX(4)], v[IDX(3)])) { ``` 160 1 ``` SWAP(SORTEE(4), SORTEE(3)); ``` 161 ``` } ``` 162 1 ``` if (@TYPE@_LT(v[IDX(3)], v[IDX(0)])) { ``` 163 1 ``` SWAP(SORTEE(3), SORTEE(0)); ``` 164 ``` } ``` 165 1 ``` if (@TYPE@_LT(v[IDX(4)], v[IDX(1)])) { ``` 166 1 ``` SWAP(SORTEE(4), SORTEE(1)); ``` 167 ``` } ``` 168 1 ``` if (@TYPE@_LT(v[IDX(2)], v[IDX(1)])) { ``` 169 1 ``` SWAP(SORTEE(2), SORTEE(1)); ``` 170 ``` } ``` 171 1 ``` if (@TYPE@_LT(v[IDX(3)], v[IDX(2)])) { ``` 172 1 ``` if (@TYPE@_LT(v[IDX(3)], v[IDX(1)])) { ``` 173 ``` return 1; ``` 174 ``` } ``` 175 ``` else { ``` 176 1 ``` return 3; ``` 177 ``` } ``` 178 ``` } ``` 179 ``` else { ``` 180 ``` /* v[1] and v[2] swapped into order above */ ``` 181 ``` return 2; ``` 182 ``` } ``` 183 ```} ``` 184 185 186 ```/* ``` 187 ``` * partition and return the index were the pivot belongs ``` 188 ``` * the data must have following property to avoid bound checks: ``` 189 ``` * ll ... hh ``` 190 ``` * lower-than-pivot [x x x x] larger-than-pivot ``` 191 ``` */ ``` 192 ```static NPY_INLINE void ``` 193 1 ```@name@unguarded_partition_@suff@(@type@ * v, ``` 194 ```#if @arg@ ``` 195 ``` npy_intp * tosort, ``` 196 ```#endif ``` 197 ``` const @type@ pivot, ``` 198 ``` npy_intp * ll, npy_intp * hh) ``` 199 ```{ ``` 200 1 ``` for (;;) { ``` 201 1 ``` do (*ll)++; while (@TYPE@_LT(v[IDX(*ll)], pivot)); ``` 202 1 ``` do (*hh)--; while (@TYPE@_LT(pivot, v[IDX(*hh)])); ``` 203 204 1 ``` if (*hh < *ll) ``` 205 ``` break; ``` 206 207 1 ``` SWAP(SORTEE(*ll), SORTEE(*hh)); ``` 208 ``` } ``` 209 1 ```} ``` 210 211 212 ```/* ``` 213 ``` * select median of median of blocks of 5 ``` 214 ``` * if used as partition pivot it splits the range into at least 30%/70% ``` 215 ``` * allowing linear time worstcase quickselect ``` 216 ``` */ ``` 217 ```static npy_intp ``` 218 1 ```@name@median_of_median5_@suff@(@type@ *v, ``` 219 ```#if @arg@ ``` 220 ``` npy_intp* tosort, ``` 221 ```#endif ``` 222 ``` const npy_intp num, ``` 223 ``` npy_intp * pivots, ``` 224 ``` npy_intp * npiv) ``` 225 ```{ ``` 226 ``` npy_intp i, subleft; ``` 227 1 ``` npy_intp right = num - 1; ``` 228 1 ``` npy_intp nmed = (right + 1) / 5; ``` 229 1 ``` for (i = 0, subleft = 0; i < nmed; i++, subleft += 5) { ``` 230 1 ``` npy_intp m = MEDIAN5(v, tosort, subleft); ``` 231 1 ``` SWAP(SORTEE(subleft + m), SORTEE(i)); ``` 232 ``` } ``` 233 234 1 ``` if (nmed > 2) ``` 235 1 ``` INTROSELECT(v, tosort, nmed, nmed / 2, pivots, npiv); ``` 236 1 ``` return nmed / 2; ``` 237 ```} ``` 238 239 240 ```/* ``` 241 ``` * N^2 selection, fast only for very small kth ``` 242 ``` * useful for close multiple partitions ``` 243 ``` * (e.g. even element median, interpolating percentile) ``` 244 ``` */ ``` 245 ```static int ``` 246 1 ```@name@dumb_select_@suff@(@type@ *v, ``` 247 ```#if @arg@ ``` 248 ``` npy_intp * tosort, ``` 249 ```#endif ``` 250 ``` npy_intp num, npy_intp kth) ``` 251 ```{ ``` 252 ``` npy_intp i; ``` 253 1 ``` for (i = 0; i <= kth; i++) { ``` 254 1 ``` npy_intp minidx = i; ``` 255 1 ``` @type@ minval = v[IDX(i)]; ``` 256 ``` npy_intp k; ``` 257 1 ``` for (k = i + 1; k < num; k++) { ``` 258 1 ``` if (@TYPE@_LT(v[IDX(k)], minval)) { ``` 259 1 ``` minidx = k; ``` 260 1 ``` minval = v[IDX(k)]; ``` 261 ``` } ``` 262 ``` } ``` 263 1 ``` SWAP(SORTEE(i), SORTEE(minidx)); ``` 264 ``` } ``` 265 266 1 ``` return 0; ``` 267 ```} ``` 268 269 270 ```/* ``` 271 ``` * iterative median of 3 quickselect with cutoff to median-of-medians-of5 ``` 272 ``` * receives stack of already computed pivots in v to minimize the ``` 273 ``` * partition size were kth is searched in ``` 274 ``` * ``` 275 ``` * area that needs partitioning in [...] ``` 276 ``` * kth 0: [8 7 6 5 4 3 2 1 0] -> med3 partitions elements [4, 2, 0] ``` 277 ``` * 0 1 2 3 4 8 7 5 6 -> pop requested kth -> stack [4, 2] ``` 278 ``` * kth 3: 0 1 2 [3] 4 8 7 5 6 -> stack [4] ``` 279 ``` * kth 5: 0 1 2 3 4 [8 7 5 6] -> stack [6] ``` 280 ``` * kth 8: 0 1 2 3 4 5 6 [8 7] -> stack [] ``` 281 ``` * ``` 282 ``` */ ``` 283 ```NPY_NO_EXPORT int ``` 284 1 ```@name@introselect_@suff@(@type@ *v, ``` 285 ```#if @arg@ ``` 286 ``` npy_intp* tosort, ``` 287 ```#endif ``` 288 ``` npy_intp num, npy_intp kth, ``` 289 ``` npy_intp * pivots, ``` 290 ``` npy_intp * npiv, ``` 291 ``` void *NOT_USED) ``` 292 ```{ ``` 293 1 ``` npy_intp low = 0; ``` 294 1 ``` npy_intp high = num - 1; ``` 295 ``` int depth_limit; ``` 296 297 1 ``` if (npiv == NULL) ``` 298 1 ``` pivots = NULL; ``` 299 300 1 ``` while (pivots != NULL && *npiv > 0) { ``` 301 1 ``` if (pivots[*npiv - 1] > kth) { ``` 302 ``` /* pivot larger than kth set it as upper bound */ ``` 303 1 ``` high = pivots[*npiv - 1] - 1; ``` 304 1 ``` break; ``` 305 ``` } ``` 306 1 ``` else if (pivots[*npiv - 1] == kth) { ``` 307 ``` /* kth was already found in a previous iteration -> done */ ``` 308 ``` return 0; ``` 309 ``` } ``` 310 311 1 ``` low = pivots[*npiv - 1] + 1; ``` 312 313 ``` /* pop from stack */ ``` 314 1 ``` *npiv -= 1; ``` 315 ``` } ``` 316 317 ``` /* ``` 318 ``` * use a faster O(n*kth) algorithm for very small kth ``` 319 ``` * e.g. for interpolating percentile ``` 320 ``` */ ``` 321 1 ``` if (kth - low < 3) { ``` 322 1 ``` DUMBSELECT(v, tosort, low, high - low + 1, kth - low); ``` 323 ``` store_pivot(kth, kth, pivots, npiv); ``` 324 ``` return 0; ``` 325 ``` } ``` 326 1 ``` else if (@inexact@ && kth == num - 1) { ``` 327 ``` /* useful to check if NaN present via partition(d, (x, -1)) */ ``` 328 ``` npy_intp k; ``` 329 1 ``` npy_intp maxidx = low; ``` 330 1 ``` @type@ maxval = v[IDX(low)]; ``` 331 1 ``` for (k = low + 1; k < num; k++) { ``` 332 1 ``` if (!@TYPE@_LT(v[IDX(k)], maxval)) { ``` 333 1 ``` maxidx = k; ``` 334 1 ``` maxval = v[IDX(k)]; ``` 335 ``` } ``` 336 ``` } ``` 337 1 ``` SWAP(SORTEE(kth), SORTEE(maxidx)); ``` 338 1 ``` return 0; ``` 339 ``` } ``` 340 341 1 ``` depth_limit = npy_get_msb(num) * 2; ``` 342 343 ``` /* guarantee three elements */ ``` 344 1 ``` for (;low + 1 < high;) { ``` 345 1 ``` npy_intp ll = low + 1; ``` 346 1 ``` npy_intp hh = high; ``` 347 348 ``` /* ``` 349 ``` * if we aren't making sufficient progress with median of 3 ``` 350 ``` * fall back to median-of-median5 pivot for linear worst case ``` 351 ``` * med3 for small sizes is required to do unguarded partition ``` 352 ``` */ ``` 353 1 ``` if (depth_limit > 0 || hh - ll < 5) { ``` 354 1 ``` const npy_intp mid = low + (high - low) / 2; ``` 355 ``` /* median of 3 pivot strategy, ``` 356 ``` * swapping for efficient partition */ ``` 357 1 ``` MEDIAN3_SWAP(v, tosort, low, mid, high); ``` 358 ``` } ``` 359 ``` else { ``` 360 ``` npy_intp mid; ``` 361 ``` /* FIXME: always use pivots to optimize this iterative partition */ ``` 362 ```#if @arg@ ``` 363 0 ``` mid = ll + amedian_of_median5_@suff@(v, tosort + ll, hh - ll, NULL, NULL); ``` 364 ```#else ``` 365 1 ``` mid = ll + median_of_median5_@suff@(v + ll, hh - ll, NULL, NULL); ``` 366 ```#endif ``` 367 1 ``` SWAP(SORTEE(mid), SORTEE(low)); ``` 368 ``` /* adapt for the larger partition than med3 pivot */ ``` 369 1 ``` ll--; ``` 370 1 ``` hh++; ``` 371 ``` } ``` 372 373 1 ``` depth_limit--; ``` 374 375 ``` /* ``` 376 ``` * find place to put pivot (in low): ``` 377 ``` * previous swapping removes need for bound checks ``` 378 ``` * pivot 3-lowest [x x x] 3-highest ``` 379 ``` */ ``` 380 1 ``` UNGUARDED_PARTITION(v, tosort, v[IDX(low)], &ll, &hh); ``` 381 382 ``` /* move pivot into position */ ``` 383 1 ``` SWAP(SORTEE(low), SORTEE(hh)); ``` 384 385 ``` /* kth pivot stored later */ ``` 386 1 ``` if (hh != kth) { ``` 387 1 ``` store_pivot(hh, kth, pivots, npiv); ``` 388 ``` } ``` 389 390 1 ``` if (hh >= kth) ``` 391 1 ``` high = hh - 1; ``` 392 1 ``` if (hh <= kth) ``` 393 1 ``` low = ll; ``` 394 ``` } ``` 395 396 ``` /* two elements */ ``` 397 1 ``` if (high == low + 1) { ``` 398 1 ``` if (@TYPE@_LT(v[IDX(high)], v[IDX(low)])) { ``` 399 1 ``` SWAP(SORTEE(high), SORTEE(low)) ``` 400 ``` } ``` 401 ``` } ``` 402 ``` store_pivot(kth, kth, pivots, npiv); ``` 403 404 ``` return 0; ``` 405 ```} ``` 406 407 408 ```#undef IDX ``` 409 ```#undef SWAP ``` 410 ```#undef SORTEE ``` 411 ```#undef MEDIAN3_SWAP ``` 412 ```#undef MEDIAN5 ``` 413 ```#undef UNGUARDED_PARTITION ``` 414 ```#undef INTROSELECT ``` 415 ```#undef DUMBSELECT ``` 416 ```/**end repeat1**/ ``` 417 418 ```/**end repeat**/ ```

Read our documentation on viewing source code .