numpy  2.0.0
src/npysort/selection.c.src File Reference
#include "npy_sort.h"
#include "npysort_common.h"
#include "numpy/npy_math.h"
#include "npy_partition.h"
#include <stdlib.h>

Defines

#define NPY_NO_DEPRECATED_API   NPY_API_VERSION
#define NOT_USED   NPY_UNUSED(unused)
#define IDX(x)   (x)
#define SORTEE(x)   v[x]
#define SWAP   @TYPE@_SWAP
#define MEDIAN3_SWAP(v, tosort, low, mid, high)   median3_swap_@suff@(v, low, mid, high)
#define MEDIAN5(v, tosort, subleft)   median5_@suff@(v + subleft)
#define UNGUARDED_PARTITION(v, tosort, pivot, ll, hh)   unguarded_partition_@suff@(v, pivot, ll, hh)
#define INTROSELECT(v, tosort, num, kth, pivots, npiv)   introselect_@suff@(v, nmed, nmed / 2, pivots, npiv, NULL)
#define DUMBSELECT(v, tosort, left, num, kth)   dumb_select_@suff@(v + left, num, kth)

Functions

static NPY_INLINE void store_pivot (npy_intp pivot, npy_intp kth, npy_intp *pivots, npy_intp *npiv)
static npy_intp amedian_of_median5_ suff (@type @*v, npy_intp *tosort, const npy_intp num, npy_intp *pivots, npy_intp *npiv)
static npy_intp median_of_median5_ suff (@type @*v, const npy_intp num, npy_intp *pivots, npy_intp *npiv)
static NPY_INLINE void name
median3_swap_ 
suff (@type @*v, npy_intp low, npy_intp mid, npy_intp high)
static npy_intp name median5_ suff (@type @*v)
static NPY_INLINE void name
unguarded_partition_ 
suff (@type @*v, const @type @pivot, npy_intp *ll, npy_intp *hh)
static int name dumb_select_ suff (@type @*v, npy_intp num, npy_intp kth)
int name introselect_ suff (@type @*v, npy_intp num, npy_intp kth, npy_intp *pivots, npy_intp *npiv, void *NOT_USED)

Define Documentation

#define DUMBSELECT (   v,
  tosort,
  left,
  num,
  kth 
)    dumb_select_@suff@(v + left, num, kth)

Referenced by suff().

#define IDX (   x)    (x)
begin repeat1
name = , a# #idx = , tosort# #arg = 0, 1#

Referenced by suff().

#define INTROSELECT (   v,
  tosort,
  num,
  kth,
  pivots,
  npiv 
)    introselect_@suff@(v, nmed, nmed / 2, pivots, npiv, NULL)
#define MEDIAN3_SWAP (   v,
  tosort,
  low,
  mid,
  high 
)    median3_swap_@suff@(v, low, mid, high)
#define MEDIAN5 (   v,
  tosort,
  subleft 
)    median5_@suff@(v + subleft)
#define NOT_USED   NPY_UNUSED(unused)
#define NPY_NO_DEPRECATED_API   NPY_API_VERSION
The code is loosely based on the quickselect from Nicolas Devillard - 1998 public domain http://ndevilla.free.fr/median/median/
Quick select with median of 3 pivot is usually the fastest, but the worst case scenario can be quadratic complexity, e.g. np.roll(np.arange(x), x / 2) To avoid this if it recurses too much it falls back to the worst case linear median of median of group 5 pivot strategy.
#define SORTEE (   x)    v[x]

Referenced by suff().

#define SWAP   @TYPE@_SWAP

Referenced by suff().

#define UNGUARDED_PARTITION (   v,
  tosort,
  pivot,
  ll,
  hh 
)    unguarded_partition_@suff@(v, pivot, ll, hh)

Function Documentation

static NPY_INLINE void store_pivot ( npy_intp  pivot,
npy_intp  kth,
npy_intp pivots,
npy_intp npiv 
) [static]

* NUMERIC SORTS **

If pivot is the requested kth store it, overwritting other pivots if required. This must be done so iterative partition can work without manually shifting lower data offset by kth each time
we only need pivots larger than current kth, larger pivots are not useful as partitions on smaller kth would reorder the stored pivots

References NPY_MAX_PIVOT_STACK.

Referenced by suff().

static npy_intp amedian_of_median5_ suff ( @type @*  v,
npy_intp tosort,
const npy_intp  num,
npy_intp pivots,
npy_intp npiv 
) [static]
begin repeat <blockquote>

TYPE = BOOL, BYTE, UBYTE, SHORT, USHORT, INT, UINT, LONG, ULONG,
LONGLONG, ULONGLONG, HALF, FLOAT, DOUBLE, LONGDOUBLE, CFLOAT, CDOUBLE, CLONGDOUBLE#
suff = bool, byte, ubyte, short, ushort, int, uint, long, ulong,
longlong, ulonglong, half, float, double, longdouble, cfloat, cdouble, clongdouble#
#type = npy_bool, npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int,
npy_uint, npy_long, npy_ulong, npy_longlong, npy_ulonglong, npy_ushort, npy_float, npy_double, npy_longdouble, npy_cfloat, npy_cdouble, npy_clongdouble#
System Message: WARNING/2 (<string>, line 13) Definition list ends without a blank line; unexpected unindent.
#inexact = 0*11, 1*7# </blockquote>
static npy_intp name median_of_median5_ suff ( @type @*  v,
const npy_intp  num,
npy_intp pivots,
npy_intp npiv 
) [static]
select median of median of blocks of 5 if used as partition pivot it splits the range into at least 30%/70% allowing linear time worstcase quickselect

References IDX.

static NPY_INLINE void name median3_swap_ suff ( @type @*  v,
npy_intp  low,
npy_intp  mid,
npy_intp  high 
) [static]
median of 3 pivot strategy gets min and median and moves median to low and min to low + 1 for efficient partitioning, see unguarded_partition
move pivot to low
move 3-lowest element to low + 1

References IDX, SORTEE, SWAP, and TYPE.

static npy_intp name median5_ suff ( @type @*  v) [static]
select index of median of five elements
could be optimized as we only need the index (no swaps)
v[1] and v[2] swapped into order above
static NPY_INLINE void name unguarded_partition_ suff ( @type @*  v,
const @type @  pivot,
npy_intp ll,
npy_intp hh 
) [static]
partition and return the index were the pivot belongs the data must have following property to avoid bound checks:

System Message: ERROR/3 (<string>, line 3) Unexpected indentation.

<blockquote> ll ... hh</blockquote>

System Message: WARNING/2 (<string>, line 4) Block quote ends without a blank line; unexpected unindent.
lower-than-pivot [x x x x] larger-than-pivot
static int name dumb_select_ suff ( @type @*  v,
npy_intp  num,
npy_intp  kth 
) [static]
N^2 selection, fast only for very small kth useful for close multiple partitions (e.g. even element median, interpolating percentile)

References DUMBSELECT, IDX, and store_pivot().

int name introselect_ suff ( @type @*  v,
npy_intp  num,
npy_intp  kth,
npy_intp pivots,
npy_intp npiv,
void *  NOT_USED 
)
iterative median of 3 quickselect with cutoff to median-of-medians-of5 receives stack of already computed pivots in v to minimize the partition size were kth is searched in
area that needs partitioning in [...] kth 0: [8 7 6 5 4 3 2 1 0] -> med3 partitions elements [4, 2, 0]

System Message: ERROR/3 (<string>, line 7) Unexpected indentation.

<blockquote> 0 1 2 3 4 8 7 5 6 -> pop requested kth -> stack [4, 2]</blockquote>

System Message: WARNING/2 (<string>, line 8) Block quote ends without a blank line; unexpected unindent.
kth 3: 0 1 2 [3] 4 8 7 5 6 -> stack [4] kth 5: 0 1 2 3 4 [8 7 5 6] -> stack [6] kth 8: 0 1 2 3 4 5 6 [8 7] -> stack []
pivot larger than kth set it as upper bound
kth was already found in a previous iteration -> done
pop from stack
use a faster O(n*kth) algorithm for very small kth e.g. for interpolating percentile
useful to check if NaN present via partition(d, (x, -1))
guarantee three elements
if we aren't making sufficient progress with median of 3 fall back to median-of-median5 pivot for linear worst case med3 for small sizes is required to do unguarded partition
median of 3 pivot strategy, swapping for efficient partition
FIXME: always use pivots to optimize this iterative partition
adapt for the larger partition than med3 pivot
find place to put pivot (in low): previous swapping removes need for bound checks pivot 3-lowest [x x x] 3-highest
move pivot into position
kth pivot stored later
two elements

References IDX, and TYPE.