numpy  2.0.0
src/private/mem_overlap.c File Reference
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <Python.h>
#include "numpy/ndarraytypes.h"
#include "mem_overlap.h"
#include "npy_extint128.h"

Defines

#define NPY_NO_DEPRECATED_API   NPY_API_VERSION
#define MAX(a, b)   (((a) >= (b)) ? (a) : (b))
#define MIN(a, b)   (((a) <= (b)) ? (a) : (b))

Functions

static void euclid (npy_int64 a1, npy_int64 a2, npy_int64 *a_gcd, npy_int64 *gamma, npy_int64 *epsilon)
static int diophantine_precompute (unsigned int n, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon)
static mem_overlap_t diophantine_dfs (unsigned int n, unsigned int v, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x, Py_ssize_t *count)
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_diophantine (unsigned int n, diophantine_term_t *E, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x)
static int diophantine_sort_A (const void *xp, const void *yp)
NPY_VISIBILITY_HIDDEN int diophantine_simplify (unsigned int *n, diophantine_term_t *E, npy_int64 b)
NPY_VISIBILITY_HIDDEN void offset_bounds_from_strides (const int itemsize, const int nd, const npy_intp *dims, const npy_intp *strides, npy_intp *lower_offset, npy_intp *upper_offset)
static void get_array_memory_extents (PyArrayObject *arr, npy_uintp *out_start, npy_uintp *out_end, npy_uintp *num_bytes)
static int strides_to_terms (PyArrayObject *arr, diophantine_term_t *terms, unsigned int *nterms, int skip_empty)
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_share_memory (PyArrayObject *a, PyArrayObject *b, Py_ssize_t max_work)
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_have_internal_overlap (PyArrayObject *a, Py_ssize_t max_work)

Define Documentation

#define MAX (   a,
 
)    (((a) >= (b)) ? (a) : (b))
#define MIN (   a,
 
)    (((a) <= (b)) ? (a) : (b))

Referenced by diophantine_sort_A().

#define NPY_NO_DEPRECATED_API   NPY_API_VERSION
Copyright (c) 2015 Pauli Virtanen All rights reserved. Licensed under 3-clause BSD license, see LICENSE.txt.

Function Documentation

static mem_overlap_t diophantine_dfs ( unsigned int  n,
unsigned int  v,
diophantine_term_t E,
diophantine_term_t Ep,
npy_int64 *  Gamma,
npy_int64 *  Epsilon,
npy_int64  b,
Py_ssize_t  max_work,
int  require_ub_nontrivial,
npy_int64 *  x,
Py_ssize_t *  count 
) [static]
Depth-first bounded Euclid search
Fetch precomputed values for the reduced problem
Generate set of allowed solutions
The set to enumerate is: x1 = gamma*c + c1*t x2 = epsilon*c - c2*t t integer 0 <= x1 <= u1 0 <= x2 <= u2 and we have c, c1, c2 >= 0
The bounds t_l, t_u ensure the x computed below do not overflow
Base case
Ignore 'trivial' solution
Recurse to all candidates
b2 = b - a2*x[v];

References MEM_OVERLAP_NO.

Referenced by solve_diophantine().

static int diophantine_precompute ( unsigned int  n,
diophantine_term_t E,
diophantine_term_t Ep,
npy_int64 *  Gamma,
npy_int64 *  Epsilon 
) [static]
Precompute GCD and bounds transformations
Ep[0].ub = E[0].ub * c1 + E[1].ub * c2;
Ep[j-1].ub = c1 * Ep[j-2].ub + c2 * E[j].ub;

References diophantine_term_t::a, safe_add(), safe_mul(), and diophantine_term_t::ub.

Referenced by solve_diophantine().

NPY_VISIBILITY_HIDDEN int diophantine_simplify ( unsigned int *  n,
diophantine_term_t E,
npy_int64  b 
)
Simplify Diophantine decision problem.
Combine identical coefficients, remove unnecessary variables, and trim bounds.
The feasible/infeasible decision result is retained.
Returns: 0 (success), -1 (integer overflow).
Skip obviously infeasible cases
Sort vs. coefficients
Combine identical coefficients
Trim bounds and remove unnecessary variables
If the problem is feasible at all, x[i]=0
static int diophantine_sort_A ( const void *  xp,
const void *  yp 
) [static]

References MIN, and diophantine_term_t::ub.

static void euclid ( npy_int64  a1,
npy_int64  a2,
npy_int64 *  a_gcd,
npy_int64 *  gamma,
npy_int64 *  epsilon 
) [static]
Euclid's algorithm for GCD.
Solves for gamma*a1 + epsilon*a2 == gcd(a1, a2) providing |gamma| < |a2|/gcd, |epsilon| < |a1|/gcd.

Docutils System Messages

System Message: ERROR/3 (<string>, line 4); backlink Undefined substitution referenced: "gamma".
System Message: ERROR/3 (<string>, line 4); backlink Undefined substitution referenced: "a2".
System Message: ERROR/3 (<string>, line 4); backlink Undefined substitution referenced: "epsilon".
System Message: ERROR/3 (<string>, line 4); backlink Undefined substitution referenced: "a1".
The numbers remain bounded by |a1|, |a2| during
the iteration, so no integer overflows

Docutils System Messages

System Message: ERROR/3 (<string>, line 1); backlink Undefined substitution referenced: "a1".
System Message: ERROR/3 (<string>, line 1); backlink Undefined substitution referenced: "a2".
static void get_array_memory_extents ( PyArrayObject arr,
npy_uintp out_start,
npy_uintp out_end,
npy_uintp num_bytes 
) [static]
Gets a half-open range [start, end) which contains the array data
NPY_VISIBILITY_HIDDEN void offset_bounds_from_strides ( const int  itemsize,
const int  nd,
const npy_intp dims,
const npy_intp strides,
npy_intp lower_offset,
npy_intp upper_offset 
)
Gets a half-open range [start, end) of offsets from the data pointer
If the array size is zero, return an empty range
Expand either upwards or downwards depending on stride
Return a half-open range
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_diophantine ( unsigned int  n,
diophantine_term_t E,
npy_int64  b,
Py_ssize_t  max_work,
int  require_ub_nontrivial,
npy_int64 *  x 
)
Solve bounded Diophantine equation
The problem considered is:

A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b
0 <= x[i] <= U[i]
A[i] > 0
Solve via depth-first Euclid's algorithm, as explained in [1].
If require_ub_nontrivial!=0, look for solutions to the problem where b = A[0]*(U[0]/2) + ... + A[n]*(U[n-1]/2) but ignoring the trivial solution x[i] = U[i]/2. All U[i] must be divisible by 2. The value given for <cite>b</cite> is ignored in this case.
Only trivial solution for 0-variable problem
Only trivial solution for 1-variable problem

References diophantine_dfs(), diophantine_precompute(), MEM_OVERLAP_ERROR, and MEM_OVERLAP_OVERFLOW.

NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_have_internal_overlap ( PyArrayObject a,
Py_ssize_t  max_work 
)
Determine whether an array has internal overlap.
Returns: 0 (no overlap), 1 (overlap), or < 0 (failed to solve).
max_work and reasons for solver failures are as in solve_may_share_memory.
Quick case
The internal memory overlap problem is looking for two different

solutions to <blockquote> sum(a*x) = b, 0 <= x[i] <= ub[i]</blockquote>

for any b. Equivalently, <blockquote> sum(a*x0) - sum(a*x1) = 0</blockquote>

Mapping the coefficients on the left by x0'[i] = x0[i] if a[i] > 0 else ub[i]-x0[i] and opposite for x1, we have <blockquote> sum(abs(a)*(x0' + x1')) = sum(abs(a)*ub)</blockquote>

Now, x0!=x1 if for some i we have x0'[i] + x1'[i] != ub[i]. We can now change variables to z[i] = x0'[i] + x1'[i] so the problem becomes <blockquote> sum(abs(a)*z) = sum(abs(a)*ub), 0 <= z[i] <= 2*ub[i], z != ub</blockquote>

This can be solved with solve_diophantine.

Get rid of zero coefficients and empty terms
Double bounds to get the internal overlap problem
Sort vs. coefficients; cannot call diophantine_simplify because it may
change the decision problem inequality part
Solve
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_share_memory ( PyArrayObject a,
PyArrayObject b,
Py_ssize_t  max_work 
)
Determine whether two arrays share some memory.
Returns: 0 (no shared memory), 1 (shared memory), or < 0 (failed to solve).
Note that failures to solve can occur due to integer overflows, or effort required solving the problem exceeding max_work. The general problem is NP-hard and worst case runtime is exponential in the number of dimensions. max_work controls the amount of work done, either exact (max_work == -1), only a simple memory extent check (max_work == 0), or set an upper bound max_work > 0 for the number of solution candidates considered.
Memory extents don't overlap
Too much work required, give up
Convert problem to Diophantine equation form with positive coefficients.

The bounds computed by offset_bounds_from_strides correspond to all-positive strides.

start1 + sum(abs(stride1)*x1) == start2 + sum(abs(stride2)*x2) == end1 - 1 - sum(abs(stride1)*x1') == end2 - 1 - sum(abs(stride2)*x2')

<=>

sum(abs(stride1)*x1) + sum(abs(stride2)*x2') == end2 - 1 - start1

OR

sum(abs(stride1)*x1') + sum(abs(stride2)*x2) == end1 - 1 - start2

We pick the problem with the smaller RHS (they are non-negative due to the extent check above.)

Integer overflow
Simplify, if possible
Integer overflow
Solve

References MEM_OVERLAP_NO.

Referenced by raw_array_is_aligned().

static int strides_to_terms ( PyArrayObject arr,
diophantine_term_t terms,
unsigned int *  nterms,
int  skip_empty 
) [static]
integer overflow