numpy
2.0.0
|
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <Python.h>
#include "numpy/ndarraytypes.h"
#include "mem_overlap.h"
#include "npy_extint128.h"
Defines | |
#define | NPY_NO_DEPRECATED_API NPY_API_VERSION |
#define | MAX(a, b) (((a) >= (b)) ? (a) : (b)) |
#define | MIN(a, b) (((a) <= (b)) ? (a) : (b)) |
Functions | |
static void | euclid (npy_int64 a1, npy_int64 a2, npy_int64 *a_gcd, npy_int64 *gamma, npy_int64 *epsilon) |
static int | diophantine_precompute (unsigned int n, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon) |
static mem_overlap_t | diophantine_dfs (unsigned int n, unsigned int v, diophantine_term_t *E, diophantine_term_t *Ep, npy_int64 *Gamma, npy_int64 *Epsilon, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x, Py_ssize_t *count) |
NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_diophantine (unsigned int n, diophantine_term_t *E, npy_int64 b, Py_ssize_t max_work, int require_ub_nontrivial, npy_int64 *x) |
static int | diophantine_sort_A (const void *xp, const void *yp) |
NPY_VISIBILITY_HIDDEN int | diophantine_simplify (unsigned int *n, diophantine_term_t *E, npy_int64 b) |
NPY_VISIBILITY_HIDDEN void | offset_bounds_from_strides (const int itemsize, const int nd, const npy_intp *dims, const npy_intp *strides, npy_intp *lower_offset, npy_intp *upper_offset) |
static void | get_array_memory_extents (PyArrayObject *arr, npy_uintp *out_start, npy_uintp *out_end, npy_uintp *num_bytes) |
static int | strides_to_terms (PyArrayObject *arr, diophantine_term_t *terms, unsigned int *nterms, int skip_empty) |
NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_may_share_memory (PyArrayObject *a, PyArrayObject *b, Py_ssize_t max_work) |
NPY_VISIBILITY_HIDDEN mem_overlap_t | solve_may_have_internal_overlap (PyArrayObject *a, Py_ssize_t max_work) |
#define MAX | ( | a, | |
b | |||
) | (((a) >= (b)) ? (a) : (b)) |
#define MIN | ( | a, | |
b | |||
) | (((a) <= (b)) ? (a) : (b)) |
Referenced by diophantine_sort_A().
#define NPY_NO_DEPRECATED_API NPY_API_VERSION |
static mem_overlap_t diophantine_dfs | ( | unsigned int | n, |
unsigned int | v, | ||
diophantine_term_t * | E, | ||
diophantine_term_t * | Ep, | ||
npy_int64 * | Gamma, | ||
npy_int64 * | Epsilon, | ||
npy_int64 | b, | ||
Py_ssize_t | max_work, | ||
int | require_ub_nontrivial, | ||
npy_int64 * | x, | ||
Py_ssize_t * | count | ||
) | [static] |
References MEM_OVERLAP_NO.
Referenced by solve_diophantine().
static int diophantine_precompute | ( | unsigned int | n, |
diophantine_term_t * | E, | ||
diophantine_term_t * | Ep, | ||
npy_int64 * | Gamma, | ||
npy_int64 * | Epsilon | ||
) | [static] |
References diophantine_term_t::a, safe_add(), safe_mul(), and diophantine_term_t::ub.
Referenced by solve_diophantine().
NPY_VISIBILITY_HIDDEN int diophantine_simplify | ( | unsigned int * | n, |
diophantine_term_t * | E, | ||
npy_int64 | b | ||
) |
static int diophantine_sort_A | ( | const void * | xp, |
const void * | yp | ||
) | [static] |
References MIN, and diophantine_term_t::ub.
static void euclid | ( | npy_int64 | a1, |
npy_int64 | a2, | ||
npy_int64 * | a_gcd, | ||
npy_int64 * | gamma, | ||
npy_int64 * | epsilon | ||
) | [static] |
static void get_array_memory_extents | ( | PyArrayObject * | arr, |
npy_uintp * | out_start, | ||
npy_uintp * | out_end, | ||
npy_uintp * | num_bytes | ||
) | [static] |
NPY_VISIBILITY_HIDDEN void offset_bounds_from_strides | ( | const int | itemsize, |
const int | nd, | ||
const npy_intp * | dims, | ||
const npy_intp * | strides, | ||
npy_intp * | lower_offset, | ||
npy_intp * | upper_offset | ||
) |
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_diophantine | ( | unsigned int | n, |
diophantine_term_t * | E, | ||
npy_int64 | b, | ||
Py_ssize_t | max_work, | ||
int | require_ub_nontrivial, | ||
npy_int64 * | x | ||
) |
A[0] x[0] + A[1] x[1] + ... + A[n-1] x[n-1] == b 0 <= x[i] <= U[i] A[i] > 0
References diophantine_dfs(), diophantine_precompute(), MEM_OVERLAP_ERROR, and MEM_OVERLAP_OVERFLOW.
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_have_internal_overlap | ( | PyArrayObject * | a, |
Py_ssize_t | max_work | ||
) |
solutions to <blockquote> sum(a*x) = b, 0 <= x[i] <= ub[i]</blockquote>
for any b. Equivalently, <blockquote> sum(a*x0) - sum(a*x1) = 0</blockquote>
Mapping the coefficients on the left by x0'[i] = x0[i] if a[i] > 0 else ub[i]-x0[i] and opposite for x1, we have <blockquote> sum(abs(a)*(x0' + x1')) = sum(abs(a)*ub)</blockquote>
Now, x0!=x1 if for some i we have x0'[i] + x1'[i] != ub[i]. We can now change variables to z[i] = x0'[i] + x1'[i] so the problem becomes <blockquote> sum(abs(a)*z) = sum(abs(a)*ub), 0 <= z[i] <= 2*ub[i], z != ub</blockquote>
This can be solved with solve_diophantine.
NPY_VISIBILITY_HIDDEN mem_overlap_t solve_may_share_memory | ( | PyArrayObject * | a, |
PyArrayObject * | b, | ||
Py_ssize_t | max_work | ||
) |
The bounds computed by offset_bounds_from_strides correspond to all-positive strides.
start1 + sum(abs(stride1)*x1) == start2 + sum(abs(stride2)*x2) == end1 - 1 - sum(abs(stride1)*x1') == end2 - 1 - sum(abs(stride2)*x2')
<=>
sum(abs(stride1)*x1) + sum(abs(stride2)*x2') == end2 - 1 - start1
OR
sum(abs(stride1)*x1') + sum(abs(stride2)*x2) == end1 - 1 - start2
We pick the problem with the smaller RHS (they are non-negative due to the extent check above.)
References MEM_OVERLAP_NO.
Referenced by raw_array_is_aligned().
static int strides_to_terms | ( | PyArrayObject * | arr, |
diophantine_term_t * | terms, | ||
unsigned int * | nterms, | ||
int | skip_empty | ||
) | [static] |