numpy  2.0.0
src/private/lowlevel_strided_loops.h
Go to the documentation of this file.
00001 #ifndef __LOWLEVEL_STRIDED_LOOPS_H
00002 #define __LOWLEVEL_STRIDED_LOOPS_H
00003 #include "common.h"
00004 #include <npy_config.h>
00005 
00006 /*
00007  * NOTE: This API should remain private for the time being, to allow
00008  *       for further refinement.  I think the 'aligned' mechanism
00009  *       needs changing, for example.
00010  */
00011 
00012 /*
00013  * This function pointer is for unary operations that input an
00014  * arbitrarily strided one-dimensional array segment and output
00015  * an arbitrarily strided array segment of the same size.
00016  * It may be a fully general function, or a specialized function
00017  * when the strides or item size have particular known values.
00018  *
00019  * Examples of unary operations are a straight copy, a byte-swap,
00020  * and a casting operation,
00021  *
00022  * The 'transferdata' parameter is slightly special, following a
00023  * generic auxiliary data pattern defined in ndarraytypes.h
00024  * Use NPY_AUXDATA_CLONE and NPY_AUXDATA_FREE to deal with this data.
00025  *
00026  */
00027 typedef void (PyArray_StridedUnaryOp)(char *dst, npy_intp dst_stride,
00028                                     char *src, npy_intp src_stride,
00029                                     npy_intp N, npy_intp src_itemsize,
00030                                     NpyAuxData *transferdata);
00031 
00032 /*
00033  * This is for pointers to functions which behave exactly as
00034  * for PyArray_StridedUnaryOp, but with an additional mask controlling
00035  * which values are transformed.
00036  *
00037  * In particular, the 'i'-th element is operated on if and only if
00038  * mask[i*mask_stride] is true.
00039  */
00040 typedef void (PyArray_MaskedStridedUnaryOp)(char *dst, npy_intp dst_stride,
00041                                     char *src, npy_intp src_stride,
00042                                     npy_bool *mask, npy_intp mask_stride,
00043                                     npy_intp N, npy_intp src_itemsize,
00044                                     NpyAuxData *transferdata);
00045 
00046 /*
00047  * This function pointer is for binary operations that input two
00048  * arbitrarily strided one-dimensional array segments and output
00049  * an arbitrarily strided array segment of the same size.
00050  * It may be a fully general function, or a specialized function
00051  * when the strides or item size have particular known values.
00052  *
00053  * Examples of binary operations are the basic arithmetic operations,
00054  * logical operators AND, OR, and many others.
00055  *
00056  * The 'transferdata' parameter is slightly special, following a
00057  * generic auxiliary data pattern defined in ndarraytypes.h
00058  * Use NPY_AUXDATA_CLONE and NPY_AUXDATA_FREE to deal with this data.
00059  *
00060  */
00061 typedef void (PyArray_StridedBinaryOp)(char *dst, npy_intp dst_stride,
00062                                     char *src0, npy_intp src0_stride,
00063                                     char *src1, npy_intp src1_stride,
00064                                     npy_intp N, NpyAuxData *transferdata);
00065 
00066 /*
00067  * Gives back a function pointer to a specialized function for copying
00068  * strided memory.  Returns NULL if there is a problem with the inputs.
00069  *
00070  * aligned:
00071  *      Should be 1 if the src and dst pointers are always aligned,
00072  *      0 otherwise.
00073  * src_stride:
00074  *      Should be the src stride if it will always be the same,
00075  *      NPY_MAX_INTP otherwise.
00076  * dst_stride:
00077  *      Should be the dst stride if it will always be the same,
00078  *      NPY_MAX_INTP otherwise.
00079  * itemsize:
00080  *      Should be the item size if it will always be the same, 0 otherwise.
00081  *
00082  */
00083 NPY_NO_EXPORT PyArray_StridedUnaryOp *
00084 PyArray_GetStridedCopyFn(int aligned,
00085                         npy_intp src_stride, npy_intp dst_stride,
00086                         npy_intp itemsize);
00087 
00088 /*
00089  * Gives back a function pointer to a specialized function for copying
00090  * and swapping strided memory.  This assumes each element is a single
00091  * value to be swapped.
00092  *
00093  * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters
00094  * see above.
00095  *
00096  * Parameters are as for PyArray_GetStridedCopyFn.
00097  */
00098 NPY_NO_EXPORT PyArray_StridedUnaryOp *
00099 PyArray_GetStridedCopySwapFn(int aligned,
00100                             npy_intp src_stride, npy_intp dst_stride,
00101                             npy_intp itemsize);
00102 
00103 /*
00104  * Gives back a function pointer to a specialized function for copying
00105  * and swapping strided memory.  This assumes each element is a pair
00106  * of values, each of which needs to be swapped.
00107  *
00108  * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters
00109  * see above.
00110  *
00111  * Parameters are as for PyArray_GetStridedCopyFn.
00112  */
00113 NPY_NO_EXPORT PyArray_StridedUnaryOp *
00114 PyArray_GetStridedCopySwapPairFn(int aligned,
00115                             npy_intp src_stride, npy_intp dst_stride,
00116                             npy_intp itemsize);
00117 
00118 /*
00119  * Gives back a transfer function and transfer data pair which copies
00120  * the data from source to dest, truncating it if the data doesn't
00121  * fit, and padding with zero bytes if there's too much space.
00122  *
00123  * For information on the 'aligned', 'src_stride' and 'dst_stride' parameters
00124  * see above.
00125  *
00126  * Returns NPY_SUCCEED or NPY_FAIL
00127  */
00128 NPY_NO_EXPORT int
00129 PyArray_GetStridedZeroPadCopyFn(int aligned, int unicode_swap,
00130                             npy_intp src_stride, npy_intp dst_stride,
00131                             npy_intp src_itemsize, npy_intp dst_itemsize,
00132                             PyArray_StridedUnaryOp **outstransfer,
00133                             NpyAuxData **outtransferdata);
00134 
00135 /*
00136  * For casts between built-in numeric types,
00137  * this produces a function pointer for casting from src_type_num
00138  * to dst_type_num.  If a conversion is unsupported, returns NULL
00139  * without setting a Python exception.
00140  */
00141 NPY_NO_EXPORT PyArray_StridedUnaryOp *
00142 PyArray_GetStridedNumericCastFn(int aligned,
00143                             npy_intp src_stride, npy_intp dst_stride,
00144                             int src_type_num, int dst_type_num);
00145 
00146 /*
00147  * Gets an operation which copies elements of the given dtype,
00148  * swapping if the dtype isn't in NBO.
00149  *
00150  * Returns NPY_SUCCEED or NPY_FAIL
00151  */
00152 NPY_NO_EXPORT int
00153 PyArray_GetDTypeCopySwapFn(int aligned,
00154                             npy_intp src_stride, npy_intp dst_stride,
00155                             PyArray_Descr *dtype,
00156                             PyArray_StridedUnaryOp **outstransfer,
00157                             NpyAuxData **outtransferdata);
00158 
00159 /*
00160  * If it's possible, gives back a transfer function which casts and/or
00161  * byte swaps data with the dtype 'src_dtype' into data with the dtype
00162  * 'dst_dtype'.  If the outtransferdata is populated with a non-NULL value,
00163  * it must be deallocated with the NPY_AUXDATA_FREE
00164  * function when the transfer function is no longer required.
00165  *
00166  * aligned:
00167  *      Should be 1 if the src and dst pointers are always aligned,
00168  *      0 otherwise.
00169  * src_stride:
00170  *      Should be the src stride if it will always be the same,
00171  *      NPY_MAX_INTP otherwise.
00172  * dst_stride:
00173  *      Should be the dst stride if it will always be the same,
00174  *      NPY_MAX_INTP otherwise.
00175  * src_dtype:
00176  *      The data type of source data.  If this is NULL, a transfer
00177  *      function which sets the destination to zeros is produced.
00178  * dst_dtype:
00179  *      The data type of destination data.  If this is NULL and
00180  *      move_references is 1, a transfer function which decrements
00181  *      source data references is produced.
00182  * move_references:
00183  *      If 0, the destination data gets new reference ownership.
00184  *      If 1, the references from the source data are moved to
00185  *      the destination data.
00186  * out_stransfer:
00187  *      The resulting transfer function is placed here.
00188  * out_transferdata:
00189  *      The auxiliary data for the transfer function is placed here.
00190  *      When finished with the transfer function, the caller must call
00191  *      NPY_AUXDATA_FREE on this data.
00192  * out_needs_api:
00193  *      If this is non-NULL, and the transfer function produced needs
00194  *      to call into the (Python) API, this gets set to 1.  This
00195  *      remains untouched if no API access is required.
00196  *
00197  * WARNING: If you set move_references to 1, it is best that src_stride is
00198  *          never zero when calling the transfer function.  Otherwise, the
00199  *          first destination reference will get the value and all the rest
00200  *          will get NULL.
00201  *
00202  * Returns NPY_SUCCEED or NPY_FAIL.
00203  */
00204 NPY_NO_EXPORT int
00205 PyArray_GetDTypeTransferFunction(int aligned,
00206                             npy_intp src_stride, npy_intp dst_stride,
00207                             PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
00208                             int move_references,
00209                             PyArray_StridedUnaryOp **out_stransfer,
00210                             NpyAuxData **out_transferdata,
00211                             int *out_needs_api);
00212 
00213 /*
00214  * This is identical to PyArray_GetDTypeTransferFunction, but returns a
00215  * transfer function which also takes a mask as a parameter.  The mask is used
00216  * to determine which values to copy, and data is transfered exactly when
00217  * mask[i*mask_stride] is true.
00218  *
00219  * If move_references is true, values which are not copied to the
00220  * destination will still have their source reference decremented.
00221  *
00222  * If mask_dtype is NPY_BOOL or NPY_UINT8, each full element is either
00223  * transferred or not according to the mask as described above. If
00224  * dst_dtype and mask_dtype are both struct dtypes, their names must
00225  * match exactly, and the dtype of each leaf field in mask_dtype must
00226  * be either NPY_BOOL or NPY_UINT8.
00227  */
00228 NPY_NO_EXPORT int
00229 PyArray_GetMaskedDTypeTransferFunction(int aligned,
00230                             npy_intp src_stride,
00231                             npy_intp dst_stride,
00232                             npy_intp mask_stride,
00233                             PyArray_Descr *src_dtype,
00234                             PyArray_Descr *dst_dtype,
00235                             PyArray_Descr *mask_dtype,
00236                             int move_references,
00237                             PyArray_MaskedStridedUnaryOp **out_stransfer,
00238                             NpyAuxData **out_transferdata,
00239                             int *out_needs_api);
00240 
00241 /*
00242  * Casts the specified number of elements from 'src' with data type
00243  * 'src_dtype' to 'dst' with 'dst_dtype'. See
00244  * PyArray_GetDTypeTransferFunction for more details.
00245  *
00246  * Returns NPY_SUCCEED or NPY_FAIL.
00247  */
00248 NPY_NO_EXPORT int
00249 PyArray_CastRawArrays(npy_intp count,
00250                       char *src, char *dst,
00251                       npy_intp src_stride, npy_intp dst_stride,
00252                       PyArray_Descr *src_dtype, PyArray_Descr *dst_dtype,
00253                       int move_references);
00254 
00255 /*
00256  * These two functions copy or convert the data of an n-dimensional array
00257  * to/from a 1-dimensional strided buffer.  These functions will only call
00258  * 'stransfer' with the provided dst_stride/src_stride and
00259  * dst_strides[0]/src_strides[0], so the caller can use those values to
00260  * specialize the function.
00261  * Note that even if ndim == 0, everything needs to be set as if ndim == 1.
00262  *
00263  * The return value is the number of elements it couldn't copy.  A return value
00264  * of 0 means all elements were copied, a larger value means the end of
00265  * the n-dimensional array was reached before 'count' elements were copied.
00266  *
00267  * ndim:
00268  *      The number of dimensions of the n-dimensional array.
00269  * dst/src/mask:
00270  *      The destination, source or mask starting pointer.
00271  * dst_stride/src_stride/mask_stride:
00272  *      The stride of the 1-dimensional strided buffer
00273  * dst_strides/src_strides:
00274  *      The strides of the n-dimensional array.
00275  * dst_strides_inc/src_strides_inc:
00276  *      How much to add to the ..._strides pointer to get to the next stride.
00277  * coords:
00278  *      The starting coordinates in the n-dimensional array.
00279  * coords_inc:
00280  *      How much to add to the coords pointer to get to the next coordinate.
00281  * shape:
00282  *      The shape of the n-dimensional array.
00283  * shape_inc:
00284  *      How much to add to the shape pointer to get to the next shape entry.
00285  * count:
00286  *      How many elements to transfer
00287  * src_itemsize:
00288  *      How big each element is.  If transfering between elements of different
00289  *      sizes, for example a casting operation, the 'stransfer' function
00290  *      should be specialized for that, in which case 'stransfer' will use
00291  *      this parameter as the source item size.
00292  * stransfer:
00293  *      The strided transfer function.
00294  * transferdata:
00295  *      An auxiliary data pointer passed to the strided transfer function.
00296  *      This follows the conventions of NpyAuxData objects.
00297  */
00298 NPY_NO_EXPORT npy_intp
00299 PyArray_TransferNDimToStrided(npy_intp ndim,
00300                 char *dst, npy_intp dst_stride,
00301                 char *src, npy_intp *src_strides, npy_intp src_strides_inc,
00302                 npy_intp *coords, npy_intp coords_inc,
00303                 npy_intp *shape, npy_intp shape_inc,
00304                 npy_intp count, npy_intp src_itemsize,
00305                 PyArray_StridedUnaryOp *stransfer,
00306                 NpyAuxData *transferdata);
00307 
00308 NPY_NO_EXPORT npy_intp
00309 PyArray_TransferStridedToNDim(npy_intp ndim,
00310                 char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
00311                 char *src, npy_intp src_stride,
00312                 npy_intp *coords, npy_intp coords_inc,
00313                 npy_intp *shape, npy_intp shape_inc,
00314                 npy_intp count, npy_intp src_itemsize,
00315                 PyArray_StridedUnaryOp *stransfer,
00316                 NpyAuxData *transferdata);
00317 
00318 NPY_NO_EXPORT npy_intp
00319 PyArray_TransferMaskedStridedToNDim(npy_intp ndim,
00320                 char *dst, npy_intp *dst_strides, npy_intp dst_strides_inc,
00321                 char *src, npy_intp src_stride,
00322                 npy_bool *mask, npy_intp mask_stride,
00323                 npy_intp *coords, npy_intp coords_inc,
00324                 npy_intp *shape, npy_intp shape_inc,
00325                 npy_intp count, npy_intp src_itemsize,
00326                 PyArray_MaskedStridedUnaryOp *stransfer,
00327                 NpyAuxData *data);
00328 
00329 NPY_NO_EXPORT int
00330 mapiter_trivial_get(PyArrayObject *self, PyArrayObject *ind,
00331                        PyArrayObject *result);
00332 
00333 NPY_NO_EXPORT int
00334 mapiter_trivial_set(PyArrayObject *self, PyArrayObject *ind,
00335                        PyArrayObject *result);
00336 
00337 NPY_NO_EXPORT int
00338 mapiter_get(PyArrayMapIterObject *mit);
00339 
00340 NPY_NO_EXPORT int
00341 mapiter_set(PyArrayMapIterObject *mit);
00342 
00343 /*
00344  * Prepares shape and strides for a simple raw array iteration.
00345  * This sorts the strides into FORTRAN order, reverses any negative
00346  * strides, then coalesces axes where possible. The results are
00347  * filled in the output parameters.
00348  *
00349  * This is intended for simple, lightweight iteration over arrays
00350  * where no buffering of any kind is needed, and the array may
00351  * not be stored as a PyArrayObject.
00352  *
00353  * You can use this together with NPY_RAW_ITER_START and
00354  * NPY_RAW_ITER_ONE_NEXT to handle the looping boilerplate of everything
00355  * but the innermost loop (which is for idim == 0).
00356  *
00357  * Returns 0 on success, -1 on failure.
00358  */
00359 NPY_NO_EXPORT int
00360 PyArray_PrepareOneRawArrayIter(int ndim, npy_intp *shape,
00361                             char *data, npy_intp *strides,
00362                             int *out_ndim, npy_intp *out_shape,
00363                             char **out_data, npy_intp *out_strides);
00364 
00365 /*
00366  * The same as PyArray_PrepareOneRawArrayIter, but for two
00367  * operands instead of one. Any broadcasting of the two operands
00368  * should have already been done before calling this function,
00369  * as the ndim and shape is only specified once for both operands.
00370  *
00371  * Only the strides of the first operand are used to reorder
00372  * the dimensions, no attempt to consider all the strides together
00373  * is made, as is done in the NpyIter object.
00374  *
00375  * You can use this together with NPY_RAW_ITER_START and
00376  * NPY_RAW_ITER_TWO_NEXT to handle the looping boilerplate of everything
00377  * but the innermost loop (which is for idim == 0).
00378  *
00379  * Returns 0 on success, -1 on failure.
00380  */
00381 NPY_NO_EXPORT int
00382 PyArray_PrepareTwoRawArrayIter(int ndim, npy_intp *shape,
00383                             char *dataA, npy_intp *stridesA,
00384                             char *dataB, npy_intp *stridesB,
00385                             int *out_ndim, npy_intp *out_shape,
00386                             char **out_dataA, npy_intp *out_stridesA,
00387                             char **out_dataB, npy_intp *out_stridesB);
00388 
00389 /*
00390  * The same as PyArray_PrepareOneRawArrayIter, but for three
00391  * operands instead of one. Any broadcasting of the three operands
00392  * should have already been done before calling this function,
00393  * as the ndim and shape is only specified once for all operands.
00394  *
00395  * Only the strides of the first operand are used to reorder
00396  * the dimensions, no attempt to consider all the strides together
00397  * is made, as is done in the NpyIter object.
00398  *
00399  * You can use this together with NPY_RAW_ITER_START and
00400  * NPY_RAW_ITER_THREE_NEXT to handle the looping boilerplate of everything
00401  * but the innermost loop (which is for idim == 0).
00402  *
00403  * Returns 0 on success, -1 on failure.
00404  */
00405 NPY_NO_EXPORT int
00406 PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
00407                             char *dataA, npy_intp *stridesA,
00408                             char *dataB, npy_intp *stridesB,
00409                             char *dataC, npy_intp *stridesC,
00410                             int *out_ndim, npy_intp *out_shape,
00411                             char **out_dataA, npy_intp *out_stridesA,
00412                             char **out_dataB, npy_intp *out_stridesB,
00413                             char **out_dataC, npy_intp *out_stridesC);
00414 
00415 /*
00416  * Return number of elements that must be peeled from
00417  * the start of 'addr' with 'nvals' elements of size 'esize'
00418  * in order to reach 'alignment'.
00419  * alignment must be a power of two.
00420  * see npy_blocked_end for an example
00421  */
00422 static NPY_INLINE npy_uintp
00423 npy_aligned_block_offset(const void * addr, const npy_uintp esize,
00424                          const npy_uintp alignment, const npy_uintp nvals)
00425 {
00426     const npy_uintp offset = (npy_uintp)addr & (alignment - 1);
00427     npy_uintp peel = offset ? (alignment - offset) / esize : 0;
00428     peel = nvals < peel ? nvals : peel;
00429     return peel;
00430 }
00431 
00432 /*
00433  * Return upper loop bound for an array of 'nvals' elements
00434  * of size 'esize' peeled by 'offset' elements and blocking to
00435  * a vector size of 'vsz' in bytes
00436  *
00437  * example usage:
00438  * npy_intp i;
00439  * double v[101];
00440  * npy_intp esize = sizeof(v[0]);
00441  * npy_intp peel = npy_aligned_block_offset(v, esize, 16, n);
00442  * // peel to alignment 16
00443  * for (i = 0; i < peel; i++)
00444  *   <scalar-op>
00445  * // simd vectorized operation
00446  * for (; i < npy_blocked_end(peel, esize, 16, n); i += 16 / esize)
00447  *   <blocked-op>
00448  * // handle scalar rest
00449  * for(; i < n; i++)
00450  *   <scalar-op>
00451  */
00452 static NPY_INLINE npy_uintp
00453 npy_blocked_end(const npy_uintp offset, const npy_uintp esize,
00454                 const npy_uintp vsz, const npy_uintp nvals)
00455 {
00456     return nvals - offset - (nvals - offset) % (vsz / esize);
00457 }
00458 
00459 
00460 /* byte swapping functions */
00461 static NPY_INLINE npy_uint16
00462 npy_bswap2(npy_uint16 x)
00463 {
00464     return ((x & 0xffu) << 8) | (x >> 8);
00465 }
00466 
00467 /*
00468  * treat as int16 and byteswap unaligned memory,
00469  * some cpus don't support unaligned access
00470  */
00471 static NPY_INLINE void
00472 npy_bswap2_unaligned(char * x)
00473 {
00474     char a = x[0];
00475     x[0] = x[1];
00476     x[1] = a;
00477 }
00478 
00479 static NPY_INLINE npy_uint32
00480 npy_bswap4(npy_uint32 x)
00481 {
00482 #ifdef HAVE___BUILTIN_BSWAP32
00483     return __builtin_bswap32(x);
00484 #else
00485     return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) |
00486            ((x & 0xff0000u) >> 8) | (x >> 24);
00487 #endif
00488 }
00489 
00490 static NPY_INLINE void
00491 npy_bswap4_unaligned(char * x)
00492 {
00493     char a = x[0];
00494     x[0] = x[3];
00495     x[3] = a;
00496     a = x[1];
00497     x[1] = x[2];
00498     x[2] = a;
00499 }
00500 
00501 static NPY_INLINE npy_uint64
00502 npy_bswap8(npy_uint64 x)
00503 {
00504 #ifdef HAVE___BUILTIN_BSWAP64
00505     return __builtin_bswap64(x);
00506 #else
00507     return ((x & 0xffULL) << 56) |
00508            ((x & 0xff00ULL) << 40) |
00509            ((x & 0xff0000ULL) << 24) |
00510            ((x & 0xff000000ULL) << 8) |
00511            ((x & 0xff00000000ULL) >> 8) |
00512            ((x & 0xff0000000000ULL) >> 24) |
00513            ((x & 0xff000000000000ULL) >> 40) |
00514            ( x >> 56);
00515 #endif
00516 }
00517 
00518 static NPY_INLINE void
00519 npy_bswap8_unaligned(char * x)
00520 {
00521     char a = x[0]; x[0] = x[7]; x[7] = a;
00522     a = x[1]; x[1] = x[6]; x[6] = a;
00523     a = x[2]; x[2] = x[5]; x[5] = a;
00524     a = x[3]; x[3] = x[4]; x[4] = a;
00525 }
00526 
00527 
00528 /* Start raw iteration */
00529 #define NPY_RAW_ITER_START(idim, ndim, coord, shape) \
00530         memset((coord), 0, (ndim) * sizeof(coord[0])); \
00531         do {
00532 
00533 /* Increment to the next n-dimensional coordinate for one raw array */
00534 #define NPY_RAW_ITER_ONE_NEXT(idim, ndim, coord, shape, data, strides) \
00535             for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
00536                 if (++(coord)[idim] == (shape)[idim]) { \
00537                     (coord)[idim] = 0; \
00538                     (data) -= ((shape)[idim] - 1) * (strides)[idim]; \
00539                 } \
00540                 else { \
00541                     (data) += (strides)[idim]; \
00542                     break; \
00543                 } \
00544             } \
00545         } while ((idim) < (ndim))
00546 
00547 /* Increment to the next n-dimensional coordinate for two raw arrays */
00548 #define NPY_RAW_ITER_TWO_NEXT(idim, ndim, coord, shape, \
00549                               dataA, stridesA, dataB, stridesB) \
00550             for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
00551                 if (++(coord)[idim] == (shape)[idim]) { \
00552                     (coord)[idim] = 0; \
00553                     (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
00554                     (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
00555                 } \
00556                 else { \
00557                     (dataA) += (stridesA)[idim]; \
00558                     (dataB) += (stridesB)[idim]; \
00559                     break; \
00560                 } \
00561             } \
00562         } while ((idim) < (ndim))
00563 
00564 /* Increment to the next n-dimensional coordinate for three raw arrays */
00565 #define NPY_RAW_ITER_THREE_NEXT(idim, ndim, coord, shape, \
00566                               dataA, stridesA, \
00567                               dataB, stridesB, \
00568                               dataC, stridesC) \
00569             for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
00570                 if (++(coord)[idim] == (shape)[idim]) { \
00571                     (coord)[idim] = 0; \
00572                     (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
00573                     (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
00574                     (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
00575                 } \
00576                 else { \
00577                     (dataA) += (stridesA)[idim]; \
00578                     (dataB) += (stridesB)[idim]; \
00579                     (dataC) += (stridesC)[idim]; \
00580                     break; \
00581                 } \
00582             } \
00583         } while ((idim) < (ndim))
00584 
00585 /* Increment to the next n-dimensional coordinate for four raw arrays */
00586 #define NPY_RAW_ITER_FOUR_NEXT(idim, ndim, coord, shape, \
00587                               dataA, stridesA, \
00588                               dataB, stridesB, \
00589                               dataC, stridesC, \
00590                               dataD, stridesD) \
00591             for ((idim) = 1; (idim) < (ndim); ++(idim)) { \
00592                 if (++(coord)[idim] == (shape)[idim]) { \
00593                     (coord)[idim] = 0; \
00594                     (dataA) -= ((shape)[idim] - 1) * (stridesA)[idim]; \
00595                     (dataB) -= ((shape)[idim] - 1) * (stridesB)[idim]; \
00596                     (dataC) -= ((shape)[idim] - 1) * (stridesC)[idim]; \
00597                     (dataD) -= ((shape)[idim] - 1) * (stridesD)[idim]; \
00598                 } \
00599                 else { \
00600                     (dataA) += (stridesA)[idim]; \
00601                     (dataB) += (stridesB)[idim]; \
00602                     (dataC) += (stridesC)[idim]; \
00603                     (dataD) += (stridesD)[idim]; \
00604                     break; \
00605                 } \
00606             } \
00607         } while ((idim) < (ndim))
00608 
00609 
00610 /*
00611  *            TRIVIAL ITERATION
00612  *
00613  * In some cases when the iteration order isn't important, iteration over
00614  * arrays is trivial.  This is the case when:
00615  *   * The array has 0 or 1 dimensions.
00616  *   * The array is C or Fortran contiguous.
00617  * Use of an iterator can be skipped when this occurs.  These macros assist
00618  * in detecting and taking advantage of the situation.  Note that it may
00619  * be worthwhile to further check if the stride is a contiguous stride
00620  * and take advantage of that.
00621  *
00622  * Here is example code for a single array:
00623  *
00624  *      if (PyArray_TRIVIALLY_ITERABLE(self) {
00625  *          char *data;
00626  *          npy_intp count, stride;
00627  *
00628  *          PyArray_PREPARE_TRIVIAL_ITERATION(self, count, data, stride);
00629  *
00630  *          while (count--) {
00631  *              // Use the data pointer
00632  *
00633  *              data += stride;
00634  *          }
00635  *      }
00636  *      else {
00637  *          // Create iterator, etc...
00638  *      }
00639  *
00640  * Here is example code for a pair of arrays:
00641  *
00642  *      if (PyArray_TRIVIALLY_ITERABLE_PAIR(a1, a2) {
00643  *          char *data1, *data2;
00644  *          npy_intp count, stride1, stride2;
00645  *
00646  *          PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(a1, a2, count,
00647  *                                  data1, data2, stride1, stride2);
00648  *
00649  *          while (count--) {
00650  *              // Use the data1 and data2 pointers
00651  *
00652  *              data1 += stride1;
00653  *              data2 += stride2;
00654  *          }
00655  *      }
00656  *      else {
00657  *          // Create iterator, etc...
00658  *      }
00659  */
00660 
00661 /*
00662  * Note: Equivalently iterable macro requires one of arr1 or arr2 be
00663  *       trivially iterable to be valid.
00664  */
00665 #define PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) ( \
00666                         PyArray_NDIM(arr1) == PyArray_NDIM(arr2) && \
00667                         PyArray_CompareLists(PyArray_DIMS(arr1), \
00668                                              PyArray_DIMS(arr2), \
00669                                              PyArray_NDIM(arr1)) && \
00670                         (PyArray_FLAGS(arr1)&(NPY_ARRAY_C_CONTIGUOUS| \
00671                                       NPY_ARRAY_F_CONTIGUOUS)) & \
00672                                 (PyArray_FLAGS(arr2)&(NPY_ARRAY_C_CONTIGUOUS| \
00673                                               NPY_ARRAY_F_CONTIGUOUS)) \
00674                         )
00675 
00676 #define PyArray_TRIVIALLY_ITERABLE(arr) ( \
00677                     PyArray_NDIM(arr) <= 1 || \
00678                     PyArray_CHKFLAGS(arr, NPY_ARRAY_C_CONTIGUOUS) || \
00679                     PyArray_CHKFLAGS(arr, NPY_ARRAY_F_CONTIGUOUS) \
00680                     )
00681 #define PyArray_PREPARE_TRIVIAL_ITERATION(arr, count, data, stride) \
00682                     count = PyArray_SIZE(arr); \
00683                     data = PyArray_BYTES(arr); \
00684                     stride = ((PyArray_NDIM(arr) == 0) ? 0 : \
00685                                     ((PyArray_NDIM(arr) == 1) ? \
00686                                             PyArray_STRIDE(arr, 0) : \
00687                                             PyArray_ITEMSIZE(arr)));
00688 
00689 
00690 #define PyArray_TRIVIALLY_ITERABLE_PAIR(arr1, arr2) (\
00691                     PyArray_TRIVIALLY_ITERABLE(arr1) && \
00692                         (PyArray_NDIM(arr2) == 0 || \
00693                          PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) || \
00694                          (PyArray_NDIM(arr1) == 0 && \
00695                              PyArray_TRIVIALLY_ITERABLE(arr2) \
00696                          ) \
00697                         ) \
00698                     )
00699 #define PyArray_PREPARE_TRIVIAL_PAIR_ITERATION(arr1, arr2, \
00700                                         count, \
00701                                         data1, data2, \
00702                                         stride1, stride2) { \
00703                     npy_intp size1 = PyArray_SIZE(arr1); \
00704                     npy_intp size2 = PyArray_SIZE(arr2); \
00705                     count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
00706                     data1 = PyArray_BYTES(arr1); \
00707                     data2 = PyArray_BYTES(arr2); \
00708                     stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \
00709                                                 PyArray_STRIDE(arr1, 0) : \
00710                                                 PyArray_ITEMSIZE(arr1))); \
00711                     stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \
00712                                                 PyArray_STRIDE(arr2, 0) : \
00713                                                 PyArray_ITEMSIZE(arr2))); \
00714                 }
00715 
00716 #define PyArray_TRIVIALLY_ITERABLE_TRIPLE(arr1, arr2, arr3) (\
00717                 PyArray_TRIVIALLY_ITERABLE(arr1) && \
00718                     ((PyArray_NDIM(arr2) == 0 && \
00719                         (PyArray_NDIM(arr3) == 0 || \
00720                             PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \
00721                         ) \
00722                      ) || \
00723                      (PyArray_EQUIVALENTLY_ITERABLE(arr1, arr2) && \
00724                         (PyArray_NDIM(arr3) == 0 || \
00725                             PyArray_EQUIVALENTLY_ITERABLE(arr1, arr3) \
00726                         ) \
00727                      ) || \
00728                      (PyArray_NDIM(arr1) == 0 && \
00729                         PyArray_TRIVIALLY_ITERABLE(arr2) && \
00730                             (PyArray_NDIM(arr3) == 0 || \
00731                                 PyArray_EQUIVALENTLY_ITERABLE(arr2, arr3) \
00732                             ) \
00733                      ) \
00734                     ) \
00735                 )
00736 
00737 #define PyArray_PREPARE_TRIVIAL_TRIPLE_ITERATION(arr1, arr2, arr3, \
00738                                         count, \
00739                                         data1, data2, data3, \
00740                                         stride1, stride2, stride3) { \
00741                     npy_intp size1 = PyArray_SIZE(arr1); \
00742                     npy_intp size2 = PyArray_SIZE(arr2); \
00743                     npy_intp size3 = PyArray_SIZE(arr3); \
00744                     count = ((size1 > size2) || size1 == 0) ? size1 : size2; \
00745                     count = ((size3 > count) || size3 == 0) ? size3 : count; \
00746                     data1 = PyArray_BYTES(arr1); \
00747                     data2 = PyArray_BYTES(arr2); \
00748                     data3 = PyArray_BYTES(arr3); \
00749                     stride1 = (size1 == 1 ? 0 : ((PyArray_NDIM(arr1) == 1) ? \
00750                                                 PyArray_STRIDE(arr1, 0) : \
00751                                                 PyArray_ITEMSIZE(arr1))); \
00752                     stride2 = (size2 == 1 ? 0 : ((PyArray_NDIM(arr2) == 1) ? \
00753                                                 PyArray_STRIDE(arr2, 0) : \
00754                                                 PyArray_ITEMSIZE(arr2))); \
00755                     stride3 = (size3 == 1 ? 0 : ((PyArray_NDIM(arr3) == 1) ? \
00756                                                 PyArray_STRIDE(arr3, 0) : \
00757                                                 PyArray_ITEMSIZE(arr3))); \
00758                 }
00759 
00760 #endif