numpy  2.0.0
src/multiarray/nditer_impl.h
Go to the documentation of this file.
00001 /*
00002  * This is a PRIVATE INTERNAL NumPy header, intended to be used *ONLY*
00003  * by the iterator implementation code. All other internal NumPy code
00004  * should use the exposed iterator API.
00005  */
00006 #ifndef NPY_ITERATOR_IMPLEMENTATION_CODE
00007 #error "This header is intended for use ONLY by iterator implementation code."
00008 #endif
00009 
00010 #ifndef _NPY_PRIVATE__NDITER_IMPL_H_
00011 #define _NPY_PRIVATE__NDITER_IMPL_H_
00012 
00013 #define PY_SSIZE_T_CLEAN
00014 #include "Python.h"
00015 #include "structmember.h"
00016 
00017 #define NPY_NO_DEPRECATED_API NPY_API_VERSION
00018 #define _MULTIARRAYMODULE
00019 #include <numpy/arrayobject.h>
00020 #include <npy_pycompat.h>
00021 #include "convert_datatype.h"
00022 
00023 #include "lowlevel_strided_loops.h"
00024 
00025 /********** ITERATOR CONSTRUCTION TIMING **************/
00026 #define NPY_IT_CONSTRUCTION_TIMING 0
00027 
00028 #if NPY_IT_CONSTRUCTION_TIMING
00029 #define NPY_IT_TIME_POINT(var) { \
00030             unsigned int hi, lo; \
00031             __asm__ __volatile__ ( \
00032                 "rdtsc" \
00033                 : "=d" (hi), "=a" (lo)); \
00034             var = (((unsigned long long)hi) << 32) | lo; \
00035         }
00036 #define NPY_IT_PRINT_TIME_START(var) { \
00037             printf("%30s: start\n", #var); \
00038             c_temp = var; \
00039         }
00040 #define NPY_IT_PRINT_TIME_VAR(var) { \
00041             printf("%30s: %6.0f clocks\n", #var, \
00042                     ((double)(var-c_temp))); \
00043             c_temp = var; \
00044         }
00045 #else
00046 #define NPY_IT_TIME_POINT(var)
00047 #endif
00048 
00049 /******************************************************/
00050 
00051 /********** PRINTF DEBUG TRACING **************/
00052 #define NPY_IT_DBG_TRACING 0
00053 
00054 #if NPY_IT_DBG_TRACING
00055 #define NPY_IT_DBG_PRINT(s) printf("%s", s)
00056 #define NPY_IT_DBG_PRINT1(s, p1) printf(s, p1)
00057 #define NPY_IT_DBG_PRINT2(s, p1, p2) printf(s, p1, p2)
00058 #define NPY_IT_DBG_PRINT3(s, p1, p2, p3) printf(s, p1, p2, p3)
00059 #else
00060 #define NPY_IT_DBG_PRINT(s)
00061 #define NPY_IT_DBG_PRINT1(s, p1)
00062 #define NPY_IT_DBG_PRINT2(s, p1, p2)
00063 #define NPY_IT_DBG_PRINT3(s, p1, p2, p3)
00064 #endif
00065 /**********************************************/
00066 
00067 /* Rounds up a number of bytes to be divisible by sizeof intp */
00068 #if NPY_SIZEOF_INTP == 4
00069 #define NPY_INTP_ALIGNED(size) ((size + 0x3)&(-0x4))
00070 #else
00071 #define NPY_INTP_ALIGNED(size) ((size + 0x7)&(-0x8))
00072 #endif
00073 
00074 /* Internal iterator flags */
00075 
00076 /* The perm is the identity */
00077 #define NPY_ITFLAG_IDENTPERM    0x0001
00078 /* The perm has negative entries (indicating flipped axes) */
00079 #define NPY_ITFLAG_NEGPERM      0x0002
00080 /* The iterator is tracking an index */
00081 #define NPY_ITFLAG_HASINDEX     0x0004
00082 /* The iterator is tracking a multi-index */
00083 #define NPY_ITFLAG_HASMULTIINDEX    0x0008
00084 /* The iteration order was forced on construction */
00085 #define NPY_ITFLAG_FORCEDORDER  0x0010
00086 /* The inner loop is handled outside the iterator */
00087 #define NPY_ITFLAG_EXLOOP      0x0020
00088 /* The iterator is ranged */
00089 #define NPY_ITFLAG_RANGE        0x0040
00090 /* The iterator is buffered */
00091 #define NPY_ITFLAG_BUFFER       0x0080
00092 /* The iterator should grow the buffered inner loop when possible */
00093 #define NPY_ITFLAG_GROWINNER    0x0100
00094 /* There is just one iteration, can specialize iternext for that */
00095 #define NPY_ITFLAG_ONEITERATION 0x0200
00096 /* Delay buffer allocation until first Reset* call */
00097 #define NPY_ITFLAG_DELAYBUF     0x0400
00098 /* Iteration needs API access during iternext */
00099 #define NPY_ITFLAG_NEEDSAPI     0x0800
00100 /* Iteration includes one or more operands being reduced */
00101 #define NPY_ITFLAG_REDUCE       0x1000
00102 /* Reduce iteration doesn't need to recalculate reduce loops next time */
00103 #define NPY_ITFLAG_REUSE_REDUCE_LOOPS 0x2000
00104 
00105 /* Internal iterator per-operand iterator flags */
00106 
00107 /* The operand will be written to */
00108 #define NPY_OP_ITFLAG_WRITE        0x0001
00109 /* The operand will be read from */
00110 #define NPY_OP_ITFLAG_READ         0x0002
00111 /* The operand needs type conversion/byte swapping/alignment */
00112 #define NPY_OP_ITFLAG_CAST         0x0004
00113 /* The operand never needs buffering */
00114 #define NPY_OP_ITFLAG_BUFNEVER     0x0008
00115 /* The operand is aligned */
00116 #define NPY_OP_ITFLAG_ALIGNED      0x0010
00117 /* The operand is being reduced */
00118 #define NPY_OP_ITFLAG_REDUCE       0x0020
00119 /* The operand is for temporary use, does not have a backing array */
00120 #define NPY_OP_ITFLAG_VIRTUAL      0x0040
00121 /* The operand requires masking when copying buffer -> array */
00122 #define NPY_OP_ITFLAG_WRITEMASKED  0x0080
00123 /* The operand's data pointer is pointing into its buffer */
00124 #define NPY_OP_ITFLAG_USINGBUFFER  0x0100
00125 
00126 /*
00127  * The data layout of the iterator is fully specified by
00128  * a triple (itflags, ndim, nop).  These three variables
00129  * are expected to exist in all functions calling these macros,
00130  * either as true variables initialized to the correct values
00131  * from the iterator, or as constants in the case of specialized
00132  * functions such as the various iternext functions.
00133  */
00134 
00135 struct NpyIter_InternalOnly {
00136     /* Initial fixed position data */
00137     npy_uint32 itflags;
00138     npy_uint8 ndim, nop;
00139     npy_int8 maskop;
00140     npy_intp itersize, iterstart, iterend;
00141     /* iterindex is only used if RANGED or BUFFERED is set */
00142     npy_intp iterindex;
00143     /* The rest is variable */
00144     char iter_flexdata;
00145 };
00146 
00147 typedef struct NpyIter_AD NpyIter_AxisData;
00148 typedef struct NpyIter_BD NpyIter_BufferData;
00149 
00150 typedef npy_int16 npyiter_opitflags;
00151 
00152 /* Byte sizes of the iterator members */
00153 #define NIT_PERM_SIZEOF(itflags, ndim, nop) \
00154         NPY_INTP_ALIGNED(NPY_MAXDIMS)
00155 #define NIT_DTYPES_SIZEOF(itflags, ndim, nop) \
00156         ((NPY_SIZEOF_INTP)*(nop))
00157 #define NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop) \
00158         ((NPY_SIZEOF_INTP)*(nop+1))
00159 #define NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop) \
00160         ((NPY_SIZEOF_INTP)*(nop+1))
00161 #define NIT_OPERANDS_SIZEOF(itflags, ndim, nop) \
00162         ((NPY_SIZEOF_INTP)*(nop))
00163 #define NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop) \
00164         (NPY_INTP_ALIGNED(sizeof(npyiter_opitflags) * nop))
00165 #define NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop) \
00166         ((itflags&NPY_ITFLAG_BUFFER) ? ((NPY_SIZEOF_INTP)*(6 + 9*nop)) : 0)
00167 
00168 /* Byte offsets of the iterator members starting from iter->iter_flexdata */
00169 #define NIT_PERM_OFFSET() \
00170         (0)
00171 #define NIT_DTYPES_OFFSET(itflags, ndim, nop) \
00172         (NIT_PERM_OFFSET() + \
00173          NIT_PERM_SIZEOF(itflags, ndim, nop))
00174 #define NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) \
00175         (NIT_DTYPES_OFFSET(itflags, ndim, nop) + \
00176          NIT_DTYPES_SIZEOF(itflags, ndim, nop))
00177 #define NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) \
00178         (NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop) + \
00179          NIT_RESETDATAPTR_SIZEOF(itflags, ndim, nop))
00180 #define NIT_OPERANDS_OFFSET(itflags, ndim, nop) \
00181         (NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop) + \
00182          NIT_BASEOFFSETS_SIZEOF(itflags, ndim, nop))
00183 #define NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) \
00184         (NIT_OPERANDS_OFFSET(itflags, ndim, nop) + \
00185          NIT_OPERANDS_SIZEOF(itflags, ndim, nop))
00186 #define NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) \
00187         (NIT_OPITFLAGS_OFFSET(itflags, ndim, nop) + \
00188          NIT_OPITFLAGS_SIZEOF(itflags, ndim, nop))
00189 #define NIT_AXISDATA_OFFSET(itflags, ndim, nop) \
00190         (NIT_BUFFERDATA_OFFSET(itflags, ndim, nop) + \
00191          NIT_BUFFERDATA_SIZEOF(itflags, ndim, nop))
00192 
00193 /* Internal-only ITERATOR DATA MEMBER ACCESS */
00194 #define NIT_ITFLAGS(iter) \
00195         ((iter)->itflags)
00196 #define NIT_NDIM(iter) \
00197         ((iter)->ndim)
00198 #define NIT_NOP(iter) \
00199         ((iter)->nop)
00200 #define NIT_MASKOP(iter) \
00201         ((iter)->maskop)
00202 #define NIT_ITERSIZE(iter) \
00203         (iter->itersize)
00204 #define NIT_ITERSTART(iter) \
00205         (iter->iterstart)
00206 #define NIT_ITEREND(iter) \
00207         (iter->iterend)
00208 #define NIT_ITERINDEX(iter) \
00209         (iter->iterindex)
00210 #define NIT_PERM(iter)  ((npy_int8 *)( \
00211         &(iter)->iter_flexdata + NIT_PERM_OFFSET()))
00212 #define NIT_DTYPES(iter) ((PyArray_Descr **)( \
00213         &(iter)->iter_flexdata + NIT_DTYPES_OFFSET(itflags, ndim, nop)))
00214 #define NIT_RESETDATAPTR(iter) ((char **)( \
00215         &(iter)->iter_flexdata + NIT_RESETDATAPTR_OFFSET(itflags, ndim, nop)))
00216 #define NIT_BASEOFFSETS(iter) ((npy_intp *)( \
00217         &(iter)->iter_flexdata + NIT_BASEOFFSETS_OFFSET(itflags, ndim, nop)))
00218 #define NIT_OPERANDS(iter) ((PyArrayObject **)( \
00219         &(iter)->iter_flexdata + NIT_OPERANDS_OFFSET(itflags, ndim, nop)))
00220 #define NIT_OPITFLAGS(iter) ((npyiter_opitflags *)( \
00221         &(iter)->iter_flexdata + NIT_OPITFLAGS_OFFSET(itflags, ndim, nop)))
00222 #define NIT_BUFFERDATA(iter) ((NpyIter_BufferData *)( \
00223         &(iter)->iter_flexdata + NIT_BUFFERDATA_OFFSET(itflags, ndim, nop)))
00224 #define NIT_AXISDATA(iter) ((NpyIter_AxisData *)( \
00225         &(iter)->iter_flexdata + NIT_AXISDATA_OFFSET(itflags, ndim, nop)))
00226 
00227 /* Internal-only BUFFERDATA MEMBER ACCESS */
00228 struct NpyIter_BD {
00229     npy_intp buffersize, size, bufiterend,
00230              reduce_pos, reduce_outersize, reduce_outerdim;
00231     npy_intp bd_flexdata;
00232 };
00233 #define NBF_BUFFERSIZE(bufferdata) ((bufferdata)->buffersize)
00234 #define NBF_SIZE(bufferdata) ((bufferdata)->size)
00235 #define NBF_BUFITEREND(bufferdata) ((bufferdata)->bufiterend)
00236 #define NBF_REDUCE_POS(bufferdata) ((bufferdata)->reduce_pos)
00237 #define NBF_REDUCE_OUTERSIZE(bufferdata) ((bufferdata)->reduce_outersize)
00238 #define NBF_REDUCE_OUTERDIM(bufferdata) ((bufferdata)->reduce_outerdim)
00239 #define NBF_STRIDES(bufferdata) ( \
00240         &(bufferdata)->bd_flexdata + 0)
00241 #define NBF_PTRS(bufferdata) ((char **) \
00242         (&(bufferdata)->bd_flexdata + 1*(nop)))
00243 #define NBF_REDUCE_OUTERSTRIDES(bufferdata) ( \
00244         (&(bufferdata)->bd_flexdata + 2*(nop)))
00245 #define NBF_REDUCE_OUTERPTRS(bufferdata) ((char **) \
00246         (&(bufferdata)->bd_flexdata + 3*(nop)))
00247 #define NBF_READTRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
00248         (&(bufferdata)->bd_flexdata + 4*(nop)))
00249 #define NBF_READTRANSFERDATA(bufferdata) ((NpyAuxData **) \
00250         (&(bufferdata)->bd_flexdata + 5*(nop)))
00251 #define NBF_WRITETRANSFERFN(bufferdata) ((PyArray_StridedUnaryOp **) \
00252         (&(bufferdata)->bd_flexdata + 6*(nop)))
00253 #define NBF_WRITETRANSFERDATA(bufferdata) ((NpyAuxData **) \
00254         (&(bufferdata)->bd_flexdata + 7*(nop)))
00255 #define NBF_BUFFERS(bufferdata) ((char **) \
00256         (&(bufferdata)->bd_flexdata + 8*(nop)))
00257 
00258 /* Internal-only AXISDATA MEMBER ACCESS. */
00259 struct NpyIter_AD {
00260     npy_intp shape, index;
00261     npy_intp ad_flexdata;
00262 };
00263 #define NAD_SHAPE(axisdata) ((axisdata)->shape)
00264 #define NAD_INDEX(axisdata) ((axisdata)->index)
00265 #define NAD_STRIDES(axisdata) ( \
00266         &(axisdata)->ad_flexdata + 0)
00267 #define NAD_PTRS(axisdata) ((char **) \
00268         &(axisdata)->ad_flexdata + 1*(nop+1))
00269 
00270 #define NAD_NSTRIDES() \
00271         ((nop) + ((itflags&NPY_ITFLAG_HASINDEX) ? 1 : 0))
00272 
00273 /* Size of one AXISDATA struct within the iterator */
00274 #define NIT_AXISDATA_SIZEOF(itflags, ndim, nop) (( \
00275         /* intp shape */ \
00276         1 + \
00277         /* intp index */ \
00278         1 + \
00279         /* intp stride[nop+1] AND char* ptr[nop+1] */ \
00280         2*((nop)+1) \
00281         )*NPY_SIZEOF_INTP )
00282 
00283 /*
00284  * Macro to advance an AXISDATA pointer by a specified count.
00285  * Requires that sizeof_axisdata be previously initialized
00286  * to NIT_AXISDATA_SIZEOF(itflags, ndim, nop).
00287  */
00288 #define NIT_INDEX_AXISDATA(axisdata, index) ((NpyIter_AxisData *) \
00289         (((char *)(axisdata)) + (index)*sizeof_axisdata))
00290 #define NIT_ADVANCE_AXISDATA(axisdata, count) \
00291         axisdata = NIT_INDEX_AXISDATA(axisdata, count)
00292 
00293 /* Size of the whole iterator */
00294 #define NIT_SIZEOF_ITERATOR(itflags, ndim, nop) ( \
00295         sizeof(struct NpyIter_InternalOnly) + \
00296         NIT_AXISDATA_OFFSET(itflags, ndim, nop) + \
00297         NIT_AXISDATA_SIZEOF(itflags, ndim, nop)*(ndim ? ndim : 1))
00298 
00299 /* Internal helper functions shared between implementation files */
00300 NPY_NO_EXPORT void
00301 npyiter_coalesce_axes(NpyIter *iter);
00302 NPY_NO_EXPORT int
00303 npyiter_allocate_buffers(NpyIter *iter, char **errmsg);
00304 NPY_NO_EXPORT void
00305 npyiter_goto_iterindex(NpyIter *iter, npy_intp iterindex);
00306 NPY_NO_EXPORT void
00307 npyiter_copy_from_buffers(NpyIter *iter);
00308 NPY_NO_EXPORT void
00309 npyiter_copy_to_buffers(NpyIter *iter, char **prev_dataptrs);
00310 
00311 
00312 #endif