TensorFFT.h
00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra.
00003 //
00004 // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
00005 //
00006 // This Source Code Form is subject to the terms of the Mozilla
00007 // Public License v. 2.0. If a copy of the MPL was not distributed
00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
00009 
00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
00011 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
00012 
00013 // This code requires the ability to initialize arrays of constant
00014 // values directly inside a class.
00015 #if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900
00016 
00017 namespace Eigen {
00018 
00030 template <bool NeedUprade> struct MakeComplex {
00031   template <typename T>
00032   EIGEN_DEVICE_FUNC
00033   T operator() (const T& val) const { return val; }
00034 };
00035 
00036 template <> struct MakeComplex<true> {
00037   template <typename T>
00038   EIGEN_DEVICE_FUNC
00039   std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); }
00040 };
00041 
00042 template <> struct MakeComplex<false> {
00043   template <typename T>
00044   EIGEN_DEVICE_FUNC
00045   std::complex<T> operator() (const std::complex<T>& val) const { return val; }
00046 };
00047 
00048 template <int ResultType> struct PartOf {
00049   template <typename T> T operator() (const T& val) const { return val; }
00050 };
00051 
00052 template <> struct PartOf<RealPart> {
00053   template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); }
00054 };
00055 
00056 template <> struct PartOf<ImagPart> {
00057   template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); }
00058 };
00059 
00060 namespace internal {
00061 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
00062 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
00063   typedef traits<XprType> XprTraits;
00064   typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
00065   typedef typename std::complex<RealScalar> ComplexScalar;
00066   typedef typename XprTraits::Scalar InputScalar;
00067   typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
00068   typedef typename XprTraits::StorageKind StorageKind;
00069   typedef typename XprTraits::Index Index;
00070   typedef typename XprType::Nested Nested;
00071   typedef typename remove_reference<Nested>::type _Nested;
00072   static const int NumDimensions = XprTraits::NumDimensions;
00073   static const int Layout = XprTraits::Layout;
00074 };
00075 
00076 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
00077 struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> {
00078   typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type;
00079 };
00080 
00081 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
00082 struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
00083   typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type;
00084 };
00085 
00086 }  // end namespace internal
00087 
00088 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
00089 class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
00090  public:
00091   typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
00092   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
00093   typedef typename std::complex<RealScalar> ComplexScalar;
00094   typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
00095   typedef OutputScalar CoeffReturnType;
00096   typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
00097   typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
00098   typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index;
00099 
00100   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft)
00101       : m_xpr(expr), m_fft(fft) {}
00102 
00103   EIGEN_DEVICE_FUNC
00104   const FFT& fft() const { return m_fft; }
00105 
00106   EIGEN_DEVICE_FUNC
00107   const typename internal::remove_all<typename XprType::Nested>::type& expression() const {
00108     return m_xpr;
00109   }
00110 
00111  protected:
00112   typename XprType::Nested m_xpr;
00113   const FFT m_fft;
00114 };
00115 
00116 // Eval as rvalue
00117 template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
00118 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
00119   typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType;
00120   typedef typename XprType::Index Index;
00121   static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
00122   typedef DSizes<Index, NumDims> Dimensions;
00123   typedef typename XprType::Scalar Scalar;
00124   typedef typename Eigen::NumTraits<Scalar>::Real RealScalar;
00125   typedef typename std::complex<RealScalar> ComplexScalar;
00126   typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions;
00127   typedef internal::traits<XprType> XprTraits;
00128   typedef typename XprTraits::Scalar InputScalar;
00129   typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar;
00130   typedef OutputScalar CoeffReturnType;
00131   typedef typename PacketType<OutputScalar, Device>::type PacketReturnType;
00132   static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
00133 
00134   enum {
00135     IsAligned = false,
00136     PacketAccess = true,
00137     BlockAccess = false,
00138     Layout = TensorEvaluator<ArgType, Device>::Layout,
00139     CoordAccess = false,
00140     RawAccess = false
00141   };
00142 
00143   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
00144     const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
00145     for (int i = 0; i < NumDims; ++i) {
00146       eigen_assert(input_dims[i] > 0);
00147       m_dimensions[i] = input_dims[i];
00148     }
00149 
00150     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00151       m_strides[0] = 1;
00152       for (int i = 1; i < NumDims; ++i) {
00153         m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
00154       }
00155     } else {
00156       m_strides[NumDims - 1] = 1;
00157       for (int i = NumDims - 2; i >= 0; --i) {
00158         m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
00159       }
00160     }
00161     m_size = m_dimensions.TotalSize();
00162   }
00163 
00164   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
00165     return m_dimensions;
00166   }
00167 
00168   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) {
00169     m_impl.evalSubExprsIfNeeded(NULL);
00170     if (data) {
00171       evalToBuf(data);
00172       return false;
00173     } else {
00174       m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size);
00175       evalToBuf(m_data);
00176       return true;
00177     }
00178   }
00179 
00180   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
00181     if (m_data) {
00182       m_device.deallocate(m_data);
00183       m_data = NULL;
00184     }
00185     m_impl.cleanup();
00186   }
00187 
00188   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const {
00189     return m_data[index];
00190   }
00191 
00192   template <int LoadMode>
00193   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType
00194   packet(Index index) const {
00195     return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
00196   }
00197 
00198   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
00199   costPerCoeff(bool vectorized) const {
00200     return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
00201   }
00202 
00203   EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; }
00204 
00205 
00206  private:
00207   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) {
00208     const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
00209     ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
00210 
00211     for (Index i = 0; i < m_size; ++i) {
00212       buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i));
00213     }
00214 
00215     for (size_t i = 0; i < m_fft.size(); ++i) {
00216       Index dim = m_fft[i];
00217       eigen_assert(dim >= 0 && dim < NumDims);
00218       Index line_len = m_dimensions[dim];
00219       eigen_assert(line_len >= 1);
00220       ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
00221       const bool is_power_of_two = isPowerOfTwo(line_len);
00222       const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
00223       const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
00224 
00225       ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
00226       ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
00227       ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
00228       if (!is_power_of_two) {
00229         // Compute twiddle factors
00230         //   t_n = exp(sqrt(-1) * pi * n^2 / line_len)
00231         // for n = 0, 1,..., line_len-1.
00232         // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
00233         pos_j_base_powered[0] = ComplexScalar(1, 0);
00234         if (line_len > 1) {
00235           const RealScalar pi_over_len(EIGEN_PI / line_len);
00236           const ComplexScalar pos_j_base = ComplexScalar(
00237                std::cos(pi_over_len), std::sin(pi_over_len));
00238           pos_j_base_powered[1] = pos_j_base;
00239           if (line_len > 2) {
00240             const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base;
00241             for (int j = 2; j < line_len + 1; ++j) {
00242               pos_j_base_powered[j] = pos_j_base_powered[j - 1] *
00243                                       pos_j_base_powered[j - 1] /
00244                                       pos_j_base_powered[j - 2] * pos_j_base_sq;
00245             }
00246           }
00247         }
00248       }
00249 
00250       for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
00251         const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
00252 
00253         // get data into line_buf
00254         const Index stride = m_strides[dim];
00255         if (stride == 1) {
00256           memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
00257         } else {
00258           Index offset = base_offset;
00259           for (int j = 0; j < line_len; ++j, offset += stride) {
00260             line_buf[j] = buf[offset];
00261           }
00262         }
00263 
00264         // processs the line
00265         if (is_power_of_two) {
00266           processDataLineCooleyTukey(line_buf, line_len, log_len);
00267         }
00268         else {
00269           processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
00270         }
00271 
00272         // write back
00273         if (FFTDir == FFT_FORWARD && stride == 1) {
00274           memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
00275         } else {
00276           Index offset = base_offset;
00277           const ComplexScalar div_factor =  ComplexScalar(1.0 / line_len, 0);
00278           for (int j = 0; j < line_len; ++j, offset += stride) {
00279              buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor;
00280           }
00281         }
00282       }
00283       m_device.deallocate(line_buf);
00284       if (!is_power_of_two) {
00285         m_device.deallocate(a);
00286         m_device.deallocate(b);
00287         m_device.deallocate(pos_j_base_powered);
00288       }
00289     }
00290 
00291     if(!write_to_out) {
00292       for (Index i = 0; i < m_size; ++i) {
00293         data[i] = PartOf<FFTResultType>()(buf[i]);
00294       }
00295       m_device.deallocate(buf);
00296     }
00297   }
00298 
00299   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) {
00300     eigen_assert(x > 0);
00301     return !(x & (x - 1));
00302   }
00303 
00304   // The composite number for padding, used in Bluestein's FFT algorithm
00305   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) {
00306     Index i = 2;
00307     while (i < 2 * n - 1) i *= 2;
00308     return i;
00309   }
00310 
00311   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) {
00312     Index log2m = 0;
00313     while (m >>= 1) log2m++;
00314     return log2m;
00315   }
00316 
00317   // Call Cooley Tukey algorithm directly, data length must be power of 2
00318   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) {
00319     eigen_assert(isPowerOfTwo(line_len));
00320     scramble_FFT(line_buf, line_len);
00321     compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
00322   }
00323 
00324   // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
00325   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) {
00326     Index n = line_len;
00327     Index m = good_composite;
00328     ComplexScalar* data = line_buf;
00329 
00330     for (Index i = 0; i < n; ++i) {
00331       if(FFTDir == FFT_FORWARD) {
00332         a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
00333       }
00334       else {
00335         a[i] = data[i] * pos_j_base_powered[i];
00336       }
00337     }
00338     for (Index i = n; i < m; ++i) {
00339       a[i] = ComplexScalar(0, 0);
00340     }
00341 
00342     for (Index i = 0; i < n; ++i) {
00343       if(FFTDir == FFT_FORWARD) {
00344         b[i] = pos_j_base_powered[i];
00345       }
00346       else {
00347         b[i] = numext::conj(pos_j_base_powered[i]);
00348       }
00349     }
00350     for (Index i = n; i < m - n; ++i) {
00351       b[i] = ComplexScalar(0, 0);
00352     }
00353     for (Index i = m - n; i < m; ++i) {
00354       if(FFTDir == FFT_FORWARD) {
00355         b[i] = pos_j_base_powered[m-i];
00356       }
00357       else {
00358         b[i] = numext::conj(pos_j_base_powered[m-i]);
00359       }
00360     }
00361 
00362     scramble_FFT(a, m);
00363     compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
00364 
00365     scramble_FFT(b, m);
00366     compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
00367 
00368     for (Index i = 0; i < m; ++i) {
00369       a[i] *= b[i];
00370     }
00371 
00372     scramble_FFT(a, m);
00373     compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
00374 
00375     //Do the scaling after ifft
00376     for (Index i = 0; i < m; ++i) {
00377       a[i] /= m;
00378     }
00379 
00380     for (Index i = 0; i < n; ++i) {
00381       if(FFTDir == FFT_FORWARD) {
00382         data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
00383       }
00384       else {
00385         data[i] = a[i] * pos_j_base_powered[i];
00386       }
00387     }
00388   }
00389 
00390   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) {
00391     eigen_assert(isPowerOfTwo(n));
00392     Index j = 1;
00393     for (Index i = 1; i < n; ++i){
00394       if (j > i) {
00395         std::swap(data[j-1], data[i-1]);
00396       }
00397       Index m = n >> 1;
00398       while (m >= 2 && j > m) {
00399         j -= m;
00400         m >>= 1;
00401       }
00402       j += m;
00403     }
00404   }
00405 
00406   template <int Dir>
00407   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) {
00408     ComplexScalar tmp = data[1];
00409     data[1] = data[0] - data[1];
00410     data[0] += tmp;
00411   }
00412 
00413   template <int Dir>
00414   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) {
00415     ComplexScalar tmp[4];
00416     tmp[0] = data[0] + data[1];
00417     tmp[1] = data[0] - data[1];
00418     tmp[2] = data[2] + data[3];
00419     if (Dir == FFT_FORWARD) {
00420       tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
00421     } else {
00422       tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
00423     }
00424     data[0] = tmp[0] + tmp[2];
00425     data[1] = tmp[1] + tmp[3];
00426     data[2] = tmp[0] - tmp[2];
00427     data[3] = tmp[1] - tmp[3];
00428   }
00429 
00430   template <int Dir>
00431   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) {
00432     ComplexScalar tmp_1[8];
00433     ComplexScalar tmp_2[8];
00434 
00435     tmp_1[0] = data[0] + data[1];
00436     tmp_1[1] = data[0] - data[1];
00437     tmp_1[2] = data[2] + data[3];
00438     if (Dir == FFT_FORWARD) {
00439       tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
00440     } else {
00441       tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
00442     }
00443     tmp_1[4] = data[4] + data[5];
00444     tmp_1[5] = data[4] - data[5];
00445     tmp_1[6] = data[6] + data[7];
00446     if (Dir == FFT_FORWARD) {
00447       tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
00448     } else {
00449       tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
00450     }
00451     tmp_2[0] = tmp_1[0] + tmp_1[2];
00452     tmp_2[1] = tmp_1[1] + tmp_1[3];
00453     tmp_2[2] = tmp_1[0] - tmp_1[2];
00454     tmp_2[3] = tmp_1[1] - tmp_1[3];
00455     tmp_2[4] = tmp_1[4] + tmp_1[6];
00456 // SQRT2DIV2 = sqrt(2)/2
00457 #define SQRT2DIV2 0.7071067811865476
00458     if (Dir == FFT_FORWARD) {
00459       tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
00460       tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
00461       tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
00462     } else {
00463       tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
00464       tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
00465       tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
00466     }
00467     data[0] = tmp_2[0] + tmp_2[4];
00468     data[1] = tmp_2[1] + tmp_2[5];
00469     data[2] = tmp_2[2] + tmp_2[6];
00470     data[3] = tmp_2[3] + tmp_2[7];
00471     data[4] = tmp_2[0] - tmp_2[4];
00472     data[5] = tmp_2[1] - tmp_2[5];
00473     data[6] = tmp_2[2] - tmp_2[6];
00474     data[7] = tmp_2[3] - tmp_2[7];
00475   }
00476 
00477   template <int Dir>
00478   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(
00479       ComplexScalar* data, Index n, Index n_power_of_2) {
00480     // Original code:
00481     // RealScalar wtemp = std::sin(M_PI/n);
00482     // RealScalar wpi =  -std::sin(2 * M_PI/n);
00483     const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
00484     const RealScalar wpi = (Dir == FFT_FORWARD)
00485                                ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2]
00486                                : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
00487 
00488     const ComplexScalar wp(wtemp, wpi);
00489     const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
00490     const ComplexScalar wp_one_2 = wp_one * wp_one;
00491     const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
00492     const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
00493     const Index n2 = n / 2;
00494     ComplexScalar w(1.0, 0.0);
00495     for (Index i = 0; i < n2; i += 4) {
00496        ComplexScalar temp0(data[i + n2] * w);
00497        ComplexScalar temp1(data[i + 1 + n2] * w * wp_one);
00498        ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2);
00499        ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3);
00500        w = w * wp_one_4;
00501 
00502        data[i + n2] = data[i] - temp0;
00503        data[i] += temp0;
00504 
00505        data[i + 1 + n2] = data[i + 1] - temp1;
00506        data[i + 1] += temp1;
00507 
00508        data[i + 2 + n2] = data[i + 2] - temp2;
00509        data[i + 2] += temp2;
00510 
00511        data[i + 3 + n2] = data[i + 3] - temp3;
00512        data[i + 3] += temp3;
00513     }
00514   }
00515 
00516  template <int Dir>
00517   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(
00518       ComplexScalar* data, Index n, Index n_power_of_2) {
00519     eigen_assert(isPowerOfTwo(n));
00520     if (n > 8) {
00521       compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
00522       compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
00523       butterfly_1D_merge<Dir>(data, n, n_power_of_2);
00524     } else if (n == 8) {
00525       butterfly_8<Dir>(data);
00526     } else if (n == 4) {
00527       butterfly_4<Dir>(data);
00528     } else if (n == 2) {
00529       butterfly_2<Dir>(data);
00530     }
00531   }
00532 
00533   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
00534     Index result = 0;
00535 
00536     if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
00537       for (int i = NumDims - 1; i > omitted_dim; --i) {
00538         const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
00539         const Index idx = index / partial_m_stride;
00540         index -= idx * partial_m_stride;
00541         result += idx * m_strides[i];
00542       }
00543       result += index;
00544     }
00545     else {
00546       for (Index i = 0; i < omitted_dim; ++i) {
00547         const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
00548         const Index idx = index / partial_m_stride;
00549         index -= idx * partial_m_stride;
00550         result += idx * m_strides[i];
00551       }
00552       result += index;
00553     }
00554     // Value of index_coords[omitted_dim] is not determined to this step
00555     return result;
00556   }
00557 
00558   EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
00559     Index result = base + offset * m_strides[omitted_dim] ;
00560     return result;
00561   }
00562 
00563  protected:
00564   Index m_size;
00565   const FFT& m_fft;
00566   Dimensions m_dimensions;
00567   array<Index, NumDims> m_strides;
00568   TensorEvaluator<ArgType, Device> m_impl;
00569   CoeffReturnType* m_data;
00570   const Device& m_device;
00571 
00572   // This will support a maximum FFT size of 2^32 for each dimension
00573   // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
00574   const RealScalar m_sin_PI_div_n_LUT[32] = {
00575     RealScalar(0.0),
00576     RealScalar(-2),
00577     RealScalar(-0.999999999999999),
00578     RealScalar(-0.292893218813453),
00579     RealScalar(-0.0761204674887130),
00580     RealScalar(-0.0192147195967696),
00581     RealScalar(-0.00481527332780311),
00582     RealScalar(-0.00120454379482761),
00583     RealScalar(-3.01181303795779e-04),
00584     RealScalar(-7.52981608554592e-05),
00585     RealScalar(-1.88247173988574e-05),
00586     RealScalar(-4.70619042382852e-06),
00587     RealScalar(-1.17654829809007e-06),
00588     RealScalar(-2.94137117780840e-07),
00589     RealScalar(-7.35342821488550e-08),
00590     RealScalar(-1.83835707061916e-08),
00591     RealScalar(-4.59589268710903e-09),
00592     RealScalar(-1.14897317243732e-09),
00593     RealScalar(-2.87243293150586e-10),
00594     RealScalar( -7.18108232902250e-11),
00595     RealScalar(-1.79527058227174e-11),
00596     RealScalar(-4.48817645568941e-12),
00597     RealScalar(-1.12204411392298e-12),
00598     RealScalar(-2.80511028480785e-13),
00599     RealScalar(-7.01277571201985e-14),
00600     RealScalar(-1.75319392800498e-14),
00601     RealScalar(-4.38298482001247e-15),
00602     RealScalar(-1.09574620500312e-15),
00603     RealScalar(-2.73936551250781e-16),
00604     RealScalar(-6.84841378126949e-17),
00605     RealScalar(-1.71210344531737e-17),
00606     RealScalar(-4.28025861329343e-18)
00607   };
00608 
00609   // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
00610   const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
00611     RealScalar(0.0),
00612     RealScalar(0.0),
00613     RealScalar(-1.00000000000000e+00),
00614     RealScalar(-7.07106781186547e-01),
00615     RealScalar(-3.82683432365090e-01),
00616     RealScalar(-1.95090322016128e-01),
00617     RealScalar(-9.80171403295606e-02),
00618     RealScalar(-4.90676743274180e-02),
00619     RealScalar(-2.45412285229123e-02),
00620     RealScalar(-1.22715382857199e-02),
00621     RealScalar(-6.13588464915448e-03),
00622     RealScalar(-3.06795676296598e-03),
00623     RealScalar(-1.53398018628477e-03),
00624     RealScalar(-7.66990318742704e-04),
00625     RealScalar(-3.83495187571396e-04),
00626     RealScalar(-1.91747597310703e-04),
00627     RealScalar(-9.58737990959773e-05),
00628     RealScalar(-4.79368996030669e-05),
00629     RealScalar(-2.39684498084182e-05),
00630     RealScalar(-1.19842249050697e-05),
00631     RealScalar(-5.99211245264243e-06),
00632     RealScalar(-2.99605622633466e-06),
00633     RealScalar(-1.49802811316901e-06),
00634     RealScalar(-7.49014056584716e-07),
00635     RealScalar(-3.74507028292384e-07),
00636     RealScalar(-1.87253514146195e-07),
00637     RealScalar(-9.36267570730981e-08),
00638     RealScalar(-4.68133785365491e-08),
00639     RealScalar(-2.34066892682746e-08),
00640     RealScalar(-1.17033446341373e-08),
00641     RealScalar(-5.85167231706864e-09),
00642     RealScalar(-2.92583615853432e-09)
00643   };
00644 };
00645 
00646 }  // end namespace Eigen
00647 
00648 #endif  // EIGEN_HAS_CONSTEXPR
00649 
00650 
00651 #endif  // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
 All Classes Functions Variables Typedefs Enumerator