![]() |
Eigen-unsupported
3.3.3
|
00001 // This file is part of Eigen, a lightweight C++ template library 00002 // for linear algebra. 00003 // 00004 // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com> 00005 // 00006 // This Source Code Form is subject to the terms of the Mozilla 00007 // Public License v. 2.0. If a copy of the MPL was not distributed 00008 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 00009 00010 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H 00011 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H 00012 00013 // This code requires the ability to initialize arrays of constant 00014 // values directly inside a class. 00015 #if __cplusplus >= 201103L || EIGEN_COMP_MSVC >= 1900 00016 00017 namespace Eigen { 00018 00030 template <bool NeedUprade> struct MakeComplex { 00031 template <typename T> 00032 EIGEN_DEVICE_FUNC 00033 T operator() (const T& val) const { return val; } 00034 }; 00035 00036 template <> struct MakeComplex<true> { 00037 template <typename T> 00038 EIGEN_DEVICE_FUNC 00039 std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); } 00040 }; 00041 00042 template <> struct MakeComplex<false> { 00043 template <typename T> 00044 EIGEN_DEVICE_FUNC 00045 std::complex<T> operator() (const std::complex<T>& val) const { return val; } 00046 }; 00047 00048 template <int ResultType> struct PartOf { 00049 template <typename T> T operator() (const T& val) const { return val; } 00050 }; 00051 00052 template <> struct PartOf<RealPart> { 00053 template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); } 00054 }; 00055 00056 template <> struct PartOf<ImagPart> { 00057 template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); } 00058 }; 00059 00060 namespace internal { 00061 template <typename FFT, typename XprType, int FFTResultType, int FFTDir> 00062 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> { 00063 typedef traits<XprType> XprTraits; 00064 typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar; 00065 typedef typename std::complex<RealScalar> ComplexScalar; 00066 typedef typename XprTraits::Scalar InputScalar; 00067 typedef typename conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 00068 typedef typename XprTraits::StorageKind StorageKind; 00069 typedef typename XprTraits::Index Index; 00070 typedef typename XprType::Nested Nested; 00071 typedef typename remove_reference<Nested>::type _Nested; 00072 static const int NumDimensions = XprTraits::NumDimensions; 00073 static const int Layout = XprTraits::Layout; 00074 }; 00075 00076 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> 00077 struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> { 00078 typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type; 00079 }; 00080 00081 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection> 00082 struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> { 00083 typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type; 00084 }; 00085 00086 } // end namespace internal 00087 00088 template <typename FFT, typename XprType, int FFTResultType, int FFTDir> 00089 class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> { 00090 public: 00091 typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar; 00092 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 00093 typedef typename std::complex<RealScalar> ComplexScalar; 00094 typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 00095 typedef OutputScalar CoeffReturnType; 00096 typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested; 00097 typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind; 00098 typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index; 00099 00100 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft) 00101 : m_xpr(expr), m_fft(fft) {} 00102 00103 EIGEN_DEVICE_FUNC 00104 const FFT& fft() const { return m_fft; } 00105 00106 EIGEN_DEVICE_FUNC 00107 const typename internal::remove_all<typename XprType::Nested>::type& expression() const { 00108 return m_xpr; 00109 } 00110 00111 protected: 00112 typename XprType::Nested m_xpr; 00113 const FFT m_fft; 00114 }; 00115 00116 // Eval as rvalue 00117 template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir> 00118 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> { 00119 typedef TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir> XprType; 00120 typedef typename XprType::Index Index; 00121 static const int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value; 00122 typedef DSizes<Index, NumDims> Dimensions; 00123 typedef typename XprType::Scalar Scalar; 00124 typedef typename Eigen::NumTraits<Scalar>::Real RealScalar; 00125 typedef typename std::complex<RealScalar> ComplexScalar; 00126 typedef typename TensorEvaluator<ArgType, Device>::Dimensions InputDimensions; 00127 typedef internal::traits<XprType> XprTraits; 00128 typedef typename XprTraits::Scalar InputScalar; 00129 typedef typename internal::conditional<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar>::type OutputScalar; 00130 typedef OutputScalar CoeffReturnType; 00131 typedef typename PacketType<OutputScalar, Device>::type PacketReturnType; 00132 static const int PacketSize = internal::unpacket_traits<PacketReturnType>::size; 00133 00134 enum { 00135 IsAligned = false, 00136 PacketAccess = true, 00137 BlockAccess = false, 00138 Layout = TensorEvaluator<ArgType, Device>::Layout, 00139 CoordAccess = false, 00140 RawAccess = false 00141 }; 00142 00143 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) { 00144 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions(); 00145 for (int i = 0; i < NumDims; ++i) { 00146 eigen_assert(input_dims[i] > 0); 00147 m_dimensions[i] = input_dims[i]; 00148 } 00149 00150 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 00151 m_strides[0] = 1; 00152 for (int i = 1; i < NumDims; ++i) { 00153 m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1]; 00154 } 00155 } else { 00156 m_strides[NumDims - 1] = 1; 00157 for (int i = NumDims - 2; i >= 0; --i) { 00158 m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1]; 00159 } 00160 } 00161 m_size = m_dimensions.TotalSize(); 00162 } 00163 00164 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { 00165 return m_dimensions; 00166 } 00167 00168 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(OutputScalar* data) { 00169 m_impl.evalSubExprsIfNeeded(NULL); 00170 if (data) { 00171 evalToBuf(data); 00172 return false; 00173 } else { 00174 m_data = (CoeffReturnType*)m_device.allocate(sizeof(CoeffReturnType) * m_size); 00175 evalToBuf(m_data); 00176 return true; 00177 } 00178 } 00179 00180 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() { 00181 if (m_data) { 00182 m_device.deallocate(m_data); 00183 m_data = NULL; 00184 } 00185 m_impl.cleanup(); 00186 } 00187 00188 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE CoeffReturnType coeff(Index index) const { 00189 return m_data[index]; 00190 } 00191 00192 template <int LoadMode> 00193 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE PacketReturnType 00194 packet(Index index) const { 00195 return internal::ploadt<PacketReturnType, LoadMode>(m_data + index); 00196 } 00197 00198 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost 00199 costPerCoeff(bool vectorized) const { 00200 return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize); 00201 } 00202 00203 EIGEN_DEVICE_FUNC Scalar* data() const { return m_data; } 00204 00205 00206 private: 00207 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalToBuf(OutputScalar* data) { 00208 const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value; 00209 ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size); 00210 00211 for (Index i = 0; i < m_size; ++i) { 00212 buf[i] = MakeComplex<internal::is_same<InputScalar, RealScalar>::value>()(m_impl.coeff(i)); 00213 } 00214 00215 for (size_t i = 0; i < m_fft.size(); ++i) { 00216 Index dim = m_fft[i]; 00217 eigen_assert(dim >= 0 && dim < NumDims); 00218 Index line_len = m_dimensions[dim]; 00219 eigen_assert(line_len >= 1); 00220 ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len); 00221 const bool is_power_of_two = isPowerOfTwo(line_len); 00222 const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len); 00223 const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite); 00224 00225 ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); 00226 ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite); 00227 ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1)); 00228 if (!is_power_of_two) { 00229 // Compute twiddle factors 00230 // t_n = exp(sqrt(-1) * pi * n^2 / line_len) 00231 // for n = 0, 1,..., line_len-1. 00232 // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2 00233 pos_j_base_powered[0] = ComplexScalar(1, 0); 00234 if (line_len > 1) { 00235 const RealScalar pi_over_len(EIGEN_PI / line_len); 00236 const ComplexScalar pos_j_base = ComplexScalar( 00237 std::cos(pi_over_len), std::sin(pi_over_len)); 00238 pos_j_base_powered[1] = pos_j_base; 00239 if (line_len > 2) { 00240 const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base; 00241 for (int j = 2; j < line_len + 1; ++j) { 00242 pos_j_base_powered[j] = pos_j_base_powered[j - 1] * 00243 pos_j_base_powered[j - 1] / 00244 pos_j_base_powered[j - 2] * pos_j_base_sq; 00245 } 00246 } 00247 } 00248 } 00249 00250 for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) { 00251 const Index base_offset = getBaseOffsetFromIndex(partial_index, dim); 00252 00253 // get data into line_buf 00254 const Index stride = m_strides[dim]; 00255 if (stride == 1) { 00256 memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar)); 00257 } else { 00258 Index offset = base_offset; 00259 for (int j = 0; j < line_len; ++j, offset += stride) { 00260 line_buf[j] = buf[offset]; 00261 } 00262 } 00263 00264 // processs the line 00265 if (is_power_of_two) { 00266 processDataLineCooleyTukey(line_buf, line_len, log_len); 00267 } 00268 else { 00269 processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered); 00270 } 00271 00272 // write back 00273 if (FFTDir == FFT_FORWARD && stride == 1) { 00274 memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar)); 00275 } else { 00276 Index offset = base_offset; 00277 const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0); 00278 for (int j = 0; j < line_len; ++j, offset += stride) { 00279 buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor; 00280 } 00281 } 00282 } 00283 m_device.deallocate(line_buf); 00284 if (!is_power_of_two) { 00285 m_device.deallocate(a); 00286 m_device.deallocate(b); 00287 m_device.deallocate(pos_j_base_powered); 00288 } 00289 } 00290 00291 if(!write_to_out) { 00292 for (Index i = 0; i < m_size; ++i) { 00293 data[i] = PartOf<FFTResultType>()(buf[i]); 00294 } 00295 m_device.deallocate(buf); 00296 } 00297 } 00298 00299 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) { 00300 eigen_assert(x > 0); 00301 return !(x & (x - 1)); 00302 } 00303 00304 // The composite number for padding, used in Bluestein's FFT algorithm 00305 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) { 00306 Index i = 2; 00307 while (i < 2 * n - 1) i *= 2; 00308 return i; 00309 } 00310 00311 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) { 00312 Index log2m = 0; 00313 while (m >>= 1) log2m++; 00314 return log2m; 00315 } 00316 00317 // Call Cooley Tukey algorithm directly, data length must be power of 2 00318 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) { 00319 eigen_assert(isPowerOfTwo(line_len)); 00320 scramble_FFT(line_buf, line_len); 00321 compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len); 00322 } 00323 00324 // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length 00325 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) { 00326 Index n = line_len; 00327 Index m = good_composite; 00328 ComplexScalar* data = line_buf; 00329 00330 for (Index i = 0; i < n; ++i) { 00331 if(FFTDir == FFT_FORWARD) { 00332 a[i] = data[i] * numext::conj(pos_j_base_powered[i]); 00333 } 00334 else { 00335 a[i] = data[i] * pos_j_base_powered[i]; 00336 } 00337 } 00338 for (Index i = n; i < m; ++i) { 00339 a[i] = ComplexScalar(0, 0); 00340 } 00341 00342 for (Index i = 0; i < n; ++i) { 00343 if(FFTDir == FFT_FORWARD) { 00344 b[i] = pos_j_base_powered[i]; 00345 } 00346 else { 00347 b[i] = numext::conj(pos_j_base_powered[i]); 00348 } 00349 } 00350 for (Index i = n; i < m - n; ++i) { 00351 b[i] = ComplexScalar(0, 0); 00352 } 00353 for (Index i = m - n; i < m; ++i) { 00354 if(FFTDir == FFT_FORWARD) { 00355 b[i] = pos_j_base_powered[m-i]; 00356 } 00357 else { 00358 b[i] = numext::conj(pos_j_base_powered[m-i]); 00359 } 00360 } 00361 00362 scramble_FFT(a, m); 00363 compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len); 00364 00365 scramble_FFT(b, m); 00366 compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len); 00367 00368 for (Index i = 0; i < m; ++i) { 00369 a[i] *= b[i]; 00370 } 00371 00372 scramble_FFT(a, m); 00373 compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len); 00374 00375 //Do the scaling after ifft 00376 for (Index i = 0; i < m; ++i) { 00377 a[i] /= m; 00378 } 00379 00380 for (Index i = 0; i < n; ++i) { 00381 if(FFTDir == FFT_FORWARD) { 00382 data[i] = a[i] * numext::conj(pos_j_base_powered[i]); 00383 } 00384 else { 00385 data[i] = a[i] * pos_j_base_powered[i]; 00386 } 00387 } 00388 } 00389 00390 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) { 00391 eigen_assert(isPowerOfTwo(n)); 00392 Index j = 1; 00393 for (Index i = 1; i < n; ++i){ 00394 if (j > i) { 00395 std::swap(data[j-1], data[i-1]); 00396 } 00397 Index m = n >> 1; 00398 while (m >= 2 && j > m) { 00399 j -= m; 00400 m >>= 1; 00401 } 00402 j += m; 00403 } 00404 } 00405 00406 template <int Dir> 00407 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) { 00408 ComplexScalar tmp = data[1]; 00409 data[1] = data[0] - data[1]; 00410 data[0] += tmp; 00411 } 00412 00413 template <int Dir> 00414 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) { 00415 ComplexScalar tmp[4]; 00416 tmp[0] = data[0] + data[1]; 00417 tmp[1] = data[0] - data[1]; 00418 tmp[2] = data[2] + data[3]; 00419 if (Dir == FFT_FORWARD) { 00420 tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]); 00421 } else { 00422 tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]); 00423 } 00424 data[0] = tmp[0] + tmp[2]; 00425 data[1] = tmp[1] + tmp[3]; 00426 data[2] = tmp[0] - tmp[2]; 00427 data[3] = tmp[1] - tmp[3]; 00428 } 00429 00430 template <int Dir> 00431 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) { 00432 ComplexScalar tmp_1[8]; 00433 ComplexScalar tmp_2[8]; 00434 00435 tmp_1[0] = data[0] + data[1]; 00436 tmp_1[1] = data[0] - data[1]; 00437 tmp_1[2] = data[2] + data[3]; 00438 if (Dir == FFT_FORWARD) { 00439 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1); 00440 } else { 00441 tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1); 00442 } 00443 tmp_1[4] = data[4] + data[5]; 00444 tmp_1[5] = data[4] - data[5]; 00445 tmp_1[6] = data[6] + data[7]; 00446 if (Dir == FFT_FORWARD) { 00447 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1); 00448 } else { 00449 tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1); 00450 } 00451 tmp_2[0] = tmp_1[0] + tmp_1[2]; 00452 tmp_2[1] = tmp_1[1] + tmp_1[3]; 00453 tmp_2[2] = tmp_1[0] - tmp_1[2]; 00454 tmp_2[3] = tmp_1[1] - tmp_1[3]; 00455 tmp_2[4] = tmp_1[4] + tmp_1[6]; 00456 // SQRT2DIV2 = sqrt(2)/2 00457 #define SQRT2DIV2 0.7071067811865476 00458 if (Dir == FFT_FORWARD) { 00459 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2); 00460 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1); 00461 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2); 00462 } else { 00463 tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2); 00464 tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1); 00465 tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2); 00466 } 00467 data[0] = tmp_2[0] + tmp_2[4]; 00468 data[1] = tmp_2[1] + tmp_2[5]; 00469 data[2] = tmp_2[2] + tmp_2[6]; 00470 data[3] = tmp_2[3] + tmp_2[7]; 00471 data[4] = tmp_2[0] - tmp_2[4]; 00472 data[5] = tmp_2[1] - tmp_2[5]; 00473 data[6] = tmp_2[2] - tmp_2[6]; 00474 data[7] = tmp_2[3] - tmp_2[7]; 00475 } 00476 00477 template <int Dir> 00478 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge( 00479 ComplexScalar* data, Index n, Index n_power_of_2) { 00480 // Original code: 00481 // RealScalar wtemp = std::sin(M_PI/n); 00482 // RealScalar wpi = -std::sin(2 * M_PI/n); 00483 const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2]; 00484 const RealScalar wpi = (Dir == FFT_FORWARD) 00485 ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2] 00486 : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2]; 00487 00488 const ComplexScalar wp(wtemp, wpi); 00489 const ComplexScalar wp_one = wp + ComplexScalar(1, 0); 00490 const ComplexScalar wp_one_2 = wp_one * wp_one; 00491 const ComplexScalar wp_one_3 = wp_one_2 * wp_one; 00492 const ComplexScalar wp_one_4 = wp_one_3 * wp_one; 00493 const Index n2 = n / 2; 00494 ComplexScalar w(1.0, 0.0); 00495 for (Index i = 0; i < n2; i += 4) { 00496 ComplexScalar temp0(data[i + n2] * w); 00497 ComplexScalar temp1(data[i + 1 + n2] * w * wp_one); 00498 ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2); 00499 ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3); 00500 w = w * wp_one_4; 00501 00502 data[i + n2] = data[i] - temp0; 00503 data[i] += temp0; 00504 00505 data[i + 1 + n2] = data[i + 1] - temp1; 00506 data[i + 1] += temp1; 00507 00508 data[i + 2 + n2] = data[i + 2] - temp2; 00509 data[i + 2] += temp2; 00510 00511 data[i + 3 + n2] = data[i + 3] - temp3; 00512 data[i + 3] += temp3; 00513 } 00514 } 00515 00516 template <int Dir> 00517 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly( 00518 ComplexScalar* data, Index n, Index n_power_of_2) { 00519 eigen_assert(isPowerOfTwo(n)); 00520 if (n > 8) { 00521 compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1); 00522 compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1); 00523 butterfly_1D_merge<Dir>(data, n, n_power_of_2); 00524 } else if (n == 8) { 00525 butterfly_8<Dir>(data); 00526 } else if (n == 4) { 00527 butterfly_4<Dir>(data); 00528 } else if (n == 2) { 00529 butterfly_2<Dir>(data); 00530 } 00531 } 00532 00533 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const { 00534 Index result = 0; 00535 00536 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) { 00537 for (int i = NumDims - 1; i > omitted_dim; --i) { 00538 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; 00539 const Index idx = index / partial_m_stride; 00540 index -= idx * partial_m_stride; 00541 result += idx * m_strides[i]; 00542 } 00543 result += index; 00544 } 00545 else { 00546 for (Index i = 0; i < omitted_dim; ++i) { 00547 const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim]; 00548 const Index idx = index / partial_m_stride; 00549 index -= idx * partial_m_stride; 00550 result += idx * m_strides[i]; 00551 } 00552 result += index; 00553 } 00554 // Value of index_coords[omitted_dim] is not determined to this step 00555 return result; 00556 } 00557 00558 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const { 00559 Index result = base + offset * m_strides[omitted_dim] ; 00560 return result; 00561 } 00562 00563 protected: 00564 Index m_size; 00565 const FFT& m_fft; 00566 Dimensions m_dimensions; 00567 array<Index, NumDims> m_strides; 00568 TensorEvaluator<ArgType, Device> m_impl; 00569 CoeffReturnType* m_data; 00570 const Device& m_device; 00571 00572 // This will support a maximum FFT size of 2^32 for each dimension 00573 // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2; 00574 const RealScalar m_sin_PI_div_n_LUT[32] = { 00575 RealScalar(0.0), 00576 RealScalar(-2), 00577 RealScalar(-0.999999999999999), 00578 RealScalar(-0.292893218813453), 00579 RealScalar(-0.0761204674887130), 00580 RealScalar(-0.0192147195967696), 00581 RealScalar(-0.00481527332780311), 00582 RealScalar(-0.00120454379482761), 00583 RealScalar(-3.01181303795779e-04), 00584 RealScalar(-7.52981608554592e-05), 00585 RealScalar(-1.88247173988574e-05), 00586 RealScalar(-4.70619042382852e-06), 00587 RealScalar(-1.17654829809007e-06), 00588 RealScalar(-2.94137117780840e-07), 00589 RealScalar(-7.35342821488550e-08), 00590 RealScalar(-1.83835707061916e-08), 00591 RealScalar(-4.59589268710903e-09), 00592 RealScalar(-1.14897317243732e-09), 00593 RealScalar(-2.87243293150586e-10), 00594 RealScalar( -7.18108232902250e-11), 00595 RealScalar(-1.79527058227174e-11), 00596 RealScalar(-4.48817645568941e-12), 00597 RealScalar(-1.12204411392298e-12), 00598 RealScalar(-2.80511028480785e-13), 00599 RealScalar(-7.01277571201985e-14), 00600 RealScalar(-1.75319392800498e-14), 00601 RealScalar(-4.38298482001247e-15), 00602 RealScalar(-1.09574620500312e-15), 00603 RealScalar(-2.73936551250781e-16), 00604 RealScalar(-6.84841378126949e-17), 00605 RealScalar(-1.71210344531737e-17), 00606 RealScalar(-4.28025861329343e-18) 00607 }; 00608 00609 // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i)); 00610 const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = { 00611 RealScalar(0.0), 00612 RealScalar(0.0), 00613 RealScalar(-1.00000000000000e+00), 00614 RealScalar(-7.07106781186547e-01), 00615 RealScalar(-3.82683432365090e-01), 00616 RealScalar(-1.95090322016128e-01), 00617 RealScalar(-9.80171403295606e-02), 00618 RealScalar(-4.90676743274180e-02), 00619 RealScalar(-2.45412285229123e-02), 00620 RealScalar(-1.22715382857199e-02), 00621 RealScalar(-6.13588464915448e-03), 00622 RealScalar(-3.06795676296598e-03), 00623 RealScalar(-1.53398018628477e-03), 00624 RealScalar(-7.66990318742704e-04), 00625 RealScalar(-3.83495187571396e-04), 00626 RealScalar(-1.91747597310703e-04), 00627 RealScalar(-9.58737990959773e-05), 00628 RealScalar(-4.79368996030669e-05), 00629 RealScalar(-2.39684498084182e-05), 00630 RealScalar(-1.19842249050697e-05), 00631 RealScalar(-5.99211245264243e-06), 00632 RealScalar(-2.99605622633466e-06), 00633 RealScalar(-1.49802811316901e-06), 00634 RealScalar(-7.49014056584716e-07), 00635 RealScalar(-3.74507028292384e-07), 00636 RealScalar(-1.87253514146195e-07), 00637 RealScalar(-9.36267570730981e-08), 00638 RealScalar(-4.68133785365491e-08), 00639 RealScalar(-2.34066892682746e-08), 00640 RealScalar(-1.17033446341373e-08), 00641 RealScalar(-5.85167231706864e-09), 00642 RealScalar(-2.92583615853432e-09) 00643 }; 00644 }; 00645 00646 } // end namespace Eigen 00647 00648 #endif // EIGEN_HAS_CONSTEXPR 00649 00650 00651 #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H