TensorFFT.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Jianwei Cui <thucjw@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_CXX11_TENSOR_TENSOR_FFT_H
11 #define EIGEN_CXX11_TENSOR_TENSOR_FFT_H
12 
13 #include "./InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
28 template <bool NeedUprade> struct MakeComplex {
29  template <typename T>
31  T operator() (const T& val) const { return val; }
32 };
33 
34 template <> struct MakeComplex<true> {
35  template <typename T>
37  std::complex<T> operator() (const T& val) const { return std::complex<T>(val, 0); }
38 };
39 
40 template <> struct MakeComplex<false> {
41  template <typename T>
43  std::complex<T> operator() (const std::complex<T>& val) const { return val; }
44 };
45 
46 template <int ResultType> struct PartOf {
47  template <typename T> T operator() (const T& val) const { return val; }
48 };
49 
50 template <> struct PartOf<RealPart> {
51  template <typename T> T operator() (const std::complex<T>& val) const { return val.real(); }
52 };
53 
54 template <> struct PartOf<ImagPart> {
55  template <typename T> T operator() (const std::complex<T>& val) const { return val.imag(); }
56 };
57 
58 namespace internal {
59 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
60 struct traits<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir> > : public traits<XprType> {
61  typedef traits<XprType> XprTraits;
62  typedef typename NumTraits<typename XprTraits::Scalar>::Real RealScalar;
63  typedef typename std::complex<RealScalar> ComplexScalar;
64  typedef typename XprTraits::Scalar InputScalar;
65  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
66  typedef typename XprTraits::StorageKind StorageKind;
67  typedef typename XprTraits::Index Index;
68  typedef typename XprType::Nested Nested;
69  typedef std::remove_reference_t<Nested> Nested_;
70  static constexpr int NumDimensions = XprTraits::NumDimensions;
71  static constexpr int Layout = XprTraits::Layout;
72  typedef typename traits<XprType>::PointerType PointerType;
73 };
74 
75 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
76 struct eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, Eigen::Dense> {
77  typedef const TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>& type;
78 };
79 
80 template <typename FFT, typename XprType, int FFTResultType, int FFTDirection>
81 struct nested<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection>, 1, typename eval<TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> >::type> {
82  typedef TensorFFTOp<FFT, XprType, FFTResultType, FFTDirection> type;
83 };
84 
85 } // end namespace internal
86 
87 template <typename FFT, typename XprType, int FFTResultType, int FFTDir>
88 class TensorFFTOp : public TensorBase<TensorFFTOp<FFT, XprType, FFTResultType, FFTDir>, ReadOnlyAccessors> {
89  public:
90  typedef typename Eigen::internal::traits<TensorFFTOp>::Scalar Scalar;
92  typedef typename std::complex<RealScalar> ComplexScalar;
93  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
95  typedef typename Eigen::internal::nested<TensorFFTOp>::type Nested;
96  typedef typename Eigen::internal::traits<TensorFFTOp>::StorageKind StorageKind;
97  typedef typename Eigen::internal::traits<TensorFFTOp>::Index Index;
98 
99  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorFFTOp(const XprType& expr, const FFT& fft)
100  : m_xpr(expr), m_fft(fft) {}
101 
103  const FFT& fft() const { return m_fft; }
104 
107  return m_xpr;
108  }
109 
110  protected:
111  typename XprType::Nested m_xpr;
112  const FFT m_fft;
113 };
114 
115 // Eval as rvalue
116 template <typename FFT, typename ArgType, typename Device, int FFTResultType, int FFTDir>
117 struct TensorEvaluator<const TensorFFTOp<FFT, ArgType, FFTResultType, FFTDir>, Device> {
119  typedef typename XprType::Index Index;
120  static constexpr int NumDims = internal::array_size<typename TensorEvaluator<ArgType, Device>::Dimensions>::value;
122  typedef typename XprType::Scalar Scalar;
124  typedef typename std::complex<RealScalar> ComplexScalar;
126  typedef internal::traits<XprType> XprTraits;
127  typedef typename XprTraits::Scalar InputScalar;
128  typedef std::conditional_t<FFTResultType == RealPart || FFTResultType == ImagPart, RealScalar, ComplexScalar> OutputScalar;
131  static constexpr int PacketSize = internal::unpacket_traits<PacketReturnType>::size;
134 
136  enum {
137  IsAligned = false,
138  PacketAccess = true,
139  BlockAccess = false,
140  PreferBlockAccess = false,
141  CoordAccess = false,
142  RawAccess = false
143  };
144 
145  //===- Tensor block evaluation strategy (see TensorBlock.h) -------------===//
146  typedef internal::TensorBlockNotImplemented TensorBlock;
147  //===--------------------------------------------------------------------===//
148 
149  EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device) : m_fft(op.fft()), m_impl(op.expression(), device), m_data(NULL), m_device(device) {
150  const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
151  for (int i = 0; i < NumDims; ++i) {
152  eigen_assert(input_dims[i] > 0);
153  m_dimensions[i] = input_dims[i];
154  }
155 
156  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
157  m_strides[0] = 1;
158  for (int i = 1; i < NumDims; ++i) {
159  m_strides[i] = m_strides[i - 1] * m_dimensions[i - 1];
160  }
161  } else {
162  m_strides[NumDims - 1] = 1;
163  for (int i = NumDims - 2; i >= 0; --i) {
164  m_strides[i] = m_strides[i + 1] * m_dimensions[i + 1];
165  }
166  }
167  m_size = m_dimensions.TotalSize();
168  }
169 
170  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const {
171  return m_dimensions;
172  }
173 
174  EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(EvaluatorPointerType data) {
175  m_impl.evalSubExprsIfNeeded(NULL);
176  if (data) {
177  evalToBuf(data);
178  return false;
179  } else {
180  m_data = (EvaluatorPointerType)m_device.get((CoeffReturnType*)(m_device.allocate_temp(sizeof(CoeffReturnType) * m_size)));
181  evalToBuf(m_data);
182  return true;
183  }
184  }
185 
186  EIGEN_STRONG_INLINE void cleanup() {
187  if (m_data) {
188  m_device.deallocate(m_data);
189  m_data = NULL;
190  }
191  m_impl.cleanup();
192  }
193 
195  return m_data[index];
196  }
197 
198  template <int LoadMode>
200  packet(Index index) const {
201  return internal::ploadt<PacketReturnType, LoadMode>(m_data + index);
202  }
203 
204  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost
205  costPerCoeff(bool vectorized) const {
206  return TensorOpCost(sizeof(CoeffReturnType), 0, 0, vectorized, PacketSize);
207  }
208 
210 
211  private:
213  const bool write_to_out = internal::is_same<OutputScalar, ComplexScalar>::value;
214  ComplexScalar* buf = write_to_out ? (ComplexScalar*)data : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * m_size);
215 
216  for (Index i = 0; i < m_size; ++i) {
218  }
219 
220  for (size_t i = 0; i < m_fft.size(); ++i) {
221  Index dim = m_fft[i];
222  eigen_assert(dim >= 0 && dim < NumDims);
223  Index line_len = m_dimensions[dim];
224  eigen_assert(line_len >= 1);
225  ComplexScalar* line_buf = (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * line_len);
226  const bool is_power_of_two = isPowerOfTwo(line_len);
227  const Index good_composite = is_power_of_two ? 0 : findGoodComposite(line_len);
228  const Index log_len = is_power_of_two ? getLog2(line_len) : getLog2(good_composite);
229 
230  ComplexScalar* a = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
231  ComplexScalar* b = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * good_composite);
232  ComplexScalar* pos_j_base_powered = is_power_of_two ? NULL : (ComplexScalar*)m_device.allocate(sizeof(ComplexScalar) * (line_len + 1));
233  if (!is_power_of_two) {
234  // Compute twiddle factors
235  // t_n = exp(sqrt(-1) * pi * n^2 / line_len)
236  // for n = 0, 1,..., line_len-1.
237  // For n > 2 we use the recurrence t_n = t_{n-1}^2 / t_{n-2} * t_1^2
238 
239  // The recurrence is correct in exact arithmetic, but causes
240  // numerical issues for large transforms, especially in
241  // single-precision floating point.
242  //
243  // pos_j_base_powered[0] = ComplexScalar(1, 0);
244  // if (line_len > 1) {
245  // const ComplexScalar pos_j_base = ComplexScalar(
246  // numext::cos(M_PI / line_len), numext::sin(M_PI / line_len));
247  // pos_j_base_powered[1] = pos_j_base;
248  // if (line_len > 2) {
249  // const ComplexScalar pos_j_base_sq = pos_j_base * pos_j_base;
250  // for (int i = 2; i < line_len + 1; ++i) {
251  // pos_j_base_powered[i] = pos_j_base_powered[i - 1] *
252  // pos_j_base_powered[i - 1] /
253  // pos_j_base_powered[i - 2] *
254  // pos_j_base_sq;
255  // }
256  // }
257  // }
258  // TODO(rmlarsen): Find a way to use Eigen's vectorized sin
259  // and cosine functions here.
260  for (int j = 0; j < line_len + 1; ++j) {
261  double arg = ((EIGEN_PI * j) * j) / line_len;
262  std::complex<double> tmp(numext::cos(arg), numext::sin(arg));
263  pos_j_base_powered[j] = static_cast<ComplexScalar>(tmp);
264  }
265  }
266 
267  for (Index partial_index = 0; partial_index < m_size / line_len; ++partial_index) {
268  const Index base_offset = getBaseOffsetFromIndex(partial_index, dim);
269 
270  // get data into line_buf
271  const Index stride = m_strides[dim];
272  if (stride == 1) {
273  m_device.memcpy(line_buf, &buf[base_offset], line_len*sizeof(ComplexScalar));
274  } else {
275  Index offset = base_offset;
276  for (int j = 0; j < line_len; ++j, offset += stride) {
277  line_buf[j] = buf[offset];
278  }
279  }
280 
281  // process the line
282  if (is_power_of_two) {
283  processDataLineCooleyTukey(line_buf, line_len, log_len);
284  }
285  else {
286  processDataLineBluestein(line_buf, line_len, good_composite, log_len, a, b, pos_j_base_powered);
287  }
288 
289  // write back
290  if (FFTDir == FFT_FORWARD && stride == 1) {
291  m_device.memcpy(&buf[base_offset], line_buf, line_len*sizeof(ComplexScalar));
292  } else {
293  Index offset = base_offset;
294  const ComplexScalar div_factor = ComplexScalar(1.0 / line_len, 0);
295  for (int j = 0; j < line_len; ++j, offset += stride) {
296  buf[offset] = (FFTDir == FFT_FORWARD) ? line_buf[j] : line_buf[j] * div_factor;
297  }
298  }
299  }
300  m_device.deallocate(line_buf);
301  if (!is_power_of_two) {
302  m_device.deallocate(a);
303  m_device.deallocate(b);
304  m_device.deallocate(pos_j_base_powered);
305  }
306  }
307 
308  if(!write_to_out) {
309  for (Index i = 0; i < m_size; ++i) {
310  data[i] = PartOf<FFTResultType>()(buf[i]);
311  }
312  m_device.deallocate(buf);
313  }
314  }
315 
316  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static bool isPowerOfTwo(Index x) {
317  eigen_assert(x > 0);
318  return !(x & (x - 1));
319  }
320 
321  // The composite number for padding, used in Bluestein's FFT algorithm
322  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index findGoodComposite(Index n) {
323  Index i = 2;
324  while (i < 2 * n - 1) i *= 2;
325  return i;
326  }
327 
328  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static Index getLog2(Index m) {
329  Index log2m = 0;
330  while (m >>= 1) log2m++;
331  return log2m;
332  }
333 
334  // Call Cooley Tukey algorithm directly, data length must be power of 2
335  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineCooleyTukey(ComplexScalar* line_buf, Index line_len, Index log_len) {
336  eigen_assert(isPowerOfTwo(line_len));
337  scramble_FFT(line_buf, line_len);
338  compute_1D_Butterfly<FFTDir>(line_buf, line_len, log_len);
339  }
340 
341  // Call Bluestein's FFT algorithm, m is a good composite number greater than (2 * n - 1), used as the padding length
342  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void processDataLineBluestein(ComplexScalar* line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar* a, ComplexScalar* b, const ComplexScalar* pos_j_base_powered) {
343  Index n = line_len;
344  Index m = good_composite;
345  ComplexScalar* data = line_buf;
346 
347  for (Index i = 0; i < n; ++i) {
348  if(FFTDir == FFT_FORWARD) {
349  a[i] = data[i] * numext::conj(pos_j_base_powered[i]);
350  }
351  else {
352  a[i] = data[i] * pos_j_base_powered[i];
353  }
354  }
355  for (Index i = n; i < m; ++i) {
356  a[i] = ComplexScalar(0, 0);
357  }
358 
359  for (Index i = 0; i < n; ++i) {
360  if(FFTDir == FFT_FORWARD) {
361  b[i] = pos_j_base_powered[i];
362  }
363  else {
364  b[i] = numext::conj(pos_j_base_powered[i]);
365  }
366  }
367  for (Index i = n; i < m - n; ++i) {
368  b[i] = ComplexScalar(0, 0);
369  }
370  for (Index i = m - n; i < m; ++i) {
371  if(FFTDir == FFT_FORWARD) {
372  b[i] = pos_j_base_powered[m-i];
373  }
374  else {
375  b[i] = numext::conj(pos_j_base_powered[m-i]);
376  }
377  }
378 
379  scramble_FFT(a, m);
380  compute_1D_Butterfly<FFT_FORWARD>(a, m, log_len);
381 
382  scramble_FFT(b, m);
383  compute_1D_Butterfly<FFT_FORWARD>(b, m, log_len);
384 
385  for (Index i = 0; i < m; ++i) {
386  a[i] *= b[i];
387  }
388 
389  scramble_FFT(a, m);
390  compute_1D_Butterfly<FFT_REVERSE>(a, m, log_len);
391 
392  //Do the scaling after ifft
393  for (Index i = 0; i < m; ++i) {
394  a[i] /= m;
395  }
396 
397  for (Index i = 0; i < n; ++i) {
398  if(FFTDir == FFT_FORWARD) {
399  data[i] = a[i] * numext::conj(pos_j_base_powered[i]);
400  }
401  else {
402  data[i] = a[i] * pos_j_base_powered[i];
403  }
404  }
405  }
406 
407  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE static void scramble_FFT(ComplexScalar* data, Index n) {
408  eigen_assert(isPowerOfTwo(n));
409  Index j = 1;
410  for (Index i = 1; i < n; ++i){
411  if (j > i) {
412  std::swap(data[j-1], data[i-1]);
413  }
414  Index m = n >> 1;
415  while (m >= 2 && j > m) {
416  j -= m;
417  m >>= 1;
418  }
419  j += m;
420  }
421  }
422 
423  template <int Dir>
424  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_2(ComplexScalar* data) {
425  ComplexScalar tmp = data[1];
426  data[1] = data[0] - data[1];
427  data[0] += tmp;
428  }
429 
430  template <int Dir>
431  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_4(ComplexScalar* data) {
432  ComplexScalar tmp[4];
433  tmp[0] = data[0] + data[1];
434  tmp[1] = data[0] - data[1];
435  tmp[2] = data[2] + data[3];
436  if (Dir == FFT_FORWARD) {
437  tmp[3] = ComplexScalar(0.0, -1.0) * (data[2] - data[3]);
438  } else {
439  tmp[3] = ComplexScalar(0.0, 1.0) * (data[2] - data[3]);
440  }
441  data[0] = tmp[0] + tmp[2];
442  data[1] = tmp[1] + tmp[3];
443  data[2] = tmp[0] - tmp[2];
444  data[3] = tmp[1] - tmp[3];
445  }
446 
447  template <int Dir>
448  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_8(ComplexScalar* data) {
449  ComplexScalar tmp_1[8];
450  ComplexScalar tmp_2[8];
451 
452  tmp_1[0] = data[0] + data[1];
453  tmp_1[1] = data[0] - data[1];
454  tmp_1[2] = data[2] + data[3];
455  if (Dir == FFT_FORWARD) {
456  tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, -1);
457  } else {
458  tmp_1[3] = (data[2] - data[3]) * ComplexScalar(0, 1);
459  }
460  tmp_1[4] = data[4] + data[5];
461  tmp_1[5] = data[4] - data[5];
462  tmp_1[6] = data[6] + data[7];
463  if (Dir == FFT_FORWARD) {
464  tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, -1);
465  } else {
466  tmp_1[7] = (data[6] - data[7]) * ComplexScalar(0, 1);
467  }
468  tmp_2[0] = tmp_1[0] + tmp_1[2];
469  tmp_2[1] = tmp_1[1] + tmp_1[3];
470  tmp_2[2] = tmp_1[0] - tmp_1[2];
471  tmp_2[3] = tmp_1[1] - tmp_1[3];
472  tmp_2[4] = tmp_1[4] + tmp_1[6];
473 // SQRT2DIV2 = sqrt(2)/2
474 #define SQRT2DIV2 0.7071067811865476
475  if (Dir == FFT_FORWARD) {
476  tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, -SQRT2DIV2);
477  tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, -1);
478  tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, -SQRT2DIV2);
479  } else {
480  tmp_2[5] = (tmp_1[5] + tmp_1[7]) * ComplexScalar(SQRT2DIV2, SQRT2DIV2);
481  tmp_2[6] = (tmp_1[4] - tmp_1[6]) * ComplexScalar(0, 1);
482  tmp_2[7] = (tmp_1[5] - tmp_1[7]) * ComplexScalar(-SQRT2DIV2, SQRT2DIV2);
483  }
484  data[0] = tmp_2[0] + tmp_2[4];
485  data[1] = tmp_2[1] + tmp_2[5];
486  data[2] = tmp_2[2] + tmp_2[6];
487  data[3] = tmp_2[3] + tmp_2[7];
488  data[4] = tmp_2[0] - tmp_2[4];
489  data[5] = tmp_2[1] - tmp_2[5];
490  data[6] = tmp_2[2] - tmp_2[6];
491  data[7] = tmp_2[3] - tmp_2[7];
492  }
493 
494  template <int Dir>
495  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void butterfly_1D_merge(
496  ComplexScalar* data, Index n, Index n_power_of_2) {
497  // Original code:
498  // RealScalar wtemp = std::sin(M_PI/n);
499  // RealScalar wpi = -std::sin(2 * M_PI/n);
500  const RealScalar wtemp = m_sin_PI_div_n_LUT[n_power_of_2];
501  const RealScalar wpi = (Dir == FFT_FORWARD)
502  ? m_minus_sin_2_PI_div_n_LUT[n_power_of_2]
503  : -m_minus_sin_2_PI_div_n_LUT[n_power_of_2];
504 
505  const ComplexScalar wp(wtemp, wpi);
506  const ComplexScalar wp_one = wp + ComplexScalar(1, 0);
507  const ComplexScalar wp_one_2 = wp_one * wp_one;
508  const ComplexScalar wp_one_3 = wp_one_2 * wp_one;
509  const ComplexScalar wp_one_4 = wp_one_3 * wp_one;
510  const Index n2 = n / 2;
511  ComplexScalar w(1.0, 0.0);
512  for (Index i = 0; i < n2; i += 4) {
513  ComplexScalar temp0(data[i + n2] * w);
514  ComplexScalar temp1(data[i + 1 + n2] * w * wp_one);
515  ComplexScalar temp2(data[i + 2 + n2] * w * wp_one_2);
516  ComplexScalar temp3(data[i + 3 + n2] * w * wp_one_3);
517  w = w * wp_one_4;
518 
519  data[i + n2] = data[i] - temp0;
520  data[i] += temp0;
521 
522  data[i + 1 + n2] = data[i + 1] - temp1;
523  data[i + 1] += temp1;
524 
525  data[i + 2 + n2] = data[i + 2] - temp2;
526  data[i + 2] += temp2;
527 
528  data[i + 3 + n2] = data[i + 3] - temp3;
529  data[i + 3] += temp3;
530  }
531  }
532 
533  template <int Dir>
534  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void compute_1D_Butterfly(
535  ComplexScalar* data, Index n, Index n_power_of_2) {
536  eigen_assert(isPowerOfTwo(n));
537  if (n > 8) {
538  compute_1D_Butterfly<Dir>(data, n / 2, n_power_of_2 - 1);
539  compute_1D_Butterfly<Dir>(data + n / 2, n / 2, n_power_of_2 - 1);
540  butterfly_1D_merge<Dir>(data, n, n_power_of_2);
541  } else if (n == 8) {
542  butterfly_8<Dir>(data);
543  } else if (n == 4) {
544  butterfly_4<Dir>(data);
545  } else if (n == 2) {
546  butterfly_2<Dir>(data);
547  }
548  }
549 
550  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getBaseOffsetFromIndex(Index index, Index omitted_dim) const {
551  Index result = 0;
552 
553  if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
554  for (int i = NumDims - 1; i > omitted_dim; --i) {
555  const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
556  const Index idx = index / partial_m_stride;
557  index -= idx * partial_m_stride;
558  result += idx * m_strides[i];
559  }
560  result += index;
561  }
562  else {
563  for (Index i = 0; i < omitted_dim; ++i) {
564  const Index partial_m_stride = m_strides[i] / m_dimensions[omitted_dim];
565  const Index idx = index / partial_m_stride;
566  index -= idx * partial_m_stride;
567  result += idx * m_strides[i];
568  }
569  result += index;
570  }
571  // Value of index_coords[omitted_dim] is not determined to this step
572  return result;
573  }
574 
575  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const {
576  Index result = base + offset * m_strides[omitted_dim] ;
577  return result;
578  }
579 
580  protected:
588 
589  // This will support a maximum FFT size of 2^32 for each dimension
590  // m_sin_PI_div_n_LUT[i] = (-2) * std::sin(M_PI / std::pow(2,i)) ^ 2;
591  const RealScalar m_sin_PI_div_n_LUT[32] = {
592  RealScalar(0.0),
593  RealScalar(-2),
594  RealScalar(-0.999999999999999),
595  RealScalar(-0.292893218813453),
596  RealScalar(-0.0761204674887130),
597  RealScalar(-0.0192147195967696),
598  RealScalar(-0.00481527332780311),
599  RealScalar(-0.00120454379482761),
600  RealScalar(-3.01181303795779e-04),
601  RealScalar(-7.52981608554592e-05),
602  RealScalar(-1.88247173988574e-05),
603  RealScalar(-4.70619042382852e-06),
604  RealScalar(-1.17654829809007e-06),
605  RealScalar(-2.94137117780840e-07),
606  RealScalar(-7.35342821488550e-08),
607  RealScalar(-1.83835707061916e-08),
608  RealScalar(-4.59589268710903e-09),
609  RealScalar(-1.14897317243732e-09),
610  RealScalar(-2.87243293150586e-10),
611  RealScalar( -7.18108232902250e-11),
612  RealScalar(-1.79527058227174e-11),
613  RealScalar(-4.48817645568941e-12),
614  RealScalar(-1.12204411392298e-12),
615  RealScalar(-2.80511028480785e-13),
616  RealScalar(-7.01277571201985e-14),
617  RealScalar(-1.75319392800498e-14),
618  RealScalar(-4.38298482001247e-15),
619  RealScalar(-1.09574620500312e-15),
620  RealScalar(-2.73936551250781e-16),
621  RealScalar(-6.84841378126949e-17),
622  RealScalar(-1.71210344531737e-17),
623  RealScalar(-4.28025861329343e-18)
624  };
625 
626  // m_minus_sin_2_PI_div_n_LUT[i] = -std::sin(2 * M_PI / std::pow(2,i));
627  const RealScalar m_minus_sin_2_PI_div_n_LUT[32] = {
628  RealScalar(0.0),
629  RealScalar(0.0),
630  RealScalar(-1.00000000000000e+00),
631  RealScalar(-7.07106781186547e-01),
632  RealScalar(-3.82683432365090e-01),
633  RealScalar(-1.95090322016128e-01),
634  RealScalar(-9.80171403295606e-02),
635  RealScalar(-4.90676743274180e-02),
636  RealScalar(-2.45412285229123e-02),
637  RealScalar(-1.22715382857199e-02),
638  RealScalar(-6.13588464915448e-03),
639  RealScalar(-3.06795676296598e-03),
640  RealScalar(-1.53398018628477e-03),
641  RealScalar(-7.66990318742704e-04),
642  RealScalar(-3.83495187571396e-04),
643  RealScalar(-1.91747597310703e-04),
644  RealScalar(-9.58737990959773e-05),
645  RealScalar(-4.79368996030669e-05),
646  RealScalar(-2.39684498084182e-05),
647  RealScalar(-1.19842249050697e-05),
648  RealScalar(-5.99211245264243e-06),
649  RealScalar(-2.99605622633466e-06),
650  RealScalar(-1.49802811316901e-06),
651  RealScalar(-7.49014056584716e-07),
652  RealScalar(-3.74507028292384e-07),
653  RealScalar(-1.87253514146195e-07),
654  RealScalar(-9.36267570730981e-08),
655  RealScalar(-4.68133785365491e-08),
656  RealScalar(-2.34066892682746e-08),
657  RealScalar(-1.17033446341373e-08),
658  RealScalar(-5.85167231706864e-09),
659  RealScalar(-2.92583615853432e-09)
660  };
661 };
662 
663 } // end namespace Eigen
664 
665 #endif // EIGEN_CXX11_TENSOR_TENSOR_FFT_H
Matrix3f m
ArrayXXi a
int n
int i
#define EIGEN_ALWAYS_INLINE
#define EIGEN_DEVICE_FUNC
#define eigen_assert(x)
#define EIGEN_PI
RowVector3d w
#define SQRT2DIV2
#define EIGEN_DEVICE_REF
Definition: TensorMacros.h:36
Definition: FFT:177
The tensor base class.
const FFT m_fft
Definition: TensorFFT.h:112
const internal::remove_all_t< typename XprType::Nested > & expression() const
Definition: TensorFFT.h:106
Eigen::internal::nested< TensorFFTOp >::type Nested
Definition: TensorFFT.h:95
OutputScalar CoeffReturnType
Definition: TensorFFT.h:94
Eigen::NumTraits< Scalar >::Real RealScalar
Definition: TensorFFT.h:91
TensorFFTOp(const XprType &expr, const FFT &fft)
Definition: TensorFFT.h:99
Eigen::internal::traits< TensorFFTOp >::Scalar Scalar
Definition: TensorFFT.h:90
Eigen::internal::traits< TensorFFTOp >::StorageKind StorageKind
Definition: TensorFFT.h:96
std::conditional_t< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar > OutputScalar
Definition: TensorFFT.h:93
Eigen::internal::traits< TensorFFTOp >::Index Index
Definition: TensorFFT.h:97
std::complex< RealScalar > ComplexScalar
Definition: TensorFFT.h:92
XprType::Nested m_xpr
Definition: TensorFFT.h:111
const FFT & fft() const
Definition: TensorFFT.h:103
typename remove_all< T >::type remove_all_t
EIGEN_ALWAYS_INLINE T sin(const T &x)
EIGEN_ALWAYS_INLINE T cos(const T &x)
: TensorContractionSycl.h, provides various tensor contraction kernel for SYCL backend
std::array< T, N > array
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
T operator()(const T &val) const
Definition: TensorFFT.h:31
internal::packet_traits< Scalar >::type type
Definition: TensorMeta.h:55
T operator()(const T &val) const
Definition: TensorFFT.h:47
void processDataLineBluestein(ComplexScalar *line_buf, Index line_len, Index good_composite, Index log_len, ComplexScalar *a, ComplexScalar *b, const ComplexScalar *pos_j_base_powered)
Definition: TensorFFT.h:342
Index getIndexFromOffset(Index base, Index omitted_dim, Index offset) const
Definition: TensorFFT.h:575
void butterfly_1D_merge(ComplexScalar *data, Index n, Index n_power_of_2)
Definition: TensorFFT.h:495
void processDataLineCooleyTukey(ComplexScalar *line_buf, Index line_len, Index log_len)
Definition: TensorFFT.h:335
std::conditional_t< FFTResultType==RealPart||FFTResultType==ImagPart, RealScalar, ComplexScalar > OutputScalar
Definition: TensorFFT.h:128
void compute_1D_Butterfly(ComplexScalar *data, Index n, Index n_power_of_2)
Definition: TensorFFT.h:534
A cost model used to limit the number of threads used for evaluating tensor expression.
const Dimensions & dimensions() const
static constexpr int Layout
const Device EIGEN_DEVICE_REF m_device
Storage::Type EvaluatorPointerType
static constexpr int PacketSize
EvaluatorPointerType data() const
PacketType< CoeffReturnType, Device >::type PacketReturnType
EvaluatorPointerType m_data
std::ptrdiff_t j