NEON/Complex.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_COMPLEX_NEON_H
12 #define EIGEN_COMPLEX_NEON_H
13 
14 #include "../../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
20 inline uint32x4_t p4ui_CONJ_XOR()
21 {
22 // See bug 1325, clang fails to call vld1q_u64.
23 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML
24  uint32x4_t ret = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
25  return ret;
26 #else
27  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
28  return vld1q_u32( conj_XOR_DATA );
29 #endif
30 }
31 
32 inline uint32x2_t p2ui_CONJ_XOR()
33 {
34  static const uint32_t conj_XOR_DATA[] = { 0x00000000, 0x80000000 };
35  return vld1_u32( conj_XOR_DATA );
36 }
37 
38 //---------- float ----------
39 
40 struct Packet1cf
41 {
42  EIGEN_STRONG_INLINE Packet1cf() {}
43  EIGEN_STRONG_INLINE explicit Packet1cf(const Packet2f& a) : v(a) {}
44  Packet2f v;
45 };
46 struct Packet2cf
47 {
48  EIGEN_STRONG_INLINE Packet2cf() {}
49  EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
50  Packet4f v;
51 };
52 
53 template<> struct packet_traits<std::complex<float> > : default_packet_traits
54 {
55  typedef Packet2cf type;
56  typedef Packet1cf half;
57  enum
58  {
59  Vectorizable = 1,
60  AlignedOnScalar = 1,
61  size = 2,
62 
63  HasAdd = 1,
64  HasSub = 1,
65  HasMul = 1,
66  HasDiv = 1,
67  HasNegate = 1,
68  HasSqrt = 1,
69  HasAbs = 0,
70  HasAbs2 = 0,
71  HasMin = 0,
72  HasMax = 0,
73  HasSetLinear = 0
74  };
75 };
76 
77 template<> struct unpacket_traits<Packet1cf>
78 {
79  typedef std::complex<float> type;
80  typedef Packet1cf half;
81  typedef Packet2f as_real;
82  enum
83  {
84  size = 1,
85  alignment = Aligned16,
86  vectorizable = true,
87  masked_load_available = false,
88  masked_store_available = false
89  };
90 };
91 template<> struct unpacket_traits<Packet2cf>
92 {
93  typedef std::complex<float> type;
94  typedef Packet1cf half;
95  typedef Packet4f as_real;
96  enum
97  {
98  size = 2,
99  alignment = Aligned16,
100  vectorizable = true,
101  masked_load_available = false,
102  masked_store_available = false
103  };
104 };
105 
106 template<> EIGEN_STRONG_INLINE Packet1cf pcast<float,Packet1cf>(const float& a)
107 { return Packet1cf(vset_lane_f32(a, vdup_n_f32(0.f), 0)); }
108 template<> EIGEN_STRONG_INLINE Packet2cf pcast<Packet2f,Packet2cf>(const Packet2f& a)
109 { return Packet2cf(vreinterpretq_f32_u64(vmovl_u32(vreinterpret_u32_f32(a)))); }
110 
111 template<> EIGEN_STRONG_INLINE Packet1cf pset1<Packet1cf>(const std::complex<float>& from)
112 { return Packet1cf(vld1_f32(reinterpret_cast<const float*>(&from))); }
113 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
114 {
115  const float32x2_t r64 = vld1_f32(reinterpret_cast<const float*>(&from));
116  return Packet2cf(vcombine_f32(r64, r64));
117 }
118 
119 template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
120 { return Packet1cf(padd<Packet2f>(a.v, b.v)); }
121 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
122 { return Packet2cf(padd<Packet4f>(a.v, b.v)); }
123 
124 template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
125 { return Packet1cf(psub<Packet2f>(a.v, b.v)); }
126 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
127 { return Packet2cf(psub<Packet4f>(a.v, b.v)); }
128 
129 template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); }
130 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
131 
132 template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
133 {
134  const Packet2ui b = Packet2ui(vreinterpret_u32_f32(a.v));
135  return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
136 }
137 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
138 {
139  const Packet4ui b = Packet4ui(vreinterpretq_u32_f32(a.v));
140  return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
141 }
142 
143 template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
144 {
145  Packet2f v1, v2;
146 
147  // Get the real values of a | a1_re | a1_re |
148  v1 = vdup_lane_f32(a.v, 0);
149  // Get the imag values of a | a1_im | a1_im |
150  v2 = vdup_lane_f32(a.v, 1);
151  // Multiply the real a with b
152  v1 = vmul_f32(v1, b.v);
153  // Multiply the imag a with b
154  v2 = vmul_f32(v2, b.v);
155  // Conjugate v2
156  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
157  // Swap real/imag elements in v2.
158  v2 = vrev64_f32(v2);
159  // Add and return the result
160  return Packet1cf(vadd_f32(v1, v2));
161 }
162 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
163 {
164  Packet4f v1, v2;
165 
166  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
167  v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
168  // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
169  v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
170  // Multiply the real a with b
171  v1 = vmulq_f32(v1, b.v);
172  // Multiply the imag a with b
173  v2 = vmulq_f32(v2, b.v);
174  // Conjugate v2
175  v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR()));
176  // Swap real/imag elements in v2.
177  v2 = vrev64q_f32(v2);
178  // Add and return the result
179  return Packet2cf(vaddq_f32(v1, v2));
180 }
181 
182 template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
183 {
184  // Compare real and imaginary parts of a and b to get the mask vector:
185  // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
186  Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v);
187  // Swap real/imag elements in the mask in to get:
188  // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
189  Packet2f eq_swapped = vrev64_f32(eq);
190  // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
191  return Packet1cf(pand<Packet2f>(eq, eq_swapped));
192 }
193 template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
194 {
195  // Compare real and imaginary parts of a and b to get the mask vector:
196  // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
197  Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v);
198  // Swap real/imag elements in the mask in to get:
199  // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
200  Packet4f eq_swapped = vrev64q_f32(eq);
201  // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped
202  return Packet2cf(pand<Packet4f>(eq, eq_swapped));
203 }
204 
205 template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
206 { return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
207 template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
208 { return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
209 
210 template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
211 { return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
212 template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
213 { return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
214 
215 template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
216 { return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
217 template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
218 { return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
219 
220 template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
221 { return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); }
222 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
223 { return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
224 
225 template<> EIGEN_STRONG_INLINE Packet1cf pload<Packet1cf>(const std::complex<float>* from)
226 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cf(pload<Packet2f>((const float*)from)); }
227 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from)
228 { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>(reinterpret_cast<const float*>(from))); }
229 
230 template<> EIGEN_STRONG_INLINE Packet1cf ploadu<Packet1cf>(const std::complex<float>* from)
231 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cf(ploadu<Packet2f>((const float*)from)); }
232 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from)
233 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>(reinterpret_cast<const float*>(from))); }
234 
235 template<> EIGEN_STRONG_INLINE Packet1cf ploaddup<Packet1cf>(const std::complex<float>* from)
236 { return pset1<Packet1cf>(*from); }
237 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from)
238 { return pset1<Packet2cf>(*from); }
239 
240 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
241 { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
242 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
243 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<float*>(to), from.v); }
244 
245 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet1cf& from)
246 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
247 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> *to, const Packet2cf& from)
248 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<float*>(to), from.v); }
249 
250 template<> EIGEN_DEVICE_FUNC inline Packet1cf pgather<std::complex<float>, Packet1cf>(
251  const std::complex<float>* from, Index stride)
252 {
253  const Packet2f tmp = vdup_n_f32(std::real(from[0*stride]));
254  return Packet1cf(vset_lane_f32(std::imag(from[0*stride]), tmp, 1));
255 }
256 template<> EIGEN_DEVICE_FUNC inline Packet2cf pgather<std::complex<float>, Packet2cf>(
257  const std::complex<float>* from, Index stride)
258 {
259  Packet4f res = vdupq_n_f32(std::real(from[0*stride]));
260  res = vsetq_lane_f32(std::imag(from[0*stride]), res, 1);
261  res = vsetq_lane_f32(std::real(from[1*stride]), res, 2);
262  res = vsetq_lane_f32(std::imag(from[1*stride]), res, 3);
263  return Packet2cf(res);
264 }
265 
266 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet1cf>(
267  std::complex<float>* to, const Packet1cf& from, Index stride)
268 { to[stride*0] = std::complex<float>(vget_lane_f32(from.v, 0), vget_lane_f32(from.v, 1)); }
269 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet2cf>(
270  std::complex<float>* to, const Packet2cf& from, Index stride)
271 {
272  to[stride*0] = std::complex<float>(vgetq_lane_f32(from.v, 0), vgetq_lane_f32(from.v, 1));
273  to[stride*1] = std::complex<float>(vgetq_lane_f32(from.v, 2), vgetq_lane_f32(from.v, 3));
274 }
275 
276 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> *addr)
277 { EIGEN_ARM_PREFETCH(reinterpret_cast<const float*>(addr)); }
278 
279 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet1cf>(const Packet1cf& a)
280 {
281  EIGEN_ALIGN16 std::complex<float> x;
282  vst1_f32(reinterpret_cast<float*>(&x), a.v);
283  return x;
284 }
285 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
286 {
287  EIGEN_ALIGN16 std::complex<float> x[2];
288  vst1q_f32(reinterpret_cast<float*>(x), a.v);
289  return x[0];
290 }
291 
292 template<> EIGEN_STRONG_INLINE Packet1cf preverse(const Packet1cf& a) { return a; }
293 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
294 { return Packet2cf(vcombine_f32(vget_high_f32(a.v), vget_low_f32(a.v))); }
295 
296 template<> EIGEN_STRONG_INLINE Packet1cf pcplxflip<Packet1cf>(const Packet1cf& a)
297 { return Packet1cf(vrev64_f32(a.v)); }
298 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
299 { return Packet2cf(vrev64q_f32(a.v)); }
300 
301 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet1cf>(const Packet1cf& a)
302 {
303  std::complex<float> s;
304  vst1_f32((float *)&s, a.v);
305  return s;
306 }
307 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
308 {
309  std::complex<float> s;
310  vst1_f32(reinterpret_cast<float*>(&s), vadd_f32(vget_low_f32(a.v), vget_high_f32(a.v)));
311  return s;
312 }
313 
314 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet1cf>(const Packet1cf& a)
315 {
316  std::complex<float> s;
317  vst1_f32((float *)&s, a.v);
318  return s;
319 }
320 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
321 {
322  float32x2_t a1, a2, v1, v2, prod;
323  std::complex<float> s;
324 
325  a1 = vget_low_f32(a.v);
326  a2 = vget_high_f32(a.v);
327  // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
328  v1 = vdup_lane_f32(a1, 0);
329  // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
330  v2 = vdup_lane_f32(a1, 1);
331  // Multiply the real a with b
332  v1 = vmul_f32(v1, a2);
333  // Multiply the imag a with b
334  v2 = vmul_f32(v2, a2);
335  // Conjugate v2
336  v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR()));
337  // Swap real/imag elements in v2.
338  v2 = vrev64_f32(v2);
339  // Add v1, v2
340  prod = vadd_f32(v1, v2);
341 
342  vst1_f32(reinterpret_cast<float*>(&s), prod);
343 
344  return s;
345 }
346 
349 
350 template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
351 {
352  return pdiv_complex(a, b);
353 }
354 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
355 {
356  return pdiv_complex(a, b);
357 }
358 
359 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet1cf, 1>& /*kernel*/) {}
360 EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet2cf, 2>& kernel)
361 {
362  Packet4f tmp = vcombine_f32(vget_high_f32(kernel.packet[0].v), vget_high_f32(kernel.packet[1].v));
363  kernel.packet[0].v = vcombine_f32(vget_low_f32(kernel.packet[0].v), vget_low_f32(kernel.packet[1].v));
364  kernel.packet[1].v = tmp;
365 }
366 
367 template<> EIGEN_STRONG_INLINE Packet1cf psqrt<Packet1cf>(const Packet1cf& a) {
368  return psqrt_complex<Packet1cf>(a);
369 }
370 
371 template<> EIGEN_STRONG_INLINE Packet2cf psqrt<Packet2cf>(const Packet2cf& a) {
372  return psqrt_complex<Packet2cf>(a);
373 }
374 
375 //---------- double ----------
376 #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
377 
378 // See bug 1325, clang fails to call vld1q_u64.
379 #if EIGEN_COMP_CLANG || EIGEN_COMP_CASTXML || EIGEN_COMP_CPE
380  static uint64x2_t p2ul_CONJ_XOR = {0x0, 0x8000000000000000};
381 #else
382  const uint64_t p2ul_conj_XOR_DATA[] = { 0x0, 0x8000000000000000 };
383  static uint64x2_t p2ul_CONJ_XOR = vld1q_u64( p2ul_conj_XOR_DATA );
384 #endif
385 
386 struct Packet1cd
387 {
388  EIGEN_STRONG_INLINE Packet1cd() {}
389  EIGEN_STRONG_INLINE explicit Packet1cd(const Packet2d& a) : v(a) {}
390  Packet2d v;
391 };
392 
393 template<> struct packet_traits<std::complex<double> > : default_packet_traits
394 {
395  typedef Packet1cd type;
396  typedef Packet1cd half;
397  enum
398  {
399  Vectorizable = 1,
400  AlignedOnScalar = 0,
401  size = 1,
402 
403  HasAdd = 1,
404  HasSub = 1,
405  HasMul = 1,
406  HasDiv = 1,
407  HasNegate = 1,
408  HasSqrt = 1,
409  HasAbs = 0,
410  HasAbs2 = 0,
411  HasMin = 0,
412  HasMax = 0,
413  HasSetLinear = 0
414  };
415 };
416 
417 template<> struct unpacket_traits<Packet1cd>
418 {
419  typedef std::complex<double> type;
420  typedef Packet1cd half;
421  typedef Packet2d as_real;
422  enum
423  {
424  size=1,
425  alignment=Aligned16,
426  vectorizable=true,
427  masked_load_available=false,
428  masked_store_available=false
429  };
430 };
431 
432 template<> EIGEN_STRONG_INLINE Packet1cd pload<Packet1cd>(const std::complex<double>* from)
433 { EIGEN_DEBUG_ALIGNED_LOAD return Packet1cd(pload<Packet2d>(reinterpret_cast<const double*>(from))); }
434 
435 template<> EIGEN_STRONG_INLINE Packet1cd ploadu<Packet1cd>(const std::complex<double>* from)
436 { EIGEN_DEBUG_UNALIGNED_LOAD return Packet1cd(ploadu<Packet2d>(reinterpret_cast<const double*>(from))); }
437 
438 template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
439 {
440  /* here we really have to use unaligned loads :( */
441  return ploadu<Packet1cd>(&from);
442 }
443 
444 template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
445 { return Packet1cd(padd<Packet2d>(a.v, b.v)); }
446 
447 template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
448 { return Packet1cd(psub<Packet2d>(a.v, b.v)); }
449 
450 template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
451 { return Packet1cd(pnegate<Packet2d>(a.v)); }
452 
453 template<> EIGEN_STRONG_INLINE Packet1cd pconj(const Packet1cd& a)
454 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v), p2ul_CONJ_XOR))); }
455 
456 template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
457 {
458  Packet2d v1, v2;
459 
460  // Get the real values of a
461  v1 = vdupq_lane_f64(vget_low_f64(a.v), 0);
462  // Get the imag values of a
463  v2 = vdupq_lane_f64(vget_high_f64(a.v), 0);
464  // Multiply the real a with b
465  v1 = vmulq_f64(v1, b.v);
466  // Multiply the imag a with b
467  v2 = vmulq_f64(v2, b.v);
468  // Conjugate v2
469  v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR));
470  // Swap real/imag elements in v2.
471  v2 = preverse<Packet2d>(v2);
472  // Add and return the result
473  return Packet1cd(vaddq_f64(v1, v2));
474 }
475 
476 template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
477 {
478  // Compare real and imaginary parts of a and b to get the mask vector:
479  // [re(a)==re(b), im(a)==im(b)]
480  Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v);
481  // Swap real/imag elements in the mask in to get:
482  // [im(a)==im(b), re(a)==re(b)]
483  Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq)));
484  // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped
485  return Packet1cd(pand<Packet2d>(eq, eq_swapped));
486 }
487 
488 template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
489 { return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
490 
491 template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
492 { return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
493 
494 template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
495 { return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
496 
497 template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
498 { return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
499 
500 template<> EIGEN_STRONG_INLINE Packet1cd ploaddup<Packet1cd>(const std::complex<double>* from)
501 { return pset1<Packet1cd>(*from); }
502 
503 template<> EIGEN_STRONG_INLINE void pstore <std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
504 { EIGEN_DEBUG_ALIGNED_STORE pstore(reinterpret_cast<double*>(to), from.v); }
505 
506 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<double> >(std::complex<double> *to, const Packet1cd& from)
507 { EIGEN_DEBUG_UNALIGNED_STORE pstoreu(reinterpret_cast<double*>(to), from.v); }
508 
509 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<double> >(const std::complex<double> *addr)
510 { EIGEN_ARM_PREFETCH(reinterpret_cast<const double*>(addr)); }
511 
512 template<> EIGEN_DEVICE_FUNC inline Packet1cd pgather<std::complex<double>, Packet1cd>(
513  const std::complex<double>* from, Index stride)
514 {
516  res = vsetq_lane_f64(std::real(from[0*stride]), res, 0);
517  res = vsetq_lane_f64(std::imag(from[0*stride]), res, 1);
518  return Packet1cd(res);
519 }
520 
521 template<> EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet1cd>(
522  std::complex<double>* to, const Packet1cd& from, Index stride)
523 { to[stride*0] = std::complex<double>(vgetq_lane_f64(from.v, 0), vgetq_lane_f64(from.v, 1)); }
524 
525 template<> EIGEN_STRONG_INLINE std::complex<double> pfirst<Packet1cd>(const Packet1cd& a)
526 {
527  EIGEN_ALIGN16 std::complex<double> res;
528  pstore<std::complex<double> >(&res, a);
529  return res;
530 }
531 
532 template<> EIGEN_STRONG_INLINE Packet1cd preverse(const Packet1cd& a) { return a; }
533 
534 template<> EIGEN_STRONG_INLINE std::complex<double> predux<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
535 
536 template<> EIGEN_STRONG_INLINE std::complex<double> predux_mul<Packet1cd>(const Packet1cd& a) { return pfirst(a); }
537 
539 
540 template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
541 {
542  return pdiv_complex(a, b);
543 }
544 
545 EIGEN_STRONG_INLINE Packet1cd pcplxflip/*<Packet1cd>*/(const Packet1cd& x)
546 { return Packet1cd(preverse(Packet2d(x.v))); }
547 
548 EIGEN_STRONG_INLINE void ptranspose(PacketBlock<Packet1cd,2>& kernel)
549 {
550  Packet2d tmp = vcombine_f64(vget_high_f64(kernel.packet[0].v), vget_high_f64(kernel.packet[1].v));
551  kernel.packet[0].v = vcombine_f64(vget_low_f64(kernel.packet[0].v), vget_low_f64(kernel.packet[1].v));
552  kernel.packet[1].v = tmp;
553 }
554 
555 template<> EIGEN_STRONG_INLINE Packet1cd psqrt<Packet1cd>(const Packet1cd& a) {
556  return psqrt_complex<Packet1cd>(a);
557 }
558 
559 #endif // EIGEN_ARCH_ARM64
560 
561 } // end namespace internal
562 
563 } // end namespace Eigen
564 
565 #endif // EIGEN_COMPLEX_NEON_H
Array< int, Dynamic, 1 > v
Array< int, 3, 1 > b
const ImagReturnType imag() const
RealReturnType real() const
#define EIGEN_ALIGN16
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)
Definition: ConjHelper.h:14
#define EIGEN_DEBUG_ALIGNED_STORE
#define EIGEN_DEBUG_ALIGNED_LOAD
#define EIGEN_DEBUG_UNALIGNED_STORE
#define EIGEN_DEBUG_UNALIGNED_LOAD
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define EIGEN_ARM_PREFETCH(ADDR)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Map< RowVectorXf > v2(M2.data(), M2.size())
M1<< 1, 2, 3, 4, 5, 6, 7, 8, 9;Map< RowVectorXf > v1(M1.data(), M1.size())
@ Aligned16
Definition: Constants.h:237
std::complex< float > predux< Packet1cf >(const Packet1cf &a)
Definition: NEON/Complex.h:301
Packet1cf por< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:210
uint32x2_t p2ui_CONJ_XOR()
Definition: NEON/Complex.h:32
Packet1cd pxor< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:516
Packet2cf pandnot< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
void pstore(Scalar *to, const Packet &from)
Packet1cf psqrt< Packet1cf >(const Packet1cf &a)
Definition: NEON/Complex.h:367
Packet1cf ploadu< Packet1cf >(const std::complex< float > *from)
Definition: NEON/Complex.h:230
std::complex< float > predux< Packet2cf >(const Packet2cf &a)
uint32x2_t Packet2ui
Packet1cd pload< Packet1cd >(const std::complex< double > *from)
Definition: MSA/Complex.h:446
Packet2cf padd< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
std::complex< float > pfirst< Packet2cf >(const Packet2cf &a)
Packet2cf pset1< Packet2cf >(const std::complex< float > &from)
Packet1cf pandnot< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:220
Packet1cd pmul< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:495
Packet1cf ploaddup< Packet1cf >(const std::complex< float > *from)
Definition: NEON/Complex.h:235
Packet1cd pdiv< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:611
Packet1cd pandnot< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:523
Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)
Packet1cd padd< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:467
Packet2cf pnegate(const Packet2cf &a)
Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
Definition: MSA/Complex.h:453
Packet1cf pxor< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:215
Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet2cf por< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet1cf pmul< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:143
Packet1cd pcplxflip(const Packet1cd &x)
Definition: MSA/Complex.h:617
Packet4f pcmp_eq< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
std::complex< double > pfirst< Packet1cd >(const Packet1cd &a)
Definition: MSA/Complex.h:581
void pstoreu(Scalar *to, const Packet &from)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Packet1cf pset1< Packet1cf >(const std::complex< float > &from)
Definition: NEON/Complex.h:111
bfloat16 pfirst(const Packet8bf &a)
Packet2cf pmul< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Definition: MSA/Complex.h:171
__vector unsigned int Packet4ui
void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
static Packet4ui p4ui_CONJ_XOR
Packet2f padd< Packet2f >(const Packet2f &a, const Packet2f &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet &x, const Packet &y)
Packet2cf pcplxflip< Packet2cf >(const Packet2cf &x)
Packet1cd pand< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:502
Packet2cf pxor< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet4f ploadu< Packet4f >(const float *from)
std::complex< double > predux_mul< Packet1cd >(const Packet1cd &a)
Definition: MSA/Complex.h:602
Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
std::complex< float > predux_mul< Packet1cf >(const Packet1cf &a)
Definition: NEON/Complex.h:314
Packet1cf pand< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:205
Packet2f pand< Packet2f >(const Packet2f &a, const Packet2f &b)
std::complex< double > predux< Packet1cd >(const Packet1cd &a)
Definition: MSA/Complex.h:595
Packet2f psub< Packet2f >(const Packet2f &a, const Packet2f &b)
Packet2d pset1< Packet2d >(const double &from)
Packet1cd por< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:509
Packet2d ploadu< Packet2d >(const double *from)
Packet1cf pcast< float, Packet1cf >(const float &a)
Definition: NEON/Complex.h:106
Packet2cf pload< Packet2cf >(const std::complex< float > *from)
float32x2_t Packet2f
Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Packet2cf pand< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet1cd ploaddup< Packet1cd >(const std::complex< double > *from)
Definition: MSA/Complex.h:530
Packet1cd psqrt< Packet1cd >(const Packet1cd &a)
Definition: SSE/Complex.h:319
Packet pdiv(const Packet &a, const Packet &b)
Packet1cd pset1< Packet1cd >(const std::complex< double > &from)
Definition: MSA/Complex.h:460
Packet2cf pconj(const Packet2cf &a)
std::complex< float > pfirst< Packet1cf >(const Packet1cf &a)
Definition: NEON/Complex.h:279
Packet2cf pdiv< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet2f ploadu< Packet2f >(const float *from)
Packet2d pload< Packet2d >(const double *from)
Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Packet2cf preverse(const Packet2cf &a)
Packet4f pload< Packet4f >(const float *from)
Packet2f pload< Packet2f >(const float *from)
__vector float Packet4f
Packet1cf pload< Packet1cf >(const std::complex< float > *from)
Definition: NEON/Complex.h:225
Packet1cf psub< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:124
Packet2cf pcast< Packet2f, Packet2cf >(const Packet2f &a)
Definition: NEON/Complex.h:108
Packet2f pcmp_eq< Packet2f >(const Packet2f &a, const Packet2f &b)
Packet1cf pcplxflip< Packet1cf >(const Packet1cf &a)
Definition: NEON/Complex.h:296
Packet2cf psub< Packet2cf >(const Packet2cf &a, const Packet2cf &b)
Packet1cd psub< Packet1cd >(const Packet1cd &a, const Packet1cd &b)
Definition: MSA/Complex.h:474
Packet1cf padd< Packet1cf >(const Packet1cf &a, const Packet1cf &b)
Definition: NEON/Complex.h:119
Packet2cf psqrt< Packet2cf >(const Packet2cf &a)
std::complex< float > predux_mul< Packet2cf >(const Packet2cf &a)
std::uint32_t uint32_t
Definition: Meta.h:39
std::uint64_t uint64_t
Definition: Meta.h:41
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
Definition: BFloat16.h:222