10 #ifndef EIGEN_COMPLEX_AVX_H
11 #define EIGEN_COMPLEX_AVX_H
13 #include "../../InternalHeaderCheck.h"
22 EIGEN_STRONG_INLINE Packet4cf() {}
23 EIGEN_STRONG_INLINE
explicit Packet4cf(
const __m256&
a) :
v(
a) {}
27 #ifndef EIGEN_VECTORIZE_AVX512
28 template<>
struct packet_traits<
std::complex<float> > : default_packet_traits
30 typedef Packet4cf type;
31 typedef Packet2cf half;
52 template<>
struct unpacket_traits<Packet4cf> {
53 typedef std::complex<float> type;
54 typedef Packet2cf half;
60 masked_load_available=
false,
61 masked_store_available=
false
65 template<> EIGEN_STRONG_INLINE Packet4cf
padd<Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_add_ps(
a.v,
b.v)); }
66 template<> EIGEN_STRONG_INLINE Packet4cf
psub<Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_sub_ps(
a.v,
b.v)); }
67 template<> EIGEN_STRONG_INLINE Packet4cf
pnegate(
const Packet4cf&
a)
71 template<> EIGEN_STRONG_INLINE Packet4cf
pconj(
const Packet4cf&
a)
73 const __m256 mask = _mm256_castsi256_ps(_mm256_setr_epi32(0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000,0x00000000,0x80000000));
74 return Packet4cf(_mm256_xor_ps(
a.v,mask));
77 template<> EIGEN_STRONG_INLINE Packet4cf
pmul<Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b)
79 __m256 tmp1 = _mm256_mul_ps(_mm256_moveldup_ps(
a.v),
b.v);
80 __m256 tmp2 = _mm256_mul_ps(_mm256_movehdup_ps(
a.v), _mm256_permute_ps(
b.v, _MM_SHUFFLE(2,3,0,1)));
81 __m256 result = _mm256_addsub_ps(tmp1, tmp2);
82 return Packet4cf(result);
86 EIGEN_STRONG_INLINE Packet4cf
pcmp_eq(
const Packet4cf&
a,
const Packet4cf&
b) {
87 __m256 eq = _mm256_cmp_ps(
a.v,
b.v, _CMP_EQ_OQ);
88 return Packet4cf(_mm256_and_ps(eq, _mm256_permute_ps(eq, 0xb1)));
92 template<> EIGEN_STRONG_INLINE Packet4cf
pand <Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_and_ps(
a.v,
b.v)); }
93 template<> EIGEN_STRONG_INLINE Packet4cf
por <Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_or_ps(
a.v,
b.v)); }
94 template<> EIGEN_STRONG_INLINE Packet4cf
pxor <Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_xor_ps(
a.v,
b.v)); }
95 template<> EIGEN_STRONG_INLINE Packet4cf
pandnot<Packet4cf>(
const Packet4cf&
a,
const Packet4cf&
b) {
return Packet4cf(_mm256_andnot_ps(
b.v,
a.v)); }
105 return Packet4cf(_mm256_set_ps(im, re, im, re, im, re, im, re));
113 return Packet4cf(_mm256_insertf128_ps(_mm256_castps128_ps256(
a.v),
b.v, 1));
119 template<>
EIGEN_DEVICE_FUNC inline Packet4cf pgather<std::complex<float>, Packet4cf>(
const std::complex<float>* from,
Index stride)
127 template<>
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<float>, Packet4cf>(std::complex<float>* to,
const Packet4cf& from,
Index stride)
129 __m128 low = _mm256_extractf128_ps(from.v, 0);
130 to[stride*0] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 0)),
131 _mm_cvtss_f32(_mm_shuffle_ps(low, low, 1)));
132 to[stride*1] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(low, low, 2)),
133 _mm_cvtss_f32(_mm_shuffle_ps(low, low, 3)));
135 __m128 high = _mm256_extractf128_ps(from.v, 1);
136 to[stride*2] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 0)),
137 _mm_cvtss_f32(_mm_shuffle_ps(high, high, 1)));
138 to[stride*3] = std::complex<float>(_mm_cvtss_f32(_mm_shuffle_ps(high, high, 2)),
139 _mm_cvtss_f32(_mm_shuffle_ps(high, high, 3)));
143 template<> EIGEN_STRONG_INLINE std::complex<float>
pfirst<Packet4cf>(
const Packet4cf&
a)
145 return pfirst(Packet2cf(_mm256_castps256_ps128(
a.v)));
148 template<> EIGEN_STRONG_INLINE Packet4cf
preverse(
const Packet4cf&
a) {
149 __m128 low = _mm256_extractf128_ps(
a.v, 0);
150 __m128 high = _mm256_extractf128_ps(
a.v, 1);
151 __m128d lowd = _mm_castps_pd(low);
152 __m128d highd = _mm_castps_pd(high);
153 low = _mm_castpd_ps(_mm_shuffle_pd(lowd,lowd,0x1));
154 high = _mm_castpd_ps(_mm_shuffle_pd(highd,highd,0x1));
155 __m256 result = _mm256_setzero_ps();
156 result = _mm256_insertf128_ps(result, low, 1);
157 result = _mm256_insertf128_ps(result, high, 0);
158 return Packet4cf(result);
161 template<> EIGEN_STRONG_INLINE std::complex<float>
predux<Packet4cf>(
const Packet4cf&
a)
163 return predux(
padd(Packet2cf(_mm256_extractf128_ps(
a.v,0)),
164 Packet2cf(_mm256_extractf128_ps(
a.v,1))));
170 Packet2cf(_mm256_extractf128_ps(
a.v, 1))));
176 template<> EIGEN_STRONG_INLINE Packet4cf
pdiv<Packet4cf>(const Packet4cf&
a, const Packet4cf&
b)
183 return Packet4cf(_mm256_shuffle_ps(
x.v,
x.v, _MM_SHUFFLE(2, 3, 0 ,1)));
189 EIGEN_STRONG_INLINE Packet2cd() {}
190 EIGEN_STRONG_INLINE
explicit Packet2cd(
const __m256d&
a) :
v(
a) {}
194 #ifndef EIGEN_VECTORIZE_AVX512
195 template<>
struct packet_traits<
std::complex<double> > : default_packet_traits
197 typedef Packet2cd type;
198 typedef Packet1cd half;
219 template<>
struct unpacket_traits<Packet2cd> {
220 typedef std::complex<double> type;
221 typedef Packet1cd half;
227 masked_load_available=
false,
228 masked_store_available=
false
232 template<> EIGEN_STRONG_INLINE Packet2cd
padd<Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_add_pd(
a.v,
b.v)); }
233 template<> EIGEN_STRONG_INLINE Packet2cd
psub<Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_sub_pd(
a.v,
b.v)); }
234 template<> EIGEN_STRONG_INLINE Packet2cd
pnegate(
const Packet2cd&
a) {
return Packet2cd(
pnegate(
a.v)); }
235 template<> EIGEN_STRONG_INLINE Packet2cd
pconj(
const Packet2cd&
a)
237 const __m256d mask = _mm256_castsi256_pd(_mm256_set_epi32(0x80000000,0x0,0x0,0x0,0x80000000,0x0,0x0,0x0));
238 return Packet2cd(_mm256_xor_pd(
a.v,mask));
241 template<> EIGEN_STRONG_INLINE Packet2cd
pmul<Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b)
243 __m256d tmp1 = _mm256_shuffle_pd(
a.v,
a.v,0x0);
244 __m256d even = _mm256_mul_pd(tmp1,
b.v);
245 __m256d tmp2 = _mm256_shuffle_pd(
a.v,
a.v,0xF);
246 __m256d tmp3 = _mm256_shuffle_pd(
b.v,
b.v,0x5);
247 __m256d odd = _mm256_mul_pd(tmp2, tmp3);
248 return Packet2cd(_mm256_addsub_pd(even, odd));
252 EIGEN_STRONG_INLINE Packet2cd
pcmp_eq(
const Packet2cd&
a,
const Packet2cd&
b) {
253 __m256d eq = _mm256_cmp_pd(
a.v,
b.v, _CMP_EQ_OQ);
254 return Packet2cd(
pand(eq, _mm256_permute_pd(eq, 0x5)));
258 template<> EIGEN_STRONG_INLINE Packet2cd
pand <Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_and_pd(
a.v,
b.v)); }
259 template<> EIGEN_STRONG_INLINE Packet2cd
por <Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_or_pd(
a.v,
b.v)); }
260 template<> EIGEN_STRONG_INLINE Packet2cd
pxor <Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_xor_pd(
a.v,
b.v)); }
261 template<> EIGEN_STRONG_INLINE Packet2cd
pandnot<Packet2cd>(
const Packet2cd&
a,
const Packet2cd&
b) {
return Packet2cd(_mm256_andnot_pd(
b.v,
a.v)); }
268 template<> EIGEN_STRONG_INLINE Packet2cd
pset1<Packet2cd>(
const std::complex<double>& from)
272 return Packet2cd(_mm256_broadcast_pd((
const __m128d*)(
const void*)&from));
277 template<> EIGEN_STRONG_INLINE
void pstore <std::complex<double> >(std::complex<double> * to,
const Packet2cd& from) {
EIGEN_DEBUG_ALIGNED_STORE pstore((
double*)to, from.v); }
280 template<>
EIGEN_DEVICE_FUNC inline Packet2cd pgather<std::complex<double>, Packet2cd>(
const std::complex<double>* from,
Index stride)
286 template<>
EIGEN_DEVICE_FUNC inline void pscatter<std::complex<double>, Packet2cd>(std::complex<double>* to,
const Packet2cd& from,
Index stride)
288 __m128d low = _mm256_extractf128_pd(from.v, 0);
289 to[stride*0] = std::complex<double>(_mm_cvtsd_f64(low), _mm_cvtsd_f64(_mm_shuffle_pd(low, low, 1)));
290 __m128d high = _mm256_extractf128_pd(from.v, 1);
291 to[stride*1] = std::complex<double>(_mm_cvtsd_f64(high), _mm_cvtsd_f64(_mm_shuffle_pd(high, high, 1)));
294 template<> EIGEN_STRONG_INLINE std::complex<double>
pfirst<Packet2cd>(
const Packet2cd&
a)
296 __m128d low = _mm256_extractf128_pd(
a.v, 0);
298 _mm_store_pd(
res, low);
299 return std::complex<double>(
res[0],
res[1]);
302 template<> EIGEN_STRONG_INLINE Packet2cd
preverse(
const Packet2cd&
a) {
303 __m256d result = _mm256_permute2f128_pd(
a.v,
a.v, 1);
304 return Packet2cd(result);
307 template<> EIGEN_STRONG_INLINE std::complex<double>
predux<Packet2cd>(
const Packet2cd&
a)
309 return predux(
padd(Packet1cd(_mm256_extractf128_pd(
a.v,0)),
310 Packet1cd(_mm256_extractf128_pd(
a.v,1))));
315 return predux(
pmul(Packet1cd(_mm256_extractf128_pd(
a.v,0)),
316 Packet1cd(_mm256_extractf128_pd(
a.v,1))));
321 template<> EIGEN_STRONG_INLINE Packet2cd
pdiv<Packet2cd>(const Packet2cd&
a, const Packet2cd&
b)
328 return Packet2cd(_mm256_shuffle_pd(
x.v,
x.v, 0x5));
333 __m256d P0 = _mm256_castps_pd(kernel.packet[0].v);
334 __m256d P1 = _mm256_castps_pd(kernel.packet[1].v);
335 __m256d P2 = _mm256_castps_pd(kernel.packet[2].v);
336 __m256d P3 = _mm256_castps_pd(kernel.packet[3].v);
338 __m256d T0 = _mm256_shuffle_pd(P0, P1, 15);
339 __m256d T1 = _mm256_shuffle_pd(P0, P1, 0);
340 __m256d T2 = _mm256_shuffle_pd(P2, P3, 15);
341 __m256d T3 = _mm256_shuffle_pd(P2, P3, 0);
343 kernel.packet[1].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 32));
344 kernel.packet[3].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T0, T2, 49));
345 kernel.packet[0].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 32));
346 kernel.packet[2].v = _mm256_castpd_ps(_mm256_permute2f128_pd(T1, T3, 49));
351 __m256d tmp = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 0+(2<<4));
352 kernel.packet[1].v = _mm256_permute2f128_pd(kernel.packet[0].v, kernel.packet[1].v, 1+(3<<4));
353 kernel.packet[0].v = tmp;
357 return psqrt_complex<Packet2cd>(
a);
361 return psqrt_complex<Packet4cf>(
a);
Array< int, Dynamic, 1 > v
const ImagReturnType imag() const
RealReturnType real() const
#define EIGEN_MAKE_CONJ_HELPER_CPLX_REAL(PACKET_CPLX, PACKET_REAL)
#define EIGEN_DEBUG_ALIGNED_STORE
#define EIGEN_DEBUG_ALIGNED_LOAD
#define EIGEN_DEBUG_UNALIGNED_STORE
#define EIGEN_DEBUG_UNALIGNED_LOAD
#define EIGEN_DEVICE_FUNC
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
Packet4d ploadu< Packet4d >(const double *from)
Packet2cd psub< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet2cd ptrue< Packet2cd >(const Packet2cd &a)
Packet padd(const Packet &a, const Packet &b)
void pstore(Scalar *to, const Packet &from)
Packet4d pload< Packet4d >(const double *from)
unpacket_traits< Packet >::type predux(const Packet &a)
Packet8h ptrue(const Packet8h &a)
Packet4cf pcplxflip< Packet4cf >(const Packet4cf &x)
Packet2cf ploaddup< Packet2cf >(const std::complex< float > *from)
Packet4cf pand< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet2cd pload< Packet2cd >(const std::complex< double > *from)
Packet4cf pandnot< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
std::complex< double > predux_mul< Packet2cd >(const Packet2cd &a)
Packet2cf pnegate(const Packet2cf &a)
Packet4cf ploaddup< Packet4cf >(const std::complex< float > *from)
std::complex< float > pfirst< Packet4cf >(const Packet4cf &a)
Packet2cd ploaddup< Packet2cd >(const std::complex< double > *from)
Packet8f ploadu< Packet8f >(const float *from)
void pstoreu(Scalar *to, const Packet &from)
Packet4cf por< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
bfloat16 pfirst(const Packet8bf &a)
std::complex< double > predux< Packet2cd >(const Packet2cd &a)
std::complex< float > predux_mul< Packet4cf >(const Packet4cf &a)
Packet pmul(const Packet &a, const Packet &b)
void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Packet2cd pset1< Packet2cd >(const std::complex< double > &from)
Packet4cf psub< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet4cf pload< Packet4cf >(const std::complex< float > *from)
Packet2cd por< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet &x, const Packet &y)
std::complex< float > predux< Packet4cf >(const Packet4cf &a)
Packet8f pload< Packet8f >(const float *from)
std::complex< double > pfirst< Packet2cd >(const Packet2cd &a)
Packet2cd ploadu< Packet2cd >(const std::complex< double > *from)
unpacket_traits< Packet >::type predux_mul(const Packet &a)
Packet8h pand(const Packet8h &a, const Packet8h &b)
Packet4cf pxor< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet2cd pxor< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet2cd padd< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet4cf pmul< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet pdiv(const Packet &a, const Packet &b)
Packet4cf padd< Packet4cf >(const Packet4cf &a, const Packet4cf &b)
Packet2cf pconj(const Packet2cf &a)
Packet2cd pcplxflip< Packet2cd >(const Packet2cd &x)
Packet4cf ptrue< Packet4cf >(const Packet4cf &a)
Packet2cd pmul< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet2cf preverse(const Packet2cf &a)
Packet4cf ploadu< Packet4cf >(const std::complex< float > *from)
Packet2cd psqrt< Packet2cd >(const Packet2cd &a)
Packet2cd pand< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet2cd pandnot< Packet2cd >(const Packet2cd &a, const Packet2cd &b)
Packet4cf psqrt< Packet4cf >(const Packet4cf &a)
Packet4cf pset1< Packet4cf >(const std::complex< float > &from)
internal::add_const_on_value_type_t< EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) > real_ref(const Scalar &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.