AVX512/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_AVX512_H
11 #define EIGEN_TYPE_CASTING_AVX512_H
12 
13 #include "../../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 template <>
20 struct type_casting_traits<float, bool> {
21  enum {
22  VectorizedCast = 1,
23  SrcCoeffRatio = 1,
24  TgtCoeffRatio = 1
25  };
26 };
27 
28 template <>
29 struct type_casting_traits<bool, float> {
30  enum {
31  VectorizedCast = 1,
32  SrcCoeffRatio = 1,
33  TgtCoeffRatio = 1
34  };
35 };
36 
37 template<> EIGEN_STRONG_INLINE Packet16b pcast<Packet16f, Packet16b>(const Packet16f& a) {
38  __mmask16 mask = _mm512_cmpneq_ps_mask(a, pzero(a));
39  return _mm512_maskz_cvtepi32_epi8(mask, _mm512_set1_epi32(1));
40 }
41 
42 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16b, Packet16f>(const Packet16b& a) {
43  return _mm512_cvtepi32_ps(_mm512_and_si512(_mm512_cvtepi8_epi32(a), _mm512_set1_epi32(1)));
44 }
45 
46 template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet16f, Packet16i>(const Packet16f& a) {
47  return _mm512_cvttps_epi32(a);
48 }
49 
50 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16i, Packet16f>(const Packet16i& a) {
51  return _mm512_cvtepi32_ps(a);
52 }
53 
54 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet8d, Packet16f>(const Packet8d& a, const Packet8d& b) {
55  return cat256(_mm512_cvtpd_ps(a), _mm512_cvtpd_ps(b));
56 }
57 
58 template<> EIGEN_STRONG_INLINE Packet16i pcast<Packet8d, Packet16i>(const Packet8d& a, const Packet8d& b) {
59  return cat256i(_mm512_cvttpd_epi32(a), _mm512_cvttpd_epi32(b));
60 }
61 
62 template<> EIGEN_STRONG_INLINE Packet8i pcast<Packet8d, Packet8i>(const Packet8d& a) {
63  return _mm512_cvtpd_epi32(a);
64 }
65 template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8d, Packet8f>(const Packet8d& a) {
66  return _mm512_cvtpd_ps(a);
67 }
68 
69 template<> EIGEN_STRONG_INLINE Packet16i preinterpret<Packet16i, Packet16f>(const Packet16f& a) {
70  return _mm512_castps_si512(a);
71 }
72 
73 template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet16i>(const Packet16i& a) {
74  return _mm512_castsi512_ps(a);
75 }
76 
77 template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet16f>(const Packet16f& a) {
78  return _mm512_castps_pd(a);
79 }
80 
81 template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8d>(const Packet8d& a) {
82  return _mm512_castpd_ps(a);
83 }
84 
85 template<> EIGEN_STRONG_INLINE Packet8f preinterpret<Packet8f, Packet16f>(const Packet16f& a) {
86  return _mm512_castps512_ps256(a);
87 }
88 
89 template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet16f>(const Packet16f& a) {
90  return _mm512_castps512_ps128(a);
91 }
92 
93 template<> EIGEN_STRONG_INLINE Packet4d preinterpret<Packet4d, Packet8d>(const Packet8d& a) {
94  return _mm512_castpd512_pd256(a);
95 }
96 
97 template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet8d>(const Packet8d& a) {
98  return _mm512_castpd512_pd128(a);
99 }
100 
101 template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet8f>(const Packet8f& a) {
102  return _mm512_castps256_ps512(a);
103 }
104 
105 template<> EIGEN_STRONG_INLINE Packet16f preinterpret<Packet16f, Packet4f>(const Packet4f& a) {
106  return _mm512_castps128_ps512(a);
107 }
108 
109 template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet4d>(const Packet4d& a) {
110  return _mm512_castpd256_pd512(a);
111 }
112 
113 template<> EIGEN_STRONG_INLINE Packet8d preinterpret<Packet8d, Packet2d>(const Packet2d& a) {
114  return _mm512_castpd128_pd512(a);
115 }
116 
117 template<> EIGEN_STRONG_INLINE Packet8i preinterpret<Packet8i, Packet16i>(const Packet16i& a) {
118  return _mm512_castsi512_si256(a);
119 }
120 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet16i>(const Packet16i& a) {
121  return _mm512_castsi512_si128(a);
122 }
123 
124 template<> EIGEN_STRONG_INLINE Packet8h preinterpret<Packet8h, Packet16h>(const Packet16h& a) {
125  return _mm256_castsi256_si128(a);
126 }
127 
128 template<> EIGEN_STRONG_INLINE Packet8bf preinterpret<Packet8bf, Packet16bf>(const Packet16bf& a) {
129  return _mm256_castsi256_si128(a);
130 }
131 
132 #ifndef EIGEN_VECTORIZE_AVX512FP16
133 
134 template <>
135 struct type_casting_traits<half, float> {
136  enum {
137  VectorizedCast = 1,
138  SrcCoeffRatio = 1,
139  TgtCoeffRatio = 1
140  };
141 };
142 
143 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) {
144  return half2float(a);
145 }
146 
147 template <>
148 struct type_casting_traits<float, half> {
149  enum {
150  VectorizedCast = 1,
151  SrcCoeffRatio = 1,
152  TgtCoeffRatio = 1
153  };
154 };
155 
156 template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) {
157  return float2half(a);
158 }
159 
160 #endif
161 
162 template <>
163 struct type_casting_traits<bfloat16, float> {
164  enum {
165  VectorizedCast = 1,
166  SrcCoeffRatio = 1,
167  TgtCoeffRatio = 1
168  };
169 };
170 
171 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16bf, Packet16f>(const Packet16bf& a) {
172  return Bf16ToF32(a);
173 }
174 
175 template <>
176 struct type_casting_traits<float, bfloat16> {
177  enum {
178  VectorizedCast = 1,
179  SrcCoeffRatio = 1,
180  TgtCoeffRatio = 1
181  };
182 };
183 
184 template<> EIGEN_STRONG_INLINE Packet16bf pcast<Packet16f, Packet16bf>(const Packet16f& a) {
185  return F32ToBf16(a);
186 }
187 
188 #ifdef EIGEN_VECTORIZE_AVX512FP16
189 
190 template <>
191 struct type_casting_traits<half, float> {
192  enum {
193  VectorizedCast = 1,
194  SrcCoeffRatio = 1,
195  TgtCoeffRatio = 2
196  };
197 };
198 
199 template <>
200 struct type_casting_traits<float, half> {
201  enum {
202  VectorizedCast = 1,
203  SrcCoeffRatio = 2,
204  TgtCoeffRatio = 1
205  };
206 };
207 
208 template<> EIGEN_STRONG_INLINE Packet16h preinterpret<Packet16h, Packet32h>(const Packet32h& a) {
209  return _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0));
210 }
211 template<> EIGEN_STRONG_INLINE Packet8h preinterpret<Packet8h, Packet32h>(const Packet32h& a) {
212  return _mm256_castsi256_si128(preinterpret<Packet16h>(a));
213 }
214 
215 template <>
216 EIGEN_STRONG_INLINE Packet16f pcast<Packet32h, Packet16f>(const Packet32h& a) {
217  // Discard second-half of input.
218  Packet16h low = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(a), 0));
219  return _mm512_cvtxph_ps(_mm256_castsi256_ph(low));
220 }
221 
222 
223 template <>
224 EIGEN_STRONG_INLINE Packet32h pcast<Packet16f, Packet32h>(const Packet16f& a, const Packet16f& b) {
225  __m512d result = _mm512_undefined_pd();
226  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
227  result = _mm512_insertf64x4(result, _mm256_castsi256_pd(_mm512_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
228  return _mm512_castpd_ph(result);
229 }
230 
231 template <>
232 EIGEN_STRONG_INLINE Packet8f pcast<Packet16h, Packet8f>(const Packet16h& a) {
233  // Discard second-half of input.
234  Packet8h low = _mm_castps_si128(_mm256_extractf32x4_ps(_mm256_castsi256_ps(a), 0));
235  return _mm256_cvtxph_ps(_mm_castsi128_ph(low));
236 }
237 
238 
239 template <>
240 EIGEN_STRONG_INLINE Packet16h pcast<Packet8f, Packet16h>(const Packet8f& a, const Packet8f& b) {
241  __m256d result = _mm256_undefined_pd();
242  result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(a, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 0);
243  result = _mm256_insertf64x2(result, _mm_castsi128_pd(_mm256_cvtps_ph(b, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC)), 1);
244  return _mm256_castpd_si256(result);
245 }
246 
247 template <>
248 EIGEN_STRONG_INLINE Packet4f pcast<Packet8h, Packet4f>(const Packet8h& a) {
249  Packet8f full = _mm256_cvtxph_ps(_mm_castsi128_ph(a));
250  // Discard second-half of input.
251  return _mm256_extractf32x4_ps(full, 0);
252 }
253 
254 
255 template <>
256 EIGEN_STRONG_INLINE Packet8h pcast<Packet4f, Packet8h>(const Packet4f& a, const Packet4f& b) {
257  __m256 result = _mm256_undefined_ps();
258  result = _mm256_insertf128_ps(result, a, 0);
259  result = _mm256_insertf128_ps(result, b, 1);
260  return _mm256_cvtps_ph(result, _MM_FROUND_TO_NEAREST_INT|_MM_FROUND_NO_EXC);
261 }
262 
263 
264 #endif
265 
266 } // end namespace internal
267 
268 } // end namespace Eigen
269 
270 #endif // EIGEN_TYPE_CASTING_AVX512_H
Array< int, 3, 1 > b
Packet8i preinterpret< Packet8i, Packet16i >(const Packet16i &a)
Packet8f pzero(const Packet8f &)
__vector int Packet4i
Packet4d preinterpret< Packet4d, Packet8d >(const Packet8d &a)
Packet8f preinterpret< Packet8f, Packet16f >(const Packet16f &a)
Packet16f pcast< Packet16bf, Packet16f >(const Packet16bf &a)
Packet8bf F32ToBf16(Packet4f p4f)
Packet16f pcast< Packet16i, Packet16f >(const Packet16i &a)
Packet8h preinterpret< Packet8h, Packet16h >(const Packet16h &a)
Packet16bf pcast< Packet16f, Packet16bf >(const Packet16f &a)
Packet8f Bf16ToF32(const Packet8bf &a)
eigen_packet_wrapper< __m128i, 1 > Packet16b
Packet4f preinterpret< Packet4f, Packet16f >(const Packet16f &a)
Packet4i preinterpret< Packet4i, Packet16i >(const Packet16i &a)
Packet16f pcast< Packet16h, Packet16f >(const Packet16h &a)
eigen_packet_wrapper< __m256i, 2 > Packet16bf
Packet8bf preinterpret< Packet8bf, Packet16bf >(const Packet16bf &a)
Packet16f preinterpret< Packet16f, Packet8f >(const Packet8f &a)
Packet8f pcast< Packet8d, Packet8f >(const Packet8d &a)
Packet16f preinterpret< Packet16f, Packet4f >(const Packet4f &a)
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
Packet8h float2half(const Packet8f &a)
Packet8i pcast< Packet8d, Packet8i >(const Packet8d &a)
Packet16f pcast< Packet8d, Packet16f >(const Packet8d &a, const Packet8d &b)
Packet8f half2float(const Packet8h &a)
Packet16f pcast< Packet16b, Packet16f >(const Packet16b &a)
Packet8d preinterpret< Packet8d, Packet4d >(const Packet4d &a)
eigen_packet_wrapper< __m256i, 0 > Packet8i
Packet16f preinterpret< Packet16f, Packet8d >(const Packet8d &a)
Packet16b pcast< Packet16f, Packet16b >(const Packet16f &a)
Packet16i cat256i(Packet8i a, Packet8i b)
Packet16f cat256(Packet8f a, Packet8f b)
Packet16i pcast< Packet16f, Packet16i >(const Packet16f &a)
Packet8d preinterpret< Packet8d, Packet16f >(const Packet16f &a)
Packet8d preinterpret< Packet8d, Packet2d >(const Packet2d &a)
Packet16f preinterpret< Packet16f, Packet16i >(const Packet16i &a)
Packet16i preinterpret< Packet16i, Packet16f >(const Packet16f &a)
Packet2d preinterpret< Packet2d, Packet8d >(const Packet8d &a)
Packet16h pcast< Packet16f, Packet16h >(const Packet16f &a)
__vector float Packet4f
eigen_packet_wrapper< __m256i, 1 > Packet16h
Packet16i pcast< Packet8d, Packet16i >(const Packet8d &a, const Packet8d &b)
eigen_packet_wrapper< __m128i, 2 > Packet8h
: InteropHeaders
Definition: Core:139