SYCL/PacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Mehdi Goli Codeplay Software Ltd.
5 // Ralph Potter Codeplay Software Ltd.
6 // Luke Iwanski Codeplay Software Ltd.
7 // Contact: <eigen@codeplay.com>
8 //
9 // This Source Code Form is subject to the terms of the Mozilla
10 // Public License v. 2.0. If a copy of the MPL was not distributed
11 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
12 
13 
21 #ifndef EIGEN_PACKET_MATH_SYCL_H
22 #define EIGEN_PACKET_MATH_SYCL_H
23 #include <type_traits>
24 
25 #include "../../InternalHeaderCheck.h"
26 
27 namespace Eigen {
28 
29 namespace internal {
30 #ifdef SYCL_DEVICE_ONLY
31 #define SYCL_PLOAD(packet_type, AlignedType) \
32  template <> \
33  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type \
34  pload##AlignedType<packet_type>( \
35  const typename unpacket_traits<packet_type>::type* from) { \
36  auto ptr = cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(from);\
37  packet_type res{}; \
38  res.load(0, ptr); \
39  return res; \
40  }
41 
42 SYCL_PLOAD(cl::sycl::cl_float4, u)
43 SYCL_PLOAD(cl::sycl::cl_float4, )
44 SYCL_PLOAD(cl::sycl::cl_double2, u)
45 SYCL_PLOAD(cl::sycl::cl_double2, )
46 
47 #undef SYCL_PLOAD
48 
49 #define SYCL_PSTORE(scalar, packet_type, alignment) \
50  template <> \
51  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstore##alignment( \
52  scalar* to, const packet_type& from) { \
53  auto ptr = cl::sycl::address_space_cast<cl::sycl::access::address_space::generic_space, cl::sycl::access::decorated::no>(to);\
54  from.store(0, ptr); \
55  }
56 
57 SYCL_PSTORE(float, cl::sycl::cl_float4, )
58 SYCL_PSTORE(float, cl::sycl::cl_float4, u)
59 SYCL_PSTORE(double, cl::sycl::cl_double2, )
60 SYCL_PSTORE(double, cl::sycl::cl_double2, u)
61 
62 #undef SYCL_PSTORE
63 
64 #define SYCL_PSET1(packet_type) \
65  template <> \
66  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pset1<packet_type>( \
67  const typename unpacket_traits<packet_type>::type& from) { \
68  return packet_type(from); \
69  }
70 
71 // global space
72 SYCL_PSET1(cl::sycl::cl_float4)
73 SYCL_PSET1(cl::sycl::cl_double2)
74 
75 #undef SYCL_PSET1
76 
77 template <typename packet_type>
78 struct get_base_packet {
79  template <typename sycl_multi_pointer>
80  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
81  get_ploaddup(sycl_multi_pointer) {}
82 
83  template <typename sycl_multi_pointer>
84  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type
85  get_pgather(sycl_multi_pointer, Index) {}
86 };
87 
88 template <>
89 struct get_base_packet<cl::sycl::cl_float4> {
90  template <typename sycl_multi_pointer>
91  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_ploaddup(
92  sycl_multi_pointer from) {
93  return cl::sycl::cl_float4(from[0], from[0], from[1], from[1]);
94  }
95  template <typename sycl_multi_pointer>
96  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 get_pgather(
97  sycl_multi_pointer from, Index stride) {
98  return cl::sycl::cl_float4(from[0 * stride], from[1 * stride],
99  from[2 * stride], from[3 * stride]);
100  }
101 
102  template <typename sycl_multi_pointer>
103  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
104  sycl_multi_pointer to, const cl::sycl::cl_float4& from, Index stride) {
105  auto tmp = stride;
106  to[0] = from.x();
107  to[tmp] = from.y();
108  to[tmp += stride] = from.z();
109  to[tmp += stride] = from.w();
110  }
111  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_float4 set_plset(
112  const float& a) {
113  return cl::sycl::cl_float4(static_cast<float>(a), static_cast<float>(a + 1),
114  static_cast<float>(a + 2),
115  static_cast<float>(a + 3));
116  }
117 };
118 
119 template <>
120 struct get_base_packet<cl::sycl::cl_double2> {
121  template <typename sycl_multi_pointer>
122  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2
123  get_ploaddup(const sycl_multi_pointer from) {
124  return cl::sycl::cl_double2(from[0], from[0]);
125  }
126 
127  template <typename sycl_multi_pointer, typename Index>
128  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 get_pgather(
129  const sycl_multi_pointer from, Index stride) {
130  return cl::sycl::cl_double2(from[0 * stride], from[1 * stride]);
131  }
132 
133  template <typename sycl_multi_pointer>
134  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void set_pscatter(
135  sycl_multi_pointer to, const cl::sycl::cl_double2& from, Index stride) {
136  to[0] = from.x();
137  to[stride] = from.y();
138  }
139 
140  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE cl::sycl::cl_double2 set_plset(
141  const double& a) {
142  return cl::sycl::cl_double2(static_cast<double>(a),
143  static_cast<double>(a + 1));
144  }
145 };
146 
147 #define SYCL_PLOAD_DUP_SPECILIZE(packet_type) \
148  template <> \
149  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type ploaddup<packet_type>( \
150  const typename unpacket_traits<packet_type>::type* from) { \
151  return get_base_packet<packet_type>::get_ploaddup(from); \
152  }
153 
154 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_float4)
155 SYCL_PLOAD_DUP_SPECILIZE(cl::sycl::cl_double2)
156 
157 #undef SYCL_PLOAD_DUP_SPECILIZE
158 
159 #define SYCL_PLSET(packet_type) \
160  template <> \
161  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type plset<packet_type>( \
162  const typename unpacket_traits<packet_type>::type& a) { \
163  return get_base_packet<packet_type>::set_plset(a); \
164  }
165 SYCL_PLSET(cl::sycl::cl_float4)
166 SYCL_PLSET(cl::sycl::cl_double2)
167 
168 #undef SYCL_PLSET
169 
170 #define SYCL_PGATHER_SPECILIZE(scalar, packet_type) \
171  template <> \
172  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE packet_type \
173  pgather<scalar, packet_type>( \
174  const typename unpacket_traits<packet_type>::type* from, Index stride) { \
175  return get_base_packet<packet_type>::get_pgather(from, stride); \
176  }
177 
178 SYCL_PGATHER_SPECILIZE(float, cl::sycl::cl_float4)
179 SYCL_PGATHER_SPECILIZE(double, cl::sycl::cl_double2)
180 
181 #undef SYCL_PGATHER_SPECILIZE
182 
183 #define SYCL_PSCATTER_SPECILIZE(scalar, packet_type) \
184  template <> \
185  EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void pscatter<scalar, packet_type>( \
186  typename unpacket_traits<packet_type>::type * to, \
187  const packet_type& from, Index stride) { \
188  get_base_packet<packet_type>::set_pscatter(to, from, stride); \
189  }
190 
191 SYCL_PSCATTER_SPECILIZE(float, cl::sycl::cl_float4)
192 SYCL_PSCATTER_SPECILIZE(double, cl::sycl::cl_double2)
193 
194 #undef SYCL_PSCATTER_SPECILIZE
195 
196 #define SYCL_PMAD(packet_type) \
197  template <> \
198  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE packet_type pmadd( \
199  const packet_type& a, const packet_type& b, const packet_type& c) { \
200  return cl::sycl::mad(a, b, c); \
201  }
202 
203 SYCL_PMAD(cl::sycl::cl_float4)
204 SYCL_PMAD(cl::sycl::cl_double2)
205 #undef SYCL_PMAD
206 
207 template <>
208 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float pfirst<cl::sycl::cl_float4>(
209  const cl::sycl::cl_float4& a) {
210  return a.x();
211 }
212 template <>
213 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double pfirst<cl::sycl::cl_double2>(
214  const cl::sycl::cl_double2& a) {
215  return a.x();
216 }
217 
218 template <>
219 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux<cl::sycl::cl_float4>(
220  const cl::sycl::cl_float4& a) {
221  return a.x() + a.y() + a.z() + a.w();
222 }
223 
224 template <>
225 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux<cl::sycl::cl_double2>(
226  const cl::sycl::cl_double2& a) {
227  return a.x() + a.y();
228 }
229 
230 template <>
231 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_max<cl::sycl::cl_float4>(
232  const cl::sycl::cl_float4& a) {
233  return cl::sycl::fmax(cl::sycl::fmax(a.x(), a.y()),
234  cl::sycl::fmax(a.z(), a.w()));
235 }
236 template <>
237 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_max<cl::sycl::cl_double2>(
238  const cl::sycl::cl_double2& a) {
239  return cl::sycl::fmax(a.x(), a.y());
240 }
241 
242 template <>
243 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_min<cl::sycl::cl_float4>(
244  const cl::sycl::cl_float4& a) {
245  return cl::sycl::fmin(cl::sycl::fmin(a.x(), a.y()),
246  cl::sycl::fmin(a.z(), a.w()));
247 }
248 template <>
249 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_min<cl::sycl::cl_double2>(
250  const cl::sycl::cl_double2& a) {
251  return cl::sycl::fmin(a.x(), a.y());
252 }
253 
254 template <>
255 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float predux_mul<cl::sycl::cl_float4>(
256  const cl::sycl::cl_float4& a) {
257  return a.x() * a.y() * a.z() * a.w();
258 }
259 template <>
260 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double predux_mul<cl::sycl::cl_double2>(
261  const cl::sycl::cl_double2& a) {
262  return a.x() * a.y();
263 }
264 
265 template <>
266 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4
267 pabs<cl::sycl::cl_float4>(const cl::sycl::cl_float4& a) {
268  return cl::sycl::cl_float4(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()),
269  cl::sycl::fabs(a.z()), cl::sycl::fabs(a.w()));
270 }
271 template <>
272 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_double2
273 pabs<cl::sycl::cl_double2>(const cl::sycl::cl_double2& a) {
274  return cl::sycl::cl_double2(cl::sycl::fabs(a.x()), cl::sycl::fabs(a.y()));
275 }
276 
277 template <typename Packet>
278 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_le(const Packet &a,
279  const Packet &b) {
280  return (a <= b).template as<Packet>();
281 }
282 
283 template <typename Packet>
284 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_lt(const Packet &a,
285  const Packet &b) {
286  return (a < b).template as<Packet>();
287 }
288 
289 template <typename Packet>
290 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet sycl_pcmp_eq(const Packet &a,
291  const Packet &b) {
292  return (a == b).template as<Packet>();
293 }
294 
295 #define SYCL_PCMP(OP, TYPE) \
296  template <> \
297  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE TYPE pcmp_##OP<TYPE>(const TYPE &a, \
298  const TYPE &b) { \
299  return sycl_pcmp_##OP<TYPE>(a, b); \
300  }
301 
302 SYCL_PCMP(le, cl::sycl::cl_float4)
303 SYCL_PCMP(lt, cl::sycl::cl_float4)
304 SYCL_PCMP(eq, cl::sycl::cl_float4)
305 SYCL_PCMP(le, cl::sycl::cl_double2)
306 SYCL_PCMP(lt, cl::sycl::cl_double2)
307 SYCL_PCMP(eq, cl::sycl::cl_double2)
308 #undef SYCL_PCMP
309 
311  PacketBlock<cl::sycl::cl_float4, 4>& kernel) {
312  float tmp = kernel.packet[0].y();
313  kernel.packet[0].y() = kernel.packet[1].x();
314  kernel.packet[1].x() = tmp;
315 
316  tmp = kernel.packet[0].z();
317  kernel.packet[0].z() = kernel.packet[2].x();
318  kernel.packet[2].x() = tmp;
319 
320  tmp = kernel.packet[0].w();
321  kernel.packet[0].w() = kernel.packet[3].x();
322  kernel.packet[3].x() = tmp;
323 
324  tmp = kernel.packet[1].z();
325  kernel.packet[1].z() = kernel.packet[2].y();
326  kernel.packet[2].y() = tmp;
327 
328  tmp = kernel.packet[1].w();
329  kernel.packet[1].w() = kernel.packet[3].y();
330  kernel.packet[3].y() = tmp;
331 
332  tmp = kernel.packet[2].w();
333  kernel.packet[2].w() = kernel.packet[3].z();
334  kernel.packet[3].z() = tmp;
335 }
336 
338  PacketBlock<cl::sycl::cl_double2, 2>& kernel) {
339  double tmp = kernel.packet[0].y();
340  kernel.packet[0].y() = kernel.packet[1].x();
341  kernel.packet[1].x() = tmp;
342 }
343 
344 template <>
345 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE cl::sycl::cl_float4 pblend(
346  const Selector<unpacket_traits<cl::sycl::cl_float4>::size>& ifPacket,
347  const cl::sycl::cl_float4& thenPacket,
348  const cl::sycl::cl_float4& elsePacket) {
349  cl::sycl::cl_int4 condition(
350  ifPacket.select[0] ? 0 : -1, ifPacket.select[1] ? 0 : -1,
351  ifPacket.select[2] ? 0 : -1, ifPacket.select[3] ? 0 : -1);
352  return cl::sycl::select(thenPacket, elsePacket, condition);
353 }
354 
355 template <>
356 inline cl::sycl::cl_double2 pblend(
357  const Selector<unpacket_traits<cl::sycl::cl_double2>::size>& ifPacket,
358  const cl::sycl::cl_double2& thenPacket,
359  const cl::sycl::cl_double2& elsePacket) {
360  cl::sycl::cl_long2 condition(ifPacket.select[0] ? 0 : -1,
361  ifPacket.select[1] ? 0 : -1);
362  return cl::sycl::select(thenPacket, elsePacket, condition);
363 }
364 #endif // SYCL_DEVICE_ONLY
365 
366 } // end namespace internal
367 
368 } // end namespace Eigen
369 
370 #endif // EIGEN_PACKET_MATH_SYCL_H
Array< int, 3, 1 > b
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
bfloat16 fmax(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:702
bfloat16 fmin(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:696
void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82