10 #ifndef EIGEN_TYPE_CASTING_GPU_H
11 #define EIGEN_TYPE_CASTING_GPU_H
13 #include "../../InternalHeaderCheck.h"
19 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
20 (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
23 struct type_casting_traits<
Eigen::half, float> {
31 template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(
const half2&
a,
const half2&
b) {
32 float2 r1 = __half22float2(
a);
33 float2 r2 = __half22float2(
b);
34 return make_float4(r1.x, r1.y, r2.x, r2.y);
38 template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(
const float4&
a,
const float4&
b) {
40 half2* r_alias=
reinterpret_cast<half2*
>(&r);
41 r_alias[0]=__floats2half2_rn(
a.x,
a.y);
42 r_alias[1]=__floats2half2_rn(
a.z,
a.w);
43 r_alias[2]=__floats2half2_rn(
b.x,
b.y);
44 r_alias[3]=__floats2half2_rn(
b.z,
b.w);
49 struct type_casting_traits<float,
Eigen::half> {
57 template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(
const Packet4h2&
a) {
60 const half2* a_alias=
reinterpret_cast<const half2*
>(&
a);
61 float2 r1 = __half22float2(a_alias[0]);
62 float2 r2 = __half22float2(a_alias[1]);
63 r.x=
static_cast<float>(r1.x);
64 r.y=
static_cast<float>(r1.y);
65 r.z=
static_cast<float>(r2.x);
66 r.w=
static_cast<float>(r2.y);
70 template<>
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(
const float4&
a) {
72 return __floats2half2_rn(
a.x,
a.y);
#define EIGEN_DEVICE_FUNC