GPU/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_GPU_H
11 #define EIGEN_TYPE_CASTING_GPU_H
12 
13 #include "../../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 #if (defined(EIGEN_HAS_CUDA_FP16) && defined(EIGEN_CUDA_ARCH) && EIGEN_CUDA_ARCH >= 300) || \
20  (defined(EIGEN_HAS_HIP_FP16) && defined(EIGEN_HIP_DEVICE_COMPILE))
21 
22 template <>
23 struct type_casting_traits<Eigen::half, float> {
24  enum {
25  VectorizedCast = 1,
26  SrcCoeffRatio = 1,
27  TgtCoeffRatio = 2
28  };
29 };
30 
31 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) {
32  float2 r1 = __half22float2(a);
33  float2 r2 = __half22float2(b);
34  return make_float4(r1.x, r1.y, r2.x, r2.y);
35 }
36 
37 
38 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet4h2 pcast<float4, Packet4h2>(const float4& a, const float4& b) {
39  Packet4h2 r;
40  half2* r_alias=reinterpret_cast<half2*>(&r);
41  r_alias[0]=__floats2half2_rn(a.x,a.y);
42  r_alias[1]=__floats2half2_rn(a.z,a.w);
43  r_alias[2]=__floats2half2_rn(b.x,b.y);
44  r_alias[3]=__floats2half2_rn(b.z,b.w);
45  return r;
46 }
47 
48 template <>
49 struct type_casting_traits<float, Eigen::half> {
50  enum {
51  VectorizedCast = 1,
52  SrcCoeffRatio = 2,
53  TgtCoeffRatio = 1
54  };
55 };
56 
57 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<Packet4h2, float4>(const Packet4h2& a) {
58  // Simply discard the second half of the input
59  float4 r;
60  const half2* a_alias=reinterpret_cast<const half2*>(&a);
61  float2 r1 = __half22float2(a_alias[0]);
62  float2 r2 = __half22float2(a_alias[1]);
63  r.x=static_cast<float>(r1.x);
64  r.y=static_cast<float>(r1.y);
65  r.z=static_cast<float>(r2.x);
66  r.w=static_cast<float>(r2.y);
67  return r;
68 }
69 
70 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) {
71  // Simply discard the second half of the input
72  return __floats2half2_rn(a.x, a.y);
73 }
74 
75 #endif
76 
77 } // end namespace internal
78 
79 } // end namespace Eigen
80 
81 #endif // EIGEN_TYPE_CASTING_GPU_H
Array< int, 3, 1 > b
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
: InteropHeaders
Definition: Core:139