AltiVec/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2019 Rasmus Munk Larsen <rmlarsen@google.com>
5 // Copyright (C) 2023 Chip Kerchner (chip.kerchner@ibm.com)
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_TYPE_CASTING_ALTIVEC_H
12 #define EIGEN_TYPE_CASTING_ALTIVEC_H
13 
14 #include "../../InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 template <>
20 struct type_casting_traits<float, int> {
21  enum {
22  VectorizedCast = 1,
23  SrcCoeffRatio = 1,
24  TgtCoeffRatio = 1
25  };
26 };
27 
28 template <>
29 struct type_casting_traits<int, float> {
30  enum {
31  VectorizedCast = 1,
32  SrcCoeffRatio = 1,
33  TgtCoeffRatio = 1
34  };
35 };
36 
37 template <>
38 struct type_casting_traits<bfloat16, unsigned short int> {
39  enum {
40  VectorizedCast = 1,
41  SrcCoeffRatio = 1,
42  TgtCoeffRatio = 1
43  };
44 };
45 
46 template <>
47 struct type_casting_traits<unsigned short int, bfloat16> {
48  enum {
49  VectorizedCast = 1,
50  SrcCoeffRatio = 1,
51  TgtCoeffRatio = 1
52  };
53 };
54 
55 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
56  return vec_cts(a,0);
57 }
58 
59 template<> EIGEN_STRONG_INLINE Packet4ui pcast<Packet4f, Packet4ui>(const Packet4f& a) {
60  return vec_ctu(a,0);
61 }
62 
63 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
64  return vec_ctf(a,0);
65 }
66 
67 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4ui, Packet4f>(const Packet4ui& a) {
68  return vec_ctf(a,0);
69 }
70 
71 template<> EIGEN_STRONG_INLINE Packet8us pcast<Packet8bf, Packet8us>(const Packet8bf& a) {
72  Packet4f float_even = Bf16ToF32Even(a);
73  Packet4f float_odd = Bf16ToF32Odd(a);
74  Packet4ui int_even = pcast<Packet4f, Packet4ui>(float_even);
75  Packet4ui int_odd = pcast<Packet4f, Packet4ui>(float_odd);
76  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
77  Packet4ui low_even = pand<Packet4ui>(int_even, p4ui_low_mask);
78  Packet4ui low_odd = pand<Packet4ui>(int_odd, p4ui_low_mask);
79 
80  //Check values that are bigger than USHRT_MAX (0xFFFF)
81  Packet4bi overflow_selector;
82  if(vec_any_gt(int_even, p4ui_low_mask)){
83  overflow_selector = vec_cmpgt(int_even, p4ui_low_mask);
84  low_even = vec_sel(low_even, p4ui_low_mask, overflow_selector);
85  }
86  if(vec_any_gt(int_odd, p4ui_low_mask)){
87  overflow_selector = vec_cmpgt(int_odd, p4ui_low_mask);
88  low_odd = vec_sel(low_even, p4ui_low_mask, overflow_selector);
89  }
90 
91  return pmerge(low_even, low_odd);
92 }
93 
94 template<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet8us, Packet8bf>(const Packet8us& a) {
95  //short -> int -> float -> bfloat16
96  const EIGEN_DECLARE_CONST_FAST_Packet4ui(low_mask, 0x0000FFFF);
97  Packet4ui int_cast = reinterpret_cast<Packet4ui>(a);
98  Packet4ui int_even = pand<Packet4ui>(int_cast, p4ui_low_mask);
99  Packet4ui int_odd = plogical_shift_right<16>(int_cast);
100  Packet4f float_even = pcast<Packet4ui, Packet4f>(int_even);
101  Packet4f float_odd = pcast<Packet4ui, Packet4f>(int_odd);
102  return F32ToBf16(float_even, float_odd);
103 }
104 
105 template <>
106 struct type_casting_traits<bfloat16, float> {
107  enum {
108  VectorizedCast = 1,
109  SrcCoeffRatio = 1,
110  TgtCoeffRatio = 2
111  };
112 };
113 
114 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet8bf, Packet4f>(const Packet8bf& a) {
116 #ifdef _BIG_ENDIAN
117  return reinterpret_cast<Packet4f>(vec_mergeh(a.m_val, z));
118 #else
119  return reinterpret_cast<Packet4f>(vec_mergeh(z, a.m_val));
120 #endif
121 }
122 
123 template <>
124 struct type_casting_traits<float, bfloat16> {
125  enum {
126  VectorizedCast = 1,
127  SrcCoeffRatio = 2,
128  TgtCoeffRatio = 1
129  };
130 };
131 
132 template<> EIGEN_STRONG_INLINE Packet8bf pcast<Packet4f, Packet8bf>(const Packet4f& a, const Packet4f &b) {
133  return F32ToBf16Both(a, b);
134 }
135 
136 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {
137  return reinterpret_cast<Packet4i>(a);
138 }
139 
140 template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {
141  return reinterpret_cast<Packet4f>(a);
142 }
143 
144 #ifdef EIGEN_VECTORIZE_VSX
145 // VSX support varies between different compilers and even different
146 // versions of the same compiler. For gcc version >= 4.9.3, we can use
147 // vec_cts to efficiently convert Packet2d to Packet2l. Otherwise, use
148 // a slow version that works with older compilers.
149 // Update: apparently vec_cts/vec_ctf intrinsics for 64-bit doubles
150 // are buggy, https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70963
151 template<>
152 inline Packet2l pcast<Packet2d, Packet2l>(const Packet2d& x) {
153 #if EIGEN_GNUC_STRICT_AT_LEAST(7,1,0)
154  return vec_cts(x, 0); // TODO: check clang version.
155 #else
156  double tmp[2];
157  memcpy(tmp, &x, sizeof(tmp));
158  Packet2l l = { static_cast<long long>(tmp[0]),
159  static_cast<long long>(tmp[1]) };
160  return l;
161 #endif
162 }
163 
164 template<>
165 inline Packet2d pcast<Packet2l, Packet2d>(const Packet2l& x) {
166  unsigned long long tmp[2];
167  memcpy(tmp, &x, sizeof(tmp));
168  Packet2d d = { static_cast<double>(tmp[0]),
169  static_cast<double>(tmp[1]) };
170  return d;
171 }
172 #endif
173 
174 } // end namespace internal
175 
176 } // end namespace Eigen
177 
178 #endif // EIGEN_TYPE_CASTING_ALTIVEC_H
Array< int, 3, 1 > b
__vector int Packet4i
Packet8bf F32ToBf16(Packet4f p4f)
Packet8bf pcast< Packet8us, Packet8bf >(const Packet8us &a)
Packet4f Bf16ToF32Even(const Packet8bf &bf)
Packet4f preinterpret< Packet4f, Packet4i >(const Packet4i &a)
EIGEN_ALWAYS_INLINE Packet8us pmerge(Packet4ui even, Packet4ui odd)
__vector unsigned short int Packet8us
static EIGEN_DECLARE_CONST_FAST_Packet4ui(SIGN, 0x80000000u)
Packet4i pcast< Packet4f, Packet4i >(const Packet4f &a)
__vector unsigned int Packet4ui
__vector __bool int Packet4bi
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
Packet4f pcast< Packet4ui, Packet4f >(const Packet4ui &a)
Packet4f pcast< Packet4i, Packet4f >(const Packet4i &a)
Packet8us pset1< Packet8us >(const unsigned short int &from)
Packet4i preinterpret< Packet4i, Packet4f >(const Packet4f &a)
Packet4f Bf16ToF32Odd(const Packet8bf &bf)
Packet4f pcast< Packet8bf, Packet4f >(const Packet8bf &a)
Packet4ui pcast< Packet4f, Packet4ui >(const Packet4f &a)
__vector float Packet4f
Packet8bf F32ToBf16Both(Packet4f lo, Packet4f hi)
Packet4ui pand< Packet4ui >(const Packet4ui &a, const Packet4ui &b)
Packet8us pcast< Packet8bf, Packet8us >(const Packet8bf &a)
Packet8bf pcast< Packet4f, Packet8bf >(const Packet4f &a, const Packet4f &b)
: InteropHeaders
Definition: Core:139