SSE/TypeCasting.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2015 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_TYPE_CASTING_SSE_H
11 #define EIGEN_TYPE_CASTING_SSE_H
12 
13 #include "../../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 #ifndef EIGEN_VECTORIZE_AVX
20 template <>
21 struct type_casting_traits<float, bool> {
22  enum {
23  VectorizedCast = 1,
24  SrcCoeffRatio = 4,
25  TgtCoeffRatio = 1
26  };
27 };
28 
29 template <>
30 struct type_casting_traits<float, double> {
31  enum {
32  VectorizedCast = 1,
33  SrcCoeffRatio = 1,
34  TgtCoeffRatio = 2
35  };
36 };
37 #endif
38 
39 template <>
40 struct type_casting_traits<int, float> {
41  enum {
42  VectorizedCast = 1,
43  SrcCoeffRatio = 1,
44  TgtCoeffRatio = 1
45  };
46 };
47 
48 template <>
49 struct type_casting_traits<float, int> {
50  enum {
51  VectorizedCast = 1,
52  SrcCoeffRatio = 1,
53  TgtCoeffRatio = 1
54  };
55 };
56 
57 template <>
58 struct type_casting_traits<double, int> {
59  enum {
60  VectorizedCast = 1,
61  SrcCoeffRatio = 2,
62  TgtCoeffRatio = 1
63  };
64 };
65 
66 template <>
67 struct type_casting_traits<double, float> {
68  enum {
69  VectorizedCast = 1,
70  SrcCoeffRatio = 2,
71  TgtCoeffRatio = 1
72  };
73 };
74 
75 template <>
76 EIGEN_STRONG_INLINE Packet16b pcast<Packet4f, Packet16b>(const Packet4f& a,
77  const Packet4f& b,
78  const Packet4f& c,
79  const Packet4f& d) {
80  __m128 zero = pzero(a);
81  __m128 nonzero_a = _mm_cmpneq_ps(a, zero);
82  __m128 nonzero_b = _mm_cmpneq_ps(b, zero);
83  __m128 nonzero_c = _mm_cmpneq_ps(c, zero);
84  __m128 nonzero_d = _mm_cmpneq_ps(d, zero);
85  __m128i ab_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_a), _mm_castps_si128(nonzero_b));
86  __m128i cd_bytes = _mm_packs_epi32(_mm_castps_si128(nonzero_c), _mm_castps_si128(nonzero_d));
87  __m128i merged = _mm_packs_epi16(ab_bytes, cd_bytes);
88  return _mm_and_si128(merged, _mm_set1_epi8(1));
89 }
90 
91 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet4f, Packet4i>(const Packet4f& a) {
92  return _mm_cvttps_epi32(a);
93 }
94 
95 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4i, Packet4f>(const Packet4i& a) {
96  return _mm_cvtepi32_ps(a);
97 }
98 
99 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet2d, Packet4f>(const Packet2d& a, const Packet2d& b) {
100  return _mm_shuffle_ps(_mm_cvtpd_ps(a), _mm_cvtpd_ps(b), (1 << 2) | (1 << 6));
101 }
102 
103 template<> EIGEN_STRONG_INLINE Packet4i pcast<Packet2d, Packet4i>(const Packet2d& a, const Packet2d& b) {
104  return _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(_mm_cvttpd_epi32(a)),
105  _mm_castsi128_ps(_mm_cvttpd_epi32(b)),
106  (1 << 2) | (1 << 6)));
107 }
108 
109 template<> EIGEN_STRONG_INLINE Packet2d pcast<Packet4f, Packet2d>(const Packet4f& a) {
110  // Simply discard the second half of the input
111  return _mm_cvtps_pd(a);
112 }
113 
114 template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d, Packet4f>(const Packet4f& a) {
115  return _mm_castps_pd(a);
116 }
117 
118 template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f, Packet2d>(const Packet2d& a) {
119  return _mm_castpd_ps(a);
120 }
121 
122 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet4f>(const Packet4f& a) {
123  return _mm_castps_si128(a);
124 }
125 
126 template<> EIGEN_STRONG_INLINE Packet4f preinterpret<Packet4f,Packet4i>(const Packet4i& a) {
127  return _mm_castsi128_ps(a);
128 }
129 
130 template<> EIGEN_STRONG_INLINE Packet2d preinterpret<Packet2d,Packet4i>(const Packet4i& a) {
131  return _mm_castsi128_pd(a);
132 }
133 
134 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i,Packet2d>(const Packet2d& a) {
135  return _mm_castpd_si128(a);
136 }
137 
138 template<> EIGEN_STRONG_INLINE Packet4ui preinterpret<Packet4ui, Packet4i>(const Packet4i& a) {
139  return Packet4ui(a);
140 }
141 
142 template<> EIGEN_STRONG_INLINE Packet4i preinterpret<Packet4i, Packet4ui>(const Packet4ui& a) {
143  return Packet4i(a);
144 }
145 // Disable the following code since it's broken on too many platforms / compilers.
146 //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC)
147 #if 0
148 
149 template <>
150 struct type_casting_traits<Eigen::half, float> {
151  enum {
152  VectorizedCast = 1,
153  SrcCoeffRatio = 1,
154  TgtCoeffRatio = 1
155  };
156 };
157 
158 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) {
159  __int64_t a64 = _mm_cvtm64_si64(a.x);
160  Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64));
161  float f1 = static_cast<float>(h);
162  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16));
163  float f2 = static_cast<float>(h);
164  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32));
165  float f3 = static_cast<float>(h);
166  h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48));
167  float f4 = static_cast<float>(h);
168  return _mm_set_ps(f4, f3, f2, f1);
169 }
170 
171 template <>
172 struct type_casting_traits<float, Eigen::half> {
173  enum {
174  VectorizedCast = 1,
175  SrcCoeffRatio = 1,
176  TgtCoeffRatio = 1
177  };
178 };
179 
180 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) {
181  EIGEN_ALIGN16 float aux[4];
182  pstore(aux, a);
183  Eigen::half h0(aux[0]);
184  Eigen::half h1(aux[1]);
185  Eigen::half h2(aux[2]);
186  Eigen::half h3(aux[3]);
187 
188  Packet4h result;
189  result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x);
190  return result;
191 }
192 
193 #endif
194 
195 } // end namespace internal
196 
197 } // end namespace Eigen
198 
199 #endif // EIGEN_TYPE_CASTING_SSE_H
Array< int, 3, 1 > b
#define EIGEN_ALIGN16
Array33i c
EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:551
Packet8f pzero(const Packet8f &)
void pstore(Scalar *to, const Packet &from)
__vector int Packet4i
Packet2d pcast< Packet4f, Packet2d >(const Packet4f &a)
Packet4f pcast< Packet2d, Packet4f >(const Packet2d &a, const Packet2d &b)
Packet4f preinterpret< Packet4f, Packet4i >(const Packet4i &a)
eigen_packet_wrapper< __m128i, 1 > Packet16b
Packet4ui preinterpret< Packet4ui, Packet4i >(const Packet4i &a)
Packet4i pcast< Packet2d, Packet4i >(const Packet2d &a, const Packet2d &b)
Packet2d preinterpret< Packet2d, Packet4i >(const Packet4i &a)
Packet4i pcast< Packet4f, Packet4i >(const Packet4f &a)
__vector unsigned int Packet4ui
Packet4f pcast< Packet4i, Packet4f >(const Packet4i &a)
Packet4i preinterpret< Packet4i, Packet4f >(const Packet4f &a)
Packet16b pcast< Packet4f, Packet16b >(const Packet4f &a, const Packet4f &b, const Packet4f &c, const Packet4f &d)
Packet2d preinterpret< Packet2d, Packet4f >(const Packet4f &a)
Packet4i preinterpret< Packet4i, Packet2d >(const Packet2d &a)
__vector float Packet4f
Packet4f preinterpret< Packet4f, Packet2d >(const Packet2d &a)
Packet4i preinterpret< Packet4i, Packet4ui >(const Packet4ui &a)
: InteropHeaders
Definition: Core:139