arch/AVX/MathFunctions.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_MATH_FUNCTIONS_AVX_H
11 #define EIGEN_MATH_FUNCTIONS_AVX_H
12 
13 /* The sin and cos functions of this file are loosely derived from
14  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
15  */
16 
17 #include "../../InternalHeaderCheck.h"
18 
19 namespace Eigen {
20 
21 namespace internal {
22 
25 
26 // Notice that for newer processors, it is counterproductive to use Newton
27 // iteration for square root. In particular, Skylake and Zen2 processors
28 // have approximately doubled throughput of the _mm_sqrt_ps instruction
29 // compared to their predecessors.
32  return _mm256_sqrt_ps(_x);
33 }
36  return _mm256_sqrt_pd(_x);
37 }
38 
39 
40 // Even on Skylake, using Newton iteration is a win for reciprocal square root.
41 #if EIGEN_FAST_MATH
43 Packet8f prsqrt<Packet8f>(const Packet8f& a) {
44  // _mm256_rsqrt_ps returns -inf for negative denormals.
45  // _mm512_rsqrt**_ps returns -NaN for negative denormals. We may want
46  // consistency here.
47  // const Packet8f rsqrt = pselect(pcmp_lt(a, pzero(a)),
48  // pset1<Packet8f>(-NumTraits<float>::quiet_NaN()),
49  // _mm256_rsqrt_ps(a));
50  return generic_rsqrt_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rsqrt_ps(a));
51 }
52 
53 template<> EIGEN_STRONG_INLINE Packet8f preciprocal<Packet8f>(const Packet8f& a) {
54  return generic_reciprocal_newton_step<Packet8f, /*Steps=*/1>::run(a, _mm256_rcp_ps(a));
55 }
56 
57 #endif
58 
59 template <>
60 EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h& a, Packet8h& exponent) {
61  Packet8f fexponent;
62  const Packet8h out = float2half(pfrexp<Packet8f>(half2float(a), fexponent));
63  exponent = float2half(fexponent);
64  return out;
65 }
66 
67 template <>
68 EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h& a, const Packet8h& exponent) {
69  return float2half(pldexp<Packet8f>(half2float(a), half2float(exponent)));
70 }
71 
72 template <>
73 EIGEN_STRONG_INLINE Packet8bf pfrexp(const Packet8bf& a, Packet8bf& exponent) {
74  Packet8f fexponent;
75  const Packet8bf out = F32ToBf16(pfrexp<Packet8f>(Bf16ToF32(a), fexponent));
76  exponent = F32ToBf16(fexponent);
77  return out;
78 }
79 
80 template <>
81 EIGEN_STRONG_INLINE Packet8bf pldexp(const Packet8bf& a, const Packet8bf& exponent) {
82  return F32ToBf16(pldexp<Packet8f>(Bf16ToF32(a), Bf16ToF32(exponent)));
83 }
84 
107 
108 
109 } // end namespace internal
110 
111 } // end namespace Eigen
112 
113 #endif // EIGEN_MATH_FUNCTIONS_AVX_H
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)
Definition: BFloat16.h:33
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET)
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET)
#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)
Definition: Half.h:53
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:892
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Packet8bf F32ToBf16(Packet4f p4f)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Packet8f Bf16ToF32(const Packet8bf &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Packet8f pldexp< Packet8f >(const Packet8f &a, const Packet8f &exponent)
Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet8f psqrt< Packet8f >(const Packet8f &_x)
Packet8h float2half(const Packet8f &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Packet8f pfrexp< Packet8f >(const Packet8f &a, Packet8f &exponent)
Packet4f psqrt(const Packet4f &a)
Packet8f half2float(const Packet8h &a)
Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Packet preciprocal(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4d psqrt< Packet4d >(const Packet4d &_x)
eigen_packet_wrapper< __m128i, 2 > Packet8h
Packet4f prsqrt(const Packet4f &a)
: InteropHeaders
Definition: Core:139