arch/AVX512/MathFunctions.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Pedro Gonnet (pedro.gonnet@gmail.com)
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
11 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
12 
13 #include "../../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
20 
21 template <>
22 EIGEN_STRONG_INLINE Packet16h pfrexp(const Packet16h& a, Packet16h& exponent) {
23  Packet16f fexponent;
24  const Packet16h out = float2half(pfrexp<Packet16f>(half2float(a), fexponent));
25  exponent = float2half(fexponent);
26  return out;
27 }
28 
29 template <>
30 EIGEN_STRONG_INLINE Packet16h pldexp(const Packet16h& a, const Packet16h& exponent) {
31  return float2half(pldexp<Packet16f>(half2float(a), half2float(exponent)));
32 }
33 
34 template <>
35 EIGEN_STRONG_INLINE Packet16bf pfrexp(const Packet16bf& a, Packet16bf& exponent) {
36  Packet16f fexponent;
37  const Packet16bf out = F32ToBf16(pfrexp<Packet16f>(Bf16ToF32(a), fexponent));
38  exponent = F32ToBf16(fexponent);
39  return out;
40 }
41 
42 template <>
43 EIGEN_STRONG_INLINE Packet16bf pldexp(const Packet16bf& a, const Packet16bf& exponent) {
44  return F32ToBf16(pldexp<Packet16f>(Bf16ToF32(a), Bf16ToF32(exponent)));
45 }
46 
47 #if EIGEN_FAST_MATH
48 template <>
50 psqrt<Packet16f>(const Packet16f& _x) {
51  return generic_sqrt_newton_step<Packet16f>::run(_x, _mm512_rsqrt14_ps(_x));
52 }
53 
54 template <>
56 psqrt<Packet8d>(const Packet8d& _x) {
57 #ifdef EIGEN_VECTORIZE_AVX512ER
58  return generic_sqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
59 #else
60  return generic_sqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
61 #endif
62 }
63 #else
64 template <>
65 EIGEN_STRONG_INLINE Packet16f psqrt<Packet16f>(const Packet16f& x) {
66  return _mm512_sqrt_ps(x);
67 }
68 
69 template <>
70 EIGEN_STRONG_INLINE Packet8d psqrt<Packet8d>(const Packet8d& x) {
71  return _mm512_sqrt_pd(x);
72 }
73 #endif
74 
75 // prsqrt for float.
76 #if defined(EIGEN_VECTORIZE_AVX512ER)
77 template <>
78 EIGEN_STRONG_INLINE Packet16f prsqrt<Packet16f>(const Packet16f& x) {
79  return _mm512_rsqrt28_ps(x);
80 }
81 #elif EIGEN_FAST_MATH
82 
83 template <>
85 prsqrt<Packet16f>(const Packet16f& _x) {
86  return generic_rsqrt_newton_step<Packet16f, /*Steps=*/1>::run(_x, _mm512_rsqrt14_ps(_x));
87 }
88 #endif
89 
90 
91 // prsqrt for double.
92 #if EIGEN_FAST_MATH
93 template <>
95 prsqrt<Packet8d>(const Packet8d& _x) {
96  #ifdef EIGEN_VECTORIZE_AVX512ER
97  return generic_rsqrt_newton_step<Packet8d, /*Steps=*/1>::run(_x, _mm512_rsqrt28_pd(_x));
98  #else
99  return generic_rsqrt_newton_step<Packet8d, /*Steps=*/2>::run(_x, _mm512_rsqrt14_pd(_x));
100  #endif
101 }
102 
103 template<> EIGEN_STRONG_INLINE Packet16f preciprocal<Packet16f>(const Packet16f& a) {
104 #ifdef EIGEN_VECTORIZE_AVX512ER
105  return _mm512_rcp28_ps(a);
106 #else
107  return generic_reciprocal_newton_step<Packet16f, /*Steps=*/1>::run(a, _mm512_rcp14_ps(a));
108 #endif
109 }
110 #endif
111 
134 
135 } // end namespace internal
136 
137 } // end namespace Eigen
138 
139 #endif // THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)
Definition: BFloat16.h:33
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET)
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET)
#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)
Definition: Half.h:53
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:892
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Packet8bf F32ToBf16(Packet4f p4f)
Packet8d psqrt< Packet8d >(const Packet8d &x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Packet16f psqrt< Packet16f >(const Packet16f &x)
Packet8f Bf16ToF32(const Packet8bf &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
eigen_packet_wrapper< __m256i, 2 > Packet16bf
Packet16f pfrexp< Packet16f >(const Packet16f &a, Packet16f &exponent)
Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Packet16f pldexp< Packet16f >(const Packet16f &a, const Packet16f &exponent)
Packet8h float2half(const Packet8f &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Packet4f psqrt(const Packet4f &a)
Packet8f half2float(const Packet8h &a)
Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Packet preciprocal(const Packet &a)
eigen_packet_wrapper< __m256i, 1 > Packet16h
Packet4f prsqrt(const Packet4f &a)
: InteropHeaders
Definition: Core:139