11 #ifndef EIGEN_MATHFUNCTIONSIMPL_H
12 #define EIGEN_MATHFUNCTIONSIMPL_H
34 template <
typename Packet,
int Steps>
35 struct generic_reciprocal_newton_step {
36 static_assert(Steps > 0,
"Steps must be at least 1.");
38 run(
const Packet&
a,
const Packet& approx_a_recip) {
39 using Scalar =
typename unpacket_traits<Packet>::type;
40 const Packet two = pset1<Packet>(Scalar(2));
44 generic_reciprocal_newton_step<Packet,Steps - 1>::run(
a, approx_a_recip);
45 const Packet tmp =
pnmadd(
a,
x, two);
48 const Packet is_not_nan =
pcmp_eq(tmp, tmp);
53 template<
typename Packet>
54 struct generic_reciprocal_newton_step<Packet, 0> {
56 run(
const Packet& ,
const Packet& approx_rsqrt) {
77 template <
typename Packet,
int Steps>
78 struct generic_rsqrt_newton_step {
79 static_assert(Steps > 0,
"Steps must be at least 1.");
80 using Scalar =
typename unpacket_traits<Packet>::type;
82 run(
const Packet&
a,
const Packet& approx_rsqrt) {
83 constexpr Scalar kMinusHalf = Scalar(-1)/Scalar(2);
84 const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
85 const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
87 Packet inv_sqrt = approx_rsqrt;
88 for (
int step = 0; step < Steps; ++step) {
92 Packet r2 =
pmul(
a, inv_sqrt);
93 Packet half_r =
pmul(inv_sqrt, cst_minus_half);
94 Packet h_n =
pmadd(r2, inv_sqrt, cst_minus_one);
95 inv_sqrt =
pmadd(half_r, h_n, inv_sqrt);
106 template<
typename Packet>
107 struct generic_rsqrt_newton_step<Packet, 0> {
109 run(
const Packet& ,
const Packet& approx_rsqrt) {
129 template <
typename Packet,
int Steps=1>
130 struct generic_sqrt_newton_step {
131 static_assert(Steps > 0,
"Steps must be at least 1.");
134 run(
const Packet&
a,
const Packet& approx_rsqrt) {
135 using Scalar =
typename unpacket_traits<Packet>::type;
136 const Packet one_point_five = pset1<Packet>(Scalar(1.5));
137 const Packet minus_half = pset1<Packet>(Scalar(-0.5));
139 const Packet inf_mask =
pcmp_eq(
a, pset1<Packet>(NumTraits<Scalar>::infinity()));
144 Packet
rsqrt =
pmul(approx_rsqrt,
pmadd(
pmul(minus_half, approx_rsqrt),
pmul(
a, approx_rsqrt), one_point_five));
145 for (
int step = 1; step < Steps; ++step) {
169 #ifdef EIGEN_VECTORIZE_FMA
170 const T plus_clamp = pset1<T>(7.99881172180175781f);
171 const T minus_clamp = pset1<T>(-7.99881172180175781f);
173 const T plus_clamp = pset1<T>(7.90531110763549805f);
174 const T minus_clamp = pset1<T>(-7.90531110763549805f);
176 const T tiny = pset1<T>(0.0004f);
177 const T x =
pmax(
pmin(a_x, plus_clamp), minus_clamp);
180 const T alpha_1 = pset1<T>(4.89352455891786e-03f);
181 const T alpha_3 = pset1<T>(6.37261928875436e-04f);
182 const T alpha_5 = pset1<T>(1.48572235717979e-05f);
183 const T alpha_7 = pset1<T>(5.12229709037114e-08f);
184 const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
185 const T alpha_11 = pset1<T>(2.00018790482477e-13f);
186 const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
189 const T beta_0 = pset1<T>(4.89352518554385e-03f);
190 const T beta_2 = pset1<T>(2.26843463243900e-03f);
191 const T beta_4 = pset1<T>(1.18534705686654e-04f);
192 const T beta_6 = pset1<T>(1.19825839466702e-06f);
198 T p =
pmadd(x2, alpha_13, alpha_11);
207 T q =
pmadd(x2, beta_6, beta_4);
208 q =
pmadd(x2, q, beta_2);
209 q =
pmadd(x2, q, beta_0);
215 template<
typename RealScalar>
230 return p *
sqrt(RealScalar(1) + qp*qp);
233 template<
typename Scalar>
238 inline RealScalar run(
const Scalar&
x,
const Scalar&
y)
241 return positive_real_hypot<RealScalar>(
abs(
x),
abs(
y));
279 :
x > zero ? std::complex<T>(
w,
y / (2 *
w))
312 const T abs_z = numext::hypot(
x,
y);
314 const T woz =
w / abs_z;
320 :
x > zero ? std::complex<T>(woz, -
y / (2 *
w * abs_z))
321 : std::complex<T>(
numext::abs(
y) / (2 *
w * abs_z),
y < zero ? woz : -woz );
329 T b =
atan2(z.imag(), z.real());
const CwiseBinaryOp< atan2< Scalar >, const Derived, const OtherDerived > atan2(const EIGEN_CURRENT_STORAGE_BASE_CLASS< OtherDerived > &other) const
const ImagReturnType imag() const
RealReturnType real() const
#define EIGEN_USING_STD(FUNC)
#define EIGEN_DEVICE_FUNC
Eigen::Triplet< double > T
Packet pmin(const Packet &a, const Packet &b)
Packet8f pzero(const Packet8f &)
Packet4f pabs(const Packet4f &a)
Packet pmax(const Packet &a, const Packet &b)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Packet pmul(const Packet &a, const Packet &b)
std::complex< T > complex_log(const std::complex< T > &z)
std::complex< T > complex_rsqrt(const std::complex< T > &a_x)
Packet pnmadd(const Packet &a, const Packet &b, const Packet &c)
std::complex< T > complex_sqrt(const std::complex< T > &a_x)
T generic_fast_tanh_float(const T &a_x)
Packet pdiv(const Packet &a, const Packet &b)
Packet8h por(const Packet8h &a, const Packet8h &b)
Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
RealScalar positive_real_hypot(const RealScalar &x, const RealScalar &y)
Packet8f pisnan(const Packet8f &a)
EIGEN_ALWAYS_INLINE bool() isinf(const Eigen::bfloat16 &h)
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_ALWAYS_INLINE float sqrt(const float &x)
EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
bool is_exactly_zero(const X &x)
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
EIGEN_ALWAYS_INLINE T log(const T &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_rsqrt_op< typename Derived::Scalar >, const Derived > rsqrt(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_abs_op< typename Derived::Scalar >, const Derived > abs(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.