MathFunctionsImpl.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
5 // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_MATHFUNCTIONSIMPL_H
12 #define EIGEN_MATHFUNCTIONSIMPL_H
13 
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
34 template <typename Packet, int Steps>
35 struct generic_reciprocal_newton_step {
36  static_assert(Steps > 0, "Steps must be at least 1.");
37  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
38  run(const Packet& a, const Packet& approx_a_recip) {
39  using Scalar = typename unpacket_traits<Packet>::type;
40  const Packet two = pset1<Packet>(Scalar(2));
41  // Refine the approximation using one Newton-Raphson step:
42  // x_{i} = x_{i-1} * (2 - a * x_{i-1})
43  const Packet x =
44  generic_reciprocal_newton_step<Packet,Steps - 1>::run(a, approx_a_recip);
45  const Packet tmp = pnmadd(a, x, two);
46  // If tmp is NaN, it means that a is either +/-0 or +/-Inf.
47  // In this case return the approximation directly.
48  const Packet is_not_nan = pcmp_eq(tmp, tmp);
49  return pselect(is_not_nan, pmul(x, tmp), x);
50  }
51 };
52 
53 template<typename Packet>
54 struct generic_reciprocal_newton_step<Packet, 0> {
55  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
56  run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
57  return approx_rsqrt;
58  }
59 };
60 
61 
77 template <typename Packet, int Steps>
78 struct generic_rsqrt_newton_step {
79  static_assert(Steps > 0, "Steps must be at least 1.");
80  using Scalar = typename unpacket_traits<Packet>::type;
81  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
82  run(const Packet& a, const Packet& approx_rsqrt) {
83  constexpr Scalar kMinusHalf = Scalar(-1)/Scalar(2);
84  const Packet cst_minus_half = pset1<Packet>(kMinusHalf);
85  const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
86 
87  Packet inv_sqrt = approx_rsqrt;
88  for (int step = 0; step < Steps; ++step) {
89  // Refine the approximation using one Newton-Raphson step:
90  // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0).
91  // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n
92  Packet r2 = pmul(a, inv_sqrt);
93  Packet half_r = pmul(inv_sqrt, cst_minus_half);
94  Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one);
95  inv_sqrt = pmadd(half_r, h_n, inv_sqrt);
96  }
97 
98  // If x is NaN, then either:
99  // 1) the input is NaN
100  // 2) zero and infinity were multiplied
101  // In either of these cases, return approx_rsqrt
102  return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt);
103  }
104 };
105 
106 template<typename Packet>
107 struct generic_rsqrt_newton_step<Packet, 0> {
108  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
109  run(const Packet& /*unused*/, const Packet& approx_rsqrt) {
110  return approx_rsqrt;
111  }
112 };
113 
129 template <typename Packet, int Steps=1>
130 struct generic_sqrt_newton_step {
131  static_assert(Steps > 0, "Steps must be at least 1.");
132 
133  EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet
134  run(const Packet& a, const Packet& approx_rsqrt) {
135  using Scalar = typename unpacket_traits<Packet>::type;
136  const Packet one_point_five = pset1<Packet>(Scalar(1.5));
137  const Packet minus_half = pset1<Packet>(Scalar(-0.5));
138  // If a is inf or zero, return a directly.
139  const Packet inf_mask = pcmp_eq(a, pset1<Packet>(NumTraits<Scalar>::infinity()));
140  const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask);
141  // Do a single step of Newton's iteration for reciprocal square root:
142  // x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))).
143  // The Newton's step is computed this way to avoid over/under-flows.
144  Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five));
145  for (int step = 1; step < Steps; ++step) {
146  rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five));
147  }
148 
149  // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments.
150  // Return a itself for 0 or +inf, NaN for negative arguments.
151  return pselect(return_a, a, pmul(a, rsqrt));
152  }
153 };
154 
165 template<typename T>
167 {
168  // Clamp the inputs to the range [-c, c]
169 #ifdef EIGEN_VECTORIZE_FMA
170  const T plus_clamp = pset1<T>(7.99881172180175781f);
171  const T minus_clamp = pset1<T>(-7.99881172180175781f);
172 #else
173  const T plus_clamp = pset1<T>(7.90531110763549805f);
174  const T minus_clamp = pset1<T>(-7.90531110763549805f);
175 #endif
176  const T tiny = pset1<T>(0.0004f);
177  const T x = pmax(pmin(a_x, plus_clamp), minus_clamp);
178  const T tiny_mask = pcmp_lt(pabs(a_x), tiny);
179  // The monomial coefficients of the numerator polynomial (odd).
180  const T alpha_1 = pset1<T>(4.89352455891786e-03f);
181  const T alpha_3 = pset1<T>(6.37261928875436e-04f);
182  const T alpha_5 = pset1<T>(1.48572235717979e-05f);
183  const T alpha_7 = pset1<T>(5.12229709037114e-08f);
184  const T alpha_9 = pset1<T>(-8.60467152213735e-11f);
185  const T alpha_11 = pset1<T>(2.00018790482477e-13f);
186  const T alpha_13 = pset1<T>(-2.76076847742355e-16f);
187 
188  // The monomial coefficients of the denominator polynomial (even).
189  const T beta_0 = pset1<T>(4.89352518554385e-03f);
190  const T beta_2 = pset1<T>(2.26843463243900e-03f);
191  const T beta_4 = pset1<T>(1.18534705686654e-04f);
192  const T beta_6 = pset1<T>(1.19825839466702e-06f);
193 
194  // Since the polynomials are odd/even, we need x^2.
195  const T x2 = pmul(x, x);
196 
197  // Evaluate the numerator polynomial p.
198  T p = pmadd(x2, alpha_13, alpha_11);
199  p = pmadd(x2, p, alpha_9);
200  p = pmadd(x2, p, alpha_7);
201  p = pmadd(x2, p, alpha_5);
202  p = pmadd(x2, p, alpha_3);
203  p = pmadd(x2, p, alpha_1);
204  p = pmul(x, p);
205 
206  // Evaluate the denominator polynomial q.
207  T q = pmadd(x2, beta_6, beta_4);
208  q = pmadd(x2, q, beta_2);
209  q = pmadd(x2, q, beta_0);
210 
211  // Divide the numerator by the denominator.
212  return pselect(tiny_mask, x, pdiv(p, q));
213 }
214 
215 template<typename RealScalar>
216 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
217 RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y)
218 {
219  // IEEE IEC 6059 special cases.
220  if ((numext::isinf)(x) || (numext::isinf)(y))
222  if ((numext::isnan)(x) || (numext::isnan)(y))
224 
226  RealScalar p, qp;
227  p = numext::maxi(x,y);
228  if(numext::is_exactly_zero(p)) return RealScalar(0);
229  qp = numext::mini(y,x) / p;
230  return p * sqrt(RealScalar(1) + qp*qp);
231 }
232 
233 template<typename Scalar>
234 struct hypot_impl
235 {
236  typedef typename NumTraits<Scalar>::Real RealScalar;
237  static EIGEN_DEVICE_FUNC
238  inline RealScalar run(const Scalar& x, const Scalar& y)
239  {
241  return positive_real_hypot<RealScalar>(abs(x), abs(y));
242  }
243 };
244 
245 // Generic complex sqrt implementation that correctly handles corner cases
246 // according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt
247 template<typename T>
248 EIGEN_DEVICE_FUNC std::complex<T> complex_sqrt(const std::complex<T>& z) {
249  // Computes the principal sqrt of the input.
250  //
251  // For a complex square root of the number x + i*y. We want to find real
252  // numbers u and v such that
253  // (u + i*v)^2 = x + i*y <=>
254  // u^2 - v^2 + i*2*u*v = x + i*v.
255  // By equating the real and imaginary parts we get:
256  // u^2 - v^2 = x
257  // 2*u*v = y.
258  //
259  // For x >= 0, this has the numerically stable solution
260  // u = sqrt(0.5 * (x + sqrt(x^2 + y^2)))
261  // v = y / (2 * u)
262  // and for x < 0,
263  // v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
264  // u = y / (2 * v)
265  //
266  // Letting w = sqrt(0.5 * (|x| + |z|)),
267  // if x == 0: u = w, v = sign(y) * w
268  // if x > 0: u = w, v = y / (2 * w)
269  // if x < 0: u = |y| / (2 * w), v = sign(y) * w
270 
271  const T x = numext::real(z);
272  const T y = numext::imag(z);
273  const T zero = T(0);
274  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y)));
275 
276  return
277  (numext::isinf)(y) ? std::complex<T>(NumTraits<T>::infinity(), y)
278  : numext::is_exactly_zero(x) ? std::complex<T>(w, y < zero ? -w : w)
279  : x > zero ? std::complex<T>(w, y / (2 * w))
280  : std::complex<T>(numext::abs(y) / (2 * w), y < zero ? -w : w );
281 }
282 
283 // Generic complex rsqrt implementation.
284 template<typename T>
285 EIGEN_DEVICE_FUNC std::complex<T> complex_rsqrt(const std::complex<T>& z) {
286  // Computes the principal reciprocal sqrt of the input.
287  //
288  // For a complex reciprocal square root of the number z = x + i*y. We want to
289  // find real numbers u and v such that
290  // (u + i*v)^2 = 1 / (x + i*y) <=>
291  // u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2.
292  // By equating the real and imaginary parts we get:
293  // u^2 - v^2 = x/|z|^2
294  // 2*u*v = y/|z|^2.
295  //
296  // For x >= 0, this has the numerically stable solution
297  // u = sqrt(0.5 * (x + |z|)) / |z|
298  // v = -y / (2 * u * |z|)
299  // and for x < 0,
300  // v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z|
301  // u = -y / (2 * v * |z|)
302  //
303  // Letting w = sqrt(0.5 * (|x| + |z|)),
304  // if x == 0: u = w / |z|, v = -sign(y) * w / |z|
305  // if x > 0: u = w / |z|, v = -y / (2 * w * |z|)
306  // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z|
307 
308  const T x = numext::real(z);
309  const T y = numext::imag(z);
310  const T zero = T(0);
311 
312  const T abs_z = numext::hypot(x, y);
313  const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z));
314  const T woz = w / abs_z;
315  // Corner cases consistent with 1/sqrt(z) on gcc/clang.
316  return
318  : ((numext::isinf)(x) || (numext::isinf)(y)) ? std::complex<T>(zero, zero)
319  : numext::is_exactly_zero(x) ? std::complex<T>(woz, y < zero ? woz : -woz)
320  : x > zero ? std::complex<T>(woz, -y / (2 * w * abs_z))
321  : std::complex<T>(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz );
322 }
323 
324 template<typename T>
325 EIGEN_DEVICE_FUNC std::complex<T> complex_log(const std::complex<T>& z) {
326  // Computes complex log.
327  T a = numext::abs(z);
329  T b = atan2(z.imag(), z.real());
330  return std::complex<T>(numext::log(a), b);
331 }
332 
333 } // end namespace internal
334 
335 } // end namespace Eigen
336 
337 #endif // EIGEN_MATHFUNCTIONSIMPL_H
const CwiseBinaryOp< atan2< Scalar >, const Derived, const OtherDerived > atan2(const EIGEN_CURRENT_STORAGE_BASE_CLASS< OtherDerived > &other) const
Array< int, 3, 1 > b
const ImagReturnType imag() const
RealReturnType real() const
#define EIGEN_USING_STD(FUNC)
Definition: Macros.h:1080
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
RowVector3d w
float * p
Eigen::Triplet< double > T
Packet pmin(const Packet &a, const Packet &b)
Packet8f pzero(const Packet8f &)
const Scalar & y
Packet4f pabs(const Packet4f &a)
Packet pmax(const Packet &a, const Packet &b)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Packet pmul(const Packet &a, const Packet &b)
std::complex< T > complex_log(const std::complex< T > &z)
std::complex< T > complex_rsqrt(const std::complex< T > &a_x)
Packet pnmadd(const Packet &a, const Packet &b, const Packet &c)
std::complex< T > complex_sqrt(const std::complex< T > &a_x)
T generic_fast_tanh_float(const T &a_x)
Packet pdiv(const Packet &a, const Packet &b)
Packet8h por(const Packet8h &a, const Packet8h &b)
Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
RealScalar positive_real_hypot(const RealScalar &x, const RealScalar &y)
Packet8f pisnan(const Packet8f &a)
EIGEN_ALWAYS_INLINE bool() isinf(const Eigen::bfloat16 &h)
Definition: BFloat16.h:784
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_ALWAYS_INLINE float sqrt(const float &x)
EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
Definition: BFloat16.h:778
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
bool is_exactly_zero(const X &x)
Definition: Meta.h:475
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
EIGEN_ALWAYS_INLINE T log(const T &x)
: InteropHeaders
Definition: Core:139
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_rsqrt_op< typename Derived::Scalar >, const Derived > rsqrt(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_abs_op< typename Derived::Scalar >, const Derived > abs(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:231