16 #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
17 #define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
19 #include "../../InternalHeaderCheck.h"
25 template<
typename T>
struct make_integer;
26 template<>
struct make_integer<float> {
typedef numext::int32_t type; };
27 template<>
struct make_integer<double> {
typedef numext::int64_t type; };
29 template<>
struct make_integer<bfloat16> {
typedef numext::int16_t type; };
33 typedef typename unpacket_traits<Packet>::type Scalar;
34 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
35 static constexpr
int mantissa_bits = numext::numeric_limits<Scalar>::digits - 1;
36 return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(
pabs(
a))));
43 typedef typename unpacket_traits<Packet>::type Scalar;
44 typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
46 TotalBits =
sizeof(Scalar) * CHAR_BIT,
47 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
48 ExponentBits = TotalBits - MantissaBits - 1;
51 ~(((ScalarUI(1) << ExponentBits) - ScalarUI(1)) << MantissaBits);
52 const Packet sign_mantissa_mask = pset1frombits<Packet>(
static_cast<ScalarUI
>(scalar_sign_mantissa_mask));
53 const Packet
half = pset1<Packet>(Scalar(0.5));
54 const Packet zero =
pzero(
a);
59 EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(MantissaBits + 1);
61 const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) <<
int(scalar_normalization_offset));
62 const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
63 const Packet normalized_a =
pselect(is_denormal,
pmul(
a, normalization_factor),
a);
66 const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(ExponentBits-1)) - ScalarUI(2));
67 Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
68 const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset));
69 exponent_offset =
pselect(is_denormal,
padd(exponent_offset, normalization_offset), exponent_offset);
75 const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << ExponentBits) - ScalarUI(1));
76 const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
77 const Packet is_zero_or_not_finite =
por(
pcmp_eq(
a, zero),
pcmp_eq(exponent, non_finite_exponent));
78 const Packet
m =
pselect(is_zero_or_not_finite,
a,
por(
pand(normalized_a, sign_mantissa_mask),
half));
79 exponent =
pselect(is_zero_or_not_finite, zero,
padd(exponent, exponent_offset));
109 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
110 typedef typename unpacket_traits<Packet>::type Scalar;
111 typedef typename unpacket_traits<PacketI>::type ScalarI;
113 TotalBits =
sizeof(Scalar) * CHAR_BIT,
114 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
115 ExponentBits = TotalBits - MantissaBits - 1;
117 const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<ExponentBits) + ScalarI(MantissaBits - 1)));
118 const PacketI bias = pset1<PacketI>((ScalarI(1)<<(ExponentBits-1)) - ScalarI(1));
119 const PacketI
e = pcast<Packet, PacketI>(
pmin(
pmax(exponent,
pnegate(max_exponent)), max_exponent));
120 PacketI
b = parithmetic_shift_right<2>(
e);
121 Packet
c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(
padd(
b, bias)));
124 c = preinterpret<Packet>(plogical_shift_left<MantissaBits>(
padd(
b, bias)));
138 template<
typename Packet>
139 struct pldexp_fast_impl {
140 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
141 typedef typename unpacket_traits<Packet>::type Scalar;
142 typedef typename unpacket_traits<PacketI>::type ScalarI;
144 TotalBits =
sizeof(Scalar) * CHAR_BIT,
145 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
146 ExponentBits = TotalBits - MantissaBits - 1;
149 Packet run(
const Packet&
a,
const Packet& exponent) {
150 const Packet bias = pset1<Packet>(Scalar((ScalarI(1)<<(ExponentBits-1)) - ScalarI(1)));
151 const Packet limit = pset1<Packet>(Scalar((ScalarI(1)<<ExponentBits) - ScalarI(1)));
153 const PacketI
e = pcast<Packet, PacketI>(
pmin(
pmax(
padd(exponent, bias),
pzero(limit)), limit));
155 return pmul(
a, preinterpret<Packet>(plogical_shift_left<MantissaBits>(
e)));
165 template <
typename Packet,
bool base2>
169 const Packet cst_1 = pset1<Packet>(1.0f);
173 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
185 Packet mask =
pcmp_lt(
x, cst_cephes_SQRTHF);
186 Packet tmp =
pand(
x, mask);
193 const Packet cst_p1 = pset1<Packet>(1.0000000190281136f);
194 const Packet cst_p2 = pset1<Packet>(1.0000000190281063f);
195 const Packet cst_p3 = pset1<Packet>(0.18256296349849254f);
196 const Packet cst_q1 = pset1<Packet>(1.4999999999999927f);
197 const Packet cst_q2 = pset1<Packet>(0.59923249590823520f);
198 const Packet cst_q3 = pset1<Packet>(0.049616247954120038f);
200 Packet
p =
pmadd(
x, cst_p3, cst_p2);
203 Packet q =
pmadd(
x, cst_q3, cst_q2);
210 const Packet cst_log2e = pset1<Packet>(
static_cast<float>(
EIGEN_LOG2E));
213 const Packet cst_ln2 = pset1<Packet>(
static_cast<float>(
EIGEN_LN2));
219 Packet pos_inf_mask =
pcmp_eq(_x,cst_pos_inf);
224 return pselect(iszero_mask, cst_minus_inf,
225 por(
pselect(pos_inf_mask,cst_pos_inf,
x), invalid_mask));
228 template <
typename Packet>
235 template <
typename Packet>
251 template <
typename Packet,
bool base2>
257 const Packet cst_1 = pset1<Packet>(1.0);
258 const Packet cst_neg_half = pset1<Packet>(-0.5);
259 const Packet cst_minus_inf = pset1frombits<Packet>(
static_cast<uint64_t>(0xfff0000000000000ull));
260 const Packet cst_pos_inf = pset1frombits<Packet>(
static_cast<uint64_t>(0x7ff0000000000000ull));
265 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
266 const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
267 const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
268 const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
269 const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
270 const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
271 const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
273 const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
274 const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
275 const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
276 const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
277 const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
278 const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
291 Packet mask =
pcmp_lt(
x, cst_cephes_SQRTHF);
292 Packet tmp =
pand(
x, mask);
298 Packet x3 =
pmul(x2,
x);
303 y =
pmadd(cst_cephes_log_p0,
x, cst_cephes_log_p1);
304 y1 =
pmadd(cst_cephes_log_p3,
x, cst_cephes_log_p4);
306 y1 =
pmadd(y1,
x, cst_cephes_log_p5);
309 y =
pmadd(cst_cephes_log_q0,
x, cst_cephes_log_q1);
310 y1 =
pmadd(cst_cephes_log_q3,
x, cst_cephes_log_q4);
312 y1 =
pmadd(y1,
x, cst_cephes_log_q5);
318 y =
pmadd(cst_neg_half, x2,
y);
323 const Packet cst_log2e = pset1<Packet>(
static_cast<double>(
EIGEN_LOG2E));
326 const Packet cst_ln2 = pset1<Packet>(
static_cast<double>(
EIGEN_LN2));
332 Packet pos_inf_mask =
pcmp_eq(_x,cst_pos_inf);
337 return pselect(iszero_mask, cst_minus_inf,
338 por(
pselect(pos_inf_mask,cst_pos_inf,
x), invalid_mask));
341 template <
typename Packet>
348 template <
typename Packet>
358 template<
typename Packet>
361 typedef typename unpacket_traits<Packet>::type ScalarType;
362 const Packet one = pset1<Packet>(ScalarType(1));
363 Packet xp1 =
padd(
x, one);
364 Packet small_mask =
pcmp_eq(xp1, one);
365 Packet log1 =
plog(xp1);
366 Packet inf_mask =
pcmp_eq(xp1, log1);
368 return pselect(
por(small_mask, inf_mask),
x, log_large);
374 template<
typename Packet>
377 typedef typename unpacket_traits<Packet>::type ScalarType;
378 const Packet one = pset1<Packet>(ScalarType(1));
379 const Packet neg_one = pset1<Packet>(ScalarType(-1));
381 Packet one_mask =
pcmp_eq(u, one);
382 Packet u_minus_one =
psub(u, one);
383 Packet neg_one_mask =
pcmp_eq(u_minus_one, neg_one);
384 Packet logu =
plog(u);
389 Packet pos_inf_mask =
pcmp_eq(logu, u);
404 template <
typename Packet>
408 const Packet cst_zero = pset1<Packet>(0.0f);
409 const Packet cst_one = pset1<Packet>(1.0f);
410 const Packet cst_half = pset1<Packet>(0.5f);
411 const Packet cst_exp_hi = pset1<Packet>(88.723f);
412 const Packet cst_exp_lo = pset1<Packet>(-104.f);
414 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
415 const Packet cst_p2 = pset1<Packet>(0.49999988079071044921875f);
416 const Packet cst_p3 = pset1<Packet>(0.16666518151760101318359375f);
417 const Packet cst_p4 = pset1<Packet>(4.166965186595916748046875e-2f);
418 const Packet cst_p5 = pset1<Packet>(8.36894474923610687255859375e-3f);
419 const Packet cst_p6 = pset1<Packet>(1.37449637986719608306884765625e-3f);
422 Packet zero_mask =
pcmp_lt(_x, cst_exp_lo);
423 Packet
x =
pmin(_x, cst_exp_hi);
432 const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
433 const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
434 Packet r =
pmadd(
m, cst_cephes_exp_C1,
x);
435 r =
pmadd(
m, cst_cephes_exp_C2, r);
439 const Packet r2 =
pmul(r, r);
440 Packet p_even =
pmadd(r2, cst_p6, cst_p4);
441 const Packet p_odd =
pmadd(r2, cst_p5, cst_p3);
442 p_even =
pmadd(r2, p_even, cst_p2);
443 const Packet p_low =
padd(r, cst_one);
444 Packet
y =
pmadd(r, p_odd, p_even);
452 template <
typename Packet>
457 const Packet cst_zero = pset1<Packet>(0.0);
458 const Packet cst_1 = pset1<Packet>(1.0);
459 const Packet cst_2 = pset1<Packet>(2.0);
460 const Packet cst_half = pset1<Packet>(0.5);
462 const Packet cst_exp_hi = pset1<Packet>(709.784);
463 const Packet cst_exp_lo = pset1<Packet>(-709.784);
465 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
466 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
467 const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
468 const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
469 const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
470 const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
471 const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
472 const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
473 const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
474 const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
479 Packet zero_mask =
pcmp_lt(_x, cst_exp_lo);
482 fx =
pmadd(cst_cephes_LOG2EF,
x, cst_half);
490 tmp =
pmul(fx, cst_cephes_exp_C1);
491 Packet z =
pmul(fx, cst_cephes_exp_C2);
498 Packet px = cst_cephes_exp_p0;
499 px =
pmadd(px, x2, cst_cephes_exp_p1);
500 px =
pmadd(px, x2, cst_cephes_exp_p2);
504 Packet qx = cst_cephes_exp_q0;
505 qx =
pmadd(qx, x2, cst_cephes_exp_q1);
506 qx =
pmadd(qx, x2, cst_cephes_exp_q2);
507 qx =
pmadd(qx, x2, cst_cephes_exp_q3);
537 const double pio2_62 = 3.4061215800865545e-19;
542 static const uint32_t two_over_pi [] =
544 0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
545 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
546 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
547 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
548 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
549 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
550 0x10e41000, 0xe4100000
553 uint32_t xi = numext::bit_cast<uint32_t>(xf);
560 xi = ((xi & 0x007fffffu)| 0x00800000u) << (
e & 0x7);
581 return float(
double(
int64_t(
p)) * pio2_62);
584 template<
bool ComputeSine,
typename Packet>
586 #if EIGEN_COMP_GNUC_STRICT
591 typedef typename unpacket_traits<Packet>::integer_packet PacketI;
593 const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f);
594 const Packet cst_rounding_magic = pset1<Packet>(12582912);
595 const PacketI csti_1 = pset1<PacketI>(1);
601 Packet
y =
pmul(
x, cst_2oPI);
604 Packet y_round =
padd(
y, cst_rounding_magic);
606 PacketI y_int = preinterpret<PacketI>(y_round);
607 y =
psub(y_round, cst_rounding_magic);
611 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
614 const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
615 x =
pmadd(
y, pset1<Packet>(-1.57079601287841796875f),
x);
616 x =
pmadd(
y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f),
x);
617 x =
pmadd(
y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f),
x);
625 const float huge_th = ComputeSine ? 25966.f : 18838.f;
626 x =
pmadd(
y, pset1<Packet>(-1.5703125),
x);
628 x =
pmadd(
y, pset1<Packet>(-0.000483989715576171875),
x);
630 x =
pmadd(
y, pset1<Packet>(1.62865035235881805419921875e-07),
x);
631 x =
pmadd(
y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11),
x);
656 for(
int k=0; k<PacketSize;++k)
662 x = ploadu<Packet>(x_cpy);
663 y_int = ploadu<PacketI>(y_int2);
669 Packet sign_bit = ComputeSine ?
pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))
670 : preinterpret<Packet>(plogical_shift_left<30>(
padd(y_int,csti_1)));
671 sign_bit =
pand(sign_bit, cst_sign_mask);
675 Packet poly_mask = preinterpret<Packet>(
pcmp_eq(
pand(y_int, csti_1),
pzero(y_int)));
680 Packet y1 = pset1<Packet>(2.4372266125283204019069671630859375e-05f);
681 y1 =
pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f ));
682 y1 =
pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f ));
683 y1 =
pmadd(y1, x2, pset1<Packet>(-0.5f));
684 y1 =
pmadd(y1, x2, pset1<Packet>(1.f));
694 Packet y2 = pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);
695 y2 =
pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));
696 y2 =
pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));
701 y = ComputeSine ?
pselect(poly_mask,y2,y1)
705 return pxor(
y, sign_bit);
708 template<
typename Packet>
712 return psincos_float<true>(
x);
715 template<
typename Packet>
719 return psincos_float<false>(
x);
723 template<
typename Packet>
726 typedef typename unpacket_traits<Packet>::type Scalar;
727 static_assert(std::is_same<Scalar, float>::value,
"Scalar type must be float");
729 const Packet cst_one = pset1<Packet>(Scalar(1));
730 const Packet cst_pi = pset1<Packet>(Scalar(
EIGEN_PI));
731 const Packet p6 = pset1<Packet>(Scalar(2.36423197202384471893310546875e-3));
732 const Packet p5 = pset1<Packet>(Scalar(-1.1368644423782825469970703125e-2));
733 const Packet p4 = pset1<Packet>(Scalar(2.717843465507030487060546875e-2));
734 const Packet p3 = pset1<Packet>(Scalar(-4.8969544470310211181640625e-2));
735 const Packet p2 = pset1<Packet>(Scalar(8.8804088532924652099609375e-2));
736 const Packet
p1 = pset1<Packet>(Scalar(-0.214591205120086669921875));
737 const Packet
p0 = pset1<Packet>(Scalar(1.57079637050628662109375));
742 const Packet neg_mask =
psignbit(x_in);
743 const Packet abs_x =
pabs(x_in);
749 Packet x2 =
pmul(x_in,x_in);
750 Packet p_even =
pmadd(p6, x2, p4);
751 Packet p_odd =
pmadd(p5, x2, p3);
752 p_even =
pmadd(p_even, x2, p2);
754 p_even =
pmadd(p_even, x2,
p0);
755 Packet
p =
pmadd(p_odd, abs_x, p_even);
760 Packet denom =
psqrt(
psub(cst_one, abs_x));
761 Packet result =
pmul(denom,
p);
763 return pselect(neg_mask,
psub(cst_pi, result), result);
767 template<
typename Packet>
770 typedef typename unpacket_traits<Packet>::type Scalar;
771 static_assert(std::is_same<Scalar, float>::value,
"Scalar type must be float");
773 constexpr
float kPiOverTwo =
static_cast<float>(
EIGEN_PI / 2);
775 const Packet cst_half = pset1<Packet>(0.5f);
776 const Packet cst_one = pset1<Packet>(1.0f);
777 const Packet cst_two = pset1<Packet>(2.0f);
778 const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
781 const Packet p9 = pset1<Packet>(5.08838854730129241943359375e-2f);
782 const Packet p7 = pset1<Packet>(3.95139865577220916748046875e-2f);
783 const Packet p5 = pset1<Packet>(7.550220191478729248046875e-2f);
784 const Packet p3 = pset1<Packet>(0.16664917767047882080078125f);
785 const Packet
p1 = pset1<Packet>(1.00000011920928955078125f);
787 const Packet abs_x =
pabs(x_in);
788 const Packet sign_mask =
pandnot(x_in, abs_x);
789 const Packet invalid_mask =
pcmp_lt(cst_one, abs_x);
796 const Packet x_large =
psqrt(
pnmadd(cst_half, abs_x, cst_half));
797 const Packet large_mask =
pcmp_lt(cst_half, abs_x);
798 const Packet
x =
pselect(large_mask, x_large, abs_x);
799 const Packet x2 =
pmul(
x,
x);
804 Packet
p =
pmadd(p9, x2, p7);
810 const Packet p_large =
pnmadd(cst_two,
p, cst_pi_over_two);
815 return por(invalid_mask,
p);
819 template<
typename Packet>
822 const Packet q0 = pset1<Packet>(-0.3333314359188079833984375f);
823 const Packet q2 = pset1<Packet>(0.19993579387664794921875f);
824 const Packet q4 = pset1<Packet>(-0.14209578931331634521484375f);
825 const Packet q6 = pset1<Packet>(0.1066047251224517822265625f);
826 const Packet q8 = pset1<Packet>(-7.5408883392810821533203125e-2f);
827 const Packet q10 = pset1<Packet>(4.3082617223262786865234375e-2f);
828 const Packet q12 = pset1<Packet>(-1.62907354533672332763671875e-2f);
829 const Packet q14 = pset1<Packet>(2.90188402868807315826416015625e-3f);
839 const Packet x2 =
pmul(
x,
x);
840 const Packet x4 =
pmul(x2, x2);
841 Packet q_odd =
pmadd(q14, x4, q10);
842 Packet q_even =
pmadd(q12, x4, q8);
843 q_odd =
pmadd(q_odd, x4, q6);
844 q_even =
pmadd(q_even, x4, q4);
845 q_odd =
pmadd(q_odd, x4, q2);
846 q_even =
pmadd(q_even, x4, q0);
847 const Packet q =
pmadd(q_odd, x2, q_even);
851 template<
typename Packet>
854 typedef typename unpacket_traits<Packet>::type Scalar;
855 static_assert(std::is_same<Scalar, float>::value,
"Scalar type must be float");
857 constexpr
float kPiOverTwo =
static_cast<float>(
EIGEN_PI / 2);
859 const Packet cst_signmask = pset1<Packet>(-0.0f);
860 const Packet cst_one = pset1<Packet>(1.0f);
861 const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
867 const Packet abs_x =
pabs(x_in);
868 const Packet x_signmask =
pand(x_in, cst_signmask);
869 const Packet large_mask =
pcmp_lt(cst_one, abs_x);
873 Packet result =
pselect(large_mask,
psub(cst_pi_over_two,
p),
p);
875 return pxor(result, x_signmask);
880 template <
typename Packet>
884 pset1<Packet>(-0.33333333333330028569463365784031338989734649658203);
886 pset1<Packet>(0.199999999990664090177006073645316064357757568359375);
888 pset1<Packet>(-0.142857141937123677255527809393242932856082916259766);
890 pset1<Packet>(0.111111065991039953404495577160560060292482376098633);
892 pset1<Packet>(-9.0907812986129224452902519715280504897236824035645e-2);
894 pset1<Packet>(7.6900542950704739442180368769186316058039665222168e-2);
896 pset1<Packet>(-6.6410112986494976294871150912513257935643196105957e-2);
898 pset1<Packet>(5.6920144995467943094258345126945641823112964630127e-2);
900 pset1<Packet>(-4.3577020814990513608577771265117917209863662719727e-2);
902 pset1<Packet>(2.1244050233624342527427586446719942614436149597168e-2);
910 const Packet x2 =
pmul(
x,
x);
911 const Packet x4 =
pmul(x2, x2);
912 Packet q_odd =
pmadd(q18, x4, q14);
913 Packet q_even =
pmadd(q16, x4, q12);
914 q_odd =
pmadd(q_odd, x4, q10);
915 q_even =
pmadd(q_even, x4, q8);
916 q_odd =
pmadd(q_odd, x4, q6);
917 q_even =
pmadd(q_even, x4, q4);
918 q_odd =
pmadd(q_odd, x4, q2);
919 q_even =
pmadd(q_even, x4, q0);
920 const Packet
p =
pmadd(q_odd, x2, q_even);
924 template<
typename Packet>
927 typedef typename unpacket_traits<Packet>::type Scalar;
928 static_assert(std::is_same<Scalar, double>::value,
"Scalar type must be double");
930 constexpr
double kPiOverTwo =
static_cast<double>(
EIGEN_PI / 2);
931 constexpr
double kPiOverFour =
static_cast<double>(
EIGEN_PI / 4);
932 constexpr
double kTanPiOverEight = 0.4142135623730950488016887;
933 constexpr
double kTan3PiOverEight = 2.4142135623730950488016887;
935 const Packet cst_signmask = pset1<Packet>(-0.0);
936 const Packet cst_one = pset1<Packet>(1.0);
937 const Packet cst_pi_over_two = pset1<Packet>(kPiOverTwo);
938 const Packet cst_pi_over_four = pset1<Packet>(kPiOverFour);
939 const Packet cst_large = pset1<Packet>(kTan3PiOverEight);
940 const Packet cst_medium = pset1<Packet>(kTanPiOverEight);
950 const Packet abs_x =
pabs(x_in);
951 const Packet x_signmask =
pand(x_in, cst_signmask);
952 const Packet large_mask =
pcmp_lt(cst_large, abs_x);
953 const Packet medium_mask =
pandnot(
pcmp_lt(cst_medium, abs_x), large_mask);
967 return pxor(
p, x_signmask);
970 template<
typename Packet>
973 typedef typename unpacket_traits<Packet>::type Scalar;
974 static_assert(std::is_same<Scalar, float>::value,
"Scalar type must be float");
975 const Packet
half = pset1<Packet>(0.5f);
979 const Packet C3 = pset1<Packet>(0.3333373963832855224609375f);
980 const Packet C5 = pset1<Packet>(0.1997792422771453857421875f);
981 const Packet C7 = pset1<Packet>(0.14672131836414337158203125f);
982 const Packet C9 = pset1<Packet>(8.2311116158962249755859375e-2f);
983 const Packet C11 = pset1<Packet>(0.1819281280040740966796875f);
984 const Packet x2 =
pmul(
x,
x);
985 Packet
p =
pmadd(C11, x2, C9);
992 const Packet one = pset1<Packet>(1.0f);
998 template<
typename Packet>
1001 typedef typename unpacket_traits<Packet>::as_real RealPacket;
1005 const RealPacket y_abs =
pabs(
y.v);
1006 const RealPacket y_abs_flip =
pcplxflip(Packet(y_abs)).v;
1007 const RealPacket y_max =
pmax(y_abs, y_abs_flip);
1008 const RealPacket y_scaled =
pdiv(
y.v, y_max);
1010 const RealPacket y_scaled_sq =
pmul(y_scaled, y_scaled);
1011 const RealPacket denom =
padd(y_scaled_sq,
pcplxflip(Packet(y_scaled_sq)).
v);
1012 Packet result_scaled =
pmul(
x,
pconj(Packet(y_scaled)));
1014 result_scaled = Packet(
pdiv(result_scaled.v, denom));
1016 return Packet(
pdiv(result_scaled.v, y_max));
1019 template<
typename Packet>
1022 typedef typename unpacket_traits<Packet>::type Scalar;
1023 typedef typename Scalar::value_type RealScalar;
1024 typedef typename unpacket_traits<Packet>::as_real RealPacket;
1062 RealPacket a_abs =
pabs(
a.v);
1063 RealPacket a_abs_flip =
pcplxflip(Packet(a_abs)).v;
1064 RealPacket a_max =
pmax(a_abs, a_abs_flip);
1065 RealPacket a_min =
pmin(a_abs, a_abs_flip);
1066 RealPacket a_min_zero_mask =
pcmp_eq(a_min,
pzero(a_min));
1067 RealPacket a_max_zero_mask =
pcmp_eq(a_max,
pzero(a_max));
1068 RealPacket r =
pdiv(a_min, a_max);
1069 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
1072 l =
pselect(a_min_zero_mask, a_max, l);
1077 const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
1086 Packet positive_real_result;
1088 positive_real_result.v =
pselect(real_mask, rho.v, eta);
1092 const RealPacket cst_imag_sign_mask =
1093 pset1<Packet>(Scalar(RealScalar(0.0), RealScalar(-0.0))).v;
1094 RealPacket imag_signs =
pand(
a.v, cst_imag_sign_mask);
1095 Packet negative_real_result;
1097 negative_real_result.v =
por(
pabs(
pcplxflip(positive_real_result).
v), imag_signs);
1100 Packet negative_real_mask;
1102 negative_real_mask.v =
por(negative_real_mask.v,
pcplxflip(negative_real_mask).
v);
1103 Packet result =
pselect(negative_real_mask, negative_real_result, positive_real_result);
1112 is_inf.v =
pcmp_eq(a_abs, cst_pos_inf);
1114 is_real_inf.v =
pand(is_inf.v, real_mask);
1115 is_real_inf =
por(is_real_inf,
pcplxflip(is_real_inf));
1117 Packet real_inf_result;
1118 real_inf_result.v =
pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).
v);
1119 real_inf_result.v =
pselect(negative_real_mask.v,
pcplxflip(real_inf_result).
v, real_inf_result.v);
1122 is_imag_inf.v =
pandnot(is_inf.v, real_mask);
1123 is_imag_inf =
por(is_imag_inf,
pcplxflip(is_imag_inf));
1124 Packet imag_inf_result;
1125 imag_inf_result.v =
por(
pand(cst_pos_inf, real_mask),
pandnot(
a.v, real_mask));
1127 Packet result_is_nan =
pisnan(result);
1128 result =
por(result_is_nan, result);
1130 return pselect(is_imag_inf, imag_inf_result,
pselect(is_real_inf, real_inf_result, result));
1134 template <
typename Packet>
1135 struct psign_impl<Packet,
std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
1136 !NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
1138 using Scalar =
typename unpacket_traits<Packet>::type;
1139 const Packet cst_one = pset1<Packet>(Scalar(1));
1140 const Packet cst_zero =
pzero(
a);
1142 const Packet abs_a =
pabs(
a);
1143 const Packet sign_mask =
pandnot(
a, abs_a);
1144 const Packet nonzero_mask =
pcmp_lt(cst_zero, abs_a);
1146 return pselect(nonzero_mask,
por(sign_mask, cst_one), abs_a);
1150 template <
typename Packet>
1151 struct psign_impl<Packet,
std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
1152 NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
1153 NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
1155 using Scalar =
typename unpacket_traits<Packet>::type;
1156 const Packet cst_one = pset1<Packet>(Scalar(1));
1157 const Packet cst_minus_one = pset1<Packet>(Scalar(-1));
1158 const Packet cst_zero =
pzero(
a);
1160 const Packet positive_mask =
pcmp_lt(cst_zero,
a);
1161 const Packet positive =
pand(positive_mask, cst_one);
1162 const Packet negative_mask =
pcmp_lt(
a, cst_zero);
1163 const Packet negative =
pand(negative_mask, cst_minus_one);
1165 return por(positive, negative);
1169 template <
typename Packet>
1170 struct psign_impl<Packet,
std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
1171 !NumTraits<typename unpacket_traits<Packet>::type>::IsSigned &&
1172 NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>> {
1174 using Scalar =
typename unpacket_traits<Packet>::type;
1175 const Packet cst_one = pset1<Packet>(Scalar(1));
1176 const Packet cst_zero =
pzero(
a);
1178 const Packet zero_mask =
pcmp_eq(cst_zero,
a);
1179 return pandnot(cst_one, zero_mask);
1184 template <
typename Packet>
1185 struct psign_impl<Packet,
std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsComplex &&
1186 unpacket_traits<Packet>::vectorizable>> {
1188 typedef typename unpacket_traits<Packet>::type Scalar;
1189 typedef typename Scalar::value_type RealScalar;
1190 typedef typename unpacket_traits<Packet>::as_real RealPacket;
1197 RealPacket a_abs =
pabs(
a.v);
1198 RealPacket a_abs_flip =
pcplxflip(Packet(a_abs)).v;
1199 RealPacket a_max =
pmax(a_abs, a_abs_flip);
1200 RealPacket a_min =
pmin(a_abs, a_abs_flip);
1201 RealPacket a_min_zero_mask =
pcmp_eq(a_min,
pzero(a_min));
1202 RealPacket a_max_zero_mask =
pcmp_eq(a_max,
pzero(a_max));
1203 RealPacket r =
pdiv(a_min, a_max);
1204 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
1208 l =
pselect(a_min_zero_mask, a_max, l);
1210 RealPacket sign_as_real =
pandnot(
pdiv(
a.v, l), a_max_zero_mask);
1212 sign.v = sign_as_real;
1224 template<
typename Packet>
1233 template<
typename Packet>
1235 void fast_twosum(
const Packet&
x,
const Packet&
y, Packet& s_hi, Packet& s_lo) {
1237 const Packet t =
psub(s_hi,
x);
1241 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
1246 template<
typename Packet>
1248 void twoprod(
const Packet&
x,
const Packet&
y,
1249 Packet& p_hi, Packet& p_lo) {
1261 template<
typename Packet>
1264 typedef typename unpacket_traits<Packet>::type Scalar;
1266 const Scalar shift_scale = Scalar(
uint64_t(1) << shift);
1267 const Packet gamma =
pmul(pset1<Packet>(shift_scale + Scalar(1)),
x);
1268 Packet rho =
psub(
x, gamma);
1269 x_hi =
padd(rho, gamma);
1270 x_lo =
psub(
x, x_hi);
1277 template<
typename Packet>
1280 Packet& p_hi, Packet& p_lo) {
1281 Packet x_hi, x_lo, y_hi, y_lo;
1287 p_lo =
pmadd(x_hi, y_lo, p_lo);
1288 p_lo =
pmadd(x_lo, y_hi, p_lo);
1289 p_lo =
pmadd(x_lo, y_lo, p_lo);
1301 template<
typename Packet>
1303 void twosum(
const Packet& x_hi,
const Packet& x_lo,
1304 const Packet& y_hi,
const Packet& y_lo,
1305 Packet& s_hi, Packet& s_lo) {
1307 Packet r_hi_1, r_lo_1;
1309 Packet r_hi_2, r_lo_2;
1311 const Packet r_hi =
pselect(x_greater_mask, r_hi_1, r_hi_2);
1313 const Packet s1 =
padd(
padd(y_lo, r_lo_1), x_lo);
1314 const Packet s2 =
padd(
padd(x_lo, r_lo_2), y_lo);
1315 const Packet s =
pselect(x_greater_mask, s1, s2);
1322 template<
typename Packet>
1325 const Packet& y_hi,
const Packet& y_lo,
1326 Packet& s_hi, Packet& s_lo) {
1329 const Packet s =
padd(
padd(y_lo, r_lo), x_lo);
1336 template<
typename Packet>
1339 const Packet& y_hi,
const Packet& y_lo,
1340 Packet& s_hi, Packet& s_lo) {
1343 const Packet s =
padd(y_lo, r_lo);
1355 template<
typename Packet>
1357 void twoprod(
const Packet& x_hi,
const Packet& x_lo,
const Packet&
y,
1358 Packet& p_hi, Packet& p_lo) {
1361 const Packet c_lo2 =
pmul(x_lo,
y);
1364 const Packet t_lo2 =
padd(t_lo1, c_lo1);
1374 template<
typename Packet>
1376 void twoprod(
const Packet& x_hi,
const Packet& x_lo,
1377 const Packet& y_hi,
const Packet& y_lo,
1378 Packet& p_hi, Packet& p_lo) {
1379 Packet p_hi_hi, p_hi_lo;
1380 twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
1381 Packet p_lo_hi, p_lo_lo;
1382 twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
1383 fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
1390 template <
typename Packet>
1392 Packet& z_hi, Packet& z_lo) {
1393 const Packet t_hi =
pdiv(x_hi,
y);
1394 Packet pi_hi, pi_lo;
1396 const Packet delta_hi =
psub(x_hi, pi_hi);
1397 const Packet delta_t =
psub(delta_hi, pi_lo);
1398 const Packet delta =
padd(delta_t, x_lo);
1399 const Packet t_lo =
pdiv(delta,
y);
1404 template <
typename Scalar>
1405 struct accurate_log2 {
1406 template <
typename Packet>
1408 void operator()(
const Packet&
x, Packet& log2_x_hi, Packet& log2_x_lo) {
1421 struct accurate_log2<float> {
1422 template <
typename Packet>
1424 void operator()(
const Packet& z, Packet& log2_x_hi, Packet& log2_x_lo) {
1440 const Packet p6 = pset1<Packet>( 9.703654795885e-2f);
1441 const Packet p5 = pset1<Packet>(-0.1690667718648f);
1442 const Packet p4 = pset1<Packet>( 0.1720575392246f);
1443 const Packet p3 = pset1<Packet>(-0.1789081543684f);
1444 const Packet p2 = pset1<Packet>( 0.2050433009862f);
1445 const Packet
p1 = pset1<Packet>(-0.2404672354459f);
1446 const Packet
p0 = pset1<Packet>( 0.2885761857032f);
1448 const Packet C3_hi = pset1<Packet>(-0.360674142838f);
1449 const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);
1450 const Packet C2_hi = pset1<Packet>(0.480897903442f);
1451 const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);
1452 const Packet C1_hi = pset1<Packet>(-0.721347510815f);
1453 const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);
1454 const Packet C0_hi = pset1<Packet>(1.44269502163f);
1455 const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);
1456 const Packet one = pset1<Packet>(1.0f);
1458 const Packet
x =
psub(z, one);
1463 Packet p_even =
pmadd(p6, x2, p4);
1464 p_even =
pmadd(p_even, x2, p2);
1465 p_even =
pmadd(p_even, x2,
p0);
1466 Packet p_odd =
pmadd(p5, x2, p3);
1468 Packet
p =
pmadd(p_odd,
x, p_even);
1478 fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
1480 twoprod(q_hi, q_lo,
x, t_hi, t_lo);
1481 fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
1483 twoprod(q_hi, q_lo,
x, t_hi, t_lo);
1484 fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);
1486 twoprod(q_hi, q_lo,
x, t_hi, t_lo);
1487 fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);
1490 twoprod(q_hi, q_lo,
x, log2_x_hi, log2_x_lo);
1502 struct accurate_log2<double> {
1503 template <
typename Packet>
1505 void operator()(
const Packet&
x, Packet& log2_x_hi, Packet& log2_x_lo) {
1527 const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
1528 const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
1529 const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
1530 const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
1531 const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
1532 const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
1533 const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
1534 const Packet C_hi = pset1<Packet>(0.0400377511598501157);
1535 const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
1536 const Packet one = pset1<Packet>(1.0);
1538 const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
1539 const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
1543 twoprod(cst_2_log2e_hi, cst_2_log2e_lo,
psub(
x, one), t_hi, t_lo);
1549 Packet r2_hi, r2_lo;
1550 twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
1552 Packet r4_hi, r4_lo;
1553 twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
1557 Packet q_even =
pmadd(q12, r4_hi, q8);
1558 Packet q_odd =
pmadd(q10, r4_hi, q6);
1559 q_even =
pmadd(q_even, r4_hi, q4);
1560 q_odd =
pmadd(q_odd, r4_hi, q2);
1561 q_even =
pmadd(q_even, r4_hi, q0);
1562 Packet q =
pmadd(q_odd, r2_hi, q_even);
1570 twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
1572 Packet p1_hi, p1_lo;
1573 fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
1575 Packet p2_hi, p2_lo;
1576 twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
1578 Packet p3_hi, p3_lo;
1582 twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
1587 template <
typename Scalar>
1588 struct fast_accurate_exp2 {
1589 template <
typename Packet>
1602 struct fast_accurate_exp2<float> {
1603 template <
typename Packet>
1617 const Packet p4 = pset1<Packet>(1.539513905e-4f);
1618 const Packet p3 = pset1<Packet>(1.340007293e-3f);
1619 const Packet p2 = pset1<Packet>(9.618283249e-3f);
1620 const Packet
p1 = pset1<Packet>(5.550328270e-2f);
1621 const Packet
p0 = pset1<Packet>(0.2402264923f);
1623 const Packet C_hi = pset1<Packet>(0.6931471825f);
1624 const Packet C_lo = pset1<Packet>(2.36836577e-08f);
1625 const Packet one = pset1<Packet>(1.0f);
1631 Packet p_even =
pmadd(p4, x2, p2);
1632 Packet p_odd =
pmadd(p3, x2,
p1);
1633 p_even =
pmadd(p_even, x2,
p0);
1634 Packet
p =
pmadd(p_odd,
x, p_even);
1642 Packet q1_hi, q1_lo;
1643 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1645 Packet q2_hi, q2_lo;
1646 twoprod(q1_hi, q1_lo,
x, q2_hi, q2_lo);
1648 Packet q3_hi, q3_lo;
1652 return padd(q3_hi,
padd(q2_lo, q3_lo));
1660 struct fast_accurate_exp2<double> {
1661 template <
typename Packet>
1675 const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
1676 const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
1677 const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
1678 const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
1679 const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
1680 const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
1681 const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
1682 const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
1683 const Packet
p1 = pset1<Packet>(5.550410866481961247e-2);
1684 const Packet
p0 = pset1<Packet>(0.240226506959101332);
1685 const Packet C_hi = pset1<Packet>(0.693147180559945286);
1686 const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
1687 const Packet one = pset1<Packet>(1.0);
1693 Packet p_even =
pmadd(p8, x2, p6);
1694 Packet p_odd =
pmadd(p9, x2, p7);
1695 p_even =
pmadd(p_even, x2, p4);
1696 p_odd =
pmadd(p_odd, x2, p5);
1697 p_even =
pmadd(p_even, x2, p2);
1698 p_odd =
pmadd(p_odd, x2, p3);
1699 p_even =
pmadd(p_even, x2,
p0);
1701 Packet
p =
pmadd(p_odd,
x, p_even);
1709 Packet q1_hi, q1_lo;
1710 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1712 Packet q2_hi, q2_lo;
1713 twoprod(q1_hi, q1_lo,
x, q2_hi, q2_lo);
1715 Packet q3_hi, q3_lo;
1719 return padd(q3_hi,
padd(q2_lo, q3_lo));
1728 template <
typename Packet>
1730 typedef typename unpacket_traits<Packet>::type Scalar;
1737 const Packet m_x_scale_mask =
pcmp_lt(m_x, pset1<Packet>(sqrt_half));
1738 m_x =
pselect(m_x_scale_mask,
pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
1739 e_x =
pselect(m_x_scale_mask,
psub(e_x, pset1<Packet>(Scalar(1))), e_x);
1742 Packet rx_hi, rx_lo;
1743 accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);
1747 Packet f1_hi, f1_lo, f2_hi, f2_lo;
1749 twoprod(rx_hi, rx_lo,
y, f2_hi, f2_lo);
1757 fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);
1762 r_z =
padd(r_z, f_lo);
1765 n_z =
padd(n_z, n_r);
1772 const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
1777 template <
typename Packet>
1779 typedef typename unpacket_traits<Packet>::type Scalar;
1783 const Packet cst_zero = pset1<Packet>(Scalar(0));
1784 const Packet cst_one = pset1<Packet>(Scalar(1));
1787 const Packet abs_x =
pabs(
x);
1789 const Packet abs_x_is_zero =
pcmp_eq(abs_x, cst_zero);
1790 const Packet x_has_signbit =
psignbit(
x);
1791 const Packet x_is_neg =
pandnot(x_has_signbit, abs_x_is_zero);
1792 const Packet x_is_neg_zero =
pand(x_has_signbit, abs_x_is_zero);
1793 const Packet abs_x_is_inf =
pcmp_eq(abs_x, cst_pos_inf);
1794 const Packet abs_x_is_one =
pcmp_eq(abs_x, cst_one);
1795 const Packet abs_x_is_gt_one =
pcmp_lt(cst_one, abs_x);
1796 const Packet abs_x_is_lt_one =
pcmp_lt(abs_x, cst_one);
1797 const Packet x_is_one =
pandnot(abs_x_is_one, x_is_neg);
1798 const Packet x_is_neg_one =
pand(abs_x_is_one, x_is_neg);
1799 const Packet x_is_nan =
pisnan(
x);
1802 const Packet abs_y =
pabs(
y);
1803 const Packet y_is_one =
pcmp_eq(
y, cst_one);
1804 const Packet abs_y_is_zero =
pcmp_eq(abs_y, cst_zero);
1805 const Packet y_is_neg =
pcmp_lt(
y, cst_zero);
1807 const Packet y_is_nan =
pisnan(
y);
1808 const Packet abs_y_is_inf =
pcmp_eq(abs_y, cst_pos_inf);
1811 const Packet abs_y_is_huge =
pcmp_le(pset1<Packet>(huge_exponent),
pabs(
y));
1815 const Packet y_div_2 =
pmul(
y, pset1<Packet>(Scalar(0.5)));
1819 const Packet invalid_negative_x =
pandnot(
pandnot(
pandnot(x_is_neg, abs_x_is_inf), y_is_int), abs_y_is_inf);
1820 const Packet pow_is_nan =
por(invalid_negative_x,
por(x_is_nan, y_is_nan));
1821 const Packet pow_is_one =
1822 por(
por(x_is_one, abs_y_is_zero),
pand(x_is_neg_one,
por(abs_y_is_inf,
pandnot(y_is_even, invalid_negative_x))));
1823 const Packet pow_is_zero =
por(
por(
por(
pand(abs_x_is_zero, y_is_pos),
pand(abs_x_is_inf, y_is_neg)),
1824 pand(
pand(abs_x_is_lt_one, abs_y_is_huge), y_is_pos)),
1825 pand(
pand(abs_x_is_gt_one, abs_y_is_huge), y_is_neg));
1826 const Packet pow_is_inf =
por(
por(
por(
pand(abs_x_is_zero, y_is_neg),
pand(abs_x_is_inf, y_is_pos)),
1827 pand(
pand(abs_x_is_lt_one, abs_y_is_huge), y_is_neg)),
1828 pand(
pand(abs_x_is_gt_one, abs_y_is_huge), y_is_pos));
1829 const Packet pow_is_neg_zero =
pand(
pandnot(y_is_int, y_is_even),
1830 por(
pand(y_is_neg,
pand(abs_x_is_inf, x_is_neg)),
pand(y_is_pos, x_is_neg_zero)));
1831 const Packet inf_val =
1833 cst_neg_inf, cst_pos_inf);
1835 const Packet negate_pow_abs =
pandnot(x_is_neg, y_is_even);
1842 pselect(pow_is_zero, cst_zero,
1885 template <
typename Packet,
int N>
1887 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const typename unpacket_traits<Packet>::type coeff[]) {
1889 return pmadd(ppolevl<Packet, N-1>::run(
x, coeff),
x, pset1<Packet>(coeff[N]));
1893 template <
typename Packet>
1894 struct ppolevl<Packet, 0> {
1895 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const typename unpacket_traits<Packet>::type coeff[]) {
1897 return pset1<Packet>(coeff[0]);
1953 template <
typename Packet,
int N>
1956 static EIGEN_STRONG_INLINE Packet run(Packet
x,
const typename unpacket_traits<Packet>::type coef[]) {
1957 typedef typename unpacket_traits<Packet>::type Scalar;
1958 Packet b0 = pset1<Packet>(coef[0]);
1959 Packet b1 = pset1<Packet>(
static_cast<Scalar
>(0.f));
1962 for (
int i = 1;
i < N;
i++) {
1965 b0 =
psub(
pmadd(
x, b1, pset1<Packet>(coef[
i])), b2);
1968 return pmul(pset1<Packet>(
static_cast<Scalar
>(0.5f)),
psub(b0, b2));
1972 namespace unary_pow {
1974 template <typename ScalarExponent, bool IsInteger = NumTraits<ScalarExponent>::IsInteger>
1975 struct exponent_helper {
1976 using safe_abs_type = ScalarExponent;
1977 static constexpr ScalarExponent one_half = ScalarExponent(0.5);
1979 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarExponent safe_abs(
const ScalarExponent&
exp) {
1984 ScalarExponent exp_div_2 =
exp * one_half;
1986 return exp_div_2 != floor_exp_div_2;
1988 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ScalarExponent floor_div_two(
const ScalarExponent&
exp) {
1989 ScalarExponent exp_div_2 =
exp * one_half;
1994 template <
typename ScalarExponent>
1995 struct exponent_helper<ScalarExponent, true> {
1998 using safe_abs_type =
typename numext::get_integer_by_size<
sizeof(ScalarExponent)>::unsigned_type;
1999 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE safe_abs_type safe_abs(
const ScalarExponent&
exp) {
2001 safe_abs_type result = safe_abs_type(
exp ^ mask);
2002 return result + safe_abs_type(ScalarExponent(1) & mask);
2005 return exp % safe_abs_type(2) != safe_abs_type(0);
2007 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE safe_abs_type floor_div_two(
const safe_abs_type&
exp) {
2008 return exp >> safe_abs_type(1);
2012 template <
typename Packet,
typename ScalarExponent,
2013 bool ReciprocateIfExponentIsNegative =
2015 struct reciprocate {
2016 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent& exponent) {
2017 using Scalar =
typename unpacket_traits<Packet>::type;
2018 const Packet cst_pos_one = pset1<Packet>(Scalar(1));
2019 return exponent < 0 ?
pdiv(cst_pos_one,
x) :
x;
2023 template <
typename Packet,
typename ScalarExponent>
2024 struct reciprocate<Packet, ScalarExponent, false> {
2027 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent&) {
return x; }
2030 template <
typename Packet,
typename ScalarExponent>
2031 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet int_pow(
const Packet&
x,
const ScalarExponent& exponent) {
2032 using Scalar =
typename unpacket_traits<Packet>::type;
2033 using ExponentHelper = exponent_helper<ScalarExponent>;
2034 using AbsExponentType =
typename ExponentHelper::safe_abs_type;
2035 const Packet cst_pos_one = pset1<Packet>(Scalar(1));
2036 if (exponent == ScalarExponent(0))
return cst_pos_one;
2038 Packet result = reciprocate<Packet, ScalarExponent>::run(
x, exponent);
2039 Packet
y = cst_pos_one;
2040 AbsExponentType
m = ExponentHelper::safe_abs(exponent);
2043 bool odd = ExponentHelper::is_odd(
m);
2044 if (odd)
y =
pmul(
y, result);
2045 result =
pmul(result, result);
2046 m = ExponentHelper::floor_div_two(
m);
2049 return pmul(
y, result);
2052 template <
typename Packet>
2054 const typename unpacket_traits<Packet>::type& exponent) {
2055 const Packet exponent_packet = pset1<Packet>(exponent);
2059 template <
typename Packet,
typename ScalarExponent>
2060 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_nonint_nonint_errors(
const Packet&
x,
const Packet& powx,
2061 const ScalarExponent& exponent) {
2062 using Scalar =
typename unpacket_traits<Packet>::type;
2066 const Scalar pos_zero = Scalar(0);
2067 const Scalar all_ones = ptrue<Scalar>(Scalar());
2068 const Scalar pos_one = Scalar(1);
2069 const Scalar pos_inf = NumTraits<Scalar>::infinity();
2071 const Packet cst_pos_zero =
pzero(
x);
2072 const Packet cst_pos_one = pset1<Packet>(pos_one);
2073 const Packet cst_pos_inf = pset1<Packet>(pos_inf);
2076 const bool exponent_is_neg = exponent < ScalarExponent(0);
2077 const bool exponent_is_pos = exponent > ScalarExponent(0);
2079 const Packet exp_is_not_fin = pset1<Packet>(exponent_is_not_fin ? all_ones : pos_zero);
2080 const Packet exp_is_neg = pset1<Packet>(exponent_is_neg ? all_ones : pos_zero);
2081 const Packet exp_is_pos = pset1<Packet>(exponent_is_pos ? all_ones : pos_zero);
2082 const Packet exp_is_inf =
pand(exp_is_not_fin,
por(exp_is_neg, exp_is_pos));
2083 const Packet exp_is_nan =
pandnot(exp_is_not_fin,
por(exp_is_neg, exp_is_pos));
2085 const Packet x_is_le_zero =
pcmp_le(
x, cst_pos_zero);
2086 const Packet x_is_ge_zero =
pcmp_le(cst_pos_zero,
x);
2087 const Packet x_is_zero =
pand(x_is_le_zero, x_is_ge_zero);
2089 const Packet abs_x =
pabs(
x);
2090 const Packet abs_x_is_le_one =
pcmp_le(abs_x, cst_pos_one);
2091 const Packet abs_x_is_ge_one =
pcmp_le(cst_pos_one, abs_x);
2092 const Packet abs_x_is_inf =
pcmp_eq(abs_x, cst_pos_inf);
2093 const Packet abs_x_is_one =
pand(abs_x_is_le_one, abs_x_is_ge_one);
2095 Packet pow_is_inf_if_exp_is_neg =
por(x_is_zero,
pand(abs_x_is_le_one, exp_is_inf));
2096 Packet pow_is_inf_if_exp_is_pos =
por(abs_x_is_inf,
pand(abs_x_is_ge_one, exp_is_inf));
2097 Packet pow_is_one =
pand(abs_x_is_one,
por(exp_is_inf, x_is_ge_zero));
2099 Packet result = powx;
2100 result =
por(x_is_le_zero, result);
2101 result =
pselect(pow_is_inf_if_exp_is_neg,
pand(cst_pos_inf, exp_is_neg), result);
2102 result =
pselect(pow_is_inf_if_exp_is_pos,
pand(cst_pos_inf, exp_is_pos), result);
2103 result =
por(exp_is_nan, result);
2104 result =
pselect(pow_is_one, cst_pos_one, result);
2108 template <
typename Packet,
typename ScalarExponent,
2109 std::enable_if_t<NumTraits<typename unpacket_traits<Packet>::type>::IsSigned,
bool> =
true>
2110 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_negative_exponent(
const Packet&
x,
const ScalarExponent& exponent) {
2111 using Scalar =
typename unpacket_traits<Packet>::type;
2118 const Scalar pos_zero = Scalar(0);
2119 const Scalar all_ones = ptrue<Scalar>(Scalar());
2120 const Scalar pos_one = Scalar(1);
2122 const Packet cst_pos_one = pset1<Packet>(pos_one);
2124 const bool exponent_is_odd = exponent % ScalarExponent(2) != ScalarExponent(0);
2126 const Packet exp_is_odd = pset1<Packet>(exponent_is_odd ? all_ones : pos_zero);
2128 const Packet abs_x =
pabs(
x);
2129 const Packet abs_x_is_one =
pcmp_eq(abs_x, cst_pos_one);
2131 Packet result =
pselect(exp_is_odd,
x, abs_x);
2132 result =
pand(abs_x_is_one, result);
2136 template <
typename Packet,
typename ScalarExponent,
2137 std::enable_if_t<!NumTraits<typename unpacket_traits<Packet>::type>::IsSigned,
bool> =
true>
2138 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet handle_negative_exponent(
const Packet&
x,
const ScalarExponent&) {
2139 using Scalar =
typename unpacket_traits<Packet>::type;
2146 const Scalar pos_one = Scalar(1);
2148 const Packet cst_pos_one = pset1<Packet>(pos_one);
2150 const Packet x_is_one =
pcmp_eq(
x, cst_pos_one);
2152 return pand(x_is_one,
x);
2158 template <
typename Packet,
typename ScalarExponent,
2159 bool BaseIsIntegerType = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger,
2162 struct unary_pow_impl;
2164 template <
typename Packet,
typename ScalarExponent,
bool ExponentIsSigned>
2165 struct unary_pow_impl<Packet, ScalarExponent, false, false, ExponentIsSigned> {
2166 typedef typename unpacket_traits<Packet>::type Scalar;
2167 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent& exponent) {
2169 if (exponent_is_integer) {
2170 return unary_pow::int_pow(
x, exponent);
2172 Packet result = unary_pow::gen_pow(
x, exponent);
2173 result = unary_pow::handle_nonint_nonint_errors(
x, result, exponent);
2179 template <
typename Packet,
typename ScalarExponent,
bool ExponentIsSigned>
2180 struct unary_pow_impl<Packet, ScalarExponent, false, true, ExponentIsSigned> {
2181 typedef typename unpacket_traits<Packet>::type Scalar;
2182 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent& exponent) {
2183 return unary_pow::int_pow(
x, exponent);
2187 template <
typename Packet,
typename ScalarExponent>
2188 struct unary_pow_impl<Packet, ScalarExponent, true, true, true> {
2189 typedef typename unpacket_traits<Packet>::type Scalar;
2190 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent& exponent) {
2191 if (exponent < ScalarExponent(0)) {
2192 return unary_pow::handle_negative_exponent(
x, exponent);
2194 return unary_pow::int_pow(
x, exponent);
2199 template <
typename Packet,
typename ScalarExponent>
2200 struct unary_pow_impl<Packet, ScalarExponent, true, true, false> {
2201 typedef typename unpacket_traits<Packet>::type Scalar;
2202 static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(
const Packet&
x,
const ScalarExponent& exponent) {
2203 return unary_pow::int_pow(
x, exponent);
Array< int, Dynamic, 1 > v
Array< double, 1, 3 > e(1./3., 0.5, 2.)
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define EIGEN_UNUSED_VARIABLE(var)
#define EIGEN_DEVICE_FUNC
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
#define EIGEN_OPTIMIZATION_BARRIER(X)
#define EIGEN_STATIC_ASSERT(X, MSG)
bfloat16() min(const bfloat16 &a, const bfloat16 &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_double(const Packet _x)
Packet pmin(const Packet &a, const Packet &b)
void twoprod(const Packet &x, const Packet &y, Packet &p_hi, Packet &p_lo)
Packet padd(const Packet &a, const Packet &b)
void veltkamp_splitting(const Packet &x, Packet &x_hi, Packet &x_lo)
Packet8f pzero(const Packet8f &)
void doubleword_div_fp(const Packet &x_hi, const Packet &x_lo, const Packet &y, Packet &z_hi, Packet &z_lo)
void twosum(const Packet &x_hi, const Packet &x_lo, const Packet &y_hi, const Packet &y_lo, Packet &s_hi, Packet &s_lo)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin_float(const Packet &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_double(const Packet _x)
Packet8h ptrue(const Packet8h &a)
Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos_float(const Packet &x_in)
float trig_reduce_huge(float xf, Eigen::numext::int32_t *quadrant)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_float(const Packet &x_in)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_impl_double(const Packet _x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh_float(const Packet &x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Packet4f pabs(const Packet4f &a)
Packet pmax(const Packet &a, const Packet &b)
Packet2cf pnegate(const Packet2cf &a)
Packet generic_pow_impl(const Packet &x, const Packet &y)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_double(const Packet &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_double(const Packet &x_in)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt_complex(const Packet &a)
Packet1cd pcplxflip(const Packet1cd &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2_float(const Packet _x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet generic_pow(const Packet &x, const Packet &y)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Packet generic_plog1p(const Packet &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos_float(const Packet &x)
void pstoreu(Scalar *to, const Packet &from)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Packet pmul(const Packet &a, const Packet &b)
Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Packet pmsub(const Packet &a, const Packet &b, const Packet &c)
Packet pfrexp_generic(const Packet &a, Packet &exponent)
Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Packet pldexp_generic(const Packet &a, const Packet &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pdiv_complex(const Packet &x, const Packet &y)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_impl_float(const Packet _x)
Packet generic_expm1(const Packet &x)
Packet4f psqrt(const Packet4f &a)
Packet pnmadd(const Packet &a, const Packet &b, const Packet &c)
Packet psub(const Packet &a, const Packet &b)
Packet8h pand(const Packet8h &a, const Packet8h &b)
Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Packet8h pxor(const Packet8h &a, const Packet8h &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp_float(const Packet _x)
Packet8bf psignbit(const Packet8bf &a)
Packet pdiv(const Packet &a, const Packet &b)
Packet preciprocal(const Packet &a)
Packet2cf pconj(const Packet2cf &a)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_double(const Packet _x)
void absolute_split(const Packet &x, Packet &n, Packet &r)
void fast_twosum(const Packet &x, const Packet &y, Packet &s_hi, Packet &s_lo)
Packet8h por(const Packet8h &a, const Packet8h &b)
Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)))
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan_reduced_float(const Packet &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog_float(const Packet _x)
Packet8f pisnan(const Packet8f &a)
bool predux_any(const Packet4f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin_float(const Packet &x_in)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet &a)
Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psincos_float(const Packet &_x)
Packet8f peven_mask(const Packet8f &)
Scalar round(const Scalar &x)
Scalar() floor(const Scalar &x)
static constexpr EIGEN_ALWAYS_INLINE Scalar signbit(const Scalar &x)
EIGEN_ALWAYS_INLINE bool() isfinite(const Eigen::bfloat16 &h)
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sign_op< typename Derived::Scalar >, const Derived > sign(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_expm1_op< typename Derived::Scalar >, const Derived > expm1(const Eigen::ArrayBase< Derived > &x)
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.