26 #ifndef EIGEN_MATH_FUNCTIONS_MSA_H
27 #define EIGEN_MATH_FUNCTIONS_MSA_H
29 #include "../../InternalHeaderCheck.h"
55 Packet4i neg_mask = __builtin_msa_fclt_w(_x, zero);
56 Packet4i zero_mask = __builtin_msa_fceq_w(_x, zero);
62 Packet4i e_int = __builtin_msa_ftint_s_w(__builtin_msa_flog2_w(
x));
64 x = __builtin_msa_fexp2_w(
x, (
Packet4i)__builtin_msa_nori_b((v16u8)e_int, 0));
75 Packet4i ge_mask = __builtin_msa_fcle_w(p4f_cephes_SQRTHF,
x);
76 e_int =
psub(e_int, ge_mask);
77 x = (
Packet4f)__builtin_msa_bsel_v((v16u8)ge_mask, (v16u8)xx, (v16u8)
x);
79 Packet4f e = __builtin_msa_ffint_s_w(e_int);
85 y =
pmadd(p4f_cephes_log_p0,
x, p4f_cephes_log_p1);
86 y1 =
pmadd(p4f_cephes_log_p3,
x, p4f_cephes_log_p4);
87 y2 =
pmadd(p4f_cephes_log_p6,
x, p4f_cephes_log_p7);
89 y1 =
pmadd(y1,
x, p4f_cephes_log_p5);
90 y2 =
pmadd(y2,
x, p4f_cephes_log_p8);
96 x = __builtin_msa_fmsub_w(
x, x2, p4f_half);
114 x = __builtin_msa_fmin_w(
x, non_neg_x_or_nan);
117 Packet4i neg_infs = __builtin_msa_slli_w(zero_mask, 23);
118 x = (
Packet4f)__builtin_msa_bsel_v((v16u8)zero_mask, (v16u8)
x, (v16u8)neg_infs);
145 x = (
Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(
x, p4f_exp_lo), (v16u8)
x,
147 x = (
Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_exp_hi,
x), (v16u8)
x,
151 Packet4f x2_add = (
Packet4f)__builtin_msa_binsli_w((v4u32)p4f_half, (v4u32)
x, 0);
153 Packet4i x2_int = __builtin_msa_ftrunc_s_w(x2);
154 Packet4f x2_int_f = __builtin_msa_ffint_s_w(x2_int);
156 x = __builtin_msa_fmsub_w(
x, x2_int_f, p4f_cephes_exp_C1);
157 x = __builtin_msa_fmsub_w(
x, x2_int_f, p4f_cephes_exp_C2);
171 y = __builtin_msa_fexp2_w(
y, x2_int);
196 Packet4i tiny_mask = __builtin_msa_fclt_w(
x, p4f_tanh_tiny);
200 x = (
Packet4f)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_w(p4f_tanh_hi,
x), (v16u8)
x,
217 q =
pmadd(x2, q, p4f_beta_2);
218 q =
pmadd(x2, q, p4f_beta_0);
224 p = (
Packet4f)__builtin_msa_binsli_w((v4u32)
p, (v4u32)_x, 0);
227 p = (
Packet4f)__builtin_msa_bsel_v((v16u8)tiny_mask, (v16u8)
p, (v16u8)_x);
252 x =
padd(
x, zero_or_nan_if_inf);
255 Packet4i small_or_nan_mask = __builtin_msa_fcult_w(
x, p4f_sincos_max_arg);
261 Packet4i y_int = __builtin_msa_ftrunc_s_w(
y);
265 Packet4i y_int1 = __builtin_msa_addvi_w(y_int, 1);
267 y = __builtin_msa_ffint_s_w(y_int2);
271 : __builtin_msa_slli_w(__builtin_msa_addvi_w(y_int, 3), 29);
275 Packet4i poly_mask = __builtin_msa_ceqi_w(__builtin_msa_slli_w(y_int2, 30), 0);
290 y =
pmadd(
y, z, p4f_coscof_p1);
291 y =
pmadd(
y, z, p4f_coscof_p2);
294 y = __builtin_msa_fmsub_w(
y, z, p4f_half);
299 y2 =
pmadd(y2, z, p4f_sincof_p1);
300 y2 =
pmadd(y2, z, p4f_sincof_p2);
305 y = sine ? (
Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)
y, (v16u8)y2)
306 : (
Packet4f)__builtin_msa_bsel_v((v16u8)poly_mask, (v16u8)y2, (v16u8)
y);
310 y = (
Packet4f)__builtin_msa_binsli_w((v4u32)
y, (v4u32)sign_mask, 0);
350 x = (
Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(
x, p2d_exp_lo), (v16u8)
x,
352 x = (
Packet2d)__builtin_msa_bsel_v((v16u8)__builtin_msa_fclt_d(p2d_exp_hi,
x), (v16u8)
x,
356 Packet2d x2_add = (
Packet2d)__builtin_msa_binsli_d((v2u64)p2d_half, (v2u64)
x, 0);
358 Packet2l x2_long = __builtin_msa_ftrunc_s_d(x2);
359 Packet2d x2_long_d = __builtin_msa_ffint_s_d(x2_long);
361 x = __builtin_msa_fmsub_d(
x, x2_long_d, p2d_cephes_exp_C1);
362 x = __builtin_msa_fmsub_d(
x, x2_long_d, p2d_cephes_exp_C2);
367 px =
pmadd(px, x2, p2d_cephes_exp_p1);
368 px =
pmadd(px, x2, p2d_cephes_exp_p2);
372 qx =
pmadd(qx, x2, p2d_cephes_exp_q1);
373 qx =
pmadd(qx, x2, p2d_cephes_exp_q2);
374 qx =
pmadd(qx, x2, p2d_cephes_exp_q3);
380 x = __builtin_msa_fexp2_d(
x, x2_long);
Array< double, 1, 3 > e(1./3., 0.5, 2.)
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Packet padd(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pcos< Packet4f >(const Packet4f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d pexp< Packet2d >(const Packet2d &_x)
static EIGEN_DECLARE_CONST_Packet4f(1, 1.0f)
Packet4f pabs(const Packet4f &a)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
__vector unsigned int Packet4ui
Packet pmul(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f plog< Packet4f >(const Packet4f &_x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f ptanh< Packet4f >(const Packet4f &_x)
Packet psub(const Packet &a, const Packet &b)
Packet8h pand(const Packet8h &a, const Packet8h &b)
Packet8h pxor(const Packet8h &a, const Packet8h &b)
Packet pdiv(const Packet &a, const Packet &b)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psin< Packet4f >(const Packet4f &x)
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f pexp< Packet4f >(const Packet4f &_x)
static EIGEN_DECLARE_CONST_Packet2d(1, 1.0)
Packet4f psincos_inner_msa_float(const Packet4f &_x)