34 #ifndef EIGEN_ASSIGN_VML_H
35 #define EIGEN_ASSIGN_VML_H
43 template<
typename Dst,
typename Src>
44 class vml_assign_traits
50 StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
51 InnerSize =
int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52 : int(Dst::Flags)&
RowMajorBit ? int(Dst::ColsAtCompileTime)
53 : int(Dst::RowsAtCompileTime),
54 InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55 : int(Dst::Flags)&
RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56 : int(Dst::MaxRowsAtCompileTime),
57 MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
59 MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
60 MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) &
LinearAccessBit),
61 VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
62 LargeEnough = VmlSize==
Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
66 EnableVml = MightEnableVml && LargeEnough,
71 #define EIGEN_PP_EXPAND(ARG) ARG
72 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
73 #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
75 #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
78 #define EIGEN_VMLMODE_EXPAND_x_
80 #define EIGEN_VMLMODE_PREFIX_xLA vm
81 #define EIGEN_VMLMODE_PREFIX_x_ v
82 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x,VMLMODE)
84 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
85 template< typename DstXprType, typename SrcXprNested> \
86 struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
87 Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
88 typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
89 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
90 resize_if_allowed(dst, src, func); \
91 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
92 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
93 VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
94 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
96 const Index outerSize = dst.outerSize(); \
97 for(Index outer = 0; outer < outerSize; ++outer) { \
98 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
99 &(src.nestedExpression().coeffRef(0, outer)); \
100 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
101 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
102 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
109 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
110 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
111 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
113 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
114 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
115 EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
117 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
118 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
119 EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
143 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
144 template< typename DstXprType, typename SrcXprNested, typename Plain> \
145 struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
146 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
147 Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
148 typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
149 const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
150 static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
151 resize_if_allowed(dst, src, func); \
152 eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
153 VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
154 if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
156 VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
157 (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
159 const Index outerSize = dst.outerSize(); \
160 for(Index outer = 0; outer < outerSize; ++outer) { \
161 const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
162 &(src.lhs().coeffRef(0, outer)); \
163 EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
164 VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
165 (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
const unsigned int LinearAccessBit
const unsigned int DirectAccessBit
const unsigned int RowMajorBit
bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
std::complex< double > dcomplex
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_ceil_op< typename Derived::Scalar >, const Derived > ceil(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_floor_op< typename Derived::Scalar >, const Derived > floor(const Eigen::ArrayBase< Derived > &x)
std::complex< float > scomplex
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_round_op< typename Derived::Scalar >, const Derived > round(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_square_op< typename Derived::Scalar >, const Derived > square(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)