Assign_MKL.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2011, Intel Corporation. All rights reserved.
3  Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
4 
5  Redistribution and use in source and binary forms, with or without modification,
6  are permitted provided that the following conditions are met:
7 
8  * Redistributions of source code must retain the above copyright notice, this
9  list of conditions and the following disclaimer.
10  * Redistributions in binary form must reproduce the above copyright notice,
11  this list of conditions and the following disclaimer in the documentation
12  and/or other materials provided with the distribution.
13  * Neither the name of Intel Corporation nor the names of its contributors may
14  be used to endorse or promote products derived from this software without
15  specific prior written permission.
16 
17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 
28  ********************************************************************************
29  * Content : Eigen bindings to Intel(R) MKL
30  * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin()
31  ********************************************************************************
32 */
33 
34 #ifndef EIGEN_ASSIGN_VML_H
35 #define EIGEN_ASSIGN_VML_H
36 
37 #include "./InternalHeaderCheck.h"
38 
39 namespace Eigen {
40 
41 namespace internal {
42 
43 template<typename Dst, typename Src>
44 class vml_assign_traits
45 {
46  private:
47  enum {
48  DstHasDirectAccess = Dst::Flags & DirectAccessBit,
49  SrcHasDirectAccess = Src::Flags & DirectAccessBit,
50  StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)),
51  InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime)
52  : int(Dst::Flags)&RowMajorBit ? int(Dst::ColsAtCompileTime)
53  : int(Dst::RowsAtCompileTime),
54  InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime)
55  : int(Dst::Flags)&RowMajorBit ? int(Dst::MaxColsAtCompileTime)
56  : int(Dst::MaxRowsAtCompileTime),
57  MaxSizeAtCompileTime = Dst::SizeAtCompileTime,
58 
59  MightEnableVml = StorageOrdersAgree && DstHasDirectAccess && SrcHasDirectAccess && Src::InnerStrideAtCompileTime==1 && Dst::InnerStrideAtCompileTime==1,
60  MightLinearize = MightEnableVml && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit),
61  VmlSize = MightLinearize ? MaxSizeAtCompileTime : InnerMaxSize,
62  LargeEnough = VmlSize==Dynamic || VmlSize>=EIGEN_MKL_VML_THRESHOLD
63  };
64  public:
65  enum {
66  EnableVml = MightEnableVml && LargeEnough,
67  Traversal = MightLinearize ? LinearTraversal : DefaultTraversal
68  };
69 };
70 
71 #define EIGEN_PP_EXPAND(ARG) ARG
72 #if !defined (EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1)
73 #define EIGEN_VMLMODE_EXPAND_xLA , VML_HA
74 #else
75 #define EIGEN_VMLMODE_EXPAND_xLA , VML_LA
76 #endif
77 
78 #define EIGEN_VMLMODE_EXPAND_x_
79 
80 #define EIGEN_VMLMODE_PREFIX_xLA vm
81 #define EIGEN_VMLMODE_PREFIX_x_ v
82 #define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x,VMLMODE)
83 
84 #define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
85  template< typename DstXprType, typename SrcXprNested> \
86  struct Assignment<DstXprType, CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested>, assign_op<EIGENTYPE,EIGENTYPE>, \
87  Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
88  typedef CwiseUnaryOp<scalar_##EIGENOP##_op<EIGENTYPE>, SrcXprNested> SrcXprType; \
89  static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
90  resize_if_allowed(dst, src, func); \
91  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
92  if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) { \
93  VMLOP(dst.size(), (const VMLTYPE*)src.nestedExpression().data(), \
94  (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
95  } else { \
96  const Index outerSize = dst.outerSize(); \
97  for(Index outer = 0; outer < outerSize; ++outer) { \
98  const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer,0)) : \
99  &(src.nestedExpression().coeffRef(0, outer)); \
100  EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
101  VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, \
102  (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
103  } \
104  } \
105  } \
106  }; \
107 
108 
109 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
110  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),s##VMLOP), float, float, VMLMODE) \
111  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),d##VMLOP), double, double, VMLMODE)
112 
113 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \
114  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),c##VMLOP), scomplex, MKL_Complex8, VMLMODE) \
115  EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE),z##VMLOP), dcomplex, MKL_Complex16, VMLMODE)
116 
117 #define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \
118  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \
119  EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
120 
121 
131 // EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _)
136 
142 
143 #define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \
144  template< typename DstXprType, typename SrcXprNested, typename Plain> \
145  struct Assignment<DstXprType, CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
146  const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> >, assign_op<EIGENTYPE,EIGENTYPE>, \
147  Dense2Dense, std::enable_if_t<vml_assign_traits<DstXprType,SrcXprNested>::EnableVml>> { \
148  typedef CwiseBinaryOp<scalar_##EIGENOP##_op<EIGENTYPE,EIGENTYPE>, SrcXprNested, \
149  const CwiseNullaryOp<internal::scalar_constant_op<EIGENTYPE>,Plain> > SrcXprType; \
150  static void run(DstXprType &dst, const SrcXprType &src, const assign_op<EIGENTYPE,EIGENTYPE> &func) { \
151  resize_if_allowed(dst, src, func); \
152  eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \
153  VMLTYPE exponent = reinterpret_cast<const VMLTYPE&>(src.rhs().functor().m_other); \
154  if(vml_assign_traits<DstXprType,SrcXprNested>::Traversal==LinearTraversal) \
155  { \
156  VMLOP( dst.size(), (const VMLTYPE*)src.lhs().data(), exponent, \
157  (VMLTYPE*)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE) ); \
158  } else { \
159  const Index outerSize = dst.outerSize(); \
160  for(Index outer = 0; outer < outerSize; ++outer) { \
161  const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.lhs().coeffRef(outer,0)) : \
162  &(src.lhs().coeffRef(0, outer)); \
163  EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer,0)) : &(dst.coeffRef(0, outer)); \
164  VMLOP( dst.innerSize(), (const VMLTYPE*)src_ptr, exponent, \
165  (VMLTYPE*)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \
166  } \
167  } \
168  } \
169  };
170 
171 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA)
172 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA)
173 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA)
174 EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA)
175 
176 } // end namespace internal
177 
178 } // end namespace Eigen
179 
180 #endif // EIGEN_ASSIGN_VML_H
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:109
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:117
#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE)
Definition: Assign_MKL.h:143
#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE)
Definition: Assign_MKL.h:113
const unsigned int LinearAccessBit
Definition: Constants.h:132
const unsigned int DirectAccessBit
Definition: Constants.h:157
const unsigned int RowMajorBit
Definition: Constants.h:68
bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:626
: InteropHeaders
Definition: Core:139
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
@ DefaultTraversal
Definition: Constants.h:279
@ LinearTraversal
Definition: Constants.h:281
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
std::complex< double > dcomplex
Definition: MKL_support.h:127
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_ceil_op< typename Derived::Scalar >, const Derived > ceil(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_floor_op< typename Derived::Scalar >, const Derived > floor(const Eigen::ArrayBase< Derived > &x)
std::complex< float > scomplex
Definition: MKL_support.h:128
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const int Dynamic
Definition: Constants.h:24
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_round_op< typename Derived::Scalar >, const Derived > round(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_square_op< typename Derived::Scalar >, const Derived > square(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sqrt_op< typename Derived::Scalar >, const Derived > sqrt(const Eigen::ArrayBase< Derived > &x)