10 #ifndef EIGEN_SELFADJOINT_MATRIX_VECTOR_H
11 #define EIGEN_SELFADJOINT_MATRIX_VECTOR_H
13 #include "../InternalHeaderCheck.h"
25 template<
typename Scalar,
typename Index,
int StorageOrder,
int UpLo,
bool ConjugateLhs,
bool ConjugateRhs,
int Version=Specialized>
26 struct selfadjoint_matrix_vector_product;
28 template<
typename Scalar,
typename Index,
int StorageOrder,
int UpLo,
bool ConjugateLhs,
bool ConjugateRhs,
int Version>
29 struct selfadjoint_matrix_vector_product
35 const Scalar* lhs,
Index lhsStride,
41 template<
typename Scalar,
typename Index,
int StorageOrder,
int UpLo,
bool ConjugateLhs,
bool ConjugateRhs,
int Version>
43 void selfadjoint_matrix_vector_product<Scalar,Index,StorageOrder,UpLo,ConjugateLhs,ConjugateRhs,Version>::run(
45 const Scalar* lhs,
Index lhsStride,
50 typedef typename packet_traits<Scalar>::type Packet;
52 const Index PacketSize =
sizeof(Packet)/
sizeof(Scalar);
55 IsRowMajor = StorageOrder==
RowMajor ? 1 : 0,
56 IsLower = UpLo ==
Lower ? 1 : 0,
57 FirstTriangular = IsRowMajor == IsLower
60 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex &&
logical_xor(ConjugateLhs, IsRowMajor), ConjugateRhs> cj0;
61 conj_helper<Scalar,Scalar,NumTraits<Scalar>::IsComplex &&
logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs> cj1;
62 conj_helper<RealScalar,Scalar,false, ConjugateRhs> cjd;
64 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex &&
logical_xor(ConjugateLhs, IsRowMajor), ConjugateRhs> pcj0;
65 conj_helper<Packet,Packet,NumTraits<Scalar>::IsComplex &&
logical_xor(ConjugateLhs, !IsRowMajor), ConjugateRhs> pcj1;
67 Scalar cjAlpha = ConjugateRhs ?
numext::conj(alpha) : alpha;
73 for (
Index j=FirstTriangular ? bound : 0;
74 j<(FirstTriangular ?
size : bound);
j+=2)
79 Scalar t0 = cjAlpha * rhs[
j];
80 Packet ptmp0 = pset1<Packet>(t0);
81 Scalar t1 = cjAlpha * rhs[
j+1];
82 Packet ptmp1 = pset1<Packet>(t1);
85 Packet ptmp2 = pset1<Packet>(t2);
87 Packet ptmp3 = pset1<Packet>(t3);
89 Index starti = FirstTriangular ? 0 :
j+2;
92 Index alignedEnd = alignedStart + ((endi-alignedStart)/(PacketSize))*(PacketSize);
98 res[
j] += cj0.pmul(A1[
j], t1);
99 t3 += cj1.pmul(A1[
j], rhs[
j]);
103 res[
j+1] += cj0.pmul(A0[
j+1],t0);
104 t2 += cj1.pmul(A0[
j+1], rhs[
j+1]);
107 for (
Index i=starti;
i<alignedStart; ++
i)
109 res[
i] += cj0.pmul(A0[
i], t0) + cj0.pmul(A1[
i],t1);
110 t2 += cj1.pmul(A0[
i], rhs[
i]);
111 t3 += cj1.pmul(A1[
i], rhs[
i]);
119 for (
Index i=alignedStart;
i<alignedEnd;
i+=PacketSize)
121 Packet A0i = ploadu<Packet>(a0It); a0It += PacketSize;
122 Packet A1i = ploadu<Packet>(a1It); a1It += PacketSize;
123 Packet Bi = ploadu<Packet>(rhsIt); rhsIt += PacketSize;
124 Packet Xi = pload <Packet>(resIt);
126 Xi = pcj0.pmadd(A0i,ptmp0, pcj0.pmadd(A1i,ptmp1,Xi));
127 ptmp2 = pcj1.pmadd(A0i, Bi, ptmp2);
128 ptmp3 = pcj1.pmadd(A1i, Bi, ptmp3);
129 pstore(resIt,Xi); resIt += PacketSize;
131 for (
Index i=alignedEnd;
i<endi;
i++)
133 res[
i] += cj0.pmul(A0[
i], t0) + cj0.pmul(A1[
i],t1);
134 t2 += cj1.pmul(A0[
i], rhs[
i]);
135 t3 += cj1.pmul(A1[
i], rhs[
i]);
141 for (
Index j=FirstTriangular ? 0 : bound;
j<(FirstTriangular ? bound :
size);
j++)
145 Scalar t1 = cjAlpha * rhs[
j];
148 for (
Index i=FirstTriangular ? 0 :
j+1;
i<(FirstTriangular ?
j :
size);
i++)
150 res[
i] += cj0.pmul(A0[
i], t1);
151 t2 += cj1.pmul(A0[
i], rhs[
i]);
153 res[
j] += alpha * t2;
165 template<
typename Lhs,
int LhsMode,
typename Rhs>
166 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,0,true>
168 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
170 typedef internal::blas_traits<Lhs> LhsBlasTraits;
171 typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
172 typedef internal::remove_all_t<ActualLhsType> ActualLhsTypeCleaned;
174 typedef internal::blas_traits<Rhs> RhsBlasTraits;
175 typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
176 typedef internal::remove_all_t<ActualRhsType> ActualRhsTypeCleaned;
180 template<
typename Dest>
182 void run(Dest& dest,
const Lhs &a_lhs,
const Rhs &a_rhs,
const Scalar& alpha)
184 typedef typename Dest::Scalar ResScalar;
185 typedef typename Rhs::Scalar RhsScalar;
188 eigen_assert(dest.rows()==a_lhs.rows() && dest.cols()==a_rhs.cols());
190 add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
191 add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
193 Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
194 * RhsBlasTraits::extractScalarFactor(a_rhs);
197 EvalToDest = (Dest::InnerStrideAtCompileTime==1),
198 UseRhs = (ActualRhsTypeCleaned::InnerStrideAtCompileTime==1)
201 internal::gemv_static_vector_if<ResScalar,Dest::SizeAtCompileTime,Dest::MaxSizeAtCompileTime,!EvalToDest> static_dest;
202 internal::gemv_static_vector_if<RhsScalar,ActualRhsTypeCleaned::SizeAtCompileTime,ActualRhsTypeCleaned::MaxSizeAtCompileTime,!UseRhs> static_rhs;
205 EvalToDest ? dest.data() : static_dest.data());
208 UseRhs ?
const_cast<RhsScalar*
>(rhs.data()) : static_rhs.data());
212 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
214 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
216 MappedDest(actualDestPtr, dest.size()) = dest;
221 #ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN
223 EIGEN_DENSE_STORAGE_CTOR_PLUGIN
225 Map<typename ActualRhsTypeCleaned::PlainObject>(actualRhsPtr, rhs.size()) = rhs;
230 int(LhsUpLo),
bool(LhsBlasTraits::NeedToConjugate),
bool(RhsBlasTraits::NeedToConjugate)>::run
233 &lhs.coeffRef(0,0), lhs.outerStride(),
240 dest = MappedDest(actualDestPtr, dest.size());
244 template<
typename Lhs,
typename Rhs,
int RhsMode>
245 struct selfadjoint_product_impl<Lhs,0,true,Rhs,RhsMode,false>
247 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
250 template<
typename Dest>
251 static void run(Dest& dest,
const Lhs &a_lhs,
const Rhs &a_rhs,
const Scalar& alpha)
254 Transpose<Dest> destT(dest);
255 selfadjoint_product_impl<Transpose<const Rhs>, int(RhsUpLo)==
Upper ?
Lower :
Upper,
false,
256 Transpose<const Lhs>, 0,
true>::run(destT, a_rhs.transpose(), a_lhs.transpose(), alpha);
RealReturnType real() const
#define EIGEN_DEVICE_FUNC
#define EIGEN_DONT_INLINE
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
const unsigned int RowMajorBit
constexpr int plain_enum_min(A a, B b)
void pstore(Scalar *to, const Packet &from)
unpacket_traits< Packet >::type predux(const Packet &a)
constexpr bool logical_xor(bool a, bool b)
static Index first_default_aligned(const DenseBase< Derived > &m)
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_conjugate_op< typename Derived::Scalar >, const Derived > conj(const Eigen::ArrayBase< Derived > &x)