SelfadjointMatrixMatrix.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #ifndef EIGEN_SELFADJOINT_MATRIX_MATRIX_H
11 #define EIGEN_SELFADJOINT_MATRIX_MATRIX_H
12 
13 #include "../InternalHeaderCheck.h"
14 
15 namespace Eigen {
16 
17 namespace internal {
18 
19 // pack a selfadjoint block diagonal for use with the gebp_kernel
20 template<typename Scalar, typename Index, int Pack1, int Pack2_dummy, int StorageOrder>
21 struct symm_pack_lhs
22 {
23  template<int BlockRows> inline
24  void pack(Scalar* blockA, const const_blas_data_mapper<Scalar,Index,StorageOrder>& lhs, Index cols, Index i, Index& count)
25  {
26  // normal copy
27  for(Index k=0; k<i; k++)
28  for(Index w=0; w<BlockRows; w++)
29  blockA[count++] = lhs(i+w,k); // normal
30  // symmetric copy
31  Index h = 0;
32  for(Index k=i; k<i+BlockRows; k++)
33  {
34  for(Index w=0; w<h; w++)
35  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
36 
37  blockA[count++] = numext::real(lhs(k,k)); // real (diagonal)
38 
39  for(Index w=h+1; w<BlockRows; w++)
40  blockA[count++] = lhs(i+w, k); // normal
41  ++h;
42  }
43  // transposed copy
44  for(Index k=i+BlockRows; k<cols; k++)
45  for(Index w=0; w<BlockRows; w++)
46  blockA[count++] = numext::conj(lhs(k, i+w)); // transposed
47  }
48  void operator()(Scalar* blockA, const Scalar* _lhs, Index lhsStride, Index cols, Index rows)
49  {
50  typedef typename unpacket_traits<typename packet_traits<Scalar>::type>::half HalfPacket;
51  typedef typename unpacket_traits<typename unpacket_traits<typename packet_traits<Scalar>::type>::half>::half QuarterPacket;
52  enum { PacketSize = packet_traits<Scalar>::size,
53  HalfPacketSize = unpacket_traits<HalfPacket>::size,
54  QuarterPacketSize = unpacket_traits<QuarterPacket>::size,
55  HasHalf = (int)HalfPacketSize < (int)PacketSize,
56  HasQuarter = (int)QuarterPacketSize < (int)HalfPacketSize};
57 
58  const_blas_data_mapper<Scalar,Index,StorageOrder> lhs(_lhs,lhsStride);
59  Index count = 0;
60  //Index peeled_mc3 = (rows/Pack1)*Pack1;
61 
62  const Index peeled_mc3 = Pack1>=3*PacketSize ? (rows/(3*PacketSize))*(3*PacketSize) : 0;
63  const Index peeled_mc2 = Pack1>=2*PacketSize ? peeled_mc3+((rows-peeled_mc3)/(2*PacketSize))*(2*PacketSize) : 0;
64  const Index peeled_mc1 = Pack1>=1*PacketSize ? peeled_mc2+((rows-peeled_mc2)/(1*PacketSize))*(1*PacketSize) : 0;
65  const Index peeled_mc_half = Pack1>=HalfPacketSize ? peeled_mc1+((rows-peeled_mc1)/(HalfPacketSize))*(HalfPacketSize) : 0;
66  const Index peeled_mc_quarter = Pack1>=QuarterPacketSize ? peeled_mc_half+((rows-peeled_mc_half)/(QuarterPacketSize))*(QuarterPacketSize) : 0;
67 
68  if(Pack1>=3*PacketSize)
69  for(Index i=0; i<peeled_mc3; i+=3*PacketSize)
70  pack<3*PacketSize>(blockA, lhs, cols, i, count);
71 
72  if(Pack1>=2*PacketSize)
73  for(Index i=peeled_mc3; i<peeled_mc2; i+=2*PacketSize)
74  pack<2*PacketSize>(blockA, lhs, cols, i, count);
75 
76  if(Pack1>=1*PacketSize)
77  for(Index i=peeled_mc2; i<peeled_mc1; i+=1*PacketSize)
78  pack<1*PacketSize>(blockA, lhs, cols, i, count);
79 
80  if(HasHalf && Pack1>=HalfPacketSize)
81  for(Index i=peeled_mc1; i<peeled_mc_half; i+=HalfPacketSize)
82  pack<HalfPacketSize>(blockA, lhs, cols, i, count);
83 
84  if(HasQuarter && Pack1>=QuarterPacketSize)
85  for(Index i=peeled_mc_half; i<peeled_mc_quarter; i+=QuarterPacketSize)
86  pack<QuarterPacketSize>(blockA, lhs, cols, i, count);
87 
88  // do the same with mr==1
89  for(Index i=peeled_mc_quarter; i<rows; i++)
90  {
91  for(Index k=0; k<i; k++)
92  blockA[count++] = lhs(i, k); // normal
93 
94  blockA[count++] = numext::real(lhs(i, i)); // real (diagonal)
95 
96  for(Index k=i+1; k<cols; k++)
97  blockA[count++] = numext::conj(lhs(k, i)); // transposed
98  }
99  }
100 };
101 
102 template<typename Scalar, typename Index, int nr, int StorageOrder>
103 struct symm_pack_rhs
104 {
105  enum { PacketSize = packet_traits<Scalar>::size };
106  void operator()(Scalar* blockB, const Scalar* _rhs, Index rhsStride, Index rows, Index cols, Index k2)
107  {
108  Index end_k = k2 + rows;
109  Index count = 0;
110  const_blas_data_mapper<Scalar,Index,StorageOrder> rhs(_rhs,rhsStride);
111  Index packet_cols8 = nr>=8 ? (cols/8) * 8 : 0;
112  Index packet_cols4 = nr>=4 ? (cols/4) * 4 : 0;
113 
114  // first part: normal case
115  for(Index j2=0; j2<k2; j2+=nr)
116  {
117  for(Index k=k2; k<end_k; k++)
118  {
119  blockB[count+0] = rhs(k,j2+0);
120  blockB[count+1] = rhs(k,j2+1);
121  if (nr>=4)
122  {
123  blockB[count+2] = rhs(k,j2+2);
124  blockB[count+3] = rhs(k,j2+3);
125  }
126  if (nr>=8)
127  {
128  blockB[count+4] = rhs(k,j2+4);
129  blockB[count+5] = rhs(k,j2+5);
130  blockB[count+6] = rhs(k,j2+6);
131  blockB[count+7] = rhs(k,j2+7);
132  }
133  count += nr;
134  }
135  }
136 
137  // second part: diagonal block
138  Index end8 = nr>=8 ? (std::min)(k2+rows,packet_cols8) : k2;
139  if(nr>=8)
140  {
141  for(Index j2=k2; j2<end8; j2+=8)
142  {
143  // again we can split vertically in three different parts (transpose, symmetric, normal)
144  // transpose
145  for(Index k=k2; k<j2; k++)
146  {
147  blockB[count+0] = numext::conj(rhs(j2+0,k));
148  blockB[count+1] = numext::conj(rhs(j2+1,k));
149  blockB[count+2] = numext::conj(rhs(j2+2,k));
150  blockB[count+3] = numext::conj(rhs(j2+3,k));
151  blockB[count+4] = numext::conj(rhs(j2+4,k));
152  blockB[count+5] = numext::conj(rhs(j2+5,k));
153  blockB[count+6] = numext::conj(rhs(j2+6,k));
154  blockB[count+7] = numext::conj(rhs(j2+7,k));
155  count += 8;
156  }
157  // symmetric
158  Index h = 0;
159  for(Index k=j2; k<j2+8; k++)
160  {
161  // normal
162  for (Index w=0 ; w<h; ++w)
163  blockB[count+w] = rhs(k,j2+w);
164 
165  blockB[count+h] = numext::real(rhs(k,k));
166 
167  // transpose
168  for (Index w=h+1 ; w<8; ++w)
169  blockB[count+w] = numext::conj(rhs(j2+w,k));
170  count += 8;
171  ++h;
172  }
173  // normal
174  for(Index k=j2+8; k<end_k; k++)
175  {
176  blockB[count+0] = rhs(k,j2+0);
177  blockB[count+1] = rhs(k,j2+1);
178  blockB[count+2] = rhs(k,j2+2);
179  blockB[count+3] = rhs(k,j2+3);
180  blockB[count+4] = rhs(k,j2+4);
181  blockB[count+5] = rhs(k,j2+5);
182  blockB[count+6] = rhs(k,j2+6);
183  blockB[count+7] = rhs(k,j2+7);
184  count += 8;
185  }
186  }
187  }
188  if(nr>=4)
189  {
190  for(Index j2=end8; j2<(std::min)(k2+rows,packet_cols4); j2+=4)
191  {
192  // again we can split vertically in three different parts (transpose, symmetric, normal)
193  // transpose
194  for(Index k=k2; k<j2; k++)
195  {
196  blockB[count+0] = numext::conj(rhs(j2+0,k));
197  blockB[count+1] = numext::conj(rhs(j2+1,k));
198  blockB[count+2] = numext::conj(rhs(j2+2,k));
199  blockB[count+3] = numext::conj(rhs(j2+3,k));
200  count += 4;
201  }
202  // symmetric
203  Index h = 0;
204  for(Index k=j2; k<j2+4; k++)
205  {
206  // normal
207  for (Index w=0 ; w<h; ++w)
208  blockB[count+w] = rhs(k,j2+w);
209 
210  blockB[count+h] = numext::real(rhs(k,k));
211 
212  // transpose
213  for (Index w=h+1 ; w<4; ++w)
214  blockB[count+w] = numext::conj(rhs(j2+w,k));
215  count += 4;
216  ++h;
217  }
218  // normal
219  for(Index k=j2+4; k<end_k; k++)
220  {
221  blockB[count+0] = rhs(k,j2+0);
222  blockB[count+1] = rhs(k,j2+1);
223  blockB[count+2] = rhs(k,j2+2);
224  blockB[count+3] = rhs(k,j2+3);
225  count += 4;
226  }
227  }
228  }
229 
230  // third part: transposed
231  if(nr>=8)
232  {
233  for(Index j2=k2+rows; j2<packet_cols8; j2+=8)
234  {
235  for(Index k=k2; k<end_k; k++)
236  {
237  blockB[count+0] = numext::conj(rhs(j2+0,k));
238  blockB[count+1] = numext::conj(rhs(j2+1,k));
239  blockB[count+2] = numext::conj(rhs(j2+2,k));
240  blockB[count+3] = numext::conj(rhs(j2+3,k));
241  blockB[count+4] = numext::conj(rhs(j2+4,k));
242  blockB[count+5] = numext::conj(rhs(j2+5,k));
243  blockB[count+6] = numext::conj(rhs(j2+6,k));
244  blockB[count+7] = numext::conj(rhs(j2+7,k));
245  count += 8;
246  }
247  }
248  }
249  if(nr>=4)
250  {
251  for(Index j2=(std::max)(packet_cols8,k2+rows); j2<packet_cols4; j2+=4)
252  {
253  for(Index k=k2; k<end_k; k++)
254  {
255  blockB[count+0] = numext::conj(rhs(j2+0,k));
256  blockB[count+1] = numext::conj(rhs(j2+1,k));
257  blockB[count+2] = numext::conj(rhs(j2+2,k));
258  blockB[count+3] = numext::conj(rhs(j2+3,k));
259  count += 4;
260  }
261  }
262  }
263 
264  // copy the remaining columns one at a time (=> the same with nr==1)
265  for(Index j2=packet_cols4; j2<cols; ++j2)
266  {
267  // transpose
268  Index half = (std::min)(end_k,j2);
269  for(Index k=k2; k<half; k++)
270  {
271  blockB[count] = numext::conj(rhs(j2,k));
272  count += 1;
273  }
274 
275  if(half==j2 && half<k2+rows)
276  {
277  blockB[count] = numext::real(rhs(j2,j2));
278  count += 1;
279  }
280  else
281  half--;
282 
283  // normal
284  for(Index k=half+1; k<k2+rows; k++)
285  {
286  blockB[count] = rhs(k,j2);
287  count += 1;
288  }
289  }
290  }
291 };
292 
293 /* Optimized selfadjoint matrix * matrix (_SYMM) product built on top of
294  * the general matrix matrix product.
295  */
296 template <typename Scalar, typename Index,
297  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
298  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
299  int ResStorageOrder, int ResInnerStride>
300 struct product_selfadjoint_matrix;
301 
302 template <typename Scalar, typename Index,
303  int LhsStorageOrder, bool LhsSelfAdjoint, bool ConjugateLhs,
304  int RhsStorageOrder, bool RhsSelfAdjoint, bool ConjugateRhs,
305  int ResInnerStride>
306 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,LhsSelfAdjoint,ConjugateLhs, RhsStorageOrder,RhsSelfAdjoint,ConjugateRhs,RowMajor,ResInnerStride>
307 {
308 
309  static EIGEN_STRONG_INLINE void run(
310  Index rows, Index cols,
311  const Scalar* lhs, Index lhsStride,
312  const Scalar* rhs, Index rhsStride,
313  Scalar* res, Index resIncr, Index resStride,
314  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
315  {
316  product_selfadjoint_matrix<Scalar, Index,
317  logical_xor(RhsSelfAdjoint,RhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
318  RhsSelfAdjoint, NumTraits<Scalar>::IsComplex && logical_xor(RhsSelfAdjoint, ConjugateRhs),
319  logical_xor(LhsSelfAdjoint,LhsStorageOrder==RowMajor) ? ColMajor : RowMajor,
320  LhsSelfAdjoint, NumTraits<Scalar>::IsComplex && logical_xor(LhsSelfAdjoint, ConjugateLhs),
321  ColMajor,ResInnerStride>
322  ::run(cols, rows, rhs, rhsStride, lhs, lhsStride, res, resIncr, resStride, alpha, blocking);
323  }
324 };
325 
326 template <typename Scalar, typename Index,
327  int LhsStorageOrder, bool ConjugateLhs,
328  int RhsStorageOrder, bool ConjugateRhs,
329  int ResInnerStride>
330 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>
331 {
332 
333  static EIGEN_DONT_INLINE void run(
334  Index rows, Index cols,
335  const Scalar* _lhs, Index lhsStride,
336  const Scalar* _rhs, Index rhsStride,
337  Scalar* res, Index resIncr, Index resStride,
338  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
339 };
340 
341 template <typename Scalar, typename Index,
342  int LhsStorageOrder, bool ConjugateLhs,
343  int RhsStorageOrder, bool ConjugateRhs,
344  int ResInnerStride>
345 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,true,ConjugateLhs, RhsStorageOrder,false,ConjugateRhs,ColMajor,ResInnerStride>::run(
346  Index rows, Index cols,
347  const Scalar* _lhs, Index lhsStride,
348  const Scalar* _rhs, Index rhsStride,
349  Scalar* _res, Index resIncr, Index resStride,
350  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
351  {
352  Index size = rows;
353 
354  typedef gebp_traits<Scalar,Scalar> Traits;
355 
356  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
357  typedef const_blas_data_mapper<Scalar, Index, (LhsStorageOrder == RowMajor) ? ColMajor : RowMajor> LhsTransposeMapper;
358  typedef const_blas_data_mapper<Scalar, Index, RhsStorageOrder> RhsMapper;
359  typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
360  LhsMapper lhs(_lhs,lhsStride);
361  LhsTransposeMapper lhs_transpose(_lhs,lhsStride);
362  RhsMapper rhs(_rhs,rhsStride);
363  ResMapper res(_res, resStride, resIncr);
364 
365  Index kc = blocking.kc(); // cache block size along the K direction
366  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
367  // kc must be smaller than mc
368  kc = (std::min)(kc,mc);
369  std::size_t sizeA = kc*mc;
370  std::size_t sizeB = kc*cols;
371  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
372  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
373 
374  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
375  symm_pack_lhs<Scalar, Index, Traits::mr, Traits::LhsProgress, LhsStorageOrder> pack_lhs;
376  gemm_pack_rhs<Scalar, Index, RhsMapper, Traits::nr,RhsStorageOrder> pack_rhs;
377  gemm_pack_lhs<Scalar, Index, LhsTransposeMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder==RowMajor?ColMajor:RowMajor, true> pack_lhs_transposed;
378 
379  for(Index k2=0; k2<size; k2+=kc)
380  {
381  const Index actual_kc = (std::min)(k2+kc,size)-k2;
382 
383  // we have selected one row panel of rhs and one column panel of lhs
384  // pack rhs's panel into a sequential chunk of memory
385  // and expand each coeff to a constant packet for further reuse
386  pack_rhs(blockB, rhs.getSubMapper(k2,0), actual_kc, cols);
387 
388  // the select lhs's panel has to be split in three different parts:
389  // 1 - the transposed panel above the diagonal block => transposed packed copy
390  // 2 - the diagonal block => special packed copy
391  // 3 - the panel below the diagonal block => generic packed copy
392  for(Index i2=0; i2<k2; i2+=mc)
393  {
394  const Index actual_mc = (std::min)(i2+mc,k2)-i2;
395  // transposed packed copy
396  pack_lhs_transposed(blockA, lhs_transpose.getSubMapper(i2, k2), actual_kc, actual_mc);
397 
398  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
399  }
400  // the block diagonal
401  {
402  const Index actual_mc = (std::min)(k2+kc,size)-k2;
403  // symmetric packed copy
404  pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc);
405 
406  gebp_kernel(res.getSubMapper(k2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
407  }
408 
409  for(Index i2=k2+kc; i2<size; i2+=mc)
410  {
411  const Index actual_mc = (std::min)(i2+mc,size)-i2;
412  gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder,false>()
413  (blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
414 
415  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
416  }
417  }
418  }
419 
420 // matrix * selfadjoint product
421 template <typename Scalar, typename Index,
422  int LhsStorageOrder, bool ConjugateLhs,
423  int RhsStorageOrder, bool ConjugateRhs,
424  int ResInnerStride>
425 struct product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>
426 {
427 
428  static EIGEN_DONT_INLINE void run(
429  Index rows, Index cols,
430  const Scalar* _lhs, Index lhsStride,
431  const Scalar* _rhs, Index rhsStride,
432  Scalar* res, Index resIncr, Index resStride,
433  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking);
434 };
435 
436 template <typename Scalar, typename Index,
437  int LhsStorageOrder, bool ConjugateLhs,
438  int RhsStorageOrder, bool ConjugateRhs,
439  int ResInnerStride>
440 EIGEN_DONT_INLINE void product_selfadjoint_matrix<Scalar,Index,LhsStorageOrder,false,ConjugateLhs, RhsStorageOrder,true,ConjugateRhs,ColMajor,ResInnerStride>::run(
441  Index rows, Index cols,
442  const Scalar* _lhs, Index lhsStride,
443  const Scalar* _rhs, Index rhsStride,
444  Scalar* _res, Index resIncr, Index resStride,
445  const Scalar& alpha, level3_blocking<Scalar,Scalar>& blocking)
446  {
447  Index size = cols;
448 
449  typedef gebp_traits<Scalar,Scalar> Traits;
450 
451  typedef const_blas_data_mapper<Scalar, Index, LhsStorageOrder> LhsMapper;
452  typedef blas_data_mapper<typename Traits::ResScalar, Index, ColMajor, Unaligned, ResInnerStride> ResMapper;
453  LhsMapper lhs(_lhs,lhsStride);
454  ResMapper res(_res,resStride, resIncr);
455 
456  Index kc = blocking.kc(); // cache block size along the K direction
457  Index mc = (std::min)(rows,blocking.mc()); // cache block size along the M direction
458  std::size_t sizeA = kc*mc;
459  std::size_t sizeB = kc*cols;
460  ei_declare_aligned_stack_constructed_variable(Scalar, blockA, sizeA, blocking.blockA());
461  ei_declare_aligned_stack_constructed_variable(Scalar, blockB, sizeB, blocking.blockB());
462 
463  gebp_kernel<Scalar, Scalar, Index, ResMapper, Traits::mr, Traits::nr, ConjugateLhs, ConjugateRhs> gebp_kernel;
464  gemm_pack_lhs<Scalar, Index, LhsMapper, Traits::mr, Traits::LhsProgress, typename Traits::LhsPacket4Packing, LhsStorageOrder> pack_lhs;
465  symm_pack_rhs<Scalar, Index, Traits::nr,RhsStorageOrder> pack_rhs;
466 
467  for(Index k2=0; k2<size; k2+=kc)
468  {
469  const Index actual_kc = (std::min)(k2+kc,size)-k2;
470 
471  pack_rhs(blockB, _rhs, rhsStride, actual_kc, cols, k2);
472 
473  // => GEPP
474  for(Index i2=0; i2<rows; i2+=mc)
475  {
476  const Index actual_mc = (std::min)(i2+mc,rows)-i2;
477  pack_lhs(blockA, lhs.getSubMapper(i2, k2), actual_kc, actual_mc);
478 
479  gebp_kernel(res.getSubMapper(i2, 0), blockA, blockB, actual_mc, actual_kc, cols, alpha);
480  }
481  }
482  }
483 
484 } // end namespace internal
485 
486 
490 namespace internal {
491 
492 template<typename Lhs, int LhsMode, typename Rhs, int RhsMode>
493 struct selfadjoint_product_impl<Lhs,LhsMode,false,Rhs,RhsMode,false>
494 {
495  typedef typename Product<Lhs,Rhs>::Scalar Scalar;
496 
497  typedef internal::blas_traits<Lhs> LhsBlasTraits;
498  typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType;
499  typedef internal::blas_traits<Rhs> RhsBlasTraits;
500  typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType;
501 
502  enum {
503  LhsIsUpper = (LhsMode&(Upper|Lower))==Upper,
504  LhsIsSelfAdjoint = (LhsMode&SelfAdjoint)==SelfAdjoint,
505  RhsIsUpper = (RhsMode&(Upper|Lower))==Upper,
506  RhsIsSelfAdjoint = (RhsMode&SelfAdjoint)==SelfAdjoint
507  };
508 
509  template<typename Dest>
510  static void run(Dest &dst, const Lhs &a_lhs, const Rhs &a_rhs, const Scalar& alpha)
511  {
512  eigen_assert(dst.rows()==a_lhs.rows() && dst.cols()==a_rhs.cols());
513 
514  add_const_on_value_type_t<ActualLhsType> lhs = LhsBlasTraits::extract(a_lhs);
515  add_const_on_value_type_t<ActualRhsType> rhs = RhsBlasTraits::extract(a_rhs);
516 
517  Scalar actualAlpha = alpha * LhsBlasTraits::extractScalarFactor(a_lhs)
518  * RhsBlasTraits::extractScalarFactor(a_rhs);
519 
520  typedef internal::gemm_blocking_space<(Dest::Flags&RowMajorBit) ? RowMajor : ColMajor,Scalar,Scalar,
521  Lhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, Lhs::MaxColsAtCompileTime,1> BlockingType;
522 
523  BlockingType blocking(lhs.rows(), rhs.cols(), lhs.cols(), 1, false);
524 
525  internal::product_selfadjoint_matrix<Scalar, Index,
526  internal::logical_xor(LhsIsUpper, internal::traits<Lhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, LhsIsSelfAdjoint,
527  NumTraits<Scalar>::IsComplex && internal::logical_xor(LhsIsUpper, bool(LhsBlasTraits::NeedToConjugate)),
528  internal::logical_xor(RhsIsUpper, internal::traits<Rhs>::Flags &RowMajorBit) ? RowMajor : ColMajor, RhsIsSelfAdjoint,
529  NumTraits<Scalar>::IsComplex && internal::logical_xor(RhsIsUpper, bool(RhsBlasTraits::NeedToConjugate)),
530  internal::traits<Dest>::Flags&RowMajorBit ? RowMajor : ColMajor,
531  Dest::InnerStrideAtCompileTime>
532  ::run(
533  lhs.rows(), rhs.cols(), // sizes
534  &lhs.coeffRef(0,0), lhs.outerStride(), // lhs info
535  &rhs.coeffRef(0,0), rhs.outerStride(), // rhs info
536  &dst.coeffRef(0,0), dst.innerStride(), dst.outerStride(), // result info
537  actualAlpha, blocking // alpha
538  );
539  }
540 };
541 
542 } // end namespace internal
543 
544 } // end namespace Eigen
545 
546 #endif // EIGEN_SELFADJOINT_MATRIX_MATRIX_H
RealReturnType real() const
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define EIGEN_DONT_INLINE
Definition: Macros.h:844
#define eigen_assert(x)
Definition: Macros.h:902
RowVector3d w
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:847
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
@ SelfAdjoint
Definition: Constants.h:227
@ Lower
Definition: Constants.h:211
@ Upper
Definition: Constants.h:213
@ ColMajor
Definition: Constants.h:321
@ RowMajor
Definition: Constants.h:323
const unsigned int RowMajorBit
Definition: Constants.h:68
bfloat16() max(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:690
bfloat16() min(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:684
constexpr bool logical_xor(bool a, bool b)
Definition: Meta.h:574
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_conjugate_op< typename Derived::Scalar >, const Derived > conj(const Eigen::ArrayBase< Derived > &x)