MatrixVectorProduct.h File Reference

Go to the source code of this file.

Classes

struct  alpha_store< PResPacket, ResPacket, ResScalar, Scalar >
 
struct  loadColData_impl< RhsMapper, linear >
 
struct  loadColData_impl< RhsMapper, true >
 
struct  alpha_store< PResPacket, ResPacket, ResScalar, Scalar >::ri
 
struct  ScalarBlock< Scalar, N >
 

Macros

#define COMPLEX_DELTA
 
#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(Scalar, LhsScalar, RhsScalar)
 
#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(Scalar, LhsScalar, RhsScalar)
 
#define EIGEN_POWER_GEMV_PREFETCH(p)
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(Scalar)
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16()
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(Scalar)
 
#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16()
 
#define gemv_bf16_col
 
#define gemv_bf16_row
 
#define GEMV_BUILDPAIR_MMA(dst, src1, src2)
 
#define GEMV_GETN(N)
 
#define GEMV_GETN_COMPLEX(N)
 
#define GEMV_INIT(iter, N)
 
#define GEMV_INIT_COMPLEX(iter, N)
 
#define GEMV_INIT_COMPLEX_OLD(iter, N)
 
#define GEMV_INIT_ROW(iter, N)
 
#define GEMV_IS_COMPLEX_COMPLEX
 
#define GEMV_IS_COMPLEX_FLOAT
 
#define GEMV_IS_FLOAT
 
#define GEMV_IS_SCALAR
 
#define GEMV_LOADPACKET_COL(iter)
 
#define GEMV_LOADPACKET_COL_COMPLEX(iter)
 
#define GEMV_LOADPACKET_COL_COMPLEX_DATA(iter)
 
#define GEMV_LOADPACKET_ROW(iter)
 
#define GEMV_LOADPACKET_ROW_COMPLEX(iter)
 
#define GEMV_LOADPACKET_ROW_COMPLEX_DATA(iter)
 
#define GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter)
 
#define GEMV_MULT(iter1, iter2, iter3, N)
 
#define GEMV_MULT_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_MULT_COMPLEX_COMPLEX(LhsType, RhsType, ResType)
 
#define GEMV_MULT_COMPLEX_REAL(LhsType, RhsType, ResType1, ResType2)
 
#define GEMV_MULT_REAL_COMPLEX(LhsType, RhsType, ResType)
 
#define GEMV_PREDUX2(iter1, iter2, iter3, N)
 
#define GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_PREDUX4_COMPLEX_OLD(iter1, iter2, iter3, N)
 
#define GEMV_PREFETCH(iter, N)
 
#define GEMV_PROCESS_COL(N)
 
#define GEMV_PROCESS_COL_COMPLEX(N)
 
#define GEMV_PROCESS_COL_COMPLEX_ONE(N)
 
#define GEMV_PROCESS_COL_ONE(N)
 
#define GEMV_PROCESS_END_ROW_COMPLEX(N)
 
#define GEMV_PROCESS_ROW(N)
 
#define GEMV_PROCESS_ROW_COMPLEX(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter)
 
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N)
 
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(which, N)
 
#define GEMV_STORE_COL(iter, N)
 
#define GEMV_STORE_COL_COMPLEX(iter, N)
 
#define GEMV_STORE_ROW(iter1, iter2, iter3, N)
 
#define GEMV_STORE_ROW_COMPLEX(iter1, iter2, iter3, N)
 
#define GEMV_UNROLL(func, N)
 
#define GEMV_UNROLL_HALF(func, N)
 
#define GEMV_UNROLL_ROW(func, N)
 
#define GEMV_UNROLL_ROW_HALF(func, N)
 
#define GEMV_WORK_COL(iter, N)
 
#define GEMV_WORK_COL_COMPLEX(iter, N)
 
#define GEMV_WORK_ROW(iter, N)
 
#define GEMV_WORK_ROW_COMPLEX(iter, N)
 
#define GEMV_WORK_ROW_COMPLEX_OLD(iter, N)
 
#define MAX_BFLOAT16_VEC_ACC_VSX
 

Functions

template<Index num_acc>
EIGEN_ALWAYS_INLINE void addResultsVSX (Packet4f(&acc)[num_acc][2])
 
template<typename LhsMapper , typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE void calcVSXVecColLoops (Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void calcVSXVecLoops (Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
void colVSXVecColLoopBody (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtra (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtraN (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper >
void colVSXVecLoopBody (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtra (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<const Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtraN (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)
 
template<bool inc = false>
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX (float *result, Index rows, bfloat16 *dst, Index resInc=1)
 
template<const Index size, bool inc = false>
EIGEN_ALWAYS_INLINE void convertPointerF32toBF16VSX (Index &i, float *result, Index rows, bfloat16 *&dst, Index resInc=1)
 
template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)
 
template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)
 
template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void gemv_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void gemv_complex_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void gemv_complex_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0, ResPacket &c1)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_real (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_generic (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_real_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0)
 
template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void gemv_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)
 
template<typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE Packet8bf loadColData (RhsMapper &rhs, Index j)
 
template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >
EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket (LhsMapper &lhs, Index i, Index j)
 
EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero (Packet8us data, Index extra_cols)
 
template<Index num_acc, typename LhsMapper , bool zero>
EIGEN_ALWAYS_INLINE void loadVecLoopVSX (Index k, LhsMapper &lhs, Packet4f(&a0)[num_acc][2])
 
template<Index num_acc, bool zero>
EIGEN_ALWAYS_INLINE void multVecVSX (Packet4f(&acc)[num_acc][2], Packet4f(&a0)[num_acc][2], Packet4f(&b0)[2])
 
template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>
EIGEN_ALWAYS_INLINE void multVSXVecLoop (Packet4f(&acc)[num_acc][2], const LhsMapper &lhs, RhsMapper &rhs, Index j, Index extra_cols)
 
template<bool extraRows>
EIGEN_ALWAYS_INLINE void outputVecCol (Packet4f acc, float *result, Packet4f pAlpha, Index extra_rows)
 
template<Index num_acc, bool extraRows, Index size>
EIGEN_ALWAYS_INLINE void outputVecColResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha, Index extra_rows)
 
template<Index num_acc, Index size>
EIGEN_ALWAYS_INLINE void outputVecResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha)
 
EIGEN_ALWAYS_INLINE Packet1cd padd (Packet1cd &a, std::complex< double > &b)
 
EIGEN_ALWAYS_INLINE Packet2cf padd (Packet2cf &a, std::complex< float > &b)
 
EIGEN_ALWAYS_INLINE Packet1cd pconj2 (const Packet1cd &a)
 
EIGEN_ALWAYS_INLINE Packet2cf pconj2 (const Packet2cf &a)
 
EIGEN_ALWAYS_INLINE Packet1cd pconjinv (const Packet1cd &a)
 
EIGEN_ALWAYS_INLINE Packet2cf pconjinv (const Packet2cf &a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2 (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2 (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj (Packet2cf a)
 
EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate (Packet2cf a)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex (Packet1cd *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex (Packet2cf *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex (std::complex< double > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_complex_full (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_complex_half (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real (double *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real (float *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real (Packet2d &src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real (Packet4f &src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real_full (double *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real_full (float *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_real_full (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_real_full (std::complex< float > *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_real_row (double *src)
 
template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_real_row (float *src)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag (RhsScalar *src, Packet2d &r, Packet2d &i)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag (RhsScalar *src, Packet4f &r, Packet4f &i)
 
EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine (std::complex< float > *src)
 
EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row (std::complex< double > *src)
 
EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row (std::complex< float > *src)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row (RhsScalar *src, Packet2d &r, Packet2d &i)
 
template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row (RhsScalar *src, Packet4f &r, Packet4f &i)
 
template<typename ScalarPacket , typename AlphaData >
EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex (ScalarPacket &c0, ScalarPacket &c2, ScalarPacket &c4, AlphaData &b0)
 
template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex (RealPacket &a, RealPacket &b, RealPacket &c)
 
template<typename ComplexPacket , typename RealPacket , bool Conjugate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real (RealPacket &a, RealPacket &b, RealPacket &c)
 
EIGEN_ALWAYS_INLINE Packet1cd pnegate2 (Packet1cd a)
 
EIGEN_ALWAYS_INLINE Packet2cf pnegate2 (Packet2cf a)
 
template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_complex (PResPacket &a0, PResPacket &b0, ResPacket &a1, ResPacket &b1)
 
template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_complex (ResPacket &a, ResPacket &b)
 
template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 > predux_real (ResPacket &a, ResPacket &b)
 
template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResults2VSX (Packet4f(&acc)[num_acc][2], Index k)
 
template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResultsVSX (Packet4f(&acc)[num_acc][2])
 
template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet1cd pset1_complex (std::complex< double > &alpha)
 
template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet2cf pset1_complex (std::complex< float > &alpha)
 
template<typename Scalar , typename ResScalar >
EIGEN_ALWAYS_INLINE Scalar pset1_realimag (ResScalar &alpha, int which, int conj)
 
template<typename Packet , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE Packet pset_init (Packet &c1)
 
template<typename Packet >
EIGEN_ALWAYS_INLINE Packet pset_zero ()
 
template<>
EIGEN_ALWAYS_INLINE Packet1cd pset_zero< Packet1cd > ()
 
template<>
EIGEN_ALWAYS_INLINE Packet2cf pset_zero< Packet2cf > ()
 
template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex (PResPacket &c0, AlphaData &b0, ResScalar *res)
 
template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex (PResPacket &c0, PResPacket &c1, AlphaData &b0, ResScalar *res)
 
template<const Index size, bool inc, Index delta>
EIGEN_ALWAYS_INLINE void storeBF16fromResult (bfloat16 *dst, Packet8bf data, Index resInc, Index extra)
 
template<typename ResPacket , typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData (ResScalar *res, ResPacket &palpha, ResPacket &data)
 
template<typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData (ResScalar *res, ResScalar &alpha, ResScalar &data)
 
template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>
EIGEN_ALWAYS_INLINE void vecColLoopVSX (Index j, LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2])
 
template<Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void vecVSXLoop (Index cols, const LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2], Index extra_cols)
 

Variables

const Packet16uc p16uc_COMPLEX32_CONJ_XOR
 
const Packet16uc p16uc_COMPLEX32_CONJ_XOR2
 
const Packet16uc p16uc_COMPLEX32_NEGATE
 
const Packet16uc p16uc_COMPLEX32_XORFLIP
 
const Packet16uc p16uc_COMPLEX64_CONJ_XOR
 
const Packet16uc p16uc_COMPLEX64_CONJ_XOR2
 
const Packet16uc p16uc_COMPLEX64_NEGATE
 
const Packet16uc p16uc_COMPLEX64_XORFLIP
 
static Packet16uc p16uc_MERGE16_32_V1
 
static Packet16uc p16uc_MERGE16_32_V2
 
const Packet16uc p16uc_MERGEE
 
const Packet16uc p16uc_MERGEO
 

Macro Definition Documentation

◆ COMPLEX_DELTA

#define COMPLEX_DELTA

Definition at line 1008 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL (   Scalar,
  LhsScalar,
  RhsScalar 
)

Definition at line 2912 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW (   Scalar,
  LhsScalar,
  RhsScalar 
)

Definition at line 2928 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_PREFETCH

#define EIGEN_POWER_GEMV_PREFETCH (   p)

Definition at line 32 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL (   Scalar)

Definition at line 2546 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16 ( )

Definition at line 2591 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW (   Scalar)

Definition at line 2562 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16 ( )

Definition at line 2605 of file MatrixVectorProduct.h.

◆ gemv_bf16_col

#define gemv_bf16_col

Definition at line 2587 of file MatrixVectorProduct.h.

◆ gemv_bf16_row

#define gemv_bf16_row

Definition at line 2588 of file MatrixVectorProduct.h.

◆ GEMV_BUILDPAIR_MMA

#define GEMV_BUILDPAIR_MMA (   dst,
  src1,
  src2 
)

Definition at line 53 of file MatrixVectorProduct.h.

◆ GEMV_GETN

#define GEMV_GETN (   N)

Definition at line 87 of file MatrixVectorProduct.h.

◆ GEMV_GETN_COMPLEX

#define GEMV_GETN_COMPLEX (   N)

Definition at line 1918 of file MatrixVectorProduct.h.

◆ GEMV_INIT

#define GEMV_INIT (   iter,
 
)

Definition at line 299 of file MatrixVectorProduct.h.

◆ GEMV_INIT_COMPLEX

#define GEMV_INIT_COMPLEX (   iter,
 
)

Definition at line 2075 of file MatrixVectorProduct.h.

◆ GEMV_INIT_COMPLEX_OLD

#define GEMV_INIT_COMPLEX_OLD (   iter,
 
)

Definition at line 2756 of file MatrixVectorProduct.h.

◆ GEMV_INIT_ROW

#define GEMV_INIT_ROW (   iter,
 
)

Definition at line 2426 of file MatrixVectorProduct.h.

◆ GEMV_IS_COMPLEX_COMPLEX

#define GEMV_IS_COMPLEX_COMPLEX

Definition at line 62 of file MatrixVectorProduct.h.

◆ GEMV_IS_COMPLEX_FLOAT

#define GEMV_IS_COMPLEX_FLOAT

Definition at line 65 of file MatrixVectorProduct.h.

◆ GEMV_IS_FLOAT

#define GEMV_IS_FLOAT

Definition at line 63 of file MatrixVectorProduct.h.

◆ GEMV_IS_SCALAR

#define GEMV_IS_SCALAR

Definition at line 64 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL

#define GEMV_LOADPACKET_COL (   iter)

Definition at line 89 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL_COMPLEX

#define GEMV_LOADPACKET_COL_COMPLEX (   iter)

Definition at line 1920 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL_COMPLEX_DATA

#define GEMV_LOADPACKET_COL_COMPLEX_DATA (   iter)

Definition at line 1923 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW

#define GEMV_LOADPACKET_ROW (   iter)

Definition at line 2385 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX

#define GEMV_LOADPACKET_ROW_COMPLEX (   iter)

Definition at line 2632 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX_DATA

#define GEMV_LOADPACKET_ROW_COMPLEX_DATA (   iter)

Definition at line 2635 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX_OLD

#define GEMV_LOADPACKET_ROW_COMPLEX_OLD (   iter)

Definition at line 2753 of file MatrixVectorProduct.h.

◆ GEMV_MULT

#define GEMV_MULT (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2446 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX

#define GEMV_MULT_COMPLEX (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2710 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX_COMPLEX

#define GEMV_MULT_COMPLEX_COMPLEX (   LhsType,
  RhsType,
  ResType 
)

Definition at line 1581 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX_REAL

#define GEMV_MULT_COMPLEX_REAL (   LhsType,
  RhsType,
  ResType1,
  ResType2 
)

Definition at line 1603 of file MatrixVectorProduct.h.

◆ GEMV_MULT_REAL_COMPLEX

#define GEMV_MULT_REAL_COMPLEX (   LhsType,
  RhsType,
  ResType 
)

Definition at line 1591 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX2

#define GEMV_PREDUX2 (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2438 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX4_COMPLEX

#define GEMV_PREDUX4_COMPLEX (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2703 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX4_COMPLEX_OLD

#define GEMV_PREDUX4_COMPLEX_OLD (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2770 of file MatrixVectorProduct.h.

◆ GEMV_PREFETCH

#define GEMV_PREFETCH (   iter,
 
)

Definition at line 312 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL

#define GEMV_PROCESS_COL (   N)

Definition at line 341 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_COMPLEX

#define GEMV_PROCESS_COL_COMPLEX (   N)

Definition at line 2129 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_COMPLEX_ONE

#define GEMV_PROCESS_COL_COMPLEX_ONE (   N)

Definition at line 2101 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_ONE

#define GEMV_PROCESS_COL_ONE (   N)

Definition at line 326 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_END_ROW_COMPLEX

#define GEMV_PROCESS_END_ROW_COMPLEX (   N)

Definition at line 2646 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW

#define GEMV_PROCESS_ROW (   N)

Definition at line 2459 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX

#define GEMV_PROCESS_ROW_COMPLEX (   N)

Definition at line 2829 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_IS_NEW

#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW

Definition at line 2799 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE

#define GEMV_PROCESS_ROW_COMPLEX_ONE (   N)

Definition at line 2810 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW (   N)

Definition at line 2727 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD (   N)

Definition at line 2786 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX (   iter)

Definition at line 2817 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW (   iter)

Definition at line 2734 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD (   iter)

Definition at line 2793 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE (   N)

Definition at line 2803 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW (   N)

Definition at line 2722 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD (   N)

Definition at line 2778 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK (   which,
 
)

Definition at line 2638 of file MatrixVectorProduct.h.

◆ GEMV_STORE_COL

#define GEMV_STORE_COL (   iter,
 
)

Definition at line 320 of file MatrixVectorProduct.h.

◆ GEMV_STORE_COL_COMPLEX

#define GEMV_STORE_COL_COMPLEX (   iter,
 
)

Definition at line 2092 of file MatrixVectorProduct.h.

◆ GEMV_STORE_ROW

#define GEMV_STORE_ROW (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2452 of file MatrixVectorProduct.h.

◆ GEMV_STORE_ROW_COMPLEX

#define GEMV_STORE_ROW_COMPLEX (   iter1,
  iter2,
  iter3,
 
)

Definition at line 2716 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL

#define GEMV_UNROLL (   func,
 
)

Definition at line 80 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_HALF

#define GEMV_UNROLL_HALF (   func,
 
)

Definition at line 84 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_ROW

#define GEMV_UNROLL_ROW (   func,
 
)

Definition at line 2379 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_ROW_HALF

#define GEMV_UNROLL_ROW_HALF (   func,
 
)

Definition at line 2382 of file MatrixVectorProduct.h.

◆ GEMV_WORK_COL

#define GEMV_WORK_COL (   iter,
 
)

Definition at line 315 of file MatrixVectorProduct.h.

◆ GEMV_WORK_COL_COMPLEX

#define GEMV_WORK_COL_COMPLEX (   iter,
 
)

Definition at line 2084 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW

#define GEMV_WORK_ROW (   iter,
 
)

Definition at line 2433 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW_COMPLEX

#define GEMV_WORK_ROW_COMPLEX (   iter,
 
)

Definition at line 2697 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW_COMPLEX_OLD

#define GEMV_WORK_ROW_COMPLEX_OLD (   iter,
 
)

Definition at line 2764 of file MatrixVectorProduct.h.

◆ MAX_BFLOAT16_VEC_ACC_VSX

#define MAX_BFLOAT16_VEC_ACC_VSX

Definition at line 578 of file MatrixVectorProduct.h.

Function Documentation

◆ addResultsVSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void addResultsVSX ( Packet4f(&)  acc[num_acc][2])

Definition at line 570 of file MatrixVectorProduct.h.

571 {
572  for(Index i = 0; i < num_acc; i++) {
573  acc[i][0] = acc[i][0] + acc[i][1];
574  }
575 }
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82

◆ calcVSXVecColLoops()

template<typename LhsMapper , typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE void calcVSXVecColLoops ( Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 650 of file MatrixVectorProduct.h.

651 {
652  Index row = 0;
653  if (rows >= (MAX_BFLOAT16_VEC_ACC_VSX * 4)) {
654  colVSXVecColLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
655  result += row;
656  }
657  if (rows & 3) {
658  colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
659  } else {
660  colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
661  }
662 }
RowXpr row(Index i)
This is the const version of row(). *‍/.
#define MAX_BFLOAT16_VEC_ACC_VSX

◆ calcVSXVecLoops()

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void calcVSXVecLoops ( Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 940 of file MatrixVectorProduct.h.

941 {
942  Index row = 0;
944  colVSXVecLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
945  result += row;
946  }
947  colVSXVecLoopBodyExtra<LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
948 }

◆ colVSXVecColLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
void colVSXVecColLoopBody ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 581 of file MatrixVectorProduct.h.

582 {
583  constexpr Index step = (num_acc * 4);
584  const Index extra_rows = (extraRows) ? (rows & 3) : 0;
585  constexpr bool multiIters = !extraRows && (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
586 
587  do{
588  Packet4f acc[num_acc][2];
589 
590  zeroAccumulators<num_acc, 2>(acc);
591 
592  LhsMapper lhs2 = lhs.getSubMapper(row, 0);
593  for(Index j = 0; j + 2 <= cend; j += 2) {
594  vecColLoopVSX<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc);
595  }
596  if (cend & 1) {
597  vecColLoopVSX<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc);
598  }
599 
600  addResultsVSX<num_acc>(acc);
601 
602  outputVecColResults<num_acc, extraRows, 2>(acc, result, pAlpha, extra_rows);
603 
604  result += step;
605  } while(multiIters && (step <= rows - (row += step)));
606 }
__vector float Packet4f
std::ptrdiff_t j

◆ colVSXVecColLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtra ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 617 of file MatrixVectorProduct.h.

618 {
619  switch ((rows - row) >> 2) {
620  case 7:
621  colVSXVecColLoopBodyExtraN<7, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
622  break;
623  case 6:
624  colVSXVecColLoopBodyExtraN<6, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
625  break;
626  case 5:
627  colVSXVecColLoopBodyExtraN<5, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
628  break;
629  case 4:
630  colVSXVecColLoopBodyExtraN<4, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
631  break;
632  case 3:
633  colVSXVecColLoopBodyExtraN<3, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
634  break;
635  case 2:
636  colVSXVecColLoopBodyExtraN<2, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
637  break;
638  case 1:
639  colVSXVecColLoopBodyExtraN<1, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
640  break;
641  default:
642  if (extraRows) {
643  colVSXVecColLoopBody<1, LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
644  }
645  break;
646  }
647 }

◆ colVSXVecColLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtraN ( Index &  row,
Index  cend,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 609 of file MatrixVectorProduct.h.

610 {
611  if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
612  colVSXVecColLoopBody<num_acc + (extraRows ? 1 : 0), LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
613  }
614 }

◆ colVSXVecLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
void colVSXVecLoopBody ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 880 of file MatrixVectorProduct.h.

881 {
882  constexpr bool multiIters = (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
883  const Index extra_cols = (cols & 7);
884 
885  do{
886  Packet4f acc[num_acc][2];
887 
888  zeroAccumulators<num_acc, 2>(acc);
889 
890  const LhsMapper lhs2 = lhs.getSubMapper(row, 0);
891  vecVSXLoop<num_acc, LhsMapper, RhsMapper>(cols, lhs2, rhs, acc, extra_cols);
892 
893  addResultsVSX<num_acc>(acc);
894 
895  preduxVecResultsVSX<num_acc>(acc);
896 
897  outputVecResults<num_acc, 2>(acc, result, pAlpha);
898 
899  result += num_acc;
900  } while(multiIters && (num_acc <= rows - (row += num_acc)));
901 }

◆ colVSXVecLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtra ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 912 of file MatrixVectorProduct.h.

913 {
914  switch (rows - row) {
915  case 7:
916  colVSXVecLoopBodyExtraN<7, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
917  break;
918  case 6:
919  colVSXVecLoopBodyExtraN<6, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
920  break;
921  case 5:
922  colVSXVecLoopBodyExtraN<5, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
923  break;
924  case 4:
925  colVSXVecLoopBodyExtraN<4, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
926  break;
927  case 3:
928  colVSXVecLoopBodyExtraN<3, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
929  break;
930  case 2:
931  colVSXVecLoopBodyExtraN<2, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
932  break;
933  case 1:
934  colVSXVecLoopBodyExtraN<1, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
935  break;
936  }
937 }

◆ colVSXVecLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtraN ( Index &  row,
Index  cols,
Index  rows,
LhsMapper &  lhs,
RhsMapper &  rhs,
const Packet4f  pAlpha,
float *  result 
)

Definition at line 904 of file MatrixVectorProduct.h.

905 {
906  if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
907  colVSXVecLoopBody<num_acc, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
908  }
909 }

◆ convertArrayPointerF32toBF16VSX()

template<bool inc = false>
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX ( float *  result,
Index  rows,
bfloat16 *  dst,
Index  resInc = 1 
)

Definition at line 710 of file MatrixVectorProduct.h.

711 {
712  Index i = 0;
713  convertPointerF32toBF16VSX<32,inc>(i, result, rows, dst, resInc);
714  convertPointerF32toBF16VSX<16,inc>(i, result, rows, dst, resInc);
715  convertPointerF32toBF16VSX<8,inc>(i, result, rows, dst, resInc);
716  convertPointerF32toBF16VSX<1,inc>(i, result, rows, dst, resInc);
717 }

◆ convertPointerF32toBF16VSX()

template<const Index size, bool inc = false>
EIGEN_ALWAYS_INLINE void convertPointerF32toBF16VSX ( Index &  i,
float *  result,
Index  rows,
bfloat16 *&  dst,
Index  resInc = 1 
)

Definition at line 683 of file MatrixVectorProduct.h.

684 {
685  constexpr Index extra = ((size < 8) ? 8 : size);
686  while (i + size <= rows) {
687  PacketBlock<Packet8bf,(size+7)/8> r32;
688  r32.packet[0] = convertF32toBF16VSX(result + i + 0);
689  if (size >= 16) {
690  r32.packet[1] = convertF32toBF16VSX(result + i + 8);
691  }
692  if (size >= 32) {
693  r32.packet[2] = convertF32toBF16VSX(result + i + 16);
694  r32.packet[3] = convertF32toBF16VSX(result + i + 24);
695  }
696  storeBF16fromResult<size, inc, 0>(dst, r32.packet[0], resInc, rows & 7);
697  if (size >= 16) {
698  storeBF16fromResult<size, inc, 8>(dst, r32.packet[1], resInc);
699  }
700  if (size >= 32) {
701  storeBF16fromResult<size, inc, 16>(dst, r32.packet[2], resInc);
702  storeBF16fromResult<size, inc, 24>(dst, r32.packet[3], resInc);
703  }
704  i += extra; dst += extra*resInc;
705  if (size != 32) break;
706  }
707 }
eigen_packet_wrapper< __vector unsigned short int, 0 > Packet8bf
EIGEN_ALWAYS_INLINE Packet8bf convertF32toBF16VSX(const float *res)

◆ gemv_bfloat16_col()

template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
bfloat16 *  res,
Index  resIncr,
bfloat16  alpha 
)

Definition at line 720 of file MatrixVectorProduct.h.

726 {
727  typedef typename RhsMapper::LinearMapper LinearMapper;
728 
729  EIGEN_UNUSED_VARIABLE(resIncr);
730  eigen_internal_assert(resIncr == 1);
731 
732  // The following copy tells the compiler that lhs's attributes are not modified outside this function
733  // This helps GCC to generate proper code.
734  LhsMapper lhs(alhs);
735  RhsMapper rhs2(rhs);
736 
737  const Index lhsStride = lhs.stride();
738 
739  // TODO: improve the following heuristic:
740  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(bfloat16) < 16000 ? 16 : 8);
741  float falpha = Eigen::bfloat16_impl::bfloat16_to_float(alpha);
742  Packet4f pAlpha = pset1<Packet4f>(falpha);
743 
745 
747 
748  for (Index j2 = 0; j2 < cols; j2 += block_cols)
749  {
750  Index jend = numext::mini(j2 + block_cols, cols);
751 
752  LhsMapper lhs2 = lhs.getSubMapper(0, j2);
753  if (rhs.stride() == 1) {
754  LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0);
755  calcVSXVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
756  } else {
757  RhsMapper rhs3 = rhs2.getSubMapper(j2, 0);
758  calcVSXVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
759  }
760  }
761 
763 }
#define eigen_internal_assert(x)
Definition: Macros.h:908
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:957
EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX(float *result, Index rows, bfloat16 *dst, Index resInc=1)
#define ei_declare_aligned_stack_constructed_variable(TYPE, NAME, SIZE, BUFFER)
Definition: Memory.h:847
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
float bfloat16_to_float(__bfloat16_raw h)
Definition: BFloat16.h:571
EIGEN_ALWAYS_INLINE void convertArrayPointerBF16toF32(float *result, Index cols, Index rows, bfloat16 *src, Index resInc)
Packet4f pset1< Packet4f >(const float &from)
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)

◆ gemv_bfloat16_row()

template<typename LhsMapper , typename RhsMapper >
void gemv_bfloat16_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
bfloat16 *  res,
Index  resIncr,
bfloat16  alpha 
)
inline

Definition at line 951 of file MatrixVectorProduct.h.

957 {
958  typedef typename RhsMapper::LinearMapper LinearMapper;
959 
960  // The following copy tells the compiler that lhs's attributes are not modified outside this function
961  // This helps GCC to generate proper code.
962  LhsMapper lhs(alhs);
963  LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
964 
965  eigen_internal_assert(rhs.stride() == 1);
966 
967  float falpha = Eigen::bfloat16_impl::bfloat16_to_float(alpha);
968  const Packet4f pAlpha = pset1<Packet4f>(falpha);
969 
971  if (resIncr == 1) {
973  } else {
974  convertArrayPointerBF16toF32<true>(result, 1, rows, res, resIncr);
975  }
976  calcVSXVecLoops<LhsMapper, LinearMapper>(cols, rows, lhs, rhs2, pAlpha, result);
977  if (resIncr == 1) {
979  } else {
980  convertArrayPointerF32toBF16VSX<true>(result, rows, res, resIncr);
981  }
982 }

◆ gemv_col()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void gemv_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
inline

Definition at line 376 of file MatrixVectorProduct.h.

382 {
383  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
384 
385  typedef typename Traits::LhsPacket LhsPacket;
386  typedef typename Traits::RhsPacket RhsPacket;
387  typedef typename Traits::ResPacket ResPacket;
388 
389  EIGEN_UNUSED_VARIABLE(resIncr);
390  eigen_internal_assert(resIncr == 1);
391 
392  // The following copy tells the compiler that lhs's attributes are not modified outside this function
393  // This helps GCC to generate proper code.
394  LhsMapper lhs(alhs);
395  RhsMapper rhs2(rhs);
396 
397  conj_helper<LhsScalar, RhsScalar, false, false> cj;
398  conj_helper<LhsPacket, RhsPacket, false, false> pcj;
399 
400  const Index lhsStride = lhs.stride();
401  // TODO: for padded aligned inputs, we could enable aligned reads
402  enum {
403  LhsAlignment = Unaligned,
404  ResPacketSize = Traits::ResPacketSize,
405  LhsPacketSize = Traits::LhsPacketSize,
406  RhsPacketSize = Traits::RhsPacketSize,
407  };
408 
409 #ifndef GCC_ONE_VECTORPAIR_BUG
410  const Index n8 = rows - 8 * ResPacketSize + 1;
411  const Index n4 = rows - 4 * ResPacketSize + 1;
412  const Index n2 = rows - 2 * ResPacketSize + 1;
413 #endif
414  const Index n1 = rows - 1 * ResPacketSize + 1;
415 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
416  const Index prefetch_dist = 64 * LhsPacketSize;
417 #endif
418 
419  // TODO: improve the following heuristic:
420  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
421  ResPacket palpha = pset1<ResPacket>(alpha);
422 
423  for (Index j2 = 0; j2 < cols; j2 += block_cols)
424  {
425  Index jend = numext::mini(j2 + block_cols, cols);
426  Index i = 0;
427  ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
428 #ifdef USE_GEMV_MMA
429  __vector_quad e0, e1, e2, e3, e4, e5, e6, e7;
430  PacketBlock<ResPacket, 4> result0, result1, result2, result3, result4, result5, result6, result7;
431  GEMV_UNUSED(8, e)
432  GEMV_UNUSED(8, result)
433  GEMV_UNUSED_EXTRA(1, c)
434 #endif
435 #ifndef GCC_ONE_VECTORPAIR_BUG
436  while (i < n8)
437  {
439  }
440  if (i < n4)
441  {
443  }
444  if (i < n2)
445  {
447  }
448  if (i < n1)
449 #else
450  while (i < n1)
451 #endif
452  {
454  }
455  for (;i < rows;++i)
456  {
457  ResScalar d0(0);
458  Index j = j2;
459  do {
460  d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
461  } while (++j < jend);
462  res[i] += alpha * d0;
463  }
464  }
465 }
Array< double, 1, 3 > e(1./3., 0.5, 2.)
Array33i c
#define GEMV_PROCESS_COL(N)
#define GEMV_PROCESS_COL_ONE(N)
@ Unaligned
Definition: Constants.h:235

◆ gemv_complex_col()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void gemv_complex_col ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
inline

Definition at line 2135 of file MatrixVectorProduct.h.

2141 {
2142  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2143 
2144  typedef typename Traits::LhsPacket LhsPacket;
2145  typedef typename Traits::RhsPacket RhsPacket;
2146  typedef typename Traits::ResPacket ResPacket;
2147 
2148  typedef typename packet_traits<Scalar>::type ScalarPacket;
2149  typedef typename packet_traits<LhsScalar>::type PLhsPacket;
2150  typedef typename packet_traits<ResScalar>::type PResPacket;
2151  typedef gemv_traits<ResPacket, ResPacket> PTraits;
2152 
2153  EIGEN_UNUSED_VARIABLE(resIncr);
2154  eigen_internal_assert(resIncr == 1);
2155 
2156  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2157  // This helps GCC to generate proper code.
2158  LhsMapper lhs(alhs);
2159  RhsMapper rhs2(rhs);
2160 
2161  conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
2162 
2163  const Index lhsStride = lhs.stride();
2164  // TODO: for padded aligned inputs, we could enable aligned reads
2165  enum {
2166  LhsAlignment = Unaligned,
2167  ResPacketSize = PTraits::ResPacketSize,
2168  LhsPacketSize = PTraits::LhsPacketSize,
2169  RhsPacketSize = PTraits::RhsPacketSize,
2170  };
2171 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
2172  const Index prefetch_dist = 64 * LhsPacketSize;
2173 #endif
2174 
2175 #ifndef GCC_ONE_VECTORPAIR_BUG
2176  const Index n8 = rows - 8 * ResPacketSize + 1;
2177  const Index n4 = rows - 4 * ResPacketSize + 1;
2178  const Index n2 = rows - 2 * ResPacketSize + 1;
2179 #endif
2180  const Index n1 = rows - 1 * ResPacketSize + 1;
2181 
2182  // TODO: improve the following heuristic:
2183  const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
2184 
2186  AlphaData alpha_data(alpha);
2187 
2188  for (Index j2 = 0; j2 < cols; j2 += block_cols)
2189  {
2190  Index jend = numext::mini(j2 + block_cols, cols);
2191  Index i = 0;
2192  PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
2193  ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
2194  PLhsPacket f0, f1, f2, f3, f4, f5, f6, f7;
2195 #ifdef USE_GEMV_MMA
2196  __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
2197  __vector_pair a0, a1, a2, a3, a4, a5, a6, a7;
2198  PacketBlock<ScalarPacket, 4> result00, result01, result02, result03, result04, result05, result06, result07;
2199  GEMV_UNUSED(8, e0)
2200  GEMV_UNUSED(8, result0)
2201  GEMV_UNUSED(8, a)
2202  GEMV_UNUSED(8, f)
2203 #if !defined(GCC_ONE_VECTORPAIR_BUG) && defined(USE_GEMV_COL_COMPLEX_MMA)
2205 #endif
2206 #endif
2207 #ifndef GCC_ONE_VECTORPAIR_BUG
2208  {
2209  while (i < n8)
2210  {
2212  }
2213  }
2214  while (i < n4)
2215  {
2217  }
2218  if (i < n2)
2219  {
2221  }
2222  if (i < n1)
2223 #else
2224  while (i < n1)
2225 #endif
2226  {
2228  }
2229  for (;i < rows;++i)
2230  {
2231  ResScalar d0(0);
2232  Index j = j2;
2233  do {
2234  d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
2235  } while (++j < jend);
2236  res[i] += alpha * d0;
2237  }
2238  }
2239 }
#define GEMV_PROCESS_COL_COMPLEX(N)
#define GEMV_IS_COMPLEX_FLOAT
#define GEMV_PROCESS_COL_COMPLEX_ONE(N)
#define GEMV_IS_COMPLEX_COMPLEX

◆ gemv_complex_row()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void gemv_complex_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
inline

Definition at line 2834 of file MatrixVectorProduct.h.

2840 {
2841  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2842 
2843  typedef typename Traits::LhsPacket LhsPacket;
2844  typedef typename Traits::RhsPacket RhsPacket;
2845  typedef typename Traits::ResPacket ResPacket;
2846 
2847  typedef typename packet_traits<Scalar>::type ScalarPacket;
2848  typedef typename packet_traits<LhsScalar>::type PLhsPacket;
2849  typedef typename packet_traits<ResScalar>::type PResPacket;
2850  typedef gemv_traits<ResPacket, ResPacket> PTraits;
2851 
2852  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2853  // This helps GCC to generate proper code.
2854  LhsMapper lhs(alhs);
2855  typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
2856 
2857  eigen_internal_assert(rhs.stride() == 1);
2858  conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
2859 #if !EIGEN_COMP_LLVM
2860  conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
2861 #endif
2862 
2863  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
2864  // processing 8 rows at once might be counter productive wrt cache.
2865 #ifndef GCC_ONE_VECTORPAIR_BUG
2866  const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
2867  const Index n4 = rows - 3;
2868  const Index n2 = rows - 1;
2869 #endif
2870 
2871  // TODO: for padded aligned inputs, we could enable aligned reads
2872  enum {
2873  LhsAlignment = Unaligned,
2874  ResPacketSize = PTraits::ResPacketSize,
2875  LhsPacketSize = PTraits::LhsPacketSize,
2876  RhsPacketSize = PTraits::RhsPacketSize,
2877  };
2878 
2879  Index i = 0, j;
2880  PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
2881  ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
2882 #ifdef USE_GEMV_MMA
2883  __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
2884  GEMV_UNUSED_ROW(8, e0)
2885  GEMV_UNUSED_EXTRA(1, c0)
2886  GEMV_UNUSED_EXTRA(1, c1)
2887 #endif
2888  ResScalar dd0;
2889 #ifndef GCC_ONE_VECTORPAIR_BUG
2890  ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
2891 #ifdef USE_GEMV_MMA
2893 #endif
2894  {
2896  }
2899 #endif
2900  for (; i < rows; ++i)
2901  {
2904  for (; j < cols; ++j)
2905  {
2906  dd0 += cj.pmul(lhs(i, j), rhs2(j));
2907  }
2908  res[i * resIncr] += alpha * dd0;
2909  }
2910 }
for(int i=0;i< 24;++i) array[i]
#define GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter)
#define GEMV_PROCESS_ROW_COMPLEX(N)
#define GEMV_PROCESS_ROW_COMPLEX_SINGLE(N)

◆ gemv_mult_complex_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0,
ResPacket &  c1 
)

Definition at line 1533 of file MatrixVectorProduct.h.

1534 {
1535  ScalarPacket br, bi;
1536  if (StorageOrder == ColMajor) {
1537  pload_realimag<RhsScalar>(b, br, bi);
1538  }
1539  else {
1540  pload_realimag_row<RhsScalar>(b, br, bi);
1541  }
1542  if (ConjugateLhs && !ConjugateRhs) a0 = pconj2(a0);
1543  LhsPacket a1 = pcplxflipconj(a0);
1544  ScalarPacket cr = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, false>(a0.v, br, c0.v);
1545  ScalarPacket ci = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, true>(a1.v, bi, c1.v);
1546  c1 = ResPacket(ci);
1547  c0 = PResPacket(cr);
1548 }
Array< int, 3, 1 > b
EIGEN_ALWAYS_INLINE Packet2cf pconj2(const Packet2cf &a)
EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj(Packet2cf a)
@ ColMajor
Definition: Constants.h:321

◆ gemv_mult_complex_real()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_complex_real ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)

Definition at line 1567 of file MatrixVectorProduct.h.

1568 {
1569  ScalarPacket a1 = pload_complex<ResPacket>(&a0);
1570  ScalarPacket b0;
1571  if (StorageOrder == ColMajor) {
1572  b0 = pload_real(b);
1573  }
1574  else {
1575  b0 = pload_real_row<ResPacket>(b);
1576  }
1577  ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateLhs>(a1, b0, c0.v);
1578  c0 = PResPacket(cri);
1579 }
EIGEN_ALWAYS_INLINE Packet4f pload_real(float *src)

◆ gemv_mult_generic()

template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_generic ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)

Definition at line 1518 of file MatrixVectorProduct.h.

1519 {
1520  conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
1521  RhsPacket b0;
1522  if (StorageOrder == ColMajor) {
1523  b0 = pset1<RhsPacket>(*b);
1524  }
1525  else {
1526  b0 = ploadu<RhsPacket>(b);
1527  }
1528  c0 = pcj.pmadd(a0, b0, c0);
1529 }

◆ gemv_mult_real_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void gemv_mult_real_complex ( LhsPacket &  a0,
RhsScalar *  b,
PResPacket &  c0 
)

Definition at line 1552 of file MatrixVectorProduct.h.

1553 {
1554  ScalarPacket b0;
1555  if (StorageOrder == ColMajor) {
1556  b0 = pload_complex_full(b);
1557  }
1558  else {
1559  b0 = pload_complex_full_row(b);
1560  }
1561  ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateRhs>(a0, b0, c0.v);
1562  c0 = PResPacket(cri);
1563 }
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full(std::complex< float > *src)
EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row(std::complex< float > *src)

◆ gemv_row()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void gemv_row ( Index  rows,
Index  cols,
const LhsMapper &  alhs,
const RhsMapper &  rhs,
ResScalar *  res,
Index  resIncr,
ResScalar  alpha 
)
inline

Definition at line 2476 of file MatrixVectorProduct.h.

2482 {
2483  typedef gemv_traits<LhsScalar, RhsScalar> Traits;
2484 
2485  typedef typename Traits::LhsPacket LhsPacket;
2486  typedef typename Traits::RhsPacket RhsPacket;
2487  typedef typename Traits::ResPacket ResPacket;
2488 
2489  // The following copy tells the compiler that lhs's attributes are not modified outside this function
2490  // This helps GCC to generate proper code.
2491  LhsMapper lhs(alhs);
2492  typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
2493 
2494  eigen_internal_assert(rhs.stride() == 1);
2495  conj_helper<LhsScalar, RhsScalar, false, false> cj;
2496  conj_helper<LhsPacket, RhsPacket, false, false> pcj;
2497 
2498  // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
2499  // processing 8 rows at once might be counter productive wrt cache.
2500 #ifndef GCC_ONE_VECTORPAIR_BUG
2501  const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
2502  const Index n4 = rows - 3;
2503  const Index n2 = rows - 1;
2504 #endif
2505 
2506  // TODO: for padded aligned inputs, we could enable aligned reads
2507  enum {
2508  LhsAlignment = Unaligned,
2509  ResPacketSize = Traits::ResPacketSize,
2510  LhsPacketSize = Traits::LhsPacketSize,
2511  RhsPacketSize = Traits::RhsPacketSize,
2512  };
2513 
2514  Index i = 0;
2515 #ifdef USE_GEMV_MMA
2516  __vector_quad c0, c1, c2, c3, c4, c5, c6, c7;
2517  GEMV_UNUSED_ROW(8, c)
2518 #else
2519  ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
2520 #endif
2521 #ifndef GCC_ONE_VECTORPAIR_BUG
2522  ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
2523  GEMV_PROCESS_ROW(8)
2524  GEMV_PROCESS_ROW(4)
2525  GEMV_PROCESS_ROW(2)
2526 #endif
2527  for (; i < rows; ++i)
2528  {
2529  ResPacket d0 = pset1<ResPacket>(ResScalar(0));
2530  Index j = 0;
2531  for (; j + LhsPacketSize <= cols; j += LhsPacketSize)
2532  {
2533  RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j);
2534 
2535  d0 = pcj.pmadd(lhs.template load<LhsPacket, LhsAlignment>(i + 0, j), b0, d0);
2536  }
2537  ResScalar dd0 = predux(d0);
2538  for (; j < cols; ++j)
2539  {
2540  dd0 += cj.pmul(lhs(i, j), rhs2(j));
2541  }
2542  res[i * resIncr] += alpha * dd0;
2543  }
2544 }
#define GEMV_PROCESS_ROW(N)
unpacket_traits< Packet >::type predux(const Packet &a)

◆ loadColData()

template<typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE Packet8bf loadColData ( RhsMapper &  rhs,
Index  j 
)

Definition at line 545 of file MatrixVectorProduct.h.

546 {
548 }
static EIGEN_ALWAYS_INLINE Packet8bf run(RhsMapper &rhs, Index j)

◆ loadLhsPacket()

template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >
EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket ( LhsMapper &  lhs,
Index  i,
Index  j 
)

Definition at line 1481 of file MatrixVectorProduct.h.

1482 {
1483  if (sizeof(Scalar) == sizeof(LhsScalar)) {
1484  const LhsScalar& src = lhs(i + 0, j);
1485  return LhsPacket(pload_real_full(const_cast<LhsScalar*>(&src)));
1486  }
1487  return lhs.template load<LhsPacket, Unaligned>(i + 0, j);
1488 }
EIGEN_ALWAYS_INLINE Packet4f pload_real_full(float *src)

◆ loadPacketPartialZero()

EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero ( Packet8us  data,
Index  extra_cols 
)

Definition at line 821 of file MatrixVectorProduct.h.

822 {
823  Packet16uc shift = pset1<Packet16uc>(8 * 2 * (8 - extra_cols));
824 #ifdef _BIG_ENDIAN
825  return reinterpret_cast<Packet8us>(vec_slo(vec_sro(reinterpret_cast<Packet16uc>(data), shift), shift));
826 #else
827  return reinterpret_cast<Packet8us>(vec_sro(vec_slo(reinterpret_cast<Packet16uc>(data), shift), shift));
828 #endif
829 }
int data[]
__vector unsigned char Packet16uc
Packet16uc pset1< Packet16uc >(const unsigned char &from)
__vector unsigned short int Packet8us

◆ loadVecLoopVSX()

template<Index num_acc, typename LhsMapper , bool zero>
EIGEN_ALWAYS_INLINE void loadVecLoopVSX ( Index  k,
LhsMapper &  lhs,
Packet4f(&)  a0[num_acc][2] 
)

Definition at line 495 of file MatrixVectorProduct.h.

496 {
497  Packet8bf c0 = lhs.template loadPacket<Packet8bf>(k*4, 0);
498  Packet8bf b1;
499  if (!zero) {
500  b1 = lhs.template loadPacket<Packet8bf>(k*4, 1);
501 
502  a0[k + 0][1] = oneConvertBF16Hi(b1.m_val);
503  }
504  a0[k + 0][0] = oneConvertBF16Hi(c0.m_val);
505 
506  if (num_acc > (k + 1)) {
507  a0[k + 1][0] = oneConvertBF16Lo(c0.m_val);
508  if (!zero) {
509  a0[k + 1][1] = oneConvertBF16Lo(b1.m_val);
510  }
511  }
512 }
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Lo(Packet8us data)
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Hi(Packet8us data)

◆ multVecVSX()

template<Index num_acc, bool zero>
EIGEN_ALWAYS_INLINE void multVecVSX ( Packet4f(&)  acc[num_acc][2],
Packet4f(&)  a0[num_acc][2],
Packet4f(&)  b0[2] 
)

Definition at line 515 of file MatrixVectorProduct.h.

516 {
517  for(Index k = 0; k < num_acc; k++) {
518  for(Index i = 0; i < (zero ? 1 : 2); i++) {
519  acc[k][i] = pmadd(b0[i], a0[k][i], acc[k][i]);
520  }
521  }
522 }
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)

◆ multVSXVecLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>
EIGEN_ALWAYS_INLINE void multVSXVecLoop ( Packet4f(&)  acc[num_acc][2],
const LhsMapper &  lhs,
RhsMapper &  rhs,
Index  j,
Index  extra_cols 
)

Definition at line 833 of file MatrixVectorProduct.h.

834 {
835  Packet4f a0[num_acc][2], b0[2];
836  Packet8bf a1, b1;
837 
838  if (extra) {
839  b1 = rhs.template loadPacketPartial<Packet8bf>(j, extra_cols);
840 #ifndef _ARCH_PWR9
841  b1 = loadPacketPartialZero(b1.m_val, extra_cols);
842 #endif
843  } else {
844  b1 = rhs.template loadPacket<Packet8bf>(j);
845  }
846  b0[0] = oneConvertBF16Hi(b1.m_val);
847  b0[1] = oneConvertBF16Lo(b1.m_val);
848 
849  const LhsMapper lhs2 = lhs.getSubMapper(0, j);
850  for(Index k = 0; k < num_acc; k++) {
851  if (extra) {
852  a1 = lhs2.template loadPacketPartial<Packet8bf>(k, 0, extra_cols);
853 #ifndef _ARCH_PWR9
854  a1 = loadPacketPartialZero(a1.m_val, extra_cols);
855 #endif
856  } else {
857  a1 = lhs2.template loadPacket<Packet8bf>(k, 0);
858  }
859  a0[k][0] = oneConvertBF16Hi(a1.m_val);
860  a0[k][1] = oneConvertBF16Lo(a1.m_val);
861  }
862 
863  multVecVSX<num_acc, false>(acc, a0, b0);
864 }
EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero(Packet8us data, Index extra_cols)

◆ outputVecCol()

template<bool extraRows>
EIGEN_ALWAYS_INLINE void outputVecCol ( Packet4f  acc,
float *  result,
Packet4f  pAlpha,
Index  extra_rows 
)

Definition at line 468 of file MatrixVectorProduct.h.

469 {
470  Packet4f d0 = ploadu<Packet4f>(result);
471  d0 = pmadd(acc, pAlpha, d0);
472  if (extraRows) {
473  pstoreu_partial(result, d0, extra_rows);
474  } else {
475  pstoreu(result, d0);
476  }
477 }
void pstoreu(Scalar *to, const Packet &from)
void pstoreu_partial(Scalar *to, const Packet &from, const Index n, const Index offset=0)
Packet4f ploadu< Packet4f >(const float *from)

◆ outputVecColResults()

template<Index num_acc, bool extraRows, Index size>
EIGEN_ALWAYS_INLINE void outputVecColResults ( Packet4f(&)  acc[num_acc][size],
float *  result,
Packet4f  pAlpha,
Index  extra_rows 
)

Definition at line 480 of file MatrixVectorProduct.h.

481 {
482  constexpr Index real_acc = (num_acc - (extraRows ? 1 : 0));
483  for(Index k = 0; k < real_acc; k++) {
484  outputVecCol<false>(acc[k][0], result + k*4, pAlpha, extra_rows);
485  }
486  if (extraRows) {
487  outputVecCol<true>(acc[real_acc][0], result + real_acc*4, pAlpha, extra_rows);
488  }
489 }

◆ outputVecResults()

template<Index num_acc, Index size>
EIGEN_ALWAYS_INLINE void outputVecResults ( Packet4f(&)  acc[num_acc][size],
float *  result,
Packet4f  pAlpha 
)

Definition at line 766 of file MatrixVectorProduct.h.

767 {
768  constexpr Index extra = num_acc & 3;
769 
770  for(Index k = 0; k < num_acc; k += 4) {
771  Packet4f d0 = ploadu<Packet4f>(result + k);
772  d0 = pmadd(acc[k + 0][0], pAlpha, d0);
773 
774  if (num_acc > (k + 3)) {
775  pstoreu(result + k, d0);
776  } else {
777  if (extra == 3) {
778  pstoreu_partial(result + k, d0, extra);
779  } else {
780  memcpy((void *)(result + k), (void *)(&d0), sizeof(float) * extra);
781  }
782  }
783  }
784 }

◆ padd() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd padd ( Packet1cd &  a,
std::complex< double > &  b 
)

Definition at line 1343 of file MatrixVectorProduct.h.

1344 {
1346  return a; // Just for compilation
1347 }

◆ padd() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf padd ( Packet2cf &  a,
std::complex< float > &  b 
)

Definition at line 1337 of file MatrixVectorProduct.h.

1338 {
1340  return a; // Just for compilation
1341 }

◆ pconj2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconj2 ( const Packet1cd &  a)

Definition at line 1016 of file MatrixVectorProduct.h.

1016  {
1017  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR)));
1018 }
const Packet16uc p16uc_COMPLEX64_CONJ_XOR
Packet8h pxor(const Packet8h &a, const Packet8h &b)

◆ pconj2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconj2 ( const Packet2cf &  a)

Definition at line 1012 of file MatrixVectorProduct.h.

1012  {
1013  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR)));
1014 }
const Packet16uc p16uc_COMPLEX32_CONJ_XOR

◆ pconjinv() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconjinv ( const Packet1cd &  a)

Definition at line 1029 of file MatrixVectorProduct.h.

1029  {
1030  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR2)));
1031 }
const Packet16uc p16uc_COMPLEX64_CONJ_XOR2

◆ pconjinv() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconjinv ( const Packet2cf &  a)

Definition at line 1021 of file MatrixVectorProduct.h.

1021  {
1022 #ifdef __POWER8_VECTOR__
1023  return Packet2cf(Packet4f(vec_neg(Packet2d(a.v))));
1024 #else
1025  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR2)));
1026 #endif
1027 }
const Packet16uc p16uc_COMPLEX32_CONJ_XOR2

◆ pcplxconjflip() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip ( Packet1cd  a)

Definition at line 1066 of file MatrixVectorProduct.h.

1067 {
1068 #ifdef PERMXOR_GOOD
1069  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR2, p16uc_COMPLEX64_XORFLIP)));
1070 #else
1071  return pconj2(pcplxflip(a));
1072 #endif
1073 }
const Packet16uc p16uc_COMPLEX64_XORFLIP
Packet1cd pcplxflip(const Packet1cd &x)
Definition: MSA/Complex.h:617

◆ pcplxconjflip() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip ( Packet2cf  a)

Definition at line 1057 of file MatrixVectorProduct.h.

1058 {
1059 #ifdef PERMXOR_GOOD
1060  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR2, p16uc_COMPLEX32_XORFLIP)));
1061 #else
1062  return pconj2(pcplxflip(a));
1063 #endif
1064 }
const Packet16uc p16uc_COMPLEX32_XORFLIP

◆ pcplxflip2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2 ( Packet1cd  a)

Definition at line 1119 of file MatrixVectorProduct.h.

1120 {
1121 #ifdef EIGEN_VECTORIZE_VSX
1122  return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2));
1123 #else
1124  return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP)));
1125 #endif
1126 }

◆ pcplxflip2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2 ( Packet2cf  a)

Definition at line 1114 of file MatrixVectorProduct.h.

1115 {
1116  return Packet2cf(Packet4f(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX32_XORFLIP)));
1117 }

◆ pcplxflipconj() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj ( Packet1cd  a)

Definition at line 1047 of file MatrixVectorProduct.h.

1048 {
1049 #ifdef PERMXOR_GOOD
1050  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR, p16uc_COMPLEX64_XORFLIP)));
1051 #else
1052  return pcplxflip(pconj2(a));
1053 #endif
1054 }

◆ pcplxflipconj() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj ( Packet2cf  a)

Definition at line 1038 of file MatrixVectorProduct.h.

1039 {
1040 #ifdef PERMXOR_GOOD
1041  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR, p16uc_COMPLEX32_XORFLIP)));
1042 #else
1043  return pcplxflip(pconj2(a));
1044 #endif
1045 }

◆ pcplxflipnegate() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate ( Packet1cd  a)

Definition at line 1104 of file MatrixVectorProduct.h.

1105 {
1106 #ifdef PERMXOR_GOOD
1107  return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_NEGATE, p16uc_COMPLEX64_XORFLIP)));
1108 #else
1109  return pcplxflip(pnegate2(a));
1110 #endif
1111 }
EIGEN_ALWAYS_INLINE Packet2cf pnegate2(Packet2cf a)
const Packet16uc p16uc_COMPLEX64_NEGATE

◆ pcplxflipnegate() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate ( Packet2cf  a)

Definition at line 1095 of file MatrixVectorProduct.h.

1096 {
1097 #ifdef PERMXOR_GOOD
1098  return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_NEGATE, p16uc_COMPLEX32_XORFLIP)));
1099 #else
1100  return pcplxflip(pnegate2(a));
1101 #endif
1102 }
const Packet16uc p16uc_COMPLEX32_NEGATE

◆ pload_complex() [1/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex ( Packet1cd *  src)

Definition at line 1249 of file MatrixVectorProduct.h.

1250 {
1251  return src->v;
1252 }

◆ pload_complex() [2/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex ( Packet2cf *  src)

Definition at line 1243 of file MatrixVectorProduct.h.

1244 {
1245  return src->v;
1246 }

◆ pload_complex() [3/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_complex ( std::complex< double > *  src)

Definition at line 1236 of file MatrixVectorProduct.h.

1237 {
1238  return ploadu<Packet2d>(reinterpret_cast<double*>(src));
1239 }
Packet2d ploadu< Packet2d >(const double *from)

◆ pload_complex() [4/4]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_complex ( std::complex< float > *  src)

Definition at line 1224 of file MatrixVectorProduct.h.

1225 {
1226  if (GEMV_IS_SCALAR) {
1227  return pload_complex_half(src);
1228  }
1229  else
1230  {
1231  return ploadu<Packet4f>(reinterpret_cast<float*>(src));
1232  }
1233 }
EIGEN_ALWAYS_INLINE Packet4f pload_complex_half(std::complex< float > *src)
#define GEMV_IS_SCALAR

◆ pload_complex_full() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full ( std::complex< double > *  src)

Definition at line 1260 of file MatrixVectorProduct.h.

1261 {
1262  return ploadu<Packet1cd>(src).v;
1263 }
Packet1cd ploadu< Packet1cd >(const std::complex< double > *from)
Definition: MSA/Complex.h:453

◆ pload_complex_full() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full ( std::complex< float > *  src)

Definition at line 1255 of file MatrixVectorProduct.h.

1256 {
1257  return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
1258 }
Packet2d ploaddup< Packet2d >(const double *from)

◆ pload_complex_full_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row ( std::complex< double > *  src)

Definition at line 1271 of file MatrixVectorProduct.h.

1272 {
1273  return pload_complex_full(src);
1274 }

◆ pload_complex_full_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row ( std::complex< float > *  src)

Definition at line 1266 of file MatrixVectorProduct.h.

1267 {
1268  return ploadu<Packet2cf>(src).v;
1269 }
Packet2cf ploadu< Packet2cf >(const std::complex< float > *from)

◆ pload_complex_half()

EIGEN_ALWAYS_INLINE Packet4f pload_complex_half ( std::complex< float > *  src)

Definition at line 1129 of file MatrixVectorProduct.h.

1130 {
1131  Packet4f t;
1132 #ifdef EIGEN_VECTORIZE_VSX
1133  // Load float64/two float32 (doubleword alignment)
1134  __asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src));
1135 #else
1136  *reinterpret_cast<std::complex<float>*>(reinterpret_cast<float*>(&t) + COMPLEX_DELTA) = *src;
1137 #endif
1138  return t;
1139 }
#define COMPLEX_DELTA

◆ pload_real() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( double *  src)

Definition at line 1282 of file MatrixVectorProduct.h.

1283 {
1284  return pset1<Packet2d>(*src);
1285 }
Packet2d pset1< Packet2d >(const double &from)

◆ pload_real() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( float *  src)

Definition at line 1277 of file MatrixVectorProduct.h.

1278 {
1279  return pset1<Packet4f>(*src);
1280 }

◆ pload_real() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( Packet2d &  src)

Definition at line 1292 of file MatrixVectorProduct.h.

1293 {
1294  return src;
1295 }

◆ pload_real() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( Packet4f &  src)

Definition at line 1287 of file MatrixVectorProduct.h.

1288 {
1289  return src;
1290 }

◆ pload_real_full() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( double *  src)

Definition at line 1304 of file MatrixVectorProduct.h.

1305 {
1306  return pload_real(src);
1307 }

◆ pload_real_full() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( float *  src)

Definition at line 1298 of file MatrixVectorProduct.h.

1299 {
1300  Packet4f ret = ploadu<Packet4f>(src);
1301  return vec_mergeh(ret, ret);
1302 }

◆ pload_real_full() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( std::complex< double > *  src)

Definition at line 1314 of file MatrixVectorProduct.h.

1315 {
1316  return pload_complex_full(src); // Just for compilation
1317 }

◆ pload_real_full() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( std::complex< float > *  src)

Definition at line 1309 of file MatrixVectorProduct.h.

1310 {
1311  return pload_complex_full(src); // Just for compilation
1312 }

◆ pload_real_row() [1/2]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d pload_real_row ( double *  src)

Definition at line 1332 of file MatrixVectorProduct.h.

1333 {
1334  return pload_real(src);
1335 }

◆ pload_real_row() [2/2]

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f pload_real_row ( float *  src)

Definition at line 1321 of file MatrixVectorProduct.h.

1322 {
1323  if (GEMV_IS_SCALAR) {
1324  return pload_real_full(src);
1325  }
1326  else {
1327  return ploadu<Packet4f>(src);
1328  }
1329 }

◆ pload_realimag() [1/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag ( RhsScalar *  src,
Packet2d &  r,
Packet2d &  i 
)

Definition at line 1156 of file MatrixVectorProduct.h.

1157 {
1158 #ifdef EIGEN_VECTORIZE_VSX
1159  __asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
1160  __asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<double*>(src) + 1)));
1161 #else
1162  Packet2d t = ploadu<Packet2d>(reinterpret_cast<double*>(src));
1163  r = vec_splat(t, 0);
1164  i = vec_splat(t, 1);
1165 #endif
1166 }

◆ pload_realimag() [2/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag ( RhsScalar *  src,
Packet4f &  r,
Packet4f &  i 
)

Definition at line 1143 of file MatrixVectorProduct.h.

1144 {
1145 #ifdef _ARCH_PWR9
1146  __asm__("lxvwsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<float*>(src) + 0)));
1147  __asm__("lxvwsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<float*>(src) + 1)));
1148 #else
1149  Packet4f t = pload_complex_half(src);
1150  r = vec_splat(t, COMPLEX_DELTA + 0);
1151  i = vec_splat(t, COMPLEX_DELTA + 1);
1152 #endif
1153 }

◆ pload_realimag_combine() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine ( std::complex< double > *  src)

Definition at line 1206 of file MatrixVectorProduct.h.

1207 {
1208  return ploadu<Packet1cd>(src).v;
1209 }

◆ pload_realimag_combine() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine ( std::complex< float > *  src)

Definition at line 1195 of file MatrixVectorProduct.h.

1196 {
1197 #ifdef EIGEN_VECTORIZE_VSX
1198  Packet4f ret;
1199  __asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
1200  return ret;
1201 #else
1202  return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
1203 #endif
1204 }

◆ pload_realimag_combine_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row ( std::complex< double > *  src)

Definition at line 1217 of file MatrixVectorProduct.h.

1218 {
1219  return ploadu<Packet1cd>(src).v;
1220 }

◆ pload_realimag_combine_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row ( std::complex< float > *  src)

Definition at line 1212 of file MatrixVectorProduct.h.

1213 {
1214  return ploadu<Packet2cf>(src).v;
1215 }

◆ pload_realimag_row() [1/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row ( RhsScalar *  src,
Packet2d &  r,
Packet2d &  i 
)

Definition at line 1189 of file MatrixVectorProduct.h.

1190 {
1191  return pload_realimag(src, r, i);
1192 }
EIGEN_ALWAYS_INLINE void pload_realimag(RhsScalar *src, Packet4f &r, Packet4f &i)

◆ pload_realimag_row() [2/2]

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void pload_realimag_row ( RhsScalar *  src,
Packet4f &  r,
Packet4f &  i 
)

Definition at line 1176 of file MatrixVectorProduct.h.

1177 {
1178  Packet4f t = ploadu<Packet4f>(reinterpret_cast<float*>(src));
1179 #ifdef __POWER8_VECTOR__
1180  r = vec_mergee(t, t);
1181  i = vec_mergeo(t, t);
1182 #else
1183  r = vec_perm(t, t, p16uc_MERGEE);
1184  i = vec_perm(t, t, p16uc_MERGEO);
1185 #endif
1186 }
const Packet16uc p16uc_MERGEE
const Packet16uc p16uc_MERGEO

◆ pmadd_complex()

template<typename ScalarPacket , typename AlphaData >
EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex ( ScalarPacket &  c0,
ScalarPacket &  c2,
ScalarPacket &  c4,
AlphaData &  b0 
)

Definition at line 1425 of file MatrixVectorProduct.h.

1426 {
1427  return pmadd(c2, b0.separate.i.v, pmadd(c0, b0.separate.r.v, c4));
1428 }

◆ pmadd_complex_complex()

template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex ( RealPacket &  a,
RealPacket &  b,
RealPacket &  c 
)

Definition at line 1492 of file MatrixVectorProduct.h.

1493 {
1494  if (ConjugateLhs && ConjugateRhs) {
1495  return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
1496  }
1497  else if (Negate && !ConjugateLhs && ConjugateRhs) {
1498  return vec_nmsub(a, b, c);
1499  }
1500  else {
1501  return vec_madd(a, b, c);
1502  }
1503 }
Array< int, Dynamic, 1 > v

◆ pmadd_complex_real()

template<typename ComplexPacket , typename RealPacket , bool Conjugate>
EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real ( RealPacket &  a,
RealPacket &  b,
RealPacket &  c 
)

Definition at line 1507 of file MatrixVectorProduct.h.

1508 {
1509  if (Conjugate) {
1510  return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
1511  }
1512  else {
1513  return vec_madd(a, b, c);
1514  }
1515 }

◆ pnegate2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pnegate2 ( Packet1cd  a)

Definition at line 1085 of file MatrixVectorProduct.h.

1086 {
1087 #ifdef __POWER8_VECTOR__
1088  return Packet1cd(vec_neg(a.v));
1089 #else
1090  return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_NEGATE)));
1091 #endif
1092 }

◆ pnegate2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pnegate2 ( Packet2cf  a)

Definition at line 1076 of file MatrixVectorProduct.h.

1077 {
1078 #ifdef __POWER8_VECTOR__
1079  return Packet2cf(vec_neg(a.v));
1080 #else
1081  return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_NEGATE)));
1082 #endif
1083 }

◆ predux_complex() [1/2]

template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex ( PResPacket &  a0,
PResPacket &  b0,
ResPacket &  a1,
ResPacket &  b1 
)

Definition at line 2623 of file MatrixVectorProduct.h.

2624 {
2626  a0 = padd(a0, a1);
2627  b0 = padd(b0, b1);
2628  }
2629  return predux_complex<ResScalar, PResPacket>(a0, b0);
2630 }
EIGEN_ALWAYS_INLINE Packet2cf padd(Packet2cf &a, std::complex< float > &b)

◆ predux_complex() [2/2]

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex ( ResPacket &  a,
ResPacket &  b 
)

Definition at line 2374 of file MatrixVectorProduct.h.

2375 {
2376  return predux_real<ResScalar, ResPacket>(a, b);
2377 }

◆ predux_real()

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real ( ResPacket &  a,
ResPacket &  b 
)

Definition at line 2365 of file MatrixVectorProduct.h.

2366 {
2368  cc0.scalar[0] = predux(a);
2369  cc0.scalar[1] = predux(b);
2370  return cc0;
2371 }

◆ preduxVecResults2VSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResults2VSX ( Packet4f(&)  acc[num_acc][2],
Index  k 
)

Definition at line 787 of file MatrixVectorProduct.h.

788 {
789  if (num_acc > (k + 1)) {
790  acc[k][1] = vec_mergel(acc[k + 0][0], acc[k + 1][0]);
791  acc[k][0] = vec_mergeh(acc[k + 0][0], acc[k + 1][0]);
792  acc[k][0] = acc[k][0] + acc[k][1];
793  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
794  } else {
795  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
796 #ifdef _BIG_ENDIAN
797  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 12);
798 #else
799  acc[k][0] += vec_sld(acc[k][0], acc[k][0], 4);
800 #endif
801  }
802 }

◆ preduxVecResultsVSX()

template<Index num_acc>
EIGEN_ALWAYS_INLINE void preduxVecResultsVSX ( Packet4f(&)  acc[num_acc][2])

Definition at line 805 of file MatrixVectorProduct.h.

806 {
807  for(Index k = 0; k < num_acc; k += 4) {
808  preduxVecResults2VSX<num_acc>(acc, k + 0);
809  if (num_acc > (k + 2)) {
810  preduxVecResults2VSX<num_acc>(acc, k + 2);
811 #ifdef EIGEN_VECTORIZE_VSX
812  acc[k + 0][0] = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2ul>(acc[k + 0][0]), reinterpret_cast<Packet2ul>(acc[k + 2][0])));
813 #else
814  acc[k + 0][0] = reinterpret_cast<Packet4f>(vec_perm(acc[k + 0][0],acc[k + 2][0],p16uc_TRANSPOSE64_HI));
815 #endif
816  }
817  }
818 }
static Packet16uc p16uc_TRANSPOSE64_HI

◆ pset1_complex() [1/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet1cd pset1_complex ( std::complex< double > &  alpha)

Definition at line 1369 of file MatrixVectorProduct.h.

1370 {
1371  Packet1cd ret;
1372  ret.v[0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
1373  ret.v[1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
1374  return ret;
1375 }

◆ pset1_complex() [2/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet2cf pset1_complex ( std::complex< float > &  alpha)

Definition at line 1358 of file MatrixVectorProduct.h.

1359 {
1360  Packet2cf ret;
1361  ret.v[COMPLEX_DELTA + 0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
1362  ret.v[COMPLEX_DELTA + 1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
1363  ret.v[2 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 0];
1364  ret.v[3 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 1];
1365  return ret;
1366 }

◆ pset1_realimag()

template<typename Scalar , typename ResScalar >
EIGEN_ALWAYS_INLINE Scalar pset1_realimag ( ResScalar &  alpha,
int  which,
int  conj 
)

Definition at line 1351 of file MatrixVectorProduct.h.

1352 {
1353  return (which) ? ((conj) ? -alpha.real() : alpha.real()) : ((conj) ? -alpha.imag() : alpha.imag());
1354 }
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_conjugate_op< typename Derived::Scalar >, const Derived > conj(const Eigen::ArrayBase< Derived > &x)

◆ pset_init()

template<typename Packet , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE Packet pset_init ( Packet &  c1)

Definition at line 1398 of file MatrixVectorProduct.h.

1399 {
1402  return pset_zero<Packet>();
1403  }
1404  else
1405  {
1406  return c1; // Intentionally left uninitialized
1407  }
1408 }

◆ pset_zero()

template<typename Packet >
EIGEN_ALWAYS_INLINE Packet pset_zero ( )

Definition at line 1379 of file MatrixVectorProduct.h.

1380 {
1381  return pset1<Packet>(__UNPACK_TYPE__(Packet)(0));
1382 }
#define __UNPACK_TYPE__(PACKETNAME)

◆ pset_zero< Packet1cd >()

template<>
EIGEN_ALWAYS_INLINE Packet1cd pset_zero< Packet1cd > ( )

Definition at line 1391 of file MatrixVectorProduct.h.

1392 {
1393  return Packet1cd(pset1<Packet2d>(double(0)));
1394 }

◆ pset_zero< Packet2cf >()

template<>
EIGEN_ALWAYS_INLINE Packet2cf pset_zero< Packet2cf > ( )

Definition at line 1385 of file MatrixVectorProduct.h.

1386 {
1387  return Packet2cf(pset1<Packet4f>(float(0)));
1388 }

◆ pstoreu_pmadd_complex() [1/2]

template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex ( PResPacket &  c0,
AlphaData &  b0,
ResScalar *  res 
)

Definition at line 1432 of file MatrixVectorProduct.h.

1433 {
1434  PResPacket c2 = pcplxflipconj(c0);
1435  if (GEMV_IS_SCALAR) {
1436  ScalarPacket c4 = ploadu<ScalarPacket>(reinterpret_cast<Scalar*>(res));
1437  ScalarPacket c3 = pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0);
1438  pstoreu(reinterpret_cast<Scalar*>(res), c3);
1439  } else {
1440  ScalarPacket c4 = pload_complex<ResPacket>(res);
1441  PResPacket c3 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
1442  pstoreu(res, c3);
1443  }
1444 }

◆ pstoreu_pmadd_complex() [2/2]

template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>
EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex ( PResPacket &  c0,
PResPacket &  c1,
AlphaData &  b0,
ResScalar *  res 
)

Definition at line 1447 of file MatrixVectorProduct.h.

1448 {
1449  PResPacket c2 = pcplxflipconj(c0);
1450  PResPacket c3 = pcplxflipconj(c1);
1451 #if !defined(_ARCH_PWR10)
1452  ScalarPacket c4 = pload_complex<ResPacket>(res + (iter2 * ResPacketSize));
1453  ScalarPacket c5 = pload_complex<ResPacket>(res + ((iter2 + 1) * ResPacketSize));
1454  PResPacket c6 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
1455  PResPacket c7 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c5, b0));
1456  pstoreu(res + (iter2 * ResPacketSize), c6);
1457  pstoreu(res + ((iter2 + 1) * ResPacketSize), c7);
1458 #else
1459  __vector_pair a = *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize));
1460 #if EIGEN_COMP_LLVM
1461  PResPacket c6[2];
1462  __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(c6), &a);
1463  c6[0] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c6[0].v, b0));
1464  c6[1] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c6[1].v, b0));
1465  GEMV_BUILDPAIR_MMA(a, c6[0].v, c6[1].v);
1466 #else
1467  if (GEMV_IS_COMPLEX_FLOAT) {
1468  __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
1469  __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
1470  } else {
1471  __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
1472  __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
1473  }
1474 #endif
1475  *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize)) = a;
1476 #endif
1477 }
#define GEMV_BUILDPAIR_MMA(dst, src1, src2)

◆ storeBF16fromResult()

template<const Index size, bool inc, Index delta>
EIGEN_ALWAYS_INLINE void storeBF16fromResult ( bfloat16 *  dst,
Packet8bf  data,
Index  resInc,
Index  extra 
)

Definition at line 665 of file MatrixVectorProduct.h.

666 {
667  if (inc) {
668  if (size < 8) {
669  pscatter_partial(dst + delta*resInc, data, resInc, extra);
670  } else {
671  pscatter(dst + delta*resInc, data, resInc);
672  }
673  } else {
674  if (size < 8) {
675  pstoreu_partial(dst + delta, data, extra);
676  } else {
677  pstoreu(dst + delta, data);
678  }
679  }
680 }
void pscatter(Scalar *to, const Packet &from, Index stride, typename unpacket_traits< Packet >::mask_t umask)
void pscatter_partial(Scalar *to, const Packet &from, Index stride, const Index n)

◆ storeMaddData() [1/2]

template<typename ResPacket , typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData ( ResScalar *  res,
ResPacket &  palpha,
ResPacket &  data 
)

Definition at line 69 of file MatrixVectorProduct.h.

70 {
71  pstoreu(res, pmadd(data, palpha, ploadu<ResPacket>(res)));
72 }

◆ storeMaddData() [2/2]

template<typename ResScalar >
EIGEN_ALWAYS_INLINE void storeMaddData ( ResScalar *  res,
ResScalar &  alpha,
ResScalar &  data 
)

Definition at line 75 of file MatrixVectorProduct.h.

76 {
77  *res += (alpha * data);
78 }

◆ vecColLoopVSX()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>
EIGEN_ALWAYS_INLINE void vecColLoopVSX ( Index  j,
LhsMapper &  lhs,
RhsMapper &  rhs,
Packet4f(&)  acc[num_acc][2] 
)

Definition at line 551 of file MatrixVectorProduct.h.

552 {
553  Packet4f a0[num_acc][2], b0[2];
554  Packet8bf b2 = loadColData<RhsMapper, linear>(rhs, j);
555 
556  b0[0] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V1);
557  if (!zero) {
558  b0[1] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V2);
559  }
560 
561  LhsMapper lhs2 = lhs.getSubMapper(0, j);
562  for(Index k = 0; k < num_acc; k += 2) {
563  loadVecLoopVSX<num_acc, LhsMapper, zero>(k, lhs2, a0);
564  }
565 
566  multVecVSX<num_acc, zero>(acc, a0, b0);
567 }
static Packet16uc p16uc_MERGE16_32_V1
static Packet16uc p16uc_MERGE16_32_V2
EIGEN_ALWAYS_INLINE Packet4f oneConvertBF16Perm(Packet8us data, Packet16uc mask)

◆ vecVSXLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void vecVSXLoop ( Index  cols,
const LhsMapper &  lhs,
RhsMapper &  rhs,
Packet4f(&)  acc[num_acc][2],
Index  extra_cols 
)

Definition at line 867 of file MatrixVectorProduct.h.

868 {
869  Index j = 0;
870  for(; j + 8 <= cols; j += 8){
871  multVSXVecLoop<num_acc, LhsMapper, RhsMapper, false>(acc, lhs, rhs, j, extra_cols);
872  }
873 
874  if (extra_cols) {
875  multVSXVecLoop<num_acc, LhsMapper, RhsMapper, true>(acc, lhs, rhs, j, extra_cols);
876  }
877 }

Variable Documentation

◆ p16uc_COMPLEX32_CONJ_XOR

const Packet16uc p16uc_COMPLEX32_CONJ_XOR

Definition at line 997 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_CONJ_XOR2

const Packet16uc p16uc_COMPLEX32_CONJ_XOR2

Definition at line 999 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_NEGATE

const Packet16uc p16uc_COMPLEX32_NEGATE

Definition at line 1001 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_XORFLIP

const Packet16uc p16uc_COMPLEX32_XORFLIP

Definition at line 986 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_CONJ_XOR

const Packet16uc p16uc_COMPLEX64_CONJ_XOR

Definition at line 998 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_CONJ_XOR2

const Packet16uc p16uc_COMPLEX64_CONJ_XOR2

Definition at line 1000 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_NEGATE

const Packet16uc p16uc_COMPLEX64_NEGATE

Definition at line 1002 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_XORFLIP

const Packet16uc p16uc_COMPLEX64_XORFLIP

Definition at line 987 of file MatrixVectorProduct.h.

◆ p16uc_MERGE16_32_V1

Packet16uc p16uc_MERGE16_32_V1
static

Definition at line 491 of file MatrixVectorProduct.h.

◆ p16uc_MERGE16_32_V2

Packet16uc p16uc_MERGE16_32_V2
static

Definition at line 492 of file MatrixVectorProduct.h.

◆ p16uc_MERGEE

const Packet16uc p16uc_MERGEE

Definition at line 1169 of file MatrixVectorProduct.h.

◆ p16uc_MERGEO

const Packet16uc p16uc_MERGEO

Definition at line 1171 of file MatrixVectorProduct.h.