Classes
struct	alpha_store< PResPacket, ResPacket, ResScalar, Scalar >

struct	loadColData_impl< RhsMapper, linear >

struct	loadColData_impl< RhsMapper, true >

struct	alpha_store< PResPacket, ResPacket, ResScalar, Scalar >::ri

struct	ScalarBlock< Scalar, N >

Macros
#define	COMPLEX_DELTA

#define	EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL(Scalar, LhsScalar, RhsScalar)

#define	EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW(Scalar, LhsScalar, RhsScalar)

#define	EIGEN_POWER_GEMV_PREFETCH(p)

#define	EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL(Scalar)

#define	EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16()

#define	EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW(Scalar)

#define	EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16()

#define	gemv_bf16_col

#define	gemv_bf16_row

#define	GEMV_BUILDPAIR_MMA(dst, src1, src2)

#define	GEMV_GETN(N)

#define	GEMV_GETN_COMPLEX(N)

#define	GEMV_INIT(iter, N)

#define	GEMV_INIT_COMPLEX(iter, N)

#define	GEMV_INIT_COMPLEX_OLD(iter, N)

#define	GEMV_INIT_ROW(iter, N)

#define	GEMV_IS_COMPLEX_COMPLEX

#define	GEMV_IS_COMPLEX_FLOAT

#define	GEMV_IS_FLOAT

#define	GEMV_IS_SCALAR

#define	GEMV_LOADPACKET_COL(iter)

#define	GEMV_LOADPACKET_COL_COMPLEX(iter)

#define	GEMV_LOADPACKET_COL_COMPLEX_DATA(iter)

#define	GEMV_LOADPACKET_ROW(iter)

#define	GEMV_LOADPACKET_ROW_COMPLEX(iter)

#define	GEMV_LOADPACKET_ROW_COMPLEX_DATA(iter)

#define	GEMV_LOADPACKET_ROW_COMPLEX_OLD(iter)

#define	GEMV_MULT(iter1, iter2, iter3, N)

#define	GEMV_MULT_COMPLEX(iter1, iter2, iter3, N)

#define	GEMV_MULT_COMPLEX_COMPLEX(LhsType, RhsType, ResType)

#define	GEMV_MULT_COMPLEX_REAL(LhsType, RhsType, ResType1, ResType2)

#define	GEMV_MULT_REAL_COMPLEX(LhsType, RhsType, ResType)

#define	GEMV_PREDUX2(iter1, iter2, iter3, N)

#define	GEMV_PREDUX4_COMPLEX(iter1, iter2, iter3, N)

#define	GEMV_PREDUX4_COMPLEX_OLD(iter1, iter2, iter3, N)

#define	GEMV_PREFETCH(iter, N)

#define	GEMV_PROCESS_COL(N)

#define	GEMV_PROCESS_COL_COMPLEX(N)

#define	GEMV_PROCESS_COL_COMPLEX_ONE(N)

#define	GEMV_PROCESS_COL_ONE(N)

#define	GEMV_PROCESS_END_ROW_COMPLEX(N)

#define	GEMV_PROCESS_ROW(N)

#define	GEMV_PROCESS_ROW_COMPLEX(N)

#define	GEMV_PROCESS_ROW_COMPLEX_IS_NEW

#define	GEMV_PROCESS_ROW_COMPLEX_ONE(N)

#define	GEMV_PROCESS_ROW_COMPLEX_ONE_NEW(N)

#define	GEMV_PROCESS_ROW_COMPLEX_ONE_OLD(N)

#define	GEMV_PROCESS_ROW_COMPLEX_PREDUX(iter)

#define	GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW(iter)

#define	GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD(iter)

#define	GEMV_PROCESS_ROW_COMPLEX_SINGLE(N)

#define	GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW(N)

#define	GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD(N)

#define	GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK(which, N)

#define	GEMV_STORE_COL(iter, N)

#define	GEMV_STORE_COL_COMPLEX(iter, N)

#define	GEMV_STORE_ROW(iter1, iter2, iter3, N)

#define	GEMV_STORE_ROW_COMPLEX(iter1, iter2, iter3, N)

#define	GEMV_UNROLL(func, N)

#define	GEMV_UNROLL_HALF(func, N)

#define	GEMV_UNROLL_ROW(func, N)

#define	GEMV_UNROLL_ROW_HALF(func, N)

#define	GEMV_WORK_COL(iter, N)

#define	GEMV_WORK_COL_COMPLEX(iter, N)

#define	GEMV_WORK_ROW(iter, N)

#define	GEMV_WORK_ROW_COMPLEX(iter, N)

#define	GEMV_WORK_ROW_COMPLEX_OLD(iter, N)

#define	MAX_BFLOAT16_VEC_ACC_VSX

Functions
template<Index num_acc>
EIGEN_ALWAYS_INLINE void	addResultsVSX (Packet4f(&acc)[num_acc][2])

template<typename LhsMapper , typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE void	calcVSXVecColLoops (Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void	calcVSXVecLoops (Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
void	colVSXVecColLoopBody (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void	colVSXVecColLoopBodyExtra (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>
EIGEN_ALWAYS_INLINE void	colVSXVecColLoopBodyExtraN (Index &row, Index cend, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
void	colVSXVecLoopBody (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void	colVSXVecLoopBodyExtra (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<const Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void	colVSXVecLoopBodyExtraN (Index &row, Index cols, Index rows, LhsMapper &lhs, RhsMapper &rhs, const Packet4f pAlpha, float *result)

template<bool inc = false>
EIGEN_ALWAYS_INLINE void	convertArrayPointerF32toBF16VSX (float result, Index rows, bfloat16 dst, Index resInc=1)

template<const Index size, bool inc = false>
EIGEN_ALWAYS_INLINE void	convertPointerF32toBF16VSX (Index &i, float result, Index rows, bfloat16 &dst, Index resInc=1)

template<typename LhsMapper , typename RhsMapper >
void	gemv_bfloat16_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)

template<typename LhsMapper , typename RhsMapper >
void	gemv_bfloat16_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, bfloat16 *res, Index resIncr, bfloat16 alpha)

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void	gemv_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void	gemv_complex_col (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >
void	gemv_complex_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void	gemv_mult_complex_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0, ResPacket &c1)

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void	gemv_mult_complex_real (LhsPacket &a0, RhsScalar *b, PResPacket &c0)

template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void	gemv_mult_generic (LhsPacket &a0, RhsScalar *b, PResPacket &c0)

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>
EIGEN_ALWAYS_INLINE void	gemv_mult_real_complex (LhsPacket &a0, RhsScalar *b, PResPacket &c0)

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >
void	gemv_row (Index rows, Index cols, const LhsMapper &alhs, const RhsMapper &rhs, ResScalar *res, Index resIncr, ResScalar alpha)

template<typename RhsMapper , bool linear>
EIGEN_ALWAYS_INLINE Packet8bf	loadColData (RhsMapper &rhs, Index j)

template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >
EIGEN_ALWAYS_INLINE LhsPacket	loadLhsPacket (LhsMapper &lhs, Index i, Index j)

EIGEN_ALWAYS_INLINE Packet8us	loadPacketPartialZero (Packet8us data, Index extra_cols)

template<Index num_acc, typename LhsMapper , bool zero>
EIGEN_ALWAYS_INLINE void	loadVecLoopVSX (Index k, LhsMapper &lhs, Packet4f(&a0)[num_acc][2])

template<Index num_acc, bool zero>
EIGEN_ALWAYS_INLINE void	multVecVSX (Packet4f(&acc)[num_acc][2], Packet4f(&a0)[num_acc][2], Packet4f(&b0)[2])

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>
EIGEN_ALWAYS_INLINE void	multVSXVecLoop (Packet4f(&acc)[num_acc][2], const LhsMapper &lhs, RhsMapper &rhs, Index j, Index extra_cols)

template<bool extraRows>
EIGEN_ALWAYS_INLINE void	outputVecCol (Packet4f acc, float *result, Packet4f pAlpha, Index extra_rows)

template<Index num_acc, bool extraRows, Index size>
EIGEN_ALWAYS_INLINE void	outputVecColResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha, Index extra_rows)

template<Index num_acc, Index size>
EIGEN_ALWAYS_INLINE void	outputVecResults (Packet4f(&acc)[num_acc][size], float *result, Packet4f pAlpha)

EIGEN_ALWAYS_INLINE Packet1cd	padd (Packet1cd &a, std::complex< double > &b)

EIGEN_ALWAYS_INLINE Packet2cf	padd (Packet2cf &a, std::complex< float > &b)

EIGEN_ALWAYS_INLINE Packet1cd	pconj2 (const Packet1cd &a)

EIGEN_ALWAYS_INLINE Packet2cf	pconj2 (const Packet2cf &a)

EIGEN_ALWAYS_INLINE Packet1cd	pconjinv (const Packet1cd &a)

EIGEN_ALWAYS_INLINE Packet2cf	pconjinv (const Packet2cf &a)

EIGEN_ALWAYS_INLINE Packet1cd	pcplxconjflip (Packet1cd a)

EIGEN_ALWAYS_INLINE Packet2cf	pcplxconjflip (Packet2cf a)

EIGEN_ALWAYS_INLINE Packet1cd	pcplxflip2 (Packet1cd a)

EIGEN_ALWAYS_INLINE Packet2cf	pcplxflip2 (Packet2cf a)

EIGEN_ALWAYS_INLINE Packet1cd	pcplxflipconj (Packet1cd a)

EIGEN_ALWAYS_INLINE Packet2cf	pcplxflipconj (Packet2cf a)

EIGEN_ALWAYS_INLINE Packet1cd	pcplxflipnegate (Packet1cd a)

EIGEN_ALWAYS_INLINE Packet2cf	pcplxflipnegate (Packet2cf a)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d	pload_complex (Packet1cd *src)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f	pload_complex (Packet2cf *src)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d	pload_complex (std::complex< double > *src)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f	pload_complex (std::complex< float > *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_complex_full (std::complex< double > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_complex_full (std::complex< float > *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_complex_full_row (std::complex< double > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_complex_full_row (std::complex< float > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_complex_half (std::complex< float > *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_real (double *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_real (float *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_real (Packet2d &src)

EIGEN_ALWAYS_INLINE Packet4f	pload_real (Packet4f &src)

EIGEN_ALWAYS_INLINE Packet2d	pload_real_full (double *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_real_full (float *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_real_full (std::complex< double > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_real_full (std::complex< float > *src)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet2d	pload_real_row (double *src)

template<typename ResPacket >
EIGEN_ALWAYS_INLINE Packet4f	pload_real_row (float *src)

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void	pload_realimag (RhsScalar *src, Packet2d &r, Packet2d &i)

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void	pload_realimag (RhsScalar *src, Packet4f &r, Packet4f &i)

EIGEN_ALWAYS_INLINE Packet2d	pload_realimag_combine (std::complex< double > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_realimag_combine (std::complex< float > *src)

EIGEN_ALWAYS_INLINE Packet2d	pload_realimag_combine_row (std::complex< double > *src)

EIGEN_ALWAYS_INLINE Packet4f	pload_realimag_combine_row (std::complex< float > *src)

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void	pload_realimag_row (RhsScalar *src, Packet2d &r, Packet2d &i)

template<typename RhsScalar >
EIGEN_ALWAYS_INLINE void	pload_realimag_row (RhsScalar *src, Packet4f &r, Packet4f &i)

template<typename ScalarPacket , typename AlphaData >
EIGEN_ALWAYS_INLINE ScalarPacket	pmadd_complex (ScalarPacket &c0, ScalarPacket &c2, ScalarPacket &c4, AlphaData &b0)

template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>
EIGEN_ALWAYS_INLINE RealPacket	pmadd_complex_complex (RealPacket &a, RealPacket &b, RealPacket &c)

template<typename ComplexPacket , typename RealPacket , bool Conjugate>
EIGEN_ALWAYS_INLINE RealPacket	pmadd_complex_real (RealPacket &a, RealPacket &b, RealPacket &c)

EIGEN_ALWAYS_INLINE Packet1cd	pnegate2 (Packet1cd a)

EIGEN_ALWAYS_INLINE Packet2cf	pnegate2 (Packet2cf a)

template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 >	predux_complex (PResPacket &a0, PResPacket &b0, ResPacket &a1, ResPacket &b1)

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 >	predux_complex (ResPacket &a, ResPacket &b)

template<typename ResScalar , typename ResPacket >
EIGEN_ALWAYS_INLINE ScalarBlock< ResScalar, 2 >	predux_real (ResPacket &a, ResPacket &b)

template<Index num_acc>
EIGEN_ALWAYS_INLINE void	preduxVecResults2VSX (Packet4f(&acc)[num_acc][2], Index k)

template<Index num_acc>
EIGEN_ALWAYS_INLINE void	preduxVecResultsVSX (Packet4f(&acc)[num_acc][2])

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet1cd	pset1_complex (std::complex< double > &alpha)

template<typename Scalar , typename ResScalar , typename ResPacket , int which>
EIGEN_ALWAYS_INLINE Packet2cf	pset1_complex (std::complex< float > &alpha)

template<typename Scalar , typename ResScalar >
EIGEN_ALWAYS_INLINE Scalar	pset1_realimag (ResScalar &alpha, int which, int conj)

template<typename Packet , typename LhsPacket , typename RhsPacket >
EIGEN_ALWAYS_INLINE Packet	pset_init (Packet &c1)

template<typename Packet >
EIGEN_ALWAYS_INLINE Packet	pset_zero ()

template<>
EIGEN_ALWAYS_INLINE Packet1cd	pset_zero< Packet1cd > ()

template<>
EIGEN_ALWAYS_INLINE Packet2cf	pset_zero< Packet2cf > ()

template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >
EIGEN_ALWAYS_INLINE void	pstoreu_pmadd_complex (PResPacket &c0, AlphaData &b0, ResScalar *res)

template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>
EIGEN_ALWAYS_INLINE void	pstoreu_pmadd_complex (PResPacket &c0, PResPacket &c1, AlphaData &b0, ResScalar *res)

template<const Index size, bool inc, Index delta>
EIGEN_ALWAYS_INLINE void	storeBF16fromResult (bfloat16 *dst, Packet8bf data, Index resInc, Index extra)

template<typename ResPacket , typename ResScalar >
EIGEN_ALWAYS_INLINE void	storeMaddData (ResScalar *res, ResPacket &palpha, ResPacket &data)

template<typename ResScalar >
EIGEN_ALWAYS_INLINE void	storeMaddData (ResScalar *res, ResScalar &alpha, ResScalar &data)

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>
EIGEN_ALWAYS_INLINE void	vecColLoopVSX (Index j, LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2])

template<Index num_acc, typename LhsMapper , typename RhsMapper >
EIGEN_ALWAYS_INLINE void	vecVSXLoop (Index cols, const LhsMapper &lhs, RhsMapper &rhs, Packet4f(&acc)[num_acc][2], Index extra_cols)

Variables
const Packet16uc	p16uc_COMPLEX32_CONJ_XOR

const Packet16uc	p16uc_COMPLEX32_CONJ_XOR2

const Packet16uc	p16uc_COMPLEX32_NEGATE

const Packet16uc	p16uc_COMPLEX32_XORFLIP

const Packet16uc	p16uc_COMPLEX64_CONJ_XOR

const Packet16uc	p16uc_COMPLEX64_CONJ_XOR2

const Packet16uc	p16uc_COMPLEX64_NEGATE

const Packet16uc	p16uc_COMPLEX64_XORFLIP

static Packet16uc	p16uc_MERGE16_32_V1

static Packet16uc	p16uc_MERGE16_32_V2

const Packet16uc	p16uc_MERGEE

const Packet16uc	p16uc_MERGEO

Macro Definition Documentation

◆ COMPLEX_DELTA

#define COMPLEX_DELTA

Definition at line 1008 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL	(	Scalar,
		LhsScalar,
		RhsScalar
	)

Definition at line 2912 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW	(	Scalar,
		LhsScalar,
		RhsScalar
	)

Definition at line 2928 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_PREFETCH

#define EIGEN_POWER_GEMV_PREFETCH ( p )

Definition at line 32 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL ( Scalar )

Definition at line 2546 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16 ( )

Definition at line 2591 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW ( Scalar )

Definition at line 2562 of file MatrixVectorProduct.h.

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16

#define EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16 ( )

Definition at line 2605 of file MatrixVectorProduct.h.

◆ gemv_bf16_col

#define gemv_bf16_col

Definition at line 2587 of file MatrixVectorProduct.h.

◆ gemv_bf16_row

#define gemv_bf16_row

Definition at line 2588 of file MatrixVectorProduct.h.

◆ GEMV_BUILDPAIR_MMA

#define GEMV_BUILDPAIR_MMA	(	dst,
		src1,
		src2
	)

Definition at line 53 of file MatrixVectorProduct.h.

◆ GEMV_GETN

#define GEMV_GETN ( N )

Definition at line 87 of file MatrixVectorProduct.h.

◆ GEMV_GETN_COMPLEX

#define GEMV_GETN_COMPLEX ( N )

Definition at line 1918 of file MatrixVectorProduct.h.

◆ GEMV_INIT

#define GEMV_INIT	(	iter,
		N
	)

Definition at line 299 of file MatrixVectorProduct.h.

◆ GEMV_INIT_COMPLEX

#define GEMV_INIT_COMPLEX	(	iter,
		N
	)

Definition at line 2075 of file MatrixVectorProduct.h.

◆ GEMV_INIT_COMPLEX_OLD

#define GEMV_INIT_COMPLEX_OLD	(	iter,
		N
	)

Definition at line 2756 of file MatrixVectorProduct.h.

◆ GEMV_INIT_ROW

#define GEMV_INIT_ROW	(	iter,
		N
	)

Definition at line 2426 of file MatrixVectorProduct.h.

◆ GEMV_IS_COMPLEX_COMPLEX

#define GEMV_IS_COMPLEX_COMPLEX

Definition at line 62 of file MatrixVectorProduct.h.

◆ GEMV_IS_COMPLEX_FLOAT

#define GEMV_IS_COMPLEX_FLOAT

Definition at line 65 of file MatrixVectorProduct.h.

◆ GEMV_IS_FLOAT

#define GEMV_IS_FLOAT

Definition at line 63 of file MatrixVectorProduct.h.

◆ GEMV_IS_SCALAR

#define GEMV_IS_SCALAR

Definition at line 64 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL

#define GEMV_LOADPACKET_COL ( iter )

Definition at line 89 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL_COMPLEX

#define GEMV_LOADPACKET_COL_COMPLEX ( iter )

Definition at line 1920 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_COL_COMPLEX_DATA

#define GEMV_LOADPACKET_COL_COMPLEX_DATA ( iter )

Definition at line 1923 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW

#define GEMV_LOADPACKET_ROW ( iter )

Definition at line 2385 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX

#define GEMV_LOADPACKET_ROW_COMPLEX ( iter )

Definition at line 2632 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX_DATA

#define GEMV_LOADPACKET_ROW_COMPLEX_DATA ( iter )

Definition at line 2635 of file MatrixVectorProduct.h.

◆ GEMV_LOADPACKET_ROW_COMPLEX_OLD

#define GEMV_LOADPACKET_ROW_COMPLEX_OLD ( iter )

Definition at line 2753 of file MatrixVectorProduct.h.

◆ GEMV_MULT

#define GEMV_MULT	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2446 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX

#define GEMV_MULT_COMPLEX	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2710 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX_COMPLEX

#define GEMV_MULT_COMPLEX_COMPLEX	(	LhsType,
		RhsType,
		ResType
	)

Definition at line 1581 of file MatrixVectorProduct.h.

◆ GEMV_MULT_COMPLEX_REAL

#define GEMV_MULT_COMPLEX_REAL	(	LhsType,
		RhsType,
		ResType1,
		ResType2
	)

Definition at line 1603 of file MatrixVectorProduct.h.

◆ GEMV_MULT_REAL_COMPLEX

#define GEMV_MULT_REAL_COMPLEX	(	LhsType,
		RhsType,
		ResType
	)

Definition at line 1591 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX2

#define GEMV_PREDUX2	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2438 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX4_COMPLEX

#define GEMV_PREDUX4_COMPLEX	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2703 of file MatrixVectorProduct.h.

◆ GEMV_PREDUX4_COMPLEX_OLD

#define GEMV_PREDUX4_COMPLEX_OLD	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2770 of file MatrixVectorProduct.h.

◆ GEMV_PREFETCH

#define GEMV_PREFETCH	(	iter,
		N
	)

Definition at line 312 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL

#define GEMV_PROCESS_COL ( N )

Definition at line 341 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_COMPLEX

#define GEMV_PROCESS_COL_COMPLEX ( N )

Definition at line 2129 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_COMPLEX_ONE

#define GEMV_PROCESS_COL_COMPLEX_ONE ( N )

Definition at line 2101 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_COL_ONE

#define GEMV_PROCESS_COL_ONE ( N )

Definition at line 326 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_END_ROW_COMPLEX

#define GEMV_PROCESS_END_ROW_COMPLEX ( N )

Definition at line 2646 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW

#define GEMV_PROCESS_ROW ( N )

Definition at line 2459 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX

#define GEMV_PROCESS_ROW_COMPLEX ( N )

Definition at line 2829 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_IS_NEW

#define GEMV_PROCESS_ROW_COMPLEX_IS_NEW

Definition at line 2799 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE

#define GEMV_PROCESS_ROW_COMPLEX_ONE ( N )

Definition at line 2810 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_ONE_NEW ( N )

Definition at line 2727 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_ONE_OLD ( N )

Definition at line 2786 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX ( iter )

Definition at line 2817 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW ( iter )

Definition at line 2734 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD

#define GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD ( iter )

Definition at line 2793 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE ( N )

Definition at line 2803 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW ( N )

Definition at line 2722 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD ( N )

Definition at line 2778 of file MatrixVectorProduct.h.

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK

#define GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK	(	which,
		N
	)

Definition at line 2638 of file MatrixVectorProduct.h.

◆ GEMV_STORE_COL

#define GEMV_STORE_COL	(	iter,
		N
	)

Definition at line 320 of file MatrixVectorProduct.h.

◆ GEMV_STORE_COL_COMPLEX

#define GEMV_STORE_COL_COMPLEX	(	iter,
		N
	)

Definition at line 2092 of file MatrixVectorProduct.h.

◆ GEMV_STORE_ROW

#define GEMV_STORE_ROW	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2452 of file MatrixVectorProduct.h.

◆ GEMV_STORE_ROW_COMPLEX

#define GEMV_STORE_ROW_COMPLEX	(	iter1,
		iter2,
		iter3,
		N
	)

Definition at line 2716 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL

#define GEMV_UNROLL	(	func,
		N
	)

Definition at line 80 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_HALF

#define GEMV_UNROLL_HALF	(	func,
		N
	)

Definition at line 84 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_ROW

#define GEMV_UNROLL_ROW	(	func,
		N
	)

Definition at line 2379 of file MatrixVectorProduct.h.

◆ GEMV_UNROLL_ROW_HALF

#define GEMV_UNROLL_ROW_HALF	(	func,
		N
	)

Definition at line 2382 of file MatrixVectorProduct.h.

◆ GEMV_WORK_COL

#define GEMV_WORK_COL	(	iter,
		N
	)

Definition at line 315 of file MatrixVectorProduct.h.

◆ GEMV_WORK_COL_COMPLEX

#define GEMV_WORK_COL_COMPLEX	(	iter,
		N
	)

Definition at line 2084 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW

#define GEMV_WORK_ROW	(	iter,
		N
	)

Definition at line 2433 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW_COMPLEX

#define GEMV_WORK_ROW_COMPLEX	(	iter,
		N
	)

Definition at line 2697 of file MatrixVectorProduct.h.

◆ GEMV_WORK_ROW_COMPLEX_OLD

#define GEMV_WORK_ROW_COMPLEX_OLD	(	iter,
		N
	)

Definition at line 2764 of file MatrixVectorProduct.h.

◆ MAX_BFLOAT16_VEC_ACC_VSX

#define MAX_BFLOAT16_VEC_ACC_VSX

Definition at line 578 of file MatrixVectorProduct.h.

Function Documentation

◆ addResultsVSX()

template<Index num_acc>

EIGEN_ALWAYS_INLINE void addResultsVSX ( Packet4f(&) acc[num_acc][2] )

Definition at line 570 of file MatrixVectorProduct.h.

 {
   for(Index i = 0; i < num_acc; i++) {
     acc[i][0] = acc[i][0] + acc[i][1];
   }
 }

Code

◆ calcVSXVecColLoops()

template<typename LhsMapper , typename RhsMapper , bool linear>

EIGEN_ALWAYS_INLINE void calcVSXVecColLoops	(	Index	cend,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 650 of file MatrixVectorProduct.h.

 {
   Index row = 0;
   if (rows >= (MAX_BFLOAT16_VEC_ACC_VSX * 4)) {
     colVSXVecColLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     result += row;
   }
   if (rows & 3) {
     colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
   } else {
     colVSXVecColLoopBodyExtra<LhsMapper, RhsMapper, false, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
   }
 }

Code

◆ calcVSXVecLoops()

template<typename LhsMapper , typename RhsMapper >

EIGEN_ALWAYS_INLINE void calcVSXVecLoops	(	Index	cols,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 940 of file MatrixVectorProduct.h.

 {
   Index row = 0;
   if (rows >= MAX_BFLOAT16_VEC_ACC_VSX) {
     colVSXVecLoopBody<MAX_BFLOAT16_VEC_ACC_VSX, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     result += row;
   }
   colVSXVecLoopBodyExtra<LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
 }

Code

◆ colVSXVecColLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>

void colVSXVecColLoopBody	(	Index &	row,
		Index	cend,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 581 of file MatrixVectorProduct.h.

 {
   constexpr Index step = (num_acc * 4);
   const Index extra_rows = (extraRows) ? (rows & 3) : 0;
   constexpr bool multiIters = !extraRows && (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
  
   do{
     Packet4f acc[num_acc][2];
  
     zeroAccumulators<num_acc, 2>(acc);
  
     LhsMapper lhs2 = lhs.getSubMapper(row, 0);
     for(Index j = 0; j + 2 <= cend; j += 2) {
       vecColLoopVSX<num_acc, LhsMapper, RhsMapper, false, linear>(j, lhs2, rhs, acc);
     }
     if (cend & 1) {
       vecColLoopVSX<num_acc, LhsMapper, RhsMapper, true, linear>(cend - 1, lhs2, rhs, acc);
     }
  
     addResultsVSX<num_acc>(acc);
  
     outputVecColResults<num_acc, extraRows, 2>(acc, result, pAlpha, extra_rows);
  
     result += step;
   } while(multiIters && (step <= rows - (row += step)));
 }

Code

◆ colVSXVecColLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>

EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtra	(	Index &	row,
		Index	cend,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 617 of file MatrixVectorProduct.h.

 {
   switch ((rows - row) >> 2) {
   case 7:
     colVSXVecColLoopBodyExtraN<7, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 6:
     colVSXVecColLoopBodyExtraN<6, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 5:
     colVSXVecColLoopBodyExtraN<5, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 4:
     colVSXVecColLoopBodyExtraN<4, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 3:
     colVSXVecColLoopBodyExtraN<3, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 2:
     colVSXVecColLoopBodyExtraN<2, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   case 1:
     colVSXVecColLoopBodyExtraN<1, LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     break;
   default:
     if (extraRows) {
       colVSXVecColLoopBody<1, LhsMapper, RhsMapper, true, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
     }
     break;
   }
 }

Code

◆ colVSXVecColLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper , bool extraRows, bool linear>

EIGEN_ALWAYS_INLINE void colVSXVecColLoopBodyExtraN	(	Index &	row,
		Index	cend,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 609 of file MatrixVectorProduct.h.

 {
   if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
     colVSXVecColLoopBody<num_acc + (extraRows ? 1 : 0), LhsMapper, RhsMapper, extraRows, linear>(row, cend, rows, lhs, rhs, pAlpha, result);
   }
 }

Code

◆ colVSXVecLoopBody()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >

void colVSXVecLoopBody	(	Index &	row,
		Index	cols,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 880 of file MatrixVectorProduct.h.

 {
   constexpr bool multiIters = (num_acc == MAX_BFLOAT16_VEC_ACC_VSX);
   const Index extra_cols = (cols & 7);
  
   do{
     Packet4f acc[num_acc][2];
  
     zeroAccumulators<num_acc, 2>(acc);
  
     const LhsMapper lhs2 = lhs.getSubMapper(row, 0);
     vecVSXLoop<num_acc, LhsMapper, RhsMapper>(cols, lhs2, rhs, acc, extra_cols);
  
     addResultsVSX<num_acc>(acc);
  
     preduxVecResultsVSX<num_acc>(acc);
  
     outputVecResults<num_acc, 2>(acc, result, pAlpha);
  
     result += num_acc;
   } while(multiIters && (num_acc <= rows - (row += num_acc)));
 }

Code

◆ colVSXVecLoopBodyExtra()

template<typename LhsMapper , typename RhsMapper >

EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtra	(	Index &	row,
		Index	cols,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 912 of file MatrixVectorProduct.h.

 {
   switch (rows - row) {
   case 7:
     colVSXVecLoopBodyExtraN<7, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 6:
     colVSXVecLoopBodyExtraN<6, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 5:
     colVSXVecLoopBodyExtraN<5, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 4:
     colVSXVecLoopBodyExtraN<4, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 3:
     colVSXVecLoopBodyExtraN<3, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 2:
     colVSXVecLoopBodyExtraN<2, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   case 1:
     colVSXVecLoopBodyExtraN<1, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
     break;
   }
 }

Code

◆ colVSXVecLoopBodyExtraN()

template<const Index num_acc, typename LhsMapper , typename RhsMapper >

EIGEN_ALWAYS_INLINE void colVSXVecLoopBodyExtraN	(	Index &	row,
		Index	cols,
		Index	rows,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		const Packet4f	pAlpha,
		float *	result
	)

Definition at line 904 of file MatrixVectorProduct.h.

 {
   if (MAX_BFLOAT16_VEC_ACC_VSX > num_acc) {
     colVSXVecLoopBody<num_acc, LhsMapper, RhsMapper>(row, cols, rows, lhs, rhs, pAlpha, result);
   }
 }

Code

◆ convertArrayPointerF32toBF16VSX()

template<bool inc = false>

EIGEN_ALWAYS_INLINE void convertArrayPointerF32toBF16VSX	(	float *	result,
		Index	rows,
		bfloat16 *	dst,
		Index	resInc = `1`
	)

Definition at line 710 of file MatrixVectorProduct.h.

 {
   Index i = 0;
   convertPointerF32toBF16VSX<32,inc>(i, result, rows, dst, resInc);
   convertPointerF32toBF16VSX<16,inc>(i, result, rows, dst, resInc);
   convertPointerF32toBF16VSX<8,inc>(i, result, rows, dst, resInc);
   convertPointerF32toBF16VSX<1,inc>(i, result, rows, dst, resInc);
 }

Code

◆ convertPointerF32toBF16VSX()

template<const Index size, bool inc = false>

EIGEN_ALWAYS_INLINE void convertPointerF32toBF16VSX	(	Index &	i,
		float *	result,
		Index	rows,
		bfloat16 *&	dst,
		Index	resInc = `1`
	)

Definition at line 683 of file MatrixVectorProduct.h.

 {
   constexpr Index extra = ((size < 8) ? 8 : size);
   while (i + size <= rows) {
     PacketBlock<Packet8bf,(size+7)/8> r32;
     r32.packet[0] = convertF32toBF16VSX(result + i +  0);
     if (size >= 16) {
       r32.packet[1] = convertF32toBF16VSX(result + i +  8);
     }
     if (size >= 32) {
       r32.packet[2] = convertF32toBF16VSX(result + i + 16);
       r32.packet[3] = convertF32toBF16VSX(result + i + 24);
     }
     storeBF16fromResult<size, inc, 0>(dst, r32.packet[0], resInc, rows & 7);
     if (size >= 16) {
       storeBF16fromResult<size, inc, 8>(dst, r32.packet[1], resInc);
     }
     if (size >= 32) {
       storeBF16fromResult<size, inc, 16>(dst, r32.packet[2], resInc);
       storeBF16fromResult<size, inc, 24>(dst, r32.packet[3], resInc);
     }
     i += extra; dst += extra*resInc;
     if (size != 32) break;
   }
 }

Code

◆ gemv_bfloat16_col()

template<typename LhsMapper , typename RhsMapper >

void gemv_bfloat16_col	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		bfloat16 *	res,
		Index	resIncr,
		bfloat16	alpha
	)

Definition at line 720 of file MatrixVectorProduct.h.

 {
   typedef typename RhsMapper::LinearMapper LinearMapper;
  
   EIGEN_UNUSED_VARIABLE(resIncr);
   eigen_internal_assert(resIncr == 1);
  
   // The following copy tells the compiler that lhs's attributes are not modified outside this function
   // This helps GCC to generate proper code.
   LhsMapper lhs(alhs);
   RhsMapper rhs2(rhs);
  
   const Index lhsStride = lhs.stride();
  
   // TODO: improve the following heuristic:
   const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(bfloat16) < 16000 ? 16 : 8);
   float falpha = Eigen::bfloat16_impl::bfloat16_to_float(alpha);
   Packet4f pAlpha = pset1<Packet4f>(falpha);
  
   ei_declare_aligned_stack_constructed_variable(float, result, rows, 0);
  
   convertArrayPointerBF16toF32(result, 1, rows, res);
  
   for (Index j2 = 0; j2 < cols; j2 += block_cols)
   {
     Index jend = numext::mini(j2 + block_cols, cols);
  
     LhsMapper lhs2 = lhs.getSubMapper(0, j2);
     if (rhs.stride() == 1) {
       LinearMapper rhs3 = rhs2.getLinearMapper(j2, 0);
       calcVSXVecColLoops<LhsMapper, LinearMapper, true>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
     } else {
       RhsMapper rhs3 = rhs2.getSubMapper(j2, 0);
       calcVSXVecColLoops<LhsMapper, RhsMapper, false>(jend - j2, rows, lhs2, rhs3, pAlpha, result);
     }
   }
  
   convertArrayPointerF32toBF16VSX(result, rows, res);
 }

Code

◆ gemv_bfloat16_row()

template<typename LhsMapper , typename RhsMapper >

void gemv_bfloat16_row	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		bfloat16 *	res,
		Index	resIncr,
		bfloat16	alpha
	)

inline

Definition at line 951 of file MatrixVectorProduct.h.

 {
   typedef typename RhsMapper::LinearMapper LinearMapper;
  
   // The following copy tells the compiler that lhs's attributes are not modified outside this function
   // This helps GCC to generate proper code.
   LhsMapper lhs(alhs);
   LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
  
   eigen_internal_assert(rhs.stride() == 1);
  
   float falpha = Eigen::bfloat16_impl::bfloat16_to_float(alpha);
   const Packet4f pAlpha = pset1<Packet4f>(falpha);
  
   ei_declare_aligned_stack_constructed_variable(float, result, rows, 0);
   if (resIncr == 1) {
     convertArrayPointerBF16toF32(result, 1, rows, res);
   } else {
     convertArrayPointerBF16toF32<true>(result, 1, rows, res, resIncr);
   }
   calcVSXVecLoops<LhsMapper, LinearMapper>(cols, rows, lhs, rhs2, pAlpha, result);
   if (resIncr == 1) {
     convertArrayPointerF32toBF16VSX(result, rows, res);
   } else {
     convertArrayPointerF32toBF16VSX<true>(result, rows, res, resIncr);
   }
 }

Code

◆ gemv_col()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >

void gemv_col	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		ResScalar *	res,
		Index	resIncr,
		ResScalar	alpha
	)

inline

Definition at line 376 of file MatrixVectorProduct.h.

 {
     typedef gemv_traits<LhsScalar, RhsScalar> Traits;
  
     typedef typename Traits::LhsPacket LhsPacket;
     typedef typename Traits::RhsPacket RhsPacket;
     typedef typename Traits::ResPacket ResPacket;
  
     EIGEN_UNUSED_VARIABLE(resIncr);
     eigen_internal_assert(resIncr == 1);
  
     // The following copy tells the compiler that lhs's attributes are not modified outside this function
     // This helps GCC to generate proper code.
     LhsMapper lhs(alhs);
     RhsMapper rhs2(rhs);
  
     conj_helper<LhsScalar, RhsScalar, false, false> cj;
     conj_helper<LhsPacket, RhsPacket, false, false> pcj;
  
     const Index lhsStride = lhs.stride();
     // TODO: for padded aligned inputs, we could enable aligned reads
     enum {
         LhsAlignment = Unaligned,
         ResPacketSize = Traits::ResPacketSize,
         LhsPacketSize = Traits::LhsPacketSize,
         RhsPacketSize = Traits::RhsPacketSize,
     };
  
 #ifndef GCC_ONE_VECTORPAIR_BUG
     const Index n8 = rows - 8 * ResPacketSize + 1;
     const Index n4 = rows - 4 * ResPacketSize + 1;
     const Index n2 = rows - 2 * ResPacketSize + 1;
 #endif
     const Index n1 = rows - 1 * ResPacketSize + 1;
 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
     const Index prefetch_dist = 64 * LhsPacketSize;
 #endif
  
     // TODO: improve the following heuristic:
     const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
     ResPacket palpha = pset1<ResPacket>(alpha);
  
     for (Index j2 = 0; j2 < cols; j2 += block_cols)
     {
         Index jend = numext::mini(j2 + block_cols, cols);
         Index i = 0;
         ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
 #ifdef USE_GEMV_MMA
         __vector_quad e0, e1, e2, e3, e4, e5, e6, e7;
         PacketBlock<ResPacket, 4> result0, result1, result2, result3, result4, result5, result6, result7;
         GEMV_UNUSED(8, e)
         GEMV_UNUSED(8, result)
         GEMV_UNUSED_EXTRA(1, c)
 #endif
 #ifndef GCC_ONE_VECTORPAIR_BUG
         while (i < n8)
         {
             GEMV_PROCESS_COL(8)
         }
         if (i < n4)
         {
             GEMV_PROCESS_COL(4)
         }
         if (i < n2)
         {
             GEMV_PROCESS_COL(2)
         }
         if (i < n1)
 #else
         while (i < n1)
 #endif
         {
             GEMV_PROCESS_COL_ONE(1)
         }
         for (;i < rows;++i)
         {
             ResScalar d0(0);
             Index j = j2;
             do {
                 d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
             } while (++j < jend);
             res[i] += alpha * d0;
         }
     }
 }

Code

◆ gemv_complex_col()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >

void gemv_complex_col	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		ResScalar *	res,
		Index	resIncr,
		ResScalar	alpha
	)

inline

Definition at line 2135 of file MatrixVectorProduct.h.

 {
     typedef gemv_traits<LhsScalar, RhsScalar> Traits;
  
     typedef typename Traits::LhsPacket LhsPacket;
     typedef typename Traits::RhsPacket RhsPacket;
     typedef typename Traits::ResPacket ResPacket;
  
     typedef typename packet_traits<Scalar>::type ScalarPacket;
     typedef typename packet_traits<LhsScalar>::type PLhsPacket;
     typedef typename packet_traits<ResScalar>::type PResPacket;
     typedef gemv_traits<ResPacket, ResPacket> PTraits;
  
     EIGEN_UNUSED_VARIABLE(resIncr);
     eigen_internal_assert(resIncr == 1);
  
     // The following copy tells the compiler that lhs's attributes are not modified outside this function
     // This helps GCC to generate proper code.
     LhsMapper lhs(alhs);
     RhsMapper rhs2(rhs);
  
     conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
  
     const Index lhsStride = lhs.stride();
     // TODO: for padded aligned inputs, we could enable aligned reads
     enum {
         LhsAlignment = Unaligned,
         ResPacketSize = PTraits::ResPacketSize,
         LhsPacketSize = PTraits::LhsPacketSize,
         RhsPacketSize = PTraits::RhsPacketSize,
     };
 #ifdef EIGEN_POWER_USE_GEMV_PREFETCH
     const Index prefetch_dist = 64 * LhsPacketSize;
 #endif
  
 #ifndef GCC_ONE_VECTORPAIR_BUG
     const Index n8 = rows - 8 * ResPacketSize + 1;
     const Index n4 = rows - 4 * ResPacketSize + 1;
     const Index n2 = rows - 2 * ResPacketSize + 1;
 #endif
     const Index n1 = rows - 1 * ResPacketSize + 1;
  
     // TODO: improve the following heuristic:
     const Index block_cols = cols < 128 ? cols : (lhsStride * sizeof(LhsScalar) < 16000 ? 16 : 8);
  
     typedef alpha_store<PResPacket, ResPacket, ResScalar, Scalar> AlphaData;
     AlphaData alpha_data(alpha);
  
     for (Index j2 = 0; j2 < cols; j2 += block_cols)
     {
         Index jend = numext::mini(j2 + block_cols, cols);
         Index i = 0;
         PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
         ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
         PLhsPacket f0, f1, f2, f3, f4, f5, f6, f7;
 #ifdef USE_GEMV_MMA
         __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
         __vector_pair a0, a1, a2, a3, a4, a5, a6, a7;
         PacketBlock<ScalarPacket, 4> result00, result01, result02, result03, result04, result05, result06, result07;
         GEMV_UNUSED(8, e0)
         GEMV_UNUSED(8, result0)
         GEMV_UNUSED(8, a)
         GEMV_UNUSED(8, f)
 #if !defined(GCC_ONE_VECTORPAIR_BUG) && defined(USE_GEMV_COL_COMPLEX_MMA)
         if (GEMV_IS_COMPLEX_COMPLEX || !GEMV_IS_COMPLEX_FLOAT)
 #endif
 #endif
 #ifndef GCC_ONE_VECTORPAIR_BUG
         {
             while (i < n8)
             {
                 GEMV_PROCESS_COL_COMPLEX(8)
             }
         }
         while (i < n4)
         {
             GEMV_PROCESS_COL_COMPLEX(4)
         }
         if (i < n2)
         {
             GEMV_PROCESS_COL_COMPLEX(2)
         }
         if (i < n1)
 #else
         while (i < n1)
 #endif
         {
             GEMV_PROCESS_COL_COMPLEX_ONE(1)
         }
         for (;i < rows;++i)
         {
             ResScalar d0(0);
             Index j = j2;
             do {
                 d0 += cj.pmul(lhs(i, j), rhs2(j, 0));
             } while (++j < jend);
             res[i] += alpha * d0;
         }
     }
 }

Code

◆ gemv_complex_row()

template<typename Scalar , typename LhsScalar , typename LhsMapper , bool ConjugateLhs, bool LhsIsReal, typename RhsScalar , typename RhsMapper , bool ConjugateRhs, bool RhsIsReal, typename ResScalar >

void gemv_complex_row	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		ResScalar *	res,
		Index	resIncr,
		ResScalar	alpha
	)

inline

Definition at line 2834 of file MatrixVectorProduct.h.

 {
     typedef gemv_traits<LhsScalar, RhsScalar> Traits;
  
     typedef typename Traits::LhsPacket LhsPacket;
     typedef typename Traits::RhsPacket RhsPacket;
     typedef typename Traits::ResPacket ResPacket;
  
     typedef typename packet_traits<Scalar>::type ScalarPacket;
     typedef typename packet_traits<LhsScalar>::type PLhsPacket;
     typedef typename packet_traits<ResScalar>::type PResPacket;
     typedef gemv_traits<ResPacket, ResPacket> PTraits;
  
     // The following copy tells the compiler that lhs's attributes are not modified outside this function
     // This helps GCC to generate proper code.
     LhsMapper lhs(alhs);
     typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
  
     eigen_internal_assert(rhs.stride() == 1);
     conj_helper<LhsScalar, RhsScalar, ConjugateLhs, ConjugateRhs> cj;
 #if !EIGEN_COMP_LLVM
     conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
 #endif
  
     // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
     //       processing 8 rows at once might be counter productive wrt cache.
 #ifndef GCC_ONE_VECTORPAIR_BUG
     const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
     const Index n4 = rows - 3;
     const Index n2 = rows - 1;
 #endif
  
     // TODO: for padded aligned inputs, we could enable aligned reads
     enum {
         LhsAlignment = Unaligned,
         ResPacketSize = PTraits::ResPacketSize,
         LhsPacketSize = PTraits::LhsPacketSize,
         RhsPacketSize = PTraits::RhsPacketSize,
     };
  
     Index i = 0, j;
     PResPacket c00, c01, c02, c03, c04, c05, c06, c07;
     ResPacket c10, c11, c12, c13, c14, c15, c16, c17;
 #ifdef USE_GEMV_MMA
     __vector_quad e00, e01, e02, e03, e04, e05, e06, e07;
     GEMV_UNUSED_ROW(8, e0)
     GEMV_UNUSED_EXTRA(1, c0)
     GEMV_UNUSED_EXTRA(1, c1)
 #endif
     ResScalar dd0;
 #ifndef GCC_ONE_VECTORPAIR_BUG
     ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
 #ifdef USE_GEMV_MMA
     if (!GEMV_IS_COMPLEX_COMPLEX)
 #endif
     {
         GEMV_PROCESS_ROW_COMPLEX(8)
     }
     GEMV_PROCESS_ROW_COMPLEX(4)
     GEMV_PROCESS_ROW_COMPLEX(2)
 #endif
     for (; i < rows; ++i)
     {
         GEMV_PROCESS_ROW_COMPLEX_SINGLE(1)
         GEMV_PROCESS_ROW_COMPLEX_PREDUX(0)
         for (; j < cols; ++j)
         {
             dd0 += cj.pmul(lhs(i, j), rhs2(j));
         }
         res[i * resIncr] += alpha * dd0;
     }
 }

Code

◆ gemv_mult_complex_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>

EIGEN_ALWAYS_INLINE void gemv_mult_complex_complex	(	LhsPacket &	a0,
		RhsScalar *	b,
		PResPacket &	c0,
		ResPacket &	c1
	)

Definition at line 1533 of file MatrixVectorProduct.h.

 {
     ScalarPacket br, bi;
     if (StorageOrder == ColMajor) {
         pload_realimag<RhsScalar>(b, br, bi);
     }
     else {
         pload_realimag_row<RhsScalar>(b, br, bi);
     }
     if (ConjugateLhs && !ConjugateRhs) a0 = pconj2(a0);
     LhsPacket a1 = pcplxflipconj(a0);
     ScalarPacket cr = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, false>(a0.v, br, c0.v);
     ScalarPacket ci = pmadd_complex_complex<LhsPacket, ScalarPacket, ConjugateLhs, ConjugateRhs, true>(a1.v, bi, c1.v);
     c1 = ResPacket(ci);
     c0 = PResPacket(cr);
 }

Code

◆ gemv_mult_complex_real()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>

EIGEN_ALWAYS_INLINE void gemv_mult_complex_real	(	LhsPacket &	a0,
		RhsScalar *	b,
		PResPacket &	c0
	)

Definition at line 1567 of file MatrixVectorProduct.h.

 {
     ScalarPacket a1 = pload_complex<ResPacket>(&a0);
     ScalarPacket b0;
     if (StorageOrder == ColMajor) {
         b0 = pload_real(b);
     }
     else {
         b0 = pload_real_row<ResPacket>(b);
     }
     ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateLhs>(a1, b0, c0.v);
     c0 = PResPacket(cri);
 }

Code

◆ gemv_mult_generic()

template<typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>

EIGEN_ALWAYS_INLINE void gemv_mult_generic	(	LhsPacket &	a0,
		RhsScalar *	b,
		PResPacket &	c0
	)

Definition at line 1518 of file MatrixVectorProduct.h.

 {
     conj_helper<LhsPacket, RhsPacket, ConjugateLhs, ConjugateRhs> pcj;
     RhsPacket b0;
     if (StorageOrder == ColMajor) {
         b0 = pset1<RhsPacket>(*b);
     }
     else {
         b0 = ploadu<RhsPacket>(b);
     }
     c0 = pcj.pmadd(a0, b0, c0);
 }

Code

◆ gemv_mult_real_complex()

template<typename ScalarPacket , typename LhsPacket , typename RhsScalar , typename RhsPacket , typename PResPacket , typename ResPacket , bool ConjugateLhs, bool ConjugateRhs, int StorageOrder>

EIGEN_ALWAYS_INLINE void gemv_mult_real_complex	(	LhsPacket &	a0,
		RhsScalar *	b,
		PResPacket &	c0
	)

Definition at line 1552 of file MatrixVectorProduct.h.

 {
     ScalarPacket b0;
     if (StorageOrder == ColMajor) {
         b0 = pload_complex_full(b);
     }
     else {
         b0 = pload_complex_full_row(b);
     }
     ScalarPacket cri = pmadd_complex_real<PResPacket, ScalarPacket, ConjugateRhs>(a0, b0, c0.v);
     c0 = PResPacket(cri);
 }

Code

◆ gemv_row()

template<typename LhsScalar , typename LhsMapper , typename RhsScalar , typename RhsMapper , typename ResScalar >

void gemv_row	(	Index	rows,
		Index	cols,
		const LhsMapper &	alhs,
		const RhsMapper &	rhs,
		ResScalar *	res,
		Index	resIncr,
		ResScalar	alpha
	)

inline

Definition at line 2476 of file MatrixVectorProduct.h.

 {
     typedef gemv_traits<LhsScalar, RhsScalar> Traits;
  
     typedef typename Traits::LhsPacket LhsPacket;
     typedef typename Traits::RhsPacket RhsPacket;
     typedef typename Traits::ResPacket ResPacket;
  
     // The following copy tells the compiler that lhs's attributes are not modified outside this function
     // This helps GCC to generate proper code.
     LhsMapper lhs(alhs);
     typename RhsMapper::LinearMapper rhs2 = rhs.getLinearMapper(0, 0);
  
     eigen_internal_assert(rhs.stride() == 1);
     conj_helper<LhsScalar, RhsScalar, false, false> cj;
     conj_helper<LhsPacket, RhsPacket, false, false> pcj;
  
     // TODO: fine tune the following heuristic. The rationale is that if the matrix is very large,
     //       processing 8 rows at once might be counter productive wrt cache.
 #ifndef GCC_ONE_VECTORPAIR_BUG
     const Index n8 = lhs.stride() * sizeof(LhsScalar) > 32000 ? (rows - 7) : (rows - 7);
     const Index n4 = rows - 3;
     const Index n2 = rows - 1;
 #endif
  
     // TODO: for padded aligned inputs, we could enable aligned reads
     enum {
         LhsAlignment = Unaligned,
         ResPacketSize = Traits::ResPacketSize,
         LhsPacketSize = Traits::LhsPacketSize,
         RhsPacketSize = Traits::RhsPacketSize,
     };
  
     Index i = 0;
 #ifdef USE_GEMV_MMA
     __vector_quad c0, c1, c2, c3, c4, c5, c6, c7;
     GEMV_UNUSED_ROW(8, c)
 #else
     ResPacket c0, c1, c2, c3, c4, c5, c6, c7;
 #endif
 #ifndef GCC_ONE_VECTORPAIR_BUG
     ScalarBlock<ResScalar, 2> cc0, cc1, cc2, cc3;
     GEMV_PROCESS_ROW(8)
     GEMV_PROCESS_ROW(4)
     GEMV_PROCESS_ROW(2)
 #endif
     for (; i < rows; ++i)
     {
         ResPacket d0 = pset1<ResPacket>(ResScalar(0));
         Index j = 0;
         for (; j + LhsPacketSize <= cols; j += LhsPacketSize)
         {
             RhsPacket b0 = rhs2.template load<RhsPacket, Unaligned>(j);
  
             d0 = pcj.pmadd(lhs.template load<LhsPacket, LhsAlignment>(i + 0, j), b0, d0);
         }
         ResScalar dd0 = predux(d0);
         for (; j < cols; ++j)
         {
             dd0 += cj.pmul(lhs(i, j), rhs2(j));
         }
         res[i * resIncr] += alpha * dd0;
     }
 }

Code

◆ loadColData()

template<typename RhsMapper , bool linear>

EIGEN_ALWAYS_INLINE Packet8bf loadColData	(	RhsMapper &	rhs,
		Index	j
	)

Definition at line 545 of file MatrixVectorProduct.h.

 {
   return loadColData_impl<RhsMapper, linear>::run(rhs, j);
 }

Code

◆ loadLhsPacket()

template<typename Scalar , typename LhsScalar , typename LhsMapper , typename LhsPacket >

EIGEN_ALWAYS_INLINE LhsPacket loadLhsPacket	(	LhsMapper &	lhs,
		Index	i,
		Index	j
	)

Definition at line 1481 of file MatrixVectorProduct.h.

 {
     if (sizeof(Scalar) == sizeof(LhsScalar)) {
         const LhsScalar& src = lhs(i + 0, j);
         return LhsPacket(pload_real_full(const_cast<LhsScalar*>(&src)));
     }
     return lhs.template load<LhsPacket, Unaligned>(i + 0, j);
 }

Code

◆ loadPacketPartialZero()

EIGEN_ALWAYS_INLINE Packet8us loadPacketPartialZero	(	Packet8us	data,
		Index	extra_cols
	)

Definition at line 821 of file MatrixVectorProduct.h.

 {
   Packet16uc shift = pset1<Packet16uc>(8 * 2 * (8 - extra_cols));
 #ifdef _BIG_ENDIAN
   return reinterpret_cast<Packet8us>(vec_slo(vec_sro(reinterpret_cast<Packet16uc>(data), shift), shift));
 #else
   return reinterpret_cast<Packet8us>(vec_sro(vec_slo(reinterpret_cast<Packet16uc>(data), shift), shift));
 #endif
 }

Code

◆ loadVecLoopVSX()

template<Index num_acc, typename LhsMapper , bool zero>

EIGEN_ALWAYS_INLINE void loadVecLoopVSX	(	Index	k,
		LhsMapper &	lhs,
		Packet4f(&)	a0[num_acc][2]
	)

Definition at line 495 of file MatrixVectorProduct.h.

 {
   Packet8bf c0 = lhs.template loadPacket<Packet8bf>(k*4, 0);
   Packet8bf b1;
   if (!zero) {
     b1 = lhs.template loadPacket<Packet8bf>(k*4, 1);
  
     a0[k + 0][1] = oneConvertBF16Hi(b1.m_val);
   }
   a0[k + 0][0] = oneConvertBF16Hi(c0.m_val);
  
   if (num_acc > (k + 1)) {
     a0[k + 1][0] = oneConvertBF16Lo(c0.m_val);
     if (!zero) {
       a0[k + 1][1] = oneConvertBF16Lo(b1.m_val);
     }
   }
 }

Code

◆ multVecVSX()

template<Index num_acc, bool zero>

EIGEN_ALWAYS_INLINE void multVecVSX	(	Packet4f(&)	acc[num_acc][2],
		Packet4f(&)	a0[num_acc][2],
		Packet4f(&)	b0[2]
	)

Definition at line 515 of file MatrixVectorProduct.h.

 {
   for(Index k = 0; k < num_acc; k++) {
     for(Index i = 0; i < (zero ? 1 : 2); i++) {
       acc[k][i] = pmadd(b0[i], a0[k][i], acc[k][i]);
     }
   }
 }

Code

◆ multVSXVecLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool extra>

EIGEN_ALWAYS_INLINE void multVSXVecLoop	(	Packet4f(&)	acc[num_acc][2],
		const LhsMapper &	lhs,
		RhsMapper &	rhs,
		Index	j,
		Index	extra_cols
	)

Definition at line 833 of file MatrixVectorProduct.h.

 {
   Packet4f a0[num_acc][2], b0[2];
   Packet8bf a1, b1;
  
   if (extra) {
     b1 = rhs.template loadPacketPartial<Packet8bf>(j, extra_cols);
 #ifndef _ARCH_PWR9
     b1 = loadPacketPartialZero(b1.m_val, extra_cols);
 #endif
   } else {
     b1 = rhs.template loadPacket<Packet8bf>(j);
   }
   b0[0] = oneConvertBF16Hi(b1.m_val);
   b0[1] = oneConvertBF16Lo(b1.m_val);
  
   const LhsMapper lhs2 = lhs.getSubMapper(0, j);
   for(Index k = 0; k < num_acc; k++) {
     if (extra) {
       a1 = lhs2.template loadPacketPartial<Packet8bf>(k, 0, extra_cols);
 #ifndef _ARCH_PWR9
       a1 = loadPacketPartialZero(a1.m_val, extra_cols);
 #endif
     } else {
       a1 = lhs2.template loadPacket<Packet8bf>(k, 0);
     }
     a0[k][0] = oneConvertBF16Hi(a1.m_val);
     a0[k][1] = oneConvertBF16Lo(a1.m_val);
   }
  
   multVecVSX<num_acc, false>(acc, a0, b0);
 }

Code

◆ outputVecCol()

template<bool extraRows>

EIGEN_ALWAYS_INLINE void outputVecCol	(	Packet4f	acc,
		float *	result,
		Packet4f	pAlpha,
		Index	extra_rows
	)

Definition at line 468 of file MatrixVectorProduct.h.

 {
   Packet4f d0 = ploadu<Packet4f>(result);
   d0 = pmadd(acc, pAlpha, d0);
   if (extraRows) {
     pstoreu_partial(result, d0, extra_rows);
   } else {
     pstoreu(result, d0);
   }
 }

Code

◆ outputVecColResults()

template<Index num_acc, bool extraRows, Index size>

EIGEN_ALWAYS_INLINE void outputVecColResults	(	Packet4f(&)	acc[num_acc][size],
		float *	result,
		Packet4f	pAlpha,
		Index	extra_rows
	)

Definition at line 480 of file MatrixVectorProduct.h.

 {
   constexpr Index real_acc = (num_acc - (extraRows ? 1 : 0));
   for(Index k = 0; k < real_acc; k++) {
     outputVecCol<false>(acc[k][0], result + k*4, pAlpha, extra_rows);
   }
   if (extraRows) {
     outputVecCol<true>(acc[real_acc][0], result + real_acc*4, pAlpha, extra_rows);
   }
 }

Code

◆ outputVecResults()

template<Index num_acc, Index size>

EIGEN_ALWAYS_INLINE void outputVecResults	(	Packet4f(&)	acc[num_acc][size],
		float *	result,
		Packet4f	pAlpha
	)

Definition at line 766 of file MatrixVectorProduct.h.

 {
   constexpr Index extra = num_acc & 3;
  
   for(Index k = 0; k < num_acc; k += 4) {
     Packet4f d0 = ploadu<Packet4f>(result + k);
     d0 = pmadd(acc[k + 0][0], pAlpha, d0);
  
     if (num_acc > (k + 3)) {
       pstoreu(result + k, d0);
     } else {
       if (extra == 3) {
         pstoreu_partial(result + k, d0, extra);
       } else {
         memcpy((void *)(result + k), (void *)(&d0), sizeof(float) * extra);
       }
     }
   }
 }

Code

◆ padd() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd padd	(	Packet1cd &	a,
		std::complex< double > &	b
	)

Definition at line 1343 of file MatrixVectorProduct.h.

 {
     EIGEN_UNUSED_VARIABLE(b);
     return a;  // Just for compilation
 }

Code

◆ padd() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf padd	(	Packet2cf &	a,
		std::complex< float > &	b
	)

Definition at line 1337 of file MatrixVectorProduct.h.

 {
     EIGEN_UNUSED_VARIABLE(b);
     return a;  // Just for compilation
 }

Code

◆ pconj2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconj2 ( const Packet1cd & a )

Definition at line 1016 of file MatrixVectorProduct.h.

                                                          {
     return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR)));
 }

Code

◆ pconj2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconj2 ( const Packet2cf & a )

Definition at line 1012 of file MatrixVectorProduct.h.

                                                          {
     return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR)));
 }

Code

◆ pconjinv() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pconjinv ( const Packet1cd & a )

Definition at line 1029 of file MatrixVectorProduct.h.

                                                            {
     return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_CONJ_XOR2)));
 }

Code

◆ pconjinv() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pconjinv ( const Packet2cf & a )

Definition at line 1021 of file MatrixVectorProduct.h.

                                                            {
 #ifdef __POWER8_VECTOR__
     return Packet2cf(Packet4f(vec_neg(Packet2d(a.v))));
 #else
     return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_CONJ_XOR2)));
 #endif
 }

Code

◆ pcplxconjflip() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxconjflip ( Packet1cd a )

Definition at line 1066 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR2, p16uc_COMPLEX64_XORFLIP)));
 #else
     return pconj2(pcplxflip(a));
 #endif
 }

Code

◆ pcplxconjflip() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxconjflip ( Packet2cf a )

Definition at line 1057 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR2, p16uc_COMPLEX32_XORFLIP)));
 #else
     return pconj2(pcplxflip(a));
 #endif
 }

Code

◆ pcplxflip2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflip2 ( Packet1cd a )

Definition at line 1119 of file MatrixVectorProduct.h.

 {
 #ifdef EIGEN_VECTORIZE_VSX
     return Packet1cd(__builtin_vsx_xxpermdi(a.v, a.v, 2));
 #else
     return Packet1cd(Packet2d(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX64_XORFLIP)));
 #endif
 }

Code

◆ pcplxflip2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflip2 ( Packet2cf a )

Definition at line 1114 of file MatrixVectorProduct.h.

 {
     return Packet2cf(Packet4f(vec_perm(Packet16uc(a.v), Packet16uc(a.v), p16uc_COMPLEX32_XORFLIP)));
 }

Code

◆ pcplxflipconj() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipconj ( Packet1cd a )

Definition at line 1047 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_CONJ_XOR, p16uc_COMPLEX64_XORFLIP)));
 #else
     return pcplxflip(pconj2(a));
 #endif
 }

Code

◆ pcplxflipconj() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipconj ( Packet2cf a )

Definition at line 1038 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_CONJ_XOR, p16uc_COMPLEX32_XORFLIP)));
 #else
     return pcplxflip(pconj2(a));
 #endif
 }

Code

◆ pcplxflipnegate() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pcplxflipnegate ( Packet1cd a )

Definition at line 1104 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet1cd(Packet2d(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX64_NEGATE, p16uc_COMPLEX64_XORFLIP)));
 #else
     return pcplxflip(pnegate2(a));
 #endif
 }

Code

◆ pcplxflipnegate() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pcplxflipnegate ( Packet2cf a )

Definition at line 1095 of file MatrixVectorProduct.h.

 {
 #ifdef PERMXOR_GOOD
     return Packet2cf(Packet4f(vec_permxor(Packet16uc(a.v), p16uc_COMPLEX32_NEGATE, p16uc_COMPLEX32_XORFLIP)));
 #else
     return pcplxflip(pnegate2(a));
 #endif
 }

Code

◆ pload_complex() [1/4]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet2d pload_complex ( Packet1cd * src )

Definition at line 1249 of file MatrixVectorProduct.h.

 {
     return src->v;
 }

Code

◆ pload_complex() [2/4]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet4f pload_complex ( Packet2cf * src )

Definition at line 1243 of file MatrixVectorProduct.h.

 {
     return src->v;
 }

Code

◆ pload_complex() [3/4]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet2d pload_complex ( std::complex< double > * src )

Definition at line 1236 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet2d>(reinterpret_cast<double*>(src));
 }

Code

◆ pload_complex() [4/4]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet4f pload_complex ( std::complex< float > * src )

Definition at line 1224 of file MatrixVectorProduct.h.

 {
     if (GEMV_IS_SCALAR) {
         return pload_complex_half(src);
     }
     else
     {
         return ploadu<Packet4f>(reinterpret_cast<float*>(src));
     }
 }

Code

◆ pload_complex_full() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full ( std::complex< double > * src )

Definition at line 1260 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet1cd>(src).v;
 }

Code

◆ pload_complex_full() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full ( std::complex< float > * src )

Definition at line 1255 of file MatrixVectorProduct.h.

 {
     return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
 }

Code

◆ pload_complex_full_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_complex_full_row ( std::complex< double > * src )

Definition at line 1271 of file MatrixVectorProduct.h.

 {
     return pload_complex_full(src);
 }

Code

◆ pload_complex_full_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_complex_full_row ( std::complex< float > * src )

Definition at line 1266 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet2cf>(src).v;
 }

Code

◆ pload_complex_half()

EIGEN_ALWAYS_INLINE Packet4f pload_complex_half ( std::complex< float > * src )

Definition at line 1129 of file MatrixVectorProduct.h.

 {
     Packet4f t;
 #ifdef EIGEN_VECTORIZE_VSX
     // Load float64/two float32 (doubleword alignment)
     __asm__("lxsdx %x0,%y1" : "=wa" (t) : "Z" (*src));
 #else
     *reinterpret_cast<std::complex<float>*>(reinterpret_cast<float*>(&t) + COMPLEX_DELTA) = *src;
 #endif
     return t;
 }

Code

◆ pload_real() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( double * src )

Definition at line 1282 of file MatrixVectorProduct.h.

 {
     return pset1<Packet2d>(*src);
 }

Code

◆ pload_real() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( float * src )

Definition at line 1277 of file MatrixVectorProduct.h.

 {
     return pset1<Packet4f>(*src);
 }

Code

◆ pload_real() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real ( Packet2d & src )

Definition at line 1292 of file MatrixVectorProduct.h.

 {
     return src;
 }

Code

◆ pload_real() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real ( Packet4f & src )

Definition at line 1287 of file MatrixVectorProduct.h.

 {
     return src;
 }

Code

◆ pload_real_full() [1/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( double * src )

Definition at line 1304 of file MatrixVectorProduct.h.

 {
     return pload_real(src);
 }

Code

◆ pload_real_full() [2/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( float * src )

Definition at line 1298 of file MatrixVectorProduct.h.

 {
     Packet4f ret = ploadu<Packet4f>(src);
     return vec_mergeh(ret, ret);
 }

Code

◆ pload_real_full() [3/4]

EIGEN_ALWAYS_INLINE Packet2d pload_real_full ( std::complex< double > * src )

Definition at line 1314 of file MatrixVectorProduct.h.

 {
     return pload_complex_full(src);   // Just for compilation
 }

Code

◆ pload_real_full() [4/4]

EIGEN_ALWAYS_INLINE Packet4f pload_real_full ( std::complex< float > * src )

Definition at line 1309 of file MatrixVectorProduct.h.

 {
     return pload_complex_full(src);   // Just for compilation
 }

Code

◆ pload_real_row() [1/2]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet2d pload_real_row ( double * src )

Definition at line 1332 of file MatrixVectorProduct.h.

 {
     return pload_real(src);
 }

Code

◆ pload_real_row() [2/2]

template<typename ResPacket >

EIGEN_ALWAYS_INLINE Packet4f pload_real_row ( float * src )

Definition at line 1321 of file MatrixVectorProduct.h.

 {
     if (GEMV_IS_SCALAR) {
         return pload_real_full(src);
     }
     else {
         return ploadu<Packet4f>(src);
     }
 }

Code

◆ pload_realimag() [1/2]

template<typename RhsScalar >

EIGEN_ALWAYS_INLINE void pload_realimag	(	RhsScalar *	src,
		Packet2d &	r,
		Packet2d &	i
	)

Definition at line 1156 of file MatrixVectorProduct.h.

 {
 #ifdef EIGEN_VECTORIZE_VSX
     __asm__("lxvdsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
     __asm__("lxvdsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<double*>(src) + 1)));
 #else
     Packet2d t = ploadu<Packet2d>(reinterpret_cast<double*>(src));
     r = vec_splat(t, 0);
     i = vec_splat(t, 1);
 #endif
 }

Code

◆ pload_realimag() [2/2]

template<typename RhsScalar >

EIGEN_ALWAYS_INLINE void pload_realimag	(	RhsScalar *	src,
		Packet4f &	r,
		Packet4f &	i
	)

Definition at line 1143 of file MatrixVectorProduct.h.

 {
 #ifdef _ARCH_PWR9
     __asm__("lxvwsx %x0,%y1" : "=wa" (r) : "Z" (*(reinterpret_cast<float*>(src) + 0)));
     __asm__("lxvwsx %x0,%y1" : "=wa" (i) : "Z" (*(reinterpret_cast<float*>(src) + 1)));
 #else
     Packet4f t = pload_complex_half(src);
     r = vec_splat(t, COMPLEX_DELTA + 0);
     i = vec_splat(t, COMPLEX_DELTA + 1);
 #endif
 }

Code

◆ pload_realimag_combine() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine ( std::complex< double > * src )

Definition at line 1206 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet1cd>(src).v;
 }

Code

◆ pload_realimag_combine() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine ( std::complex< float > * src )

Definition at line 1195 of file MatrixVectorProduct.h.

 {
 #ifdef EIGEN_VECTORIZE_VSX
     Packet4f ret;
     __asm__("lxvdsx %x0,%y1" : "=wa" (ret) : "Z" (*(reinterpret_cast<double*>(src) + 0)));
     return ret;
 #else
     return Packet4f(ploaddup<Packet2d>(reinterpret_cast<double *>(src)));
 #endif
 }

Code

◆ pload_realimag_combine_row() [1/2]

EIGEN_ALWAYS_INLINE Packet2d pload_realimag_combine_row ( std::complex< double > * src )

Definition at line 1217 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet1cd>(src).v;
 }

Code

◆ pload_realimag_combine_row() [2/2]

EIGEN_ALWAYS_INLINE Packet4f pload_realimag_combine_row ( std::complex< float > * src )

Definition at line 1212 of file MatrixVectorProduct.h.

 {
     return ploadu<Packet2cf>(src).v;
 }

Code

◆ pload_realimag_row() [1/2]

template<typename RhsScalar >

EIGEN_ALWAYS_INLINE void pload_realimag_row	(	RhsScalar *	src,
		Packet2d &	r,
		Packet2d &	i
	)

Definition at line 1189 of file MatrixVectorProduct.h.

 {
     return pload_realimag(src, r, i);
 }

Code

◆ pload_realimag_row() [2/2]

template<typename RhsScalar >

EIGEN_ALWAYS_INLINE void pload_realimag_row	(	RhsScalar *	src,
		Packet4f &	r,
		Packet4f &	i
	)

Definition at line 1176 of file MatrixVectorProduct.h.

 {
     Packet4f t = ploadu<Packet4f>(reinterpret_cast<float*>(src));
 #ifdef __POWER8_VECTOR__
     r = vec_mergee(t, t);
     i = vec_mergeo(t, t);
 #else
     r = vec_perm(t, t, p16uc_MERGEE);
     i = vec_perm(t, t, p16uc_MERGEO);
 #endif
 }

Code

◆ pmadd_complex()

template<typename ScalarPacket , typename AlphaData >

EIGEN_ALWAYS_INLINE ScalarPacket pmadd_complex	(	ScalarPacket &	c0,
		ScalarPacket &	c2,
		ScalarPacket &	c4,
		AlphaData &	b0
	)

Definition at line 1425 of file MatrixVectorProduct.h.

 {
     return pmadd(c2, b0.separate.i.v, pmadd(c0, b0.separate.r.v, c4));
 }

Code

◆ pmadd_complex_complex()

template<typename ComplexPacket , typename RealPacket , bool ConjugateLhs, bool ConjugateRhs, bool Negate>

EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_complex	(	RealPacket &	a,
		RealPacket &	b,
		RealPacket &	c
	)

Definition at line 1492 of file MatrixVectorProduct.h.

 {
     if (ConjugateLhs && ConjugateRhs) {
         return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
     }
     else if (Negate && !ConjugateLhs && ConjugateRhs) {
         return vec_nmsub(a, b, c);
     }
     else {
         return vec_madd(a, b, c);
     }
 }

Code

◆ pmadd_complex_real()

template<typename ComplexPacket , typename RealPacket , bool Conjugate>

EIGEN_ALWAYS_INLINE RealPacket pmadd_complex_real	(	RealPacket &	a,
		RealPacket &	b,
		RealPacket &	c
	)

Definition at line 1507 of file MatrixVectorProduct.h.

 {
     if (Conjugate) {
         return vec_madd(a, pconj2(ComplexPacket(b)).v, c);
     }
     else {
         return vec_madd(a, b, c);
     }
 }

Code

◆ pnegate2() [1/2]

EIGEN_ALWAYS_INLINE Packet1cd pnegate2 ( Packet1cd a )

Definition at line 1085 of file MatrixVectorProduct.h.

 {
 #ifdef __POWER8_VECTOR__
     return Packet1cd(vec_neg(a.v));
 #else
     return Packet1cd(pxor(a.v, reinterpret_cast<Packet2d>(p16uc_COMPLEX64_NEGATE)));
 #endif
 }

Code

◆ pnegate2() [2/2]

EIGEN_ALWAYS_INLINE Packet2cf pnegate2 ( Packet2cf a )

Definition at line 1076 of file MatrixVectorProduct.h.

 {
 #ifdef __POWER8_VECTOR__
     return Packet2cf(vec_neg(a.v));
 #else
     return Packet2cf(pxor(a.v, reinterpret_cast<Packet4f>(p16uc_COMPLEX32_NEGATE)));
 #endif
 }

Code

◆ predux_complex() [1/2]

template<typename ResScalar , typename PResPacket , typename ResPacket , typename LhsPacket , typename RhsPacket >

EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex	(	PResPacket &	a0,
		PResPacket &	b0,
		ResPacket &	a1,
		ResPacket &	b1
	)

Definition at line 2623 of file MatrixVectorProduct.h.

 {
     if (GEMV_IS_COMPLEX_COMPLEX) {
         a0 = padd(a0, a1);
         b0 = padd(b0, b1);
     }
     return predux_complex<ResScalar, PResPacket>(a0, b0);
 }

Code

◆ predux_complex() [2/2]

template<typename ResScalar , typename ResPacket >

EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_complex	(	ResPacket &	a,
		ResPacket &	b
	)

Definition at line 2374 of file MatrixVectorProduct.h.

 {
     return predux_real<ResScalar, ResPacket>(a, b);
 }

Code

◆ predux_real()

template<typename ResScalar , typename ResPacket >

EIGEN_ALWAYS_INLINE ScalarBlock<ResScalar, 2> predux_real	(	ResPacket &	a,
		ResPacket &	b
	)

Definition at line 2365 of file MatrixVectorProduct.h.

 {
     ScalarBlock<ResScalar, 2> cc0;
     cc0.scalar[0] = predux(a);
     cc0.scalar[1] = predux(b);
     return cc0;
 }

Code

◆ preduxVecResults2VSX()

template<Index num_acc>

EIGEN_ALWAYS_INLINE void preduxVecResults2VSX	(	Packet4f(&)	acc[num_acc][2],
		Index	k
	)

Definition at line 787 of file MatrixVectorProduct.h.

 {
   if (num_acc > (k + 1)) {
     acc[k][1] = vec_mergel(acc[k + 0][0], acc[k + 1][0]);
     acc[k][0] = vec_mergeh(acc[k + 0][0], acc[k + 1][0]);
     acc[k][0] = acc[k][0] + acc[k][1];
     acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
   } else {
     acc[k][0] += vec_sld(acc[k][0], acc[k][0], 8);
 #ifdef _BIG_ENDIAN
     acc[k][0] += vec_sld(acc[k][0], acc[k][0], 12);
 #else
     acc[k][0] += vec_sld(acc[k][0], acc[k][0], 4);
 #endif
   }
 }

Code

◆ preduxVecResultsVSX()

template<Index num_acc>

EIGEN_ALWAYS_INLINE void preduxVecResultsVSX ( Packet4f(&) acc[num_acc][2] )

Definition at line 805 of file MatrixVectorProduct.h.

 {
   for(Index k = 0; k < num_acc; k += 4) {
     preduxVecResults2VSX<num_acc>(acc, k + 0);
     if (num_acc > (k + 2)) {
       preduxVecResults2VSX<num_acc>(acc, k + 2);
 #ifdef EIGEN_VECTORIZE_VSX
       acc[k + 0][0] = reinterpret_cast<Packet4f>(vec_mergeh(reinterpret_cast<Packet2ul>(acc[k + 0][0]), reinterpret_cast<Packet2ul>(acc[k + 2][0])));
 #else
       acc[k + 0][0] = reinterpret_cast<Packet4f>(vec_perm(acc[k + 0][0],acc[k + 2][0],p16uc_TRANSPOSE64_HI));
 #endif
     }
   }
 }

Code

◆ pset1_complex() [1/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>

EIGEN_ALWAYS_INLINE Packet1cd pset1_complex ( std::complex< double > & alpha )

Definition at line 1369 of file MatrixVectorProduct.h.

 {
     Packet1cd ret;
     ret.v[0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
     ret.v[1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
     return ret;
 }

Code

◆ pset1_complex() [2/2]

template<typename Scalar , typename ResScalar , typename ResPacket , int which>

EIGEN_ALWAYS_INLINE Packet2cf pset1_complex ( std::complex< float > & alpha )

Definition at line 1358 of file MatrixVectorProduct.h.

 {
     Packet2cf ret;
     ret.v[COMPLEX_DELTA + 0] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x01), (which & 0x04));
     ret.v[COMPLEX_DELTA + 1] = pset1_realimag<Scalar, ResScalar>(alpha, (which & 0x02), (which & 0x08));
     ret.v[2 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 0];
     ret.v[3 - COMPLEX_DELTA] = ret.v[COMPLEX_DELTA + 1];
     return ret;
 }

Code

◆ pset1_realimag()

template<typename Scalar , typename ResScalar >

EIGEN_ALWAYS_INLINE Scalar pset1_realimag	(	ResScalar &	alpha,
		int	which,
		int	conj
	)

Definition at line 1351 of file MatrixVectorProduct.h.

 {
     return (which) ? ((conj) ? -alpha.real() : alpha.real()) : ((conj) ? -alpha.imag() : alpha.imag());
 }

Code

◆ pset_init()

template<typename Packet , typename LhsPacket , typename RhsPacket >

EIGEN_ALWAYS_INLINE Packet pset_init ( Packet & c1 )

Definition at line 1398 of file MatrixVectorProduct.h.

 {
     if (GEMV_IS_COMPLEX_COMPLEX) {
         EIGEN_UNUSED_VARIABLE(c1);
         return pset_zero<Packet>();
     }
     else
     {
         return c1;  // Intentionally left uninitialized
     }
 }

Code

◆ pset_zero()

template<typename Packet >

EIGEN_ALWAYS_INLINE Packet pset_zero ( )

Definition at line 1379 of file MatrixVectorProduct.h.

 {
     return pset1<Packet>(__UNPACK_TYPE__(Packet)(0));
 }

Code

◆ pset_zero< Packet1cd >()

template<>

EIGEN_ALWAYS_INLINE Packet1cd pset_zero< Packet1cd > ( )

Definition at line 1391 of file MatrixVectorProduct.h.

 {
     return Packet1cd(pset1<Packet2d>(double(0)));
 }

Code

◆ pset_zero< Packet2cf >()

template<>

EIGEN_ALWAYS_INLINE Packet2cf pset_zero< Packet2cf > ( )

Definition at line 1385 of file MatrixVectorProduct.h.

 {
     return Packet2cf(pset1<Packet4f>(float(0)));
 }

Code

◆ pstoreu_pmadd_complex() [1/2]

template<typename Scalar , typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData >

EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex	(	PResPacket &	c0,
		AlphaData &	b0,
		ResScalar *	res
	)

Definition at line 1432 of file MatrixVectorProduct.h.

 {
     PResPacket c2 = pcplxflipconj(c0);
     if (GEMV_IS_SCALAR) {
         ScalarPacket c4 = ploadu<ScalarPacket>(reinterpret_cast<Scalar*>(res));
         ScalarPacket c3 = pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0);
         pstoreu(reinterpret_cast<Scalar*>(res), c3);
     } else {
         ScalarPacket c4 = pload_complex<ResPacket>(res);
         PResPacket c3 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
         pstoreu(res, c3);
     }
 }

Code

◆ pstoreu_pmadd_complex() [2/2]

template<typename ScalarPacket , typename PResPacket , typename ResPacket , typename ResScalar , typename AlphaData , Index ResPacketSize, Index iter2>

EIGEN_ALWAYS_INLINE void pstoreu_pmadd_complex	(	PResPacket &	c0,
		PResPacket &	c1,
		AlphaData &	b0,
		ResScalar *	res
	)

Definition at line 1447 of file MatrixVectorProduct.h.

 {
     PResPacket c2 = pcplxflipconj(c0);
     PResPacket c3 = pcplxflipconj(c1);
 #if !defined(_ARCH_PWR10)
     ScalarPacket c4 = pload_complex<ResPacket>(res + (iter2 * ResPacketSize));
     ScalarPacket c5 = pload_complex<ResPacket>(res + ((iter2 + 1) * ResPacketSize));
     PResPacket c6 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c4, b0));
     PResPacket c7 = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c5, b0));
     pstoreu(res + (iter2 * ResPacketSize), c6);
     pstoreu(res + ((iter2 + 1) * ResPacketSize), c7);
 #else
     __vector_pair a = *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize));
 #if EIGEN_COMP_LLVM
     PResPacket c6[2];
     __builtin_vsx_disassemble_pair(reinterpret_cast<void*>(c6), &a);
     c6[0] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c0.v, c2.v, c6[0].v, b0));
     c6[1] = PResPacket(pmadd_complex<ScalarPacket, AlphaData>(c1.v, c3.v, c6[1].v, b0));
     GEMV_BUILDPAIR_MMA(a, c6[0].v, c6[1].v);
 #else
     if (GEMV_IS_COMPLEX_FLOAT) {
         __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
         __asm__ ("xvmaddasp %L0,%x1,%x2\n\txvmaddasp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
     } else {
         __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.r.v), "wa" (c0.v), "wa" (c1.v));
         __asm__ ("xvmaddadp %L0,%x1,%x2\n\txvmaddadp %0,%x1,%x3" : "+&d" (a) : "wa" (b0.separate.i.v), "wa" (c2.v), "wa" (c3.v));
     }
 #endif
     *reinterpret_cast<__vector_pair *>(res + (iter2 * ResPacketSize)) = a;
 #endif
 }

Code

◆ storeBF16fromResult()

template<const Index size, bool inc, Index delta>

EIGEN_ALWAYS_INLINE void storeBF16fromResult	(	bfloat16 *	dst,
		Packet8bf	data,
		Index	resInc,
		Index	extra
	)

Definition at line 665 of file MatrixVectorProduct.h.

 {
   if (inc) {
     if (size < 8) {
       pscatter_partial(dst + delta*resInc, data, resInc, extra);
     } else {
       pscatter(dst + delta*resInc, data, resInc);
     }
   } else {
     if (size < 8) {
       pstoreu_partial(dst + delta, data, extra);
     } else {
       pstoreu(dst + delta, data);
     }
   }
 }

Code

◆ storeMaddData() [1/2]

template<typename ResPacket , typename ResScalar >

EIGEN_ALWAYS_INLINE void storeMaddData	(	ResScalar *	res,
		ResPacket &	palpha,
		ResPacket &	data
	)

Definition at line 69 of file MatrixVectorProduct.h.

 {
     pstoreu(res, pmadd(data, palpha, ploadu<ResPacket>(res)));
 }

Code

◆ storeMaddData() [2/2]

template<typename ResScalar >

EIGEN_ALWAYS_INLINE void storeMaddData	(	ResScalar *	res,
		ResScalar &	alpha,
		ResScalar &	data
	)

Definition at line 75 of file MatrixVectorProduct.h.

 {
     *res += (alpha * data);
 }

Code

◆ vecColLoopVSX()

template<Index num_acc, typename LhsMapper , typename RhsMapper , bool zero, bool linear>

EIGEN_ALWAYS_INLINE void vecColLoopVSX	(	Index	j,
		LhsMapper &	lhs,
		RhsMapper &	rhs,
		Packet4f(&)	acc[num_acc][2]
	)

Definition at line 551 of file MatrixVectorProduct.h.

 {
   Packet4f a0[num_acc][2], b0[2];
   Packet8bf b2 = loadColData<RhsMapper, linear>(rhs, j);
  
   b0[0] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V1);
   if (!zero) {
     b0[1] = oneConvertBF16Perm(b2.m_val, p16uc_MERGE16_32_V2);
   }
  
   LhsMapper lhs2 = lhs.getSubMapper(0, j);
   for(Index k = 0; k < num_acc; k += 2) {
     loadVecLoopVSX<num_acc, LhsMapper, zero>(k, lhs2, a0);
   }
  
   multVecVSX<num_acc, zero>(acc, a0, b0);
 }

Code

◆ vecVSXLoop()

template<Index num_acc, typename LhsMapper , typename RhsMapper >

EIGEN_ALWAYS_INLINE void vecVSXLoop	(	Index	cols,
		const LhsMapper &	lhs,
		RhsMapper &	rhs,
		Packet4f(&)	acc[num_acc][2],
		Index	extra_cols
	)

Definition at line 867 of file MatrixVectorProduct.h.

 {
   Index j = 0;
   for(; j + 8 <= cols; j += 8){
     multVSXVecLoop<num_acc, LhsMapper, RhsMapper, false>(acc, lhs, rhs, j, extra_cols);
   }
  
   if (extra_cols) {
     multVSXVecLoop<num_acc, LhsMapper, RhsMapper, true>(acc, lhs, rhs, j, extra_cols);
   }
 }

Code

Variable Documentation

◆ p16uc_COMPLEX32_CONJ_XOR

const Packet16uc p16uc_COMPLEX32_CONJ_XOR

Definition at line 997 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_CONJ_XOR2

const Packet16uc p16uc_COMPLEX32_CONJ_XOR2

Definition at line 999 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_NEGATE

const Packet16uc p16uc_COMPLEX32_NEGATE

Definition at line 1001 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX32_XORFLIP

const Packet16uc p16uc_COMPLEX32_XORFLIP

Definition at line 986 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_CONJ_XOR

const Packet16uc p16uc_COMPLEX64_CONJ_XOR

Definition at line 998 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_CONJ_XOR2

const Packet16uc p16uc_COMPLEX64_CONJ_XOR2

Definition at line 1000 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_NEGATE

const Packet16uc p16uc_COMPLEX64_NEGATE

Definition at line 1002 of file MatrixVectorProduct.h.

◆ p16uc_COMPLEX64_XORFLIP

const Packet16uc p16uc_COMPLEX64_XORFLIP

Definition at line 987 of file MatrixVectorProduct.h.

◆ p16uc_MERGE16_32_V1

Packet16uc p16uc_MERGE16_32_V1

static

Definition at line 491 of file MatrixVectorProduct.h.

◆ p16uc_MERGE16_32_V2

Packet16uc p16uc_MERGE16_32_V2

static

Definition at line 492 of file MatrixVectorProduct.h.

◆ p16uc_MERGEE

const Packet16uc p16uc_MERGEE

Definition at line 1169 of file MatrixVectorProduct.h.

◆ p16uc_MERGEO

const Packet16uc p16uc_MERGEO

Definition at line 1171 of file MatrixVectorProduct.h.

Classes

Macros

Functions

Variables

Macro Definition Documentation

◆ COMPLEX_DELTA

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_COL

◆ EIGEN_POWER_GEMV_COMPLEX_SPECIALIZE_ROW

◆ EIGEN_POWER_GEMV_PREFETCH

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_COL_BFLOAT16

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW

◆ EIGEN_POWER_GEMV_REAL_SPECIALIZE_ROW_BFLOAT16

◆ gemv_bf16_col

◆ gemv_bf16_row

◆ GEMV_BUILDPAIR_MMA

◆ GEMV_GETN

◆ GEMV_GETN_COMPLEX

◆ GEMV_INIT

◆ GEMV_INIT_COMPLEX

◆ GEMV_INIT_COMPLEX_OLD

◆ GEMV_INIT_ROW

◆ GEMV_IS_COMPLEX_COMPLEX

◆ GEMV_IS_COMPLEX_FLOAT

◆ GEMV_IS_FLOAT

◆ GEMV_IS_SCALAR

◆ GEMV_LOADPACKET_COL

◆ GEMV_LOADPACKET_COL_COMPLEX

◆ GEMV_LOADPACKET_COL_COMPLEX_DATA

◆ GEMV_LOADPACKET_ROW

◆ GEMV_LOADPACKET_ROW_COMPLEX

◆ GEMV_LOADPACKET_ROW_COMPLEX_DATA

◆ GEMV_LOADPACKET_ROW_COMPLEX_OLD

◆ GEMV_MULT

◆ GEMV_MULT_COMPLEX

◆ GEMV_MULT_COMPLEX_COMPLEX

◆ GEMV_MULT_COMPLEX_REAL

◆ GEMV_MULT_REAL_COMPLEX

◆ GEMV_PREDUX2

◆ GEMV_PREDUX4_COMPLEX

◆ GEMV_PREDUX4_COMPLEX_OLD

◆ GEMV_PREFETCH

◆ GEMV_PROCESS_COL

◆ GEMV_PROCESS_COL_COMPLEX

◆ GEMV_PROCESS_COL_COMPLEX_ONE

◆ GEMV_PROCESS_COL_ONE

◆ GEMV_PROCESS_END_ROW_COMPLEX

◆ GEMV_PROCESS_ROW

◆ GEMV_PROCESS_ROW_COMPLEX

◆ GEMV_PROCESS_ROW_COMPLEX_IS_NEW

◆ GEMV_PROCESS_ROW_COMPLEX_ONE

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_NEW

◆ GEMV_PROCESS_ROW_COMPLEX_ONE_OLD

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_NEW

◆ GEMV_PROCESS_ROW_COMPLEX_PREDUX_OLD

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_NEW

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_OLD

◆ GEMV_PROCESS_ROW_COMPLEX_SINGLE_WORK

◆ GEMV_STORE_COL

◆ GEMV_STORE_COL_COMPLEX

◆ GEMV_STORE_ROW

◆ GEMV_STORE_ROW_COMPLEX

◆ GEMV_UNROLL

◆ GEMV_UNROLL_HALF

◆ GEMV_UNROLL_ROW

◆ GEMV_UNROLL_ROW_HALF

◆ GEMV_WORK_COL

◆ GEMV_WORK_COL_COMPLEX

◆ GEMV_WORK_ROW

◆ GEMV_WORK_ROW_COMPLEX

◆ GEMV_WORK_ROW_COMPLEX_OLD

◆ MAX_BFLOAT16_VEC_ACC_VSX

Function Documentation

◆ addResultsVSX()

◆ calcVSXVecColLoops()

◆ calcVSXVecLoops()

◆ colVSXVecColLoopBody()

◆ colVSXVecColLoopBodyExtra()