1 #include "../../InternalHeaderCheck.h"
6 #if EIGEN_ARCH_ARM && EIGEN_COMP_CLANG
13 : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
15 EIGEN_STRONG_INLINE
void acc(
const AccPacket&
c,
const ResPacket& alpha, ResPacket& r)
const
20 "vmla.f32 %q[r], %q[c], %q[alpha]"
27 template <
typename LaneIdType>
30 const LaneIdType&)
const {
34 template <
typename LaneIdType>
35 EIGEN_STRONG_INLINE
void madd(
const Packet4f&
a,
const QuadPacket<Packet4f>&
b,
37 const LaneIdType& lane)
const {
38 madd(
a,
b.get(lane),
c, tmp, lane);
46 #ifndef EIGEN_NEON_GEBP_NR
47 #define EIGEN_NEON_GEBP_NR 8
52 : gebp_traits<float,float,false,false,Architecture::Generic,GEBPPacketFull>
54 typedef float RhsPacket;
55 typedef float32x4_t RhsPacketx4;
56 enum { nr = EIGEN_NEON_GEBP_NR };
57 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacket& dest)
const {
61 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacketx4& dest)
const
66 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*
b, RhsPacket& dest)
const
71 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const
74 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar*
b, RhsPacket& dest)
const
79 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacket&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
81 c = vfmaq_n_f32(
c,
a,
b);
86 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
87 { madd_helper<0>(
a,
b,
c); }
88 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<1>&)
const
89 { madd_helper<1>(
a,
b,
c); }
90 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<2>&)
const
91 { madd_helper<2>(
a,
b,
c); }
92 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<3>&)
const
93 { madd_helper<3>(
a,
b,
c); }
97 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c)
const
99 #if EIGEN_GNUC_STRICT_LESS_THAN(9,0,0)
103 if(LaneID==0)
asm(
"fmla %0.4s, %1.4s, %2.s[0]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b) : );
104 else if(LaneID==1)
asm(
"fmla %0.4s, %1.4s, %2.s[1]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b) : );
105 else if(LaneID==2)
asm(
"fmla %0.4s, %1.4s, %2.s[2]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b) : );
106 else if(LaneID==3)
asm(
"fmla %0.4s, %1.4s, %2.s[3]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b) : );
108 c = vfmaq_laneq_f32(
c,
a,
b, LaneID);
115 struct gebp_traits <double,double,false,false,Architecture::
NEON>
116 : gebp_traits<double,double,false,false,Architecture::Generic>
118 typedef double RhsPacket;
119 enum { nr = EIGEN_NEON_GEBP_NR };
121 float64x2_t B_0, B_1;
124 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacket& dest)
const
129 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacketx4& dest)
const
131 dest.B_0 = vld1q_f64(
b);
132 dest.B_1 = vld1q_f64(
b+2);
135 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*
b, RhsPacket& dest)
const
140 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const
143 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar*
b, RhsPacket& dest)
const
148 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacket&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
150 c = vfmaq_n_f64(
c,
a,
b);
156 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
157 { madd_helper<0>(
a,
b,
c); }
158 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<1>&)
const
159 { madd_helper<1>(
a,
b,
c); }
160 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<2>&)
const
161 { madd_helper<2>(
a,
b,
c); }
162 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<3>&)
const
163 { madd_helper<3>(
a,
b,
c); }
166 template <
int LaneID>
167 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c)
const
169 #if EIGEN_GNUC_STRICT_LESS_THAN(9,0,0)
173 if(LaneID==0)
asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b.B_0) : );
174 else if(LaneID==1)
asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b.B_0) : );
175 else if(LaneID==2)
asm(
"fmla %0.2d, %1.2d, %2.d[0]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b.B_1) : );
176 else if(LaneID==3)
asm(
"fmla %0.2d, %1.2d, %2.d[1]\n" :
"+w" (
c) :
"w" (
a),
"w" (
b.B_1) : );
178 if(LaneID==0)
c = vfmaq_laneq_f64(
c,
a,
b.B_0, 0);
179 else if(LaneID==1)
c = vfmaq_laneq_f64(
c,
a,
b.B_0, 1);
180 else if(LaneID==2)
c = vfmaq_laneq_f64(
c,
a,
b.B_1, 0);
181 else if(LaneID==3)
c = vfmaq_laneq_f64(
c,
a,
b.B_1, 1);
190 #if EIGEN_HAS_ARM64_FP16_VECTOR_ARITHMETIC && EIGEN_COMP_CLANG
193 struct gebp_traits <half,half,false,false,Architecture::
NEON>
194 : gebp_traits<half,half,false,false,Architecture::Generic>
196 typedef half RhsPacket;
197 typedef float16x4_t RhsPacketx4;
198 typedef float16x4_t PacketHalf;
199 enum { nr = EIGEN_NEON_GEBP_NR };
201 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacket& dest)
const
206 EIGEN_STRONG_INLINE
void loadRhs(
const RhsScalar*
b, RhsPacketx4& dest)
const
208 dest = vld1_f16((
const __fp16 *)
b);
211 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*
b, RhsPacket& dest)
const
216 EIGEN_STRONG_INLINE
void updateRhs(
const RhsScalar*, RhsPacketx4&)
const
219 EIGEN_STRONG_INLINE
void loadRhsQuad(
const RhsScalar*, RhsPacket&)
const
223 eigen_assert(
false &&
"Cannot loadRhsQuad for a scalar RHS.");
226 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacket&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
228 c = vfmaq_n_f16(
c,
a,
b);
230 EIGEN_STRONG_INLINE
void madd(
const PacketHalf&
a,
const RhsPacket&
b, PacketHalf&
c, RhsPacket& ,
const FixedInt<0>&)
const
232 c = vfma_n_f16(
c,
a,
b);
237 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<0>&)
const
238 { madd_helper<0>(
a,
b,
c); }
239 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<1>&)
const
240 { madd_helper<1>(
a,
b,
c); }
241 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<2>&)
const
242 { madd_helper<2>(
a,
b,
c); }
243 EIGEN_STRONG_INLINE
void madd(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c, RhsPacket& ,
const FixedInt<3>&)
const
244 { madd_helper<3>(
a,
b,
c); }
247 EIGEN_STRONG_INLINE
void madd_helper(
const LhsPacket&
a,
const RhsPacketx4&
b, AccPacket&
c)
const
249 c = vfmaq_lane_f16(
c,
a,
b, LaneID);