35 #ifndef EIGEN_INVERSE_SIZE_4_H
36 #define EIGEN_INVERSE_SIZE_4_H
38 #include "../InternalHeaderCheck.h"
40 #if EIGEN_COMP_GNUC_STRICT
43 #pragma GCC push_options
44 #pragma GCC optimize ("no-fast-math")
51 template <
typename MatrixType,
typename ResultType>
52 struct compute_inverse_size4<Architecture::
Target, float,
MatrixType, ResultType>
56 MatrixAlignment = traits<MatrixType>::Alignment,
57 ResultAlignment = traits<ResultType>::Alignment,
64 ActualMatrixType matrix(
mat);
66 const float*
data = matrix.data();
67 const Index stride = matrix.innerStride();
69 Packet4f L2 = ploadt<Packet4f,MatrixAlignment>(
data + stride*4);
70 Packet4f L3 = ploadt<Packet4f,MatrixAlignment>(
data + stride*8);
71 Packet4f L4 = ploadt<Packet4f,MatrixAlignment>(
data + stride*12);
78 if (!StorageOrdersMatch)
97 AB =
psub(AB,
pmul(
vec4f_swizzle2(
A,
A, 1, 1, 2, 2),
vec4f_swizzle2(
B,
B, 2, 3, 0, 1)));
101 DC =
psub(DC,
pmul(
vec4f_swizzle2(
D,
D, 1, 1, 2, 2),
vec4f_swizzle2(C, C, 2, 3, 0, 1)));
147 iB =
psub(iB,
pmul(
vec4f_swizzle2(
D,
D, 1, 0, 3, 2),
vec4f_swizzle2(AB, AB, 2, 1, 2, 1)));
152 iC =
psub(iC,
pmul(
vec4f_swizzle2(
A,
A, 1, 0, 3, 2),
vec4f_swizzle2(DC, DC, 2, 1, 2, 1)));
157 rd =
pxor(rd, p4f_sign_PNNP);
163 Index res_stride = result.outerStride();
164 float *
res = result.data();
166 pstoret<float, Packet4f, ResultAlignment>(
res + 0,
vec4f_swizzle2(iA, iB, 3, 1, 3, 1));
167 pstoret<float, Packet4f, ResultAlignment>(
res + res_stride,
vec4f_swizzle2(iA, iB, 2, 0, 2, 0));
168 pstoret<float, Packet4f, ResultAlignment>(
res + 2 * res_stride,
vec4f_swizzle2(iC, iD, 3, 1, 3, 1));
169 pstoret<float, Packet4f, ResultAlignment>(
res + 3 * res_stride,
vec4f_swizzle2(iC, iD, 2, 0, 2, 0));
173 #if !(defined EIGEN_VECTORIZE_NEON && !(EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG))
176 template <
typename MatrixType,
typename ResultType>
177 struct compute_inverse_size4<Architecture::
Target, double,
MatrixType, ResultType>
181 MatrixAlignment = traits<MatrixType>::Alignment,
182 ResultAlignment = traits<ResultType>::Alignment,
192 ActualMatrixType matrix(
mat);
201 Packet2d A1, A2, B1, B2, C1, C2, D1, D2;
203 const double*
data = matrix.data();
204 const Index stride = matrix.innerStride();
205 if (StorageOrdersMatch)
207 A1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*0);
208 B1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*2);
209 A2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*4);
210 B2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*6);
211 C1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*8);
212 D1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*10);
213 C2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*12);
214 D2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*14);
219 A1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*0);
220 C1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*2);
221 A2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*4);
222 C2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*6);
231 B1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*8);
232 D1 = ploadt<Packet2d,MatrixAlignment>(
data + stride*10);
233 B2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*12);
234 D2 = ploadt<Packet2d,MatrixAlignment>(
data + stride*14);
300 Packet2d iA1, iA2, iB1, iB2, iC1, iC2, iD1, iD2;
342 d1 =
pxor(rd, sign_PN);
343 d2 =
pxor(rd, sign_NP);
345 Index res_stride = result.outerStride();
346 double *
res = result.data();
350 pstoret<double, Packet2d, ResultAlignment>(
res + res_stride + 2,
pmul(
vec2d_swizzle2(iB2, iB1, 0), d2));
351 pstoret<double, Packet2d, ResultAlignment>(
res + 2 * res_stride,
pmul(
vec2d_swizzle2(iC2, iC1, 3), d1));
352 pstoret<double, Packet2d, ResultAlignment>(
res + 3 * res_stride,
pmul(
vec2d_swizzle2(iC2, iC1, 0), d2));
353 pstoret<double, Packet2d, ResultAlignment>(
res + 2 * res_stride + 2,
pmul(
vec2d_swizzle2(iD2, iD1, 3), d1));
354 pstoret<double, Packet2d, ResultAlignment>(
res + 3 * res_stride + 2,
pmul(
vec2d_swizzle2(iD2, iD1, 0), d2));
361 #if EIGEN_COMP_GNUC_STRICT
362 #pragma GCC pop_options
#define vec4f_duplane(a, p)
cout<< "Here is the matrix m:"<< endl<< m<< endl;Matrix< ptrdiff_t, 3, 1 > res
#define vec2d_duplane(a, p)
#define vec2d_swizzle2(a, b, mask)
Matrix< float, 1, Dynamic > MatrixType
Base::PlainObject PlainObject
const unsigned int LinearAccessBit
const unsigned int RowMajorBit
Packet padd(const Packet &a, const Packet &b)
Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Packet pmul(const Packet &a, const Packet &b)
Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Packet psub(const Packet &a, const Packet &b)
Packet4f vec4f_swizzle2(const Packet4f &a, const Packet4f &b, int p, int q, int r, int s)
Packet2d pset1< Packet2d >(const double &from)
Packet8h pxor(const Packet8h &a, const Packet8h &b)
Packet pdiv(const Packet &a, const Packet &b)
Packet preciprocal(const Packet &a)
Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Packet2d pload< Packet2d >(const double *from)
Packet4f pload< Packet4f >(const float *from)
Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.