10 #ifndef EIGEN_PACKET_MATH_FP16_AVX512_H
11 #define EIGEN_PACKET_MATH_FP16_AVX512_H
13 #include "../../InternalHeaderCheck.h"
20 typedef eigen_packet_wrapper<__m256i, 1>
Packet16h;
21 typedef eigen_packet_wrapper<__m128i, 2>
Packet8h;
25 enum { value =
true };
29 struct packet_traits<
half> : default_packet_traits {
78 masked_load_available =
false,
79 masked_store_available =
false
91 masked_load_available =
false,
92 masked_store_available =
false
104 masked_load_available =
false,
105 masked_store_available =
false
115 return _mm512_set1_ph(
static_cast<_Float16
>(from));
121 return _mm512_castsi512_ph(_mm512_set1_epi16(from));
128 #ifdef EIGEN_VECTORIZE_AVX512DQ
130 static_cast<unsigned short>(_mm256_extract_epi16(_mm512_extracti32x8_epi32(_mm512_castph_si512(from), 0), 0)));
133 _mm512_storeu_ph(dest, from);
169 __m512h
a = _mm512_castph256_ph512(_mm256_loadu_ph(from));
170 return _mm512_permutexvar_ph(_mm512_set_epi16(15, 15, 14, 14, 13, 13, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6,
171 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0, 0),
178 __m512h
a = _mm512_castph128_ph512(_mm_loadu_ph(from));
179 return _mm512_permutexvar_ph(
180 _mm512_set_epi16(7, 7, 7, 7, 6, 6, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0),
188 return _mm512_abs_ph(
a);
195 return _mm512_castsi512_ph(_mm512_srai_epi16(_mm512_castph_si512(
a), 15));
202 return _mm512_min_ph(
a,
b);
209 return _mm512_max_ph(
a,
b);
215 return _mm512_add_ph(_mm512_set1_ph(
a),
216 _mm512_set_ph(31.0f, 30.0f, 29.0f, 28.0f, 27.0f, 26.0f, 25.0f, 24.0f, 23.0f, 22.0f, 21.0f, 20.0f,
217 19.0f, 18.0f, 17.0f, 16.0f, 15.0f, 14.0f, 13.0f, 12.0f, 11.0f, 10.0f, 9.0f, 8.0f,
218 7.0f, 6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f, 0.0f));
225 return _mm512_castsi512_ph(_mm512_or_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
232 return _mm512_castsi512_ph(_mm512_xor_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
239 return _mm512_castsi512_ph(_mm512_and_si512(_mm512_castph_si512(
a), _mm512_castph_si512(
b)));
246 return _mm512_castsi512_ph(_mm512_andnot_si512(_mm512_castph_si512(
b), _mm512_castph_si512(
a)));
253 __mmask32 mask32 = _mm512_cmp_epi16_mask(_mm512_castph_si512(mask), _mm512_setzero_epi32(), _MM_CMPINT_EQ);
254 return _mm512_mask_blend_ph(mask32,
a,
b);
261 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_EQ_OQ);
262 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
269 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_LE_OQ);
270 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
277 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_LT_OQ);
278 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi32(0), mask, 0xffffu));
285 __mmask32 mask = _mm512_cmp_ph_mask(
a,
b, _CMP_NGE_UQ);
286 return _mm512_castsi512_ph(_mm512_mask_set1_epi16(_mm512_set1_epi16(0), mask, 0xffffu));
293 return _mm512_add_ph(
a,
b);
298 return _mm256_castph_si256(_mm256_add_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
303 return _mm_castph_si128(_mm_add_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
310 return _mm512_sub_ph(
a,
b);
315 return _mm256_castph_si256(_mm256_sub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
320 return _mm_castph_si128(_mm_sub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
327 return _mm512_mul_ph(
a,
b);
332 return _mm256_castph_si256(_mm256_mul_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
337 return _mm_castph_si128(_mm_mul_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
344 return _mm512_div_ph(
a,
b);
349 return _mm256_castph_si256(_mm256_div_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b)));
354 return _mm_castph_si128(_mm_div_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b)));
368 return _mm512_roundscale_ph(
padd(
por(
pand(
a, signMask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
375 return _mm512_roundscale_ph(
a, _MM_FROUND_CUR_DIRECTION);
382 return _mm512_roundscale_ph(
a, _MM_FROUND_TO_POS_INF);
389 return _mm512_roundscale_ph(
a, _MM_FROUND_TO_NEG_INF);
395 return (
half)_mm512_reduce_add_ph(
a);
400 return (
half)_mm256_reduce_add_ph(_mm256_castsi256_ph(
a));
405 return (half)_mm_reduce_add_ph(_mm_castsi128_ph(
a));
411 #ifdef EIGEN_VECTORIZE_AVX512DQ
412 __m256i lowHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(
a), 0));
413 __m256i highHalf = _mm256_castps_si256(_mm512_extractf32x8_ps(_mm512_castph_ps(
a), 1));
418 _mm512_storeu_ph(
data,
a);
420 __m256i lowHalf = _mm256_castph_si256(_mm256_loadu_ph(
data));
421 __m256i highHalf = _mm256_castph_si256(_mm256_loadu_ph(
data + 16));
433 #ifdef EIGEN_VECTORIZE_FMA
439 return _mm512_fmadd_ph(
a,
b,
c);
444 return _mm256_castph_si256(_mm256_fmadd_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
449 return _mm_castph_si128(_mm_fmadd_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
456 return _mm512_fmsub_ph(
a,
b,
c);
461 return _mm256_castph_si256(_mm256_fmsub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
466 return _mm_castph_si128(_mm_fmsub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
473 return _mm512_fnmadd_ph(
a,
b,
c);
478 return _mm256_castph_si256(_mm256_fnmadd_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
483 return _mm_castph_si128(_mm_fnmadd_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
490 return _mm512_fnmsub_ph(
a,
b,
c);
495 return _mm256_castph_si256(_mm256_fnmsub_ph(_mm256_castsi256_ph(
a), _mm256_castsi256_ph(
b), _mm256_castsi256_ph(
c)));
500 return _mm_castph_si128(_mm_fnmsub_ph(_mm_castsi128_ph(
a), _mm_castsi128_ph(
b), _mm_castsi128_ph(
c)));
509 return _mm512_sub_ph(_mm512_set1_ph(0.0),
a);
523 return _mm512_sqrt_ph(
a);
530 return _mm512_rsqrt_ph(
a);
537 return _mm512_rcp_ph(
a);
546 for (
int i = 0;
i < 16;
i++) {
547 t[2 *
i] = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[2 *
i]), _mm512_castph_si512(
a.packet[2 *
i + 1]));
549 _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[2 *
i]), _mm512_castph_si512(
a.packet[2 *
i + 1]));
555 for (
int i = 0;
i < 8;
i++) {
556 p[4 *
i] = _mm512_unpacklo_epi32(t[4 *
i], t[4 *
i + 2]);
557 p[4 *
i + 1] = _mm512_unpackhi_epi32(t[4 *
i], t[4 *
i + 2]);
558 p[4 *
i + 2] = _mm512_unpacklo_epi32(t[4 *
i + 1], t[4 *
i + 3]);
559 p[4 *
i + 3] = _mm512_unpackhi_epi32(t[4 *
i + 1], t[4 *
i + 3]);
565 for (
int i = 0;
i < 4;
i++) {
566 q[8 *
i] = _mm512_unpacklo_epi64(
p[8 *
i],
p[8 *
i + 4]);
567 q[8 *
i + 1] = _mm512_unpackhi_epi64(
p[8 *
i],
p[8 *
i + 4]);
568 q[8 *
i + 2] = _mm512_unpacklo_epi64(
p[8 *
i + 1],
p[8 *
i + 5]);
569 q[8 *
i + 3] = _mm512_unpackhi_epi64(
p[8 *
i + 1],
p[8 *
i + 5]);
570 q[8 *
i + 4] = _mm512_unpacklo_epi64(
p[8 *
i + 2],
p[8 *
i + 6]);
571 q[8 *
i + 5] = _mm512_unpackhi_epi64(
p[8 *
i + 2],
p[8 *
i + 6]);
572 q[8 *
i + 6] = _mm512_unpacklo_epi64(
p[8 *
i + 3],
p[8 *
i + 7]);
573 q[8 *
i + 7] = _mm512_unpackhi_epi64(
p[8 *
i + 3],
p[8 *
i + 7]);
578 #define PACKET32H_TRANSPOSE_HELPER(X, Y) \
580 f[Y * 8] = _mm512_inserti32x4(f[Y * 8], _mm512_extracti32x4_epi32(q[X * 8], Y), X); \
581 f[Y * 8 + 1] = _mm512_inserti32x4(f[Y * 8 + 1], _mm512_extracti32x4_epi32(q[X * 8 + 1], Y), X); \
582 f[Y * 8 + 2] = _mm512_inserti32x4(f[Y * 8 + 2], _mm512_extracti32x4_epi32(q[X * 8 + 2], Y), X); \
583 f[Y * 8 + 3] = _mm512_inserti32x4(f[Y * 8 + 3], _mm512_extracti32x4_epi32(q[X * 8 + 3], Y), X); \
584 f[Y * 8 + 4] = _mm512_inserti32x4(f[Y * 8 + 4], _mm512_extracti32x4_epi32(q[X * 8 + 4], Y), X); \
585 f[Y * 8 + 5] = _mm512_inserti32x4(f[Y * 8 + 5], _mm512_extracti32x4_epi32(q[X * 8 + 5], Y), X); \
586 f[Y * 8 + 6] = _mm512_inserti32x4(f[Y * 8 + 6], _mm512_extracti32x4_epi32(q[X * 8 + 6], Y), X); \
587 f[Y * 8 + 7] = _mm512_inserti32x4(f[Y * 8 + 7], _mm512_extracti32x4_epi32(q[X * 8 + 7], Y), X); \
609 #undef PACKET32H_TRANSPOSE_HELPER
612 for (
int i = 0;
i < 32;
i++) {
613 a.packet[
i] = _mm512_castsi512_ph(f[
i]);
618 __m512i
p0,
p1, p2, p3, t0, t1, t2, t3, a0, a1, a2, a3;
619 t0 = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[0]), _mm512_castph_si512(
a.packet[1]));
620 t1 = _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[0]), _mm512_castph_si512(
a.packet[1]));
621 t2 = _mm512_unpacklo_epi16(_mm512_castph_si512(
a.packet[2]), _mm512_castph_si512(
a.packet[3]));
622 t3 = _mm512_unpackhi_epi16(_mm512_castph_si512(
a.packet[2]), _mm512_castph_si512(
a.packet[3]));
624 p0 = _mm512_unpacklo_epi32(t0, t2);
625 p1 = _mm512_unpackhi_epi32(t0, t2);
626 p2 = _mm512_unpacklo_epi32(t1, t3);
627 p3 = _mm512_unpackhi_epi32(t1, t3);
634 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(
p1, 0), 1);
635 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(
p0, 1), 0);
637 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p2, 0), 2);
638 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(
p0, 2), 0);
640 a0 = _mm512_inserti32x4(a0, _mm512_extracti32x4_epi32(p3, 0), 3);
641 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(
p0, 3), 0);
643 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p2, 1), 2);
644 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(
p1, 2), 1);
646 a2 = _mm512_inserti32x4(a2, _mm512_extracti32x4_epi32(p3, 2), 3);
647 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(p2, 3), 2);
649 a1 = _mm512_inserti32x4(a1, _mm512_extracti32x4_epi32(p3, 1), 3);
650 a3 = _mm512_inserti32x4(a3, _mm512_extracti32x4_epi32(
p1, 3), 1);
652 a.packet[0] = _mm512_castsi512_ph(a0);
653 a.packet[1] = _mm512_castsi512_ph(a1);
654 a.packet[2] = _mm512_castsi512_ph(a2);
655 a.packet[3] = _mm512_castsi512_ph(a3);
662 return _mm512_permutexvar_ph(_mm512_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
663 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31),
675 for (
int i = 0;
i < 32;
i++) {
676 to[stride *
i] = aux[
i];
684 return _mm512_castsi512_ph(_mm512_set_epi16(
685 from[31 * stride].
x, from[30 * stride].
x, from[29 * stride].
x, from[28 * stride].
x, from[27 * stride].
x,
686 from[26 * stride].
x, from[25 * stride].
x, from[24 * stride].
x, from[23 * stride].
x, from[22 * stride].
x,
687 from[21 * stride].
x, from[20 * stride].
x, from[19 * stride].
x, from[18 * stride].
x, from[17 * stride].
x,
688 from[16 * stride].
x, from[15 * stride].
x, from[14 * stride].
x, from[13 * stride].
x, from[12 * stride].
x,
689 from[11 * stride].
x, from[10 * stride].
x, from[9 * stride].
x, from[8 * stride].
x, from[7 * stride].
x,
690 from[6 * stride].
x, from[5 * stride].
x, from[4 * stride].
x, from[3 * stride].
x, from[2 * stride].
x,
691 from[1 * stride].
x, from[0 * stride].
x));
716 __m512d result = _mm512_undefined_pd();
717 result = _mm512_insertf64x4(result, _mm256_castsi256_pd(
a), 0);
718 result = _mm512_insertf64x4(result, _mm256_castsi256_pd(
b), 1);
719 return _mm512_castpd_ph(result);
723 a = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(
x), 0));
724 b = _mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castph_pd(
x), 1));
838 Packet16h exp1 = _mm256_undefined_si256();
839 Packet16h exp2 = _mm256_undefined_si256();
#define EIGEN_DEBUG_ALIGNED_STORE
#define EIGEN_DEBUG_ALIGNED_LOAD
#define EIGEN_DEBUG_UNALIGNED_STORE
#define EIGEN_DEBUG_UNALIGNED_LOAD
#define EIGEN_UNROLL_LOOP
#define EIGEN_DEVICE_FUNC
#define PACKET32H_TRANSPOSE_HELPER(X, Y)
EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
void pscatter< half, Packet32h >(half *to, const Packet32h &from, Index stride)
Packet16h pfrexp< Packet16h >(const Packet16h &, Packet16h &)
Packet16h pexpm1< Packet16h >(const Packet16h &)
Packet pnmsub(const Packet &a, const Packet &b, const Packet &c)
Packet32h ploadu< Packet32h >(const Eigen::half *from)
Packet32h plog1p< Packet32h >(const Packet32h &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Packet padd(const Packet &a, const Packet &b)
Packet8h pdiv< Packet8h >(const Packet8h &a, const Packet8h &b)
void pstore(Scalar *to, const Packet &from)
Packet32h pceil< Packet32h >(const Packet32h &a)
Packet32h pround< Packet32h >(const Packet32h &a)
Packet32h plset< Packet32h >(const half &a)
void pstoreu< half >(Eigen::half *to, const Packet16h &from)
Packet32h pmul< Packet32h >(const Packet32h &a, const Packet32h &b)
Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Packet16h pldexp< Packet16h >(const Packet16h &, const Packet16h &)
half predux< Packet32h >(const Packet32h &a)
Packet32h prsqrt< Packet32h >(const Packet32h &a)
Packet8h padd< Packet8h >(const Packet8h &a, const Packet8h &b)
Packet16h plog1p< Packet16h >(const Packet16h &)
Packet32h pexpm1< Packet32h >(const Packet32h &a)
Packet32h print< Packet32h >(const Packet32h &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Packet32h ptanh< Packet32h >(const Packet32h &a)
Packet16h plog2< Packet16h >(const Packet16h &)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
void extract2Packet16h(const Packet32h &x, Packet16h &a, Packet16h &b)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Packet32h pset1< Packet32h >(const Eigen::half &from)
Packet16h plog< Packet16h >(const Packet16h &)
Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Packet32h pdiv< Packet32h >(const Packet32h &a, const Packet32h &b)
Packet32h pload< Packet32h >(const Eigen::half *from)
Packet pmsub(const Packet &a, const Packet &b, const Packet &c)
Packet16h ptanh< Packet16h >(const Packet16h &)
Packet32h pnegate< Packet32h >(const Packet32h &a)
Packet32h ploaddup< Packet32h >(const Eigen::half *from)
Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Eigen::half predux< Packet8h >(const Packet8h &a)
Packet8h psub< Packet8h >(const Packet8h &a, const Packet8h &b)
Packet32h psin< Packet32h >(const Packet32h &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Packet32h pexp< Packet32h >(const Packet32h &a)
Packet pnmadd(const Packet &a, const Packet &b, const Packet &c)
Packet32h pmin< Packet32h >(const Packet32h &a, const Packet32h &b)
void pstore< half >(Eigen::half *to, const Packet16h &from)
Packet32h plog2< Packet32h >(const Packet32h &a)
Packet16h padd< Packet16h >(const Packet16h &a, const Packet16h &b)
Packet8h pand(const Packet8h &a, const Packet8h &b)
Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Packet16h pmul< Packet16h >(const Packet16h &a, const Packet16h &b)
Packet16h predux_half_dowto4< Packet32h >(const Packet32h &a)
half predux< Packet16h >(const Packet16h &from)
Packet16h pcos< Packet16h >(const Packet16h &)
Packet8h pxor(const Packet8h &a, const Packet8h &b)
Packet32h psqrt< Packet32h >(const Packet32h &a)
Packet32h pmax< Packet32h >(const Packet32h &a, const Packet32h &b)
Packet16h psub< Packet16h >(const Packet16h &a, const Packet16h &b)
Packet16h psin< Packet16h >(const Packet16h &)
Packet32h padd< Packet32h >(const Packet32h &a, const Packet32h &b)
Packet32h pldexp< Packet32h >(const Packet32h &a, const Packet32h &exponent)
Packet2cf preverse(const Packet2cf &a)
Packet8h pmul< Packet8h >(const Packet8h &a, const Packet8h &b)
Packet8h por(const Packet8h &a, const Packet8h &b)
Packet32h plog< Packet32h >(const Packet32h &a)
Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Packet32h pcos< Packet32h >(const Packet32h &a)
Packet32h pconj< Packet32h >(const Packet32h &a)
Packet32h psignbit< Packet32h >(const Packet32h &a)
Eigen::half pfirst< Packet32h >(const Packet32h &from)
Packet32h pset1frombits< Packet32h >(unsigned short from)
Packet16h pexp< Packet16h >(const Packet16h &)
eigen_packet_wrapper< __m256i, 1 > Packet16h
Packet32h preciprocal< Packet32h >(const Packet32h &a)
Packet32h pfloor< Packet32h >(const Packet32h &a)
Packet32h psub< Packet32h >(const Packet32h &a, const Packet32h &b)
Packet16h pdiv< Packet16h >(const Packet16h &a, const Packet16h &b)
Packet32h ploadquad< Packet32h >(const Eigen::half *from)
eigen_packet_wrapper< __m128i, 2 > Packet8h
Packet32h combine2Packet16h(const Packet16h &a, const Packet16h &b)
Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Packet32h pabs< Packet32h >(const Packet32h &a)
Packet32h pfrexp< Packet32h >(const Packet32h &a, Packet32h &exponent)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.