11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12 #define EIGEN_CONFIGURE_VECTORIZATION_H
34 #if (defined EIGEN_CUDACC)
35 #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
36 #define EIGEN_ALIGNOF(x) __alignof(x)
38 #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
39 #define EIGEN_ALIGNOF(x) alignof(x)
43 #if defined(EIGEN_DONT_VECTORIZE)
44 #if defined(EIGEN_GPUCC)
47 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
49 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
51 #elif defined(__AVX512F__)
53 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
54 #elif defined(__AVX__)
56 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
58 #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
63 #define EIGEN_MIN_ALIGN_BYTES 16
69 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
70 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
75 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
76 #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
77 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
79 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
82 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
92 #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
93 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
95 #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
99 #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
100 && !EIGEN_COMP_SUNCC \
102 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
104 #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
107 #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
108 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
110 #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
116 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
117 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
118 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
121 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
122 #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
132 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
133 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
134 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
135 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
136 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
137 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
139 #define EIGEN_ALIGN_MAX
145 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
146 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
149 #ifdef EIGEN_DONT_ALIGN
150 #ifdef EIGEN_MAX_ALIGN_BYTES
151 #undef EIGEN_MAX_ALIGN_BYTES
153 #define EIGEN_MAX_ALIGN_BYTES 0
154 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
155 #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
158 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
159 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
161 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
165 #ifndef EIGEN_UNALIGNED_VECTORIZE
166 #define EIGEN_UNALIGNED_VECTORIZE 1
173 #if EIGEN_MAX_ALIGN_BYTES==0
174 #ifndef EIGEN_DONT_VECTORIZE
175 #define EIGEN_DONT_VECTORIZE
185 #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
186 #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
189 #if defined(__SSE2__)
190 #define EIGEN_SSE2_ON_NON_MSVC
194 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
196 #if defined (EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
201 #define EIGEN_VECTORIZE
202 #define EIGEN_VECTORIZE_SSE
203 #define EIGEN_VECTORIZE_SSE2
210 #define EIGEN_VECTORIZE_SSE3
213 #define EIGEN_VECTORIZE_SSSE3
216 #define EIGEN_VECTORIZE_SSE4_1
219 #define EIGEN_VECTORIZE_SSE4_2
222 #ifndef EIGEN_USE_SYCL
223 #define EIGEN_VECTORIZE_AVX
225 #define EIGEN_VECTORIZE_SSE3
226 #define EIGEN_VECTORIZE_SSSE3
227 #define EIGEN_VECTORIZE_SSE4_1
228 #define EIGEN_VECTORIZE_SSE4_2
231 #ifndef EIGEN_USE_SYCL
232 #define EIGEN_VECTORIZE_AVX2
233 #define EIGEN_VECTORIZE_AVX
235 #define EIGEN_VECTORIZE_SSE3
236 #define EIGEN_VECTORIZE_SSSE3
237 #define EIGEN_VECTORIZE_SSE4_1
238 #define EIGEN_VECTORIZE_SSE4_2
240 #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
243 #define EIGEN_VECTORIZE_FMA
245 #if defined(__AVX512F__)
246 #ifndef EIGEN_VECTORIZE_FMA
248 #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
250 #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
253 #ifndef EIGEN_USE_SYCL
254 #define EIGEN_VECTORIZE_AVX512
255 #define EIGEN_VECTORIZE_AVX2
256 #define EIGEN_VECTORIZE_AVX
258 #define EIGEN_VECTORIZE_FMA
259 #define EIGEN_VECTORIZE_SSE3
260 #define EIGEN_VECTORIZE_SSSE3
261 #define EIGEN_VECTORIZE_SSE4_1
262 #define EIGEN_VECTORIZE_SSE4_2
263 #ifndef EIGEN_USE_SYCL
265 #define EIGEN_VECTORIZE_AVX512DQ
268 #define EIGEN_VECTORIZE_AVX512ER
270 #ifdef __AVX512BF16__
271 #define EIGEN_VECTORIZE_AVX512BF16
273 #ifdef __AVX512FP16__
275 #define EIGEN_VECTORIZE_AVX512FP16
278 #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
280 #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
288 #if ( EIGEN_COMP_CLANGAPPLE == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
291 #ifdef EIGEN_VECTORIZE_AVX
292 #undef EIGEN_VECTORIZE_AVX
293 #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
294 #ifdef EIGEN_VECTORIZE_AVX2
295 #undef EIGEN_VECTORIZE_AVX2
297 #ifdef EIGEN_VECTORIZE_FMA
298 #undef EIGEN_VECTORIZE_FMA
300 #ifdef EIGEN_VECTORIZE_AVX512
301 #undef EIGEN_VECTORIZE_AVX512
303 #ifdef EIGEN_VECTORIZE_AVX512DQ
304 #undef EIGEN_VECTORIZE_AVX512DQ
306 #ifdef EIGEN_VECTORIZE_AVX512ER
307 #undef EIGEN_VECTORIZE_AVX512ER
331 #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
332 #include <immintrin.h>
334 #include <mmintrin.h>
335 #include <emmintrin.h>
336 #include <xmmintrin.h>
337 #ifdef EIGEN_VECTORIZE_SSE3
338 #include <pmmintrin.h>
340 #ifdef EIGEN_VECTORIZE_SSSE3
341 #include <tmmintrin.h>
343 #ifdef EIGEN_VECTORIZE_SSE4_1
344 #include <smmintrin.h>
346 #ifdef EIGEN_VECTORIZE_SSE4_2
347 #include <nmmintrin.h>
349 #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
350 #include <immintrin.h>
355 #elif defined(__VSX__) && !defined(__APPLE__)
357 #define EIGEN_VECTORIZE
358 #define EIGEN_VECTORIZE_VSX 1
366 #elif defined __ALTIVEC__
368 #define EIGEN_VECTORIZE
369 #define EIGEN_VECTORIZE_ALTIVEC
377 #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
379 #define EIGEN_VECTORIZE
380 #define EIGEN_VECTORIZE_NEON
381 #include <arm_neon.h>
385 #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
387 #define EIGEN_VECTORIZE
388 #define EIGEN_VECTORIZE_SVE
393 #if defined __ARM_FEATURE_SVE_BITS
394 #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
396 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
399 #elif (defined __s390x__ && defined __VEC__)
401 #define EIGEN_VECTORIZE
402 #define EIGEN_VECTORIZE_ZVECTOR
403 #include <vecintrin.h>
405 #elif defined __mips_msa
409 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
410 #if defined(__LP64__)
411 #define EIGEN_MIPS_64
413 #define EIGEN_MIPS_32
415 #define EIGEN_VECTORIZE
416 #define EIGEN_VECTORIZE_MSA
426 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
427 #include <arm_fp16.h>
430 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3,8,0))
432 #define EIGEN_HAS_FP16_C
440 #include <immintrin.h>
444 #if defined EIGEN_CUDACC
445 #define EIGEN_VECTORIZE_GPU
446 #include <vector_types.h>
447 #if EIGEN_CUDA_SDK_VER >= 70500
448 #define EIGEN_HAS_CUDA_FP16
452 #if defined(EIGEN_HAS_CUDA_FP16)
453 #include <cuda_runtime_api.h>
454 #include <cuda_fp16.h>
457 #if defined(EIGEN_HIPCC)
458 #define EIGEN_VECTORIZE_GPU
459 #include <hip/hip_vector_types.h>
460 #define EIGEN_HAS_HIP_FP16
461 #include <hip/hip_fp16.h>
462 #define EIGEN_HAS_HIP_BF16
463 #include <hip/hip_bfloat16.h>
468 #include "../InternalHeaderCheck.h"
473 #if defined(EIGEN_VECTORIZE_AVX512)
474 return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
475 #elif defined(EIGEN_VECTORIZE_AVX)
476 return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
477 #elif defined(EIGEN_VECTORIZE_SSE4_2)
478 return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
479 #elif defined(EIGEN_VECTORIZE_SSE4_1)
480 return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
481 #elif defined(EIGEN_VECTORIZE_SSSE3)
482 return "SSE, SSE2, SSE3, SSSE3";
483 #elif defined(EIGEN_VECTORIZE_SSE3)
484 return "SSE, SSE2, SSE3";
485 #elif defined(EIGEN_VECTORIZE_SSE2)
487 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
489 #elif defined(EIGEN_VECTORIZE_VSX)
491 #elif defined(EIGEN_VECTORIZE_NEON)
493 #elif defined(EIGEN_VECTORIZE_SVE)
495 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
496 return "S390X ZVECTOR";
497 #elif defined(EIGEN_VECTORIZE_MSA)
static const char * SimdInstructionSetsInUse(void)