ConfigureVectorization.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2018 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2020, Arm Limited and Contributors
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_CONFIGURE_VECTORIZATION_H
12 #define EIGEN_CONFIGURE_VECTORIZATION_H
13 
14 //------------------------------------------------------------------------------------------
15 // Static and dynamic alignment control
16 //
17 // The main purpose of this section is to define EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES
18 // as the maximal boundary in bytes on which dynamically and statically allocated data may be alignment respectively.
19 // The values of EIGEN_MAX_ALIGN_BYTES and EIGEN_MAX_STATIC_ALIGN_BYTES can be specified by the user. If not,
20 // a default value is automatically computed based on architecture, compiler, and OS.
21 //
22 // This section also defines macros EIGEN_ALIGN_TO_BOUNDARY(N) and the shortcuts EIGEN_ALIGN{8,16,32,_MAX}
23 // to be used to declare statically aligned buffers.
24 //------------------------------------------------------------------------------------------
25 
26 
27 /* EIGEN_ALIGN_TO_BOUNDARY(n) forces data to be n-byte aligned. This is used to satisfy SIMD requirements.
28  * However, we do that EVEN if vectorization (EIGEN_VECTORIZE) is disabled,
29  * so that vectorization doesn't affect binary compatibility.
30  *
31  * If we made alignment depend on whether or not EIGEN_VECTORIZE is defined, it would be impossible to link
32  * vectorized and non-vectorized code.
33  */
34 #if (defined EIGEN_CUDACC)
35  #define EIGEN_ALIGN_TO_BOUNDARY(n) __align__(n)
36  #define EIGEN_ALIGNOF(x) __alignof(x)
37 #else
38  #define EIGEN_ALIGN_TO_BOUNDARY(n) alignas(n)
39  #define EIGEN_ALIGNOF(x) alignof(x)
40 #endif
41 
42 // If the user explicitly disable vectorization, then we also disable alignment
43 #if defined(EIGEN_DONT_VECTORIZE)
44  #if defined(EIGEN_GPUCC)
45  // GPU code is always vectorized and requires memory alignment for
46  // statically allocated buffers.
47  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
48  #else
49  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 0
50  #endif
51 #elif defined(__AVX512F__)
52  // 64 bytes static alignment is preferred only if really required
53  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 64
54 #elif defined(__AVX__)
55  // 32 bytes static alignment is preferred only if really required
56  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 32
57 #else
58  #define EIGEN_IDEAL_MAX_ALIGN_BYTES 16
59 #endif
60 
61 
62 // EIGEN_MIN_ALIGN_BYTES defines the minimal value for which the notion of explicit alignment makes sense
63 #define EIGEN_MIN_ALIGN_BYTES 16
64 
65 // Defined the boundary (in bytes) on which the data needs to be aligned. Note
66 // that unless EIGEN_ALIGN is defined and not equal to 0, the data may not be
67 // aligned at all regardless of the value of this #define.
68 
69 #if (defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)) && defined(EIGEN_MAX_STATIC_ALIGN_BYTES) && EIGEN_MAX_STATIC_ALIGN_BYTES>0
70 #error EIGEN_MAX_STATIC_ALIGN_BYTES and EIGEN_DONT_ALIGN[_STATICALLY] are both defined with EIGEN_MAX_STATIC_ALIGN_BYTES!=0. Use EIGEN_MAX_STATIC_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN_STATICALLY.
71 #endif
72 
73 // EIGEN_DONT_ALIGN_STATICALLY and EIGEN_DONT_ALIGN are deprecated
74 // They imply EIGEN_MAX_STATIC_ALIGN_BYTES=0
75 #if defined(EIGEN_DONT_ALIGN_STATICALLY) || defined(EIGEN_DONT_ALIGN)
76  #ifdef EIGEN_MAX_STATIC_ALIGN_BYTES
77  #undef EIGEN_MAX_STATIC_ALIGN_BYTES
78  #endif
79  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
80 #endif
81 
82 #ifndef EIGEN_MAX_STATIC_ALIGN_BYTES
83 
84  // Try to automatically guess what is the best default value for EIGEN_MAX_STATIC_ALIGN_BYTES
85 
86  // 16 byte alignment is only useful for vectorization. Since it affects the ABI, we need to enable
87  // 16 byte alignment on all platforms where vectorization might be enabled. In theory we could always
88  // enable alignment, but it can be a cause of problems on some platforms, so we just disable it in
89  // certain common platform (compiler+architecture combinations) to avoid these problems.
90  // Only static alignment is really problematic (relies on nonstandard compiler extensions),
91  // try to keep heap alignment even when we have to disable static alignment.
92  #if EIGEN_COMP_GNUC && !(EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM_OR_ARM64 || EIGEN_ARCH_PPC || EIGEN_ARCH_IA64 || EIGEN_ARCH_MIPS)
93  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 1
94  #else
95  #define EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT 0
96  #endif
97 
98  // static alignment is completely disabled with GCC 3, Sun Studio, and QCC/QNX
99  #if !EIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT \
100  && !EIGEN_COMP_SUNCC \
101  && !EIGEN_OS_QNX
102  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 1
103  #else
104  #define EIGEN_ARCH_WANTS_STACK_ALIGNMENT 0
105  #endif
106 
107  #if EIGEN_ARCH_WANTS_STACK_ALIGNMENT
108  #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
109  #else
110  #define EIGEN_MAX_STATIC_ALIGN_BYTES 0
111  #endif
112 
113 #endif
114 
115 // If EIGEN_MAX_ALIGN_BYTES is defined, then it is considered as an upper bound for EIGEN_MAX_STATIC_ALIGN_BYTES
116 #if defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES<EIGEN_MAX_STATIC_ALIGN_BYTES
117 #undef EIGEN_MAX_STATIC_ALIGN_BYTES
118 #define EIGEN_MAX_STATIC_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
119 #endif
120 
121 #if EIGEN_MAX_STATIC_ALIGN_BYTES==0 && !defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT)
122  #define EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT
123 #endif
124 
125 // At this stage, EIGEN_MAX_STATIC_ALIGN_BYTES>0 is the true test whether we want to align arrays on the stack or not.
126 // It takes into account both the user choice to explicitly enable/disable alignment (by setting EIGEN_MAX_STATIC_ALIGN_BYTES)
127 // and the architecture config (EIGEN_ARCH_WANTS_STACK_ALIGNMENT).
128 // Henceforth, only EIGEN_MAX_STATIC_ALIGN_BYTES should be used.
129 
130 
131 // Shortcuts to EIGEN_ALIGN_TO_BOUNDARY
132 #define EIGEN_ALIGN8 EIGEN_ALIGN_TO_BOUNDARY(8)
133 #define EIGEN_ALIGN16 EIGEN_ALIGN_TO_BOUNDARY(16)
134 #define EIGEN_ALIGN32 EIGEN_ALIGN_TO_BOUNDARY(32)
135 #define EIGEN_ALIGN64 EIGEN_ALIGN_TO_BOUNDARY(64)
136 #if EIGEN_MAX_STATIC_ALIGN_BYTES>0
137 #define EIGEN_ALIGN_MAX EIGEN_ALIGN_TO_BOUNDARY(EIGEN_MAX_STATIC_ALIGN_BYTES)
138 #else
139 #define EIGEN_ALIGN_MAX
140 #endif
141 
142 
143 // Dynamic alignment control
144 
145 #if defined(EIGEN_DONT_ALIGN) && defined(EIGEN_MAX_ALIGN_BYTES) && EIGEN_MAX_ALIGN_BYTES>0
146 #error EIGEN_MAX_ALIGN_BYTES and EIGEN_DONT_ALIGN are both defined with EIGEN_MAX_ALIGN_BYTES!=0. Use EIGEN_MAX_ALIGN_BYTES=0 as a synonym of EIGEN_DONT_ALIGN.
147 #endif
148 
149 #ifdef EIGEN_DONT_ALIGN
150  #ifdef EIGEN_MAX_ALIGN_BYTES
151  #undef EIGEN_MAX_ALIGN_BYTES
152  #endif
153  #define EIGEN_MAX_ALIGN_BYTES 0
154 #elif !defined(EIGEN_MAX_ALIGN_BYTES)
155  #define EIGEN_MAX_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
156 #endif
157 
158 #if EIGEN_IDEAL_MAX_ALIGN_BYTES > EIGEN_MAX_ALIGN_BYTES
159 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_IDEAL_MAX_ALIGN_BYTES
160 #else
161 #define EIGEN_DEFAULT_ALIGN_BYTES EIGEN_MAX_ALIGN_BYTES
162 #endif
163 
164 
165 #ifndef EIGEN_UNALIGNED_VECTORIZE
166 #define EIGEN_UNALIGNED_VECTORIZE 1
167 #endif
168 
169 //----------------------------------------------------------------------
170 
171 // if alignment is disabled, then disable vectorization. Note: EIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
172 // account both the user's will (EIGEN_MAX_ALIGN_BYTES,EIGEN_DONT_ALIGN) and our own platform checks
173 #if EIGEN_MAX_ALIGN_BYTES==0
174  #ifndef EIGEN_DONT_VECTORIZE
175  #define EIGEN_DONT_VECTORIZE
176  #endif
177 #endif
178 
179 
180 // The following (except #include <malloc.h> and _M_IX86_FP ??) can likely be
181 // removed as gcc 4.1 and msvc 2008 are not supported anyways.
182 #if EIGEN_COMP_MSVC
183  #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
184  // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
185  #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || EIGEN_ARCH_x86_64
186  #define EIGEN_SSE2_ON_MSVC_2008_OR_LATER
187  #endif
188 #else
189  #if defined(__SSE2__)
190  #define EIGEN_SSE2_ON_NON_MSVC
191  #endif
192 #endif
193 
194 #if !(defined(EIGEN_DONT_VECTORIZE) || defined(EIGEN_GPUCC))
195 
196  #if defined (EIGEN_SSE2_ON_NON_MSVC) || defined(EIGEN_SSE2_ON_MSVC_2008_OR_LATER)
197 
198  // Defines symbols for compile-time detection of which instructions are
199  // used.
200  // EIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
201  #define EIGEN_VECTORIZE
202  #define EIGEN_VECTORIZE_SSE
203  #define EIGEN_VECTORIZE_SSE2
204 
205  // Detect sse3/ssse3/sse4:
206  // gcc and icc defines __SSE3__, ...
207  // there is no way to know about this on msvc. You can define EIGEN_VECTORIZE_SSE* if you
208  // want to force the use of those instructions with msvc.
209  #ifdef __SSE3__
210  #define EIGEN_VECTORIZE_SSE3
211  #endif
212  #ifdef __SSSE3__
213  #define EIGEN_VECTORIZE_SSSE3
214  #endif
215  #ifdef __SSE4_1__
216  #define EIGEN_VECTORIZE_SSE4_1
217  #endif
218  #ifdef __SSE4_2__
219  #define EIGEN_VECTORIZE_SSE4_2
220  #endif
221  #ifdef __AVX__
222  #ifndef EIGEN_USE_SYCL
223  #define EIGEN_VECTORIZE_AVX
224  #endif
225  #define EIGEN_VECTORIZE_SSE3
226  #define EIGEN_VECTORIZE_SSSE3
227  #define EIGEN_VECTORIZE_SSE4_1
228  #define EIGEN_VECTORIZE_SSE4_2
229  #endif
230  #ifdef __AVX2__
231  #ifndef EIGEN_USE_SYCL
232  #define EIGEN_VECTORIZE_AVX2
233  #define EIGEN_VECTORIZE_AVX
234  #endif
235  #define EIGEN_VECTORIZE_SSE3
236  #define EIGEN_VECTORIZE_SSSE3
237  #define EIGEN_VECTORIZE_SSE4_1
238  #define EIGEN_VECTORIZE_SSE4_2
239  #endif
240  #if defined(__FMA__) || (EIGEN_COMP_MSVC && defined(__AVX2__))
241  // MSVC does not expose a switch dedicated for FMA
242  // For MSVC, AVX2 => FMA
243  #define EIGEN_VECTORIZE_FMA
244  #endif
245  #if defined(__AVX512F__)
246  #ifndef EIGEN_VECTORIZE_FMA
247  #if EIGEN_COMP_GNUC
248  #error Please add -mfma to your compiler flags: compiling with -mavx512f alone without SSE/AVX FMA is not supported (bug 1638).
249  #else
250  #error Please enable FMA in your compiler flags (e.g. -mfma): compiling with AVX512 alone without SSE/AVX FMA is not supported (bug 1638).
251  #endif
252  #endif
253  #ifndef EIGEN_USE_SYCL
254  #define EIGEN_VECTORIZE_AVX512
255  #define EIGEN_VECTORIZE_AVX2
256  #define EIGEN_VECTORIZE_AVX
257  #endif
258  #define EIGEN_VECTORIZE_FMA
259  #define EIGEN_VECTORIZE_SSE3
260  #define EIGEN_VECTORIZE_SSSE3
261  #define EIGEN_VECTORIZE_SSE4_1
262  #define EIGEN_VECTORIZE_SSE4_2
263  #ifndef EIGEN_USE_SYCL
264  #ifdef __AVX512DQ__
265  #define EIGEN_VECTORIZE_AVX512DQ
266  #endif
267  #ifdef __AVX512ER__
268  #define EIGEN_VECTORIZE_AVX512ER
269  #endif
270  #ifdef __AVX512BF16__
271  #define EIGEN_VECTORIZE_AVX512BF16
272  #endif
273  #ifdef __AVX512FP16__
274  #ifdef __AVX512VL__
275  #define EIGEN_VECTORIZE_AVX512FP16
276  #else
277  #if EIGEN_COMP_GNUC
278  #error Please add -mavx512vl to your compiler flags: compiling with -mavx512fp16 alone without AVX512-VL is not supported.
279  #else
280  #error Please enable AVX512-VL in your compiler flags (e.g. -mavx512vl): compiling with AVX512-FP16 alone without AVX512-VL is not supported.
281  #endif
282  #endif
283  #endif
284  #endif
285  #endif
286 
287  // Disable AVX support on broken xcode versions
288  #if ( EIGEN_COMP_CLANGAPPLE == 11000033 ) && ( __MAC_OS_X_VERSION_MIN_REQUIRED == 101500 )
289  // A nasty bug in the clang compiler shipped with xcode in a common compilation situation
290  // when XCode 11.0 and Mac deployment target macOS 10.15 is https://trac.macports.org/ticket/58776#no1
291  #ifdef EIGEN_VECTORIZE_AVX
292  #undef EIGEN_VECTORIZE_AVX
293  #warning "Disabling AVX support: clang compiler shipped with XCode 11.[012] generates broken assembly with -macosx-version-min=10.15 and AVX enabled. "
294  #ifdef EIGEN_VECTORIZE_AVX2
295  #undef EIGEN_VECTORIZE_AVX2
296  #endif
297  #ifdef EIGEN_VECTORIZE_FMA
298  #undef EIGEN_VECTORIZE_FMA
299  #endif
300  #ifdef EIGEN_VECTORIZE_AVX512
301  #undef EIGEN_VECTORIZE_AVX512
302  #endif
303  #ifdef EIGEN_VECTORIZE_AVX512DQ
304  #undef EIGEN_VECTORIZE_AVX512DQ
305  #endif
306  #ifdef EIGEN_VECTORIZE_AVX512ER
307  #undef EIGEN_VECTORIZE_AVX512ER
308  #endif
309  #endif
310  // NOTE: Confirmed test failures in XCode 11.0, and XCode 11.2 with -macosx-version-min=10.15 and AVX
311  // NOTE using -macosx-version-min=10.15 with Xcode 11.0 results in runtime segmentation faults in many tests, 11.2 produce core dumps in 3 tests
312  // NOTE using -macosx-version-min=10.14 produces functioning and passing tests in all cases
313  // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.8)" XCode 11.0 <- Produces many segfault and core dumping tests
314  // with -macosx-version-min=10.15 and AVX
315  // NOTE __clang_version__ "11.0.0 (clang-1100.0.33.12)" XCode 11.2 <- Produces 3 core dumping tests with
316  // -macosx-version-min=10.15 and AVX
317  #endif
318 
319  // include files
320 
321  // This extern "C" works around a MINGW-w64 compilation issue
322  // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
323  // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
324  // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
325  // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
326  // so, to avoid compile errors when windows.h is included after Eigen/Core, ensure intrinsics are extern "C" here too.
327  // notice that since these are C headers, the extern "C" is theoretically needed anyways.
328  extern "C" {
329  // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
330  // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
331  #if EIGEN_COMP_ICC >= 1110 || EIGEN_COMP_EMSCRIPTEN
332  #include <immintrin.h>
333  #else
334  #include <mmintrin.h>
335  #include <emmintrin.h>
336  #include <xmmintrin.h>
337  #ifdef EIGEN_VECTORIZE_SSE3
338  #include <pmmintrin.h>
339  #endif
340  #ifdef EIGEN_VECTORIZE_SSSE3
341  #include <tmmintrin.h>
342  #endif
343  #ifdef EIGEN_VECTORIZE_SSE4_1
344  #include <smmintrin.h>
345  #endif
346  #ifdef EIGEN_VECTORIZE_SSE4_2
347  #include <nmmintrin.h>
348  #endif
349  #if defined(EIGEN_VECTORIZE_AVX) || defined(EIGEN_VECTORIZE_AVX512)
350  #include <immintrin.h>
351  #endif
352  #endif
353  } // end extern "C"
354 
355  #elif defined(__VSX__) && !defined(__APPLE__)
356 
357  #define EIGEN_VECTORIZE
358  #define EIGEN_VECTORIZE_VSX 1
359  #include <altivec.h>
360  // We need to #undef all these ugly tokens defined in <altivec.h>
361  // => use __vector instead of vector
362  #undef bool
363  #undef vector
364  #undef pixel
365 
366  #elif defined __ALTIVEC__
367 
368  #define EIGEN_VECTORIZE
369  #define EIGEN_VECTORIZE_ALTIVEC
370  #include <altivec.h>
371  // We need to #undef all these ugly tokens defined in <altivec.h>
372  // => use __vector instead of vector
373  #undef bool
374  #undef vector
375  #undef pixel
376 
377  #elif ((defined __ARM_NEON) || (defined __ARM_NEON__)) && !(defined EIGEN_ARM64_USE_SVE)
378 
379  #define EIGEN_VECTORIZE
380  #define EIGEN_VECTORIZE_NEON
381  #include <arm_neon.h>
382 
383  // We currently require SVE to be enabled explicitly via EIGEN_ARM64_USE_SVE and
384  // will not select the backend automatically
385  #elif (defined __ARM_FEATURE_SVE) && (defined EIGEN_ARM64_USE_SVE)
386 
387  #define EIGEN_VECTORIZE
388  #define EIGEN_VECTORIZE_SVE
389  #include <arm_sve.h>
390 
391  // Since we depend on knowing SVE vector lengths at compile-time, we need
392  // to ensure a fixed lengths is set
393  #if defined __ARM_FEATURE_SVE_BITS
394  #define EIGEN_ARM64_SVE_VL __ARM_FEATURE_SVE_BITS
395  #else
396 #error "Eigen requires a fixed SVE lector length but EIGEN_ARM64_SVE_VL is not set."
397 #endif
398 
399 #elif (defined __s390x__ && defined __VEC__)
400 
401 #define EIGEN_VECTORIZE
402 #define EIGEN_VECTORIZE_ZVECTOR
403 #include <vecintrin.h>
404 
405 #elif defined __mips_msa
406 
407 // Limit MSA optimizations to little-endian CPUs for now.
408 // TODO: Perhaps, eventually support MSA optimizations on big-endian CPUs?
409 #if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
410 #if defined(__LP64__)
411 #define EIGEN_MIPS_64
412 #else
413 #define EIGEN_MIPS_32
414 #endif
415 #define EIGEN_VECTORIZE
416 #define EIGEN_VECTORIZE_MSA
417 #include <msa.h>
418 #endif
419 
420 #endif
421 #endif
422 
423 // Following the Arm ACLE arm_neon.h should also include arm_fp16.h but not all
424 // compilers seem to follow this. We therefore include it explicitly.
425 // See also: https://bugs.llvm.org/show_bug.cgi?id=47955
426 #if defined(EIGEN_HAS_ARM64_FP16_SCALAR_ARITHMETIC)
427  #include <arm_fp16.h>
428 #endif
429 
430 #if defined(__F16C__) && !defined(EIGEN_GPUCC) && (!EIGEN_COMP_CLANG_STRICT || EIGEN_CLANG_STRICT_AT_LEAST(3,8,0))
431  // We can use the optimized fp16 to float and float to fp16 conversion routines
432  #define EIGEN_HAS_FP16_C
433 
434  #if EIGEN_COMP_GNUC
435  // Make sure immintrin.h is included, even if e.g. vectorization is
436  // explicitly disabled (see also issue #2395).
437  // Note that FP16C intrinsics for gcc and clang are included by immintrin.h,
438  // as opposed to emmintrin.h as suggested by Intel:
439  // https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=FP16C&expand=1711
440  #include <immintrin.h>
441  #endif
442 #endif
443 
444 #if defined EIGEN_CUDACC
445  #define EIGEN_VECTORIZE_GPU
446  #include <vector_types.h>
447  #if EIGEN_CUDA_SDK_VER >= 70500
448  #define EIGEN_HAS_CUDA_FP16
449  #endif
450 #endif
451 
452 #if defined(EIGEN_HAS_CUDA_FP16)
453  #include <cuda_runtime_api.h>
454  #include <cuda_fp16.h>
455 #endif
456 
457 #if defined(EIGEN_HIPCC)
458  #define EIGEN_VECTORIZE_GPU
459  #include <hip/hip_vector_types.h>
460  #define EIGEN_HAS_HIP_FP16
461  #include <hip/hip_fp16.h>
462  #define EIGEN_HAS_HIP_BF16
463  #include <hip/hip_bfloat16.h>
464 #endif
465 
466 
468 #include "../InternalHeaderCheck.h"
469 
470 namespace Eigen {
471 
472 inline static const char *SimdInstructionSetsInUse(void) {
473 #if defined(EIGEN_VECTORIZE_AVX512)
474  return "AVX512, FMA, AVX2, AVX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
475 #elif defined(EIGEN_VECTORIZE_AVX)
476  return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
477 #elif defined(EIGEN_VECTORIZE_SSE4_2)
478  return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
479 #elif defined(EIGEN_VECTORIZE_SSE4_1)
480  return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
481 #elif defined(EIGEN_VECTORIZE_SSSE3)
482  return "SSE, SSE2, SSE3, SSSE3";
483 #elif defined(EIGEN_VECTORIZE_SSE3)
484  return "SSE, SSE2, SSE3";
485 #elif defined(EIGEN_VECTORIZE_SSE2)
486  return "SSE, SSE2";
487 #elif defined(EIGEN_VECTORIZE_ALTIVEC)
488  return "AltiVec";
489 #elif defined(EIGEN_VECTORIZE_VSX)
490  return "VSX";
491 #elif defined(EIGEN_VECTORIZE_NEON)
492  return "ARM NEON";
493 #elif defined(EIGEN_VECTORIZE_SVE)
494  return "ARM SVE";
495 #elif defined(EIGEN_VECTORIZE_ZVECTOR)
496  return "S390X ZVECTOR";
497 #elif defined(EIGEN_VECTORIZE_MSA)
498  return "MIPS MSA";
499 #else
500  return "None";
501 #endif
502 }
503 
504 } // end namespace Eigen
505 
506 
507 #endif // EIGEN_CONFIGURE_VECTORIZATION_H
: InteropHeaders
Definition: Core:139
static const char * SimdInstructionSetsInUse(void)