Memory.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008-2015 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2008-2009 Benoit Jacob <jacob.benoit.1@gmail.com>
6 // Copyright (C) 2009 Kenneth Riddile <kfriddile@yahoo.com>
7 // Copyright (C) 2010 Hauke Heibel <hauke.heibel@gmail.com>
8 // Copyright (C) 2010 Thomas Capricelli <orzel@freehackers.org>
9 // Copyright (C) 2013 Pavel Holoborodko <pavel@holoborodko.com>
10 //
11 // This Source Code Form is subject to the terms of the Mozilla
12 // Public License v. 2.0. If a copy of the MPL was not distributed
13 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
14 
15 
16 
20 #ifndef EIGEN_MEMORY_H
21 #define EIGEN_MEMORY_H
22 
23 #ifndef EIGEN_MALLOC_ALREADY_ALIGNED
24 
25 // Try to determine automatically if malloc is already aligned.
26 
27 // On 64-bit systems, glibc's malloc returns 16-byte-aligned pointers, see:
28 // http://www.gnu.org/s/libc/manual/html_node/Aligned-Memory-Blocks.html
29 // This is true at least since glibc 2.8.
30 // This leaves the question how to detect 64-bit. According to this document,
31 // http://gcc.fyxm.net/summit/2003/Porting%20to%2064%20bit.pdf
32 // page 114, "[The] LP64 model [...] is used by all 64-bit UNIX ports" so it's indeed
33 // quite safe, at least within the context of glibc, to equate 64-bit with LP64.
34 #if defined(__GLIBC__) && ((__GLIBC__>=2 && __GLIBC_MINOR__ >= 8) || __GLIBC__>2) \
35  && defined(__LP64__) && ! defined( __SANITIZE_ADDRESS__ ) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
36  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 1
37 #else
38  #define EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED 0
39 #endif
40 
41 // FreeBSD 6 seems to have 16-byte aligned malloc
42 // See http://svn.freebsd.org/viewvc/base/stable/6/lib/libc/stdlib/malloc.c?view=markup
43 // FreeBSD 7 seems to have 16-byte aligned malloc except on ARM and MIPS architectures
44 // See http://svn.freebsd.org/viewvc/base/stable/7/lib/libc/stdlib/malloc.c?view=markup
45 #if defined(__FreeBSD__) && !(EIGEN_ARCH_ARM || EIGEN_ARCH_MIPS) && (EIGEN_DEFAULT_ALIGN_BYTES == 16)
46  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 1
47 #else
48  #define EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED 0
49 #endif
50 
51 #if (EIGEN_OS_MAC && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
52  || (EIGEN_OS_WIN64 && (EIGEN_DEFAULT_ALIGN_BYTES == 16)) \
53  || EIGEN_GLIBC_MALLOC_ALREADY_ALIGNED \
54  || EIGEN_FREEBSD_MALLOC_ALREADY_ALIGNED
55  #define EIGEN_MALLOC_ALREADY_ALIGNED 1
56 #else
57  #define EIGEN_MALLOC_ALREADY_ALIGNED 0
58 #endif
59 
60 #endif
61 
62 #ifndef EIGEN_MALLOC_CHECK_THREAD_LOCAL
63 
64 // Check whether we can use the thread_local keyword to allow or disallow
65 // allocating memory with per-thread granularity, by means of the
66 // set_is_malloc_allowed() function.
67 #ifndef EIGEN_AVOID_THREAD_LOCAL
68 
69 #if ((EIGEN_COMP_GNUC) || __has_feature(cxx_thread_local) || EIGEN_COMP_MSVC >= 1900) && !defined(EIGEN_GPU_COMPILE_PHASE)
70 #define EIGEN_MALLOC_CHECK_THREAD_LOCAL thread_local
71 #else
72 #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
73 #endif
74 
75 #else // EIGEN_AVOID_THREAD_LOCAL
76 #define EIGEN_MALLOC_CHECK_THREAD_LOCAL
77 #endif // EIGEN_AVOID_THREAD_LOCAL
78 
79 #endif
80 
81 #include "../InternalHeaderCheck.h"
82 
83 namespace Eigen {
84 
85 namespace internal {
86 
87 
91 #ifdef EIGEN_NO_MALLOC
93 {
94  eigen_assert(false && "heap allocation is forbidden (EIGEN_NO_MALLOC is defined)");
95 }
96 #elif defined EIGEN_RUNTIME_NO_MALLOC
97 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed_impl(bool update, bool new_value = false)
98 {
99  EIGEN_MALLOC_CHECK_THREAD_LOCAL static bool value = true;
100  if (update == 1)
101  value = new_value;
102  return value;
103 }
104 EIGEN_DEVICE_FUNC inline bool is_malloc_allowed() { return is_malloc_allowed_impl(false); }
105 EIGEN_DEVICE_FUNC inline bool set_is_malloc_allowed(bool new_value) { return is_malloc_allowed_impl(true, new_value); }
107 {
108  eigen_assert(is_malloc_allowed() && "heap allocation is forbidden (EIGEN_RUNTIME_NO_MALLOC is defined and g_is_malloc_allowed is false)");
109 }
110 #else
112 {}
113 #endif
114 
115 
117 inline void throw_std_bad_alloc()
118 {
119  #ifdef EIGEN_EXCEPTIONS
120  throw std::bad_alloc();
121  #else
122  std::size_t huge = static_cast<std::size_t>(-1);
123  #if defined(EIGEN_HIPCC)
124  //
125  // calls to "::operator new" are to be treated as opaque function calls (i.e no inlining),
126  // and as a consequence the code in the #else block triggers the hipcc warning :
127  // "no overloaded function has restriction specifiers that are compatible with the ambient context"
128  //
129  // "throw_std_bad_alloc" has the EIGEN_DEVICE_FUNC attribute, so it seems that hipcc expects
130  // the same on "operator new"
131  // Reverting code back to the old version in this #if block for the hipcc compiler
132  //
133  new int[huge];
134  #else
135  void* unused = ::operator new(huge);
136  EIGEN_UNUSED_VARIABLE(unused);
137  #endif
138  #endif
139 }
140 
141 
145 /* ----- Hand made implementations of aligned malloc/free and realloc ----- */
146 
150 EIGEN_DEVICE_FUNC inline void* handmade_aligned_malloc(std::size_t size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
151 {
152  eigen_assert(alignment >= sizeof(void*) && alignment <= 128 && (alignment & (alignment-1)) == 0 && "Alignment must be at least sizeof(void*), less than or equal to 128, and a power of 2");
153 
155  EIGEN_USING_STD(malloc)
156  void* original = malloc(size + alignment);
157  if (original == 0) return 0;
158  uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
159  void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
160  *(static_cast<uint8_t*>(aligned) - 1) = offset;
161  return aligned;
162 }
163 
166 {
167  if (ptr) {
168  uint8_t offset = static_cast<uint8_t>(*(static_cast<uint8_t*>(ptr) - 1));
169  void* original = static_cast<void*>(static_cast<uint8_t*>(ptr) - offset);
170 
172  EIGEN_USING_STD(free)
173  free(original);
174  }
175 }
176 
182 EIGEN_DEVICE_FUNC inline void* handmade_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size, std::size_t alignment = EIGEN_DEFAULT_ALIGN_BYTES)
183 {
184  if (ptr == nullptr) return handmade_aligned_malloc(new_size, alignment);
185  uint8_t old_offset = *(static_cast<uint8_t*>(ptr) - 1);
186  void* old_original = static_cast<uint8_t*>(ptr) - old_offset;
187 
189  EIGEN_USING_STD(realloc)
190  void* original = realloc(old_original, new_size + alignment);
191  if (original == nullptr) return nullptr;
192  if (original == old_original) return ptr;
193  uint8_t offset = static_cast<uint8_t>(alignment - (reinterpret_cast<std::size_t>(original) & (alignment - 1)));
194  void* aligned = static_cast<void*>(static_cast<uint8_t*>(original) + offset);
195  if (offset != old_offset) {
196  const void* src = static_cast<const void*>(static_cast<uint8_t*>(original) + old_offset);
197  std::size_t count = (std::min)(new_size, old_size);
198  std::memmove(aligned, src, count);
199  }
200  *(static_cast<uint8_t*>(aligned) - 1) = offset;
201  return aligned;
202 }
203 
207 EIGEN_DEVICE_FUNC inline void* aligned_malloc(std::size_t size)
208 {
209  if (size == 0) return nullptr;
210 
211  void *result;
212  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
213 
215  EIGEN_USING_STD(malloc)
216  result = malloc(size);
217 
218  #if EIGEN_DEFAULT_ALIGN_BYTES==16
219  eigen_assert((size<16 || (std::size_t(result)%16)==0) && "System's malloc returned an unaligned pointer. Compile with EIGEN_MALLOC_ALREADY_ALIGNED=0 to fallback to handmade aligned memory allocator.");
220  #endif
221  #else
222  result = handmade_aligned_malloc(size);
223  #endif
224 
225  if(!result && size)
227 
228  return result;
229 }
230 
232 EIGEN_DEVICE_FUNC inline void aligned_free(void *ptr)
233 {
234  #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
235 
236  if(ptr)
238  EIGEN_USING_STD(free)
239  free(ptr);
240 
241  #else
243  #endif
244 }
245 
251 EIGEN_DEVICE_FUNC inline void* aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
252 {
253  if (ptr == nullptr) return aligned_malloc(new_size);
254  if (old_size == new_size) return ptr;
255  if (new_size == 0) { aligned_free(ptr); return nullptr; }
256 
257  void *result;
258 #if (EIGEN_DEFAULT_ALIGN_BYTES==0) || EIGEN_MALLOC_ALREADY_ALIGNED
259  EIGEN_UNUSED_VARIABLE(old_size)
260 
262  EIGEN_USING_STD(realloc)
263  result = realloc(ptr,new_size);
264 #else
265  result = handmade_aligned_realloc(ptr,new_size,old_size);
266 #endif
267 
268  if (!result && new_size)
270 
271  return result;
272 }
273 
274 
281 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc(std::size_t size)
282 {
283  return aligned_malloc(size);
284 }
285 
286 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_malloc<false>(std::size_t size)
287 {
288  if (size == 0) return nullptr;
289 
291  EIGEN_USING_STD(malloc)
292  void *result = malloc(size);
293 
294  if(!result && size)
296  return result;
297 }
298 
300 template<bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_free(void *ptr)
301 {
302  aligned_free(ptr);
303 }
304 
305 template<> EIGEN_DEVICE_FUNC inline void conditional_aligned_free<false>(void *ptr)
306 {
307  if(ptr)
309  EIGEN_USING_STD(free)
310  free(ptr);
311 }
312 
313 template<bool Align> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc(void* ptr, std::size_t new_size, std::size_t old_size)
314 {
315  return aligned_realloc(ptr, new_size, old_size);
316 }
317 
318 template<> EIGEN_DEVICE_FUNC inline void* conditional_aligned_realloc<false>(void* ptr, std::size_t new_size, std::size_t old_size)
319 {
320  if (ptr == nullptr) return conditional_aligned_malloc<false>(new_size);
321  if (old_size == new_size) return ptr;
322  if (new_size == 0) { conditional_aligned_free<false>(ptr); return nullptr; }
323 
325  EIGEN_USING_STD(realloc)
326  return realloc(ptr, new_size);
327 }
328 
329 
336 template<typename T> EIGEN_DEVICE_FUNC inline void destruct_elements_of_array(T *ptr, std::size_t size)
337 {
338  // always destruct an array starting from the end.
339  if(ptr)
340  while(size) ptr[--size].~T();
341 }
342 
346 template<typename T> EIGEN_DEVICE_FUNC inline T* default_construct_elements_of_array(T *ptr, std::size_t size)
347 {
348  std::size_t i=0;
349  EIGEN_TRY
350  {
351  for (i = 0; i < size; ++i) ::new (ptr + i) T;
352  }
353  EIGEN_CATCH(...)
354  {
356  EIGEN_THROW;
357  }
358  return ptr;
359 }
360 
364 template<typename T> EIGEN_DEVICE_FUNC inline T* copy_construct_elements_of_array(T *ptr, const T* src, std::size_t size)
365 {
366  std::size_t i=0;
367  EIGEN_TRY
368  {
369  for (i = 0; i < size; ++i) ::new (ptr + i) T(*(src + i));
370  }
371  EIGEN_CATCH(...)
372  {
374  EIGEN_THROW;
375  }
376  return ptr;
377 }
378 
382 template<typename T> EIGEN_DEVICE_FUNC inline T* move_construct_elements_of_array(T *ptr, T* src, std::size_t size)
383 {
384  std::size_t i=0;
385  EIGEN_TRY
386  {
387  for (i = 0; i < size; ++i) ::new (ptr + i) T(std::move(*(src + i)));
388  }
389  EIGEN_CATCH(...)
390  {
392  EIGEN_THROW;
393  }
394  return ptr;
395 }
396 
397 
401 template<typename T>
403 {
404  if(size > std::size_t(-1) / sizeof(T))
406 }
407 
412 template<typename T> EIGEN_DEVICE_FUNC inline T* aligned_new(std::size_t size)
413 {
414  check_size_for_overflow<T>(size);
415  T *result = static_cast<T*>(aligned_malloc(sizeof(T)*size));
416  EIGEN_TRY
417  {
419  }
420  EIGEN_CATCH(...)
421  {
422  aligned_free(result);
423  EIGEN_THROW;
424  }
425  return result;
426 }
427 
428 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new(std::size_t size)
429 {
430  check_size_for_overflow<T>(size);
431  T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
432  EIGEN_TRY
433  {
435  }
436  EIGEN_CATCH(...)
437  {
438  conditional_aligned_free<Align>(result);
439  EIGEN_THROW;
440  }
441  return result;
442 }
443 
447 template<typename T> EIGEN_DEVICE_FUNC inline void aligned_delete(T *ptr, std::size_t size)
448 {
449  destruct_elements_of_array<T>(ptr, size);
450  aligned_free(ptr);
451 }
452 
456 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete(T *ptr, std::size_t size)
457 {
458  destruct_elements_of_array<T>(ptr, size);
459  conditional_aligned_free<Align>(ptr);
460 }
461 
462 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new(T* pts, std::size_t new_size, std::size_t old_size)
463 {
464  check_size_for_overflow<T>(new_size);
465  check_size_for_overflow<T>(old_size);
466 
467  // If elements need to be explicitly initialized, we cannot simply realloc
468  // (or memcpy) the memory block - each element needs to be reconstructed.
469  // Otherwise, objects that contain internal pointers like mpfr or
470  // AnnoyingScalar can be pointing to the wrong thing.
471  T* result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*new_size));
472  EIGEN_TRY
473  {
474  // Move-construct initial elements.
475  std::size_t copy_size = (std::min)(old_size, new_size);
476  move_construct_elements_of_array(result, pts, copy_size);
477 
478  // Default-construct remaining elements.
479  if (new_size > old_size) {
480  default_construct_elements_of_array(result + copy_size, new_size - old_size);
481  }
482 
483  // Delete old elements.
484  conditional_aligned_delete<T, Align>(pts, old_size);
485  }
486  EIGEN_CATCH(...)
487  {
488  conditional_aligned_free<Align>(result);
489  EIGEN_THROW;
490  }
491 
492  return result;
493 }
494 
495 
496 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_new_auto(std::size_t size)
497 {
498  if(size==0)
499  return 0; // short-cut. Also fixes Bug 884
500  check_size_for_overflow<T>(size);
501  T *result = static_cast<T*>(conditional_aligned_malloc<Align>(sizeof(T)*size));
503  {
504  EIGEN_TRY
505  {
507  }
508  EIGEN_CATCH(...)
509  {
510  conditional_aligned_free<Align>(result);
511  EIGEN_THROW;
512  }
513  }
514  return result;
515 }
516 
517 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline T* conditional_aligned_realloc_new_auto(T* pts, std::size_t new_size, std::size_t old_size)
518 {
520  return conditional_aligned_realloc_new<T, Align>(pts, new_size, old_size);
521  }
522 
523  check_size_for_overflow<T>(new_size);
524  check_size_for_overflow<T>(old_size);
525  return static_cast<T*>(conditional_aligned_realloc<Align>(static_cast<void*>(pts), sizeof(T)*new_size, sizeof(T)*old_size));
526 }
527 
528 template<typename T, bool Align> EIGEN_DEVICE_FUNC inline void conditional_aligned_delete_auto(T *ptr, std::size_t size)
529 {
531  destruct_elements_of_array<T>(ptr, size);
532  conditional_aligned_free<Align>(ptr);
533 }
534 
535 
554 template<int Alignment, typename Scalar, typename Index>
556 {
557  const Index ScalarSize = sizeof(Scalar);
558  const Index AlignmentSize = Alignment / ScalarSize;
559  const Index AlignmentMask = AlignmentSize-1;
560 
561  if(AlignmentSize<=1)
562  {
563  // Either the requested alignment if smaller than a scalar, or it exactly match a 1 scalar
564  // so that all elements of the array have the same alignment.
565  return 0;
566  }
567  else if( (std::uintptr_t(array) & (sizeof(Scalar)-1)) || (Alignment%ScalarSize)!=0)
568  {
569  // The array is not aligned to the size of a single scalar, or the requested alignment is not a multiple of the scalar size.
570  // Consequently, no element of the array is well aligned.
571  return size;
572  }
573  else
574  {
575  Index first = (AlignmentSize - (Index((std::uintptr_t(array)/sizeof(Scalar))) & AlignmentMask)) & AlignmentMask;
576  return (first < size) ? first : size;
577  }
578 }
579 
582 template<typename Scalar, typename Index>
584 {
585  typedef typename packet_traits<Scalar>::type DefaultPacketType;
586  return first_aligned<unpacket_traits<DefaultPacketType>::alignment>(array, size);
587 }
588 
591 template<typename Index>
593 {
594  return ((size+base-1)/base)*base;
595 }
596 
597 // std::copy is much slower than memcpy, so let's introduce a smart_copy which
598 // use memcpy on trivial types, i.e., on types that does not require an initialization ctor.
599 template<typename T, bool UseMemcpy> struct smart_copy_helper;
600 
601 template<typename T> EIGEN_DEVICE_FUNC void smart_copy(const T* start, const T* end, T* target)
602 {
603  smart_copy_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
604 }
605 
606 template<typename T> struct smart_copy_helper<T,true> {
607  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
608  {
609  std::intptr_t size = std::intptr_t(end)-std::intptr_t(start);
610  if(size==0) return;
611  eigen_internal_assert(start!=0 && end!=0 && target!=0);
612  EIGEN_USING_STD(memcpy)
613  memcpy(target, start, size);
614  }
615 };
616 
617 template<typename T> struct smart_copy_helper<T,false> {
618  EIGEN_DEVICE_FUNC static inline void run(const T* start, const T* end, T* target)
619  { std::copy(start, end, target); }
620 };
621 
622 // intelligent memmove. falls back to std::memmove for POD types, uses std::copy otherwise.
623 template<typename T, bool UseMemmove> struct smart_memmove_helper;
624 
625 template<typename T> void smart_memmove(const T* start, const T* end, T* target)
626 {
627  smart_memmove_helper<T,!NumTraits<T>::RequireInitialization>::run(start, end, target);
628 }
629 
630 template<typename T> struct smart_memmove_helper<T,true> {
631  static inline void run(const T* start, const T* end, T* target)
632  {
633  std::intptr_t size = std::intptr_t(end)-std::intptr_t(start);
634  if(size==0) return;
635  eigen_internal_assert(start!=0 && end!=0 && target!=0);
636  std::memmove(target, start, size);
637  }
638 };
639 
640 template<typename T> struct smart_memmove_helper<T,false> {
641  static inline void run(const T* start, const T* end, T* target)
642  {
643  if (std::uintptr_t(target) < std::uintptr_t(start))
644  {
645  std::copy(start, end, target);
646  }
647  else
648  {
649  std::ptrdiff_t count = (std::ptrdiff_t(end)-std::ptrdiff_t(start)) / sizeof(T);
650  std::copy_backward(start, end, target + count);
651  }
652  }
653 };
654 
655 template<typename T> EIGEN_DEVICE_FUNC T* smart_move(T* start, T* end, T* target)
656 {
657  return std::move(start, end, target);
658 }
659 
660 
664 // you can overwrite Eigen's default behavior regarding alloca by defining EIGEN_ALLOCA
665 // to the appropriate stack allocation function
666 #if ! defined EIGEN_ALLOCA && ! defined EIGEN_GPU_COMPILE_PHASE
667  #if EIGEN_OS_LINUX || EIGEN_OS_MAC || (defined alloca)
668  #define EIGEN_ALLOCA alloca
669  #elif EIGEN_COMP_MSVC
670  #define EIGEN_ALLOCA _alloca
671  #endif
672 #endif
673 
674 // With clang -Oz -mthumb, alloca changes the stack pointer in a way that is
675 // not allowed in Thumb2. -DEIGEN_STACK_ALLOCATION_LIMIT=0 doesn't work because
676 // the compiler still emits bad code because stack allocation checks use "<=".
677 // TODO: Eliminate after https://bugs.llvm.org/show_bug.cgi?id=23772
678 // is fixed.
679 #if defined(__clang__) && defined(__thumb__)
680  #undef EIGEN_ALLOCA
681 #endif
682 
683 // This helper class construct the allocated memory, and takes care of destructing and freeing the handled data
684 // at destruction time. In practice this helper class is mainly useful to avoid memory leak in case of exceptions.
685 template<typename T> class aligned_stack_memory_handler : noncopyable
686 {
687  public:
688  /* Creates a stack_memory_handler responsible for the buffer \a ptr of size \a size.
689  * Note that \a ptr can be 0 regardless of the other parameters.
690  * This constructor takes care of constructing/initializing the elements of the buffer if required by the scalar type T (see NumTraits<T>::RequireInitialization).
691  * In this case, the buffer elements will also be destructed when this handler will be destructed.
692  * Finally, if \a dealloc is true, then the pointer \a ptr is freed.
693  **/
695  aligned_stack_memory_handler(T* ptr, std::size_t size, bool dealloc)
696  : m_ptr(ptr), m_size(size), m_deallocate(dealloc)
697  {
700  }
702  ~aligned_stack_memory_handler()
703  {
705  Eigen::internal::destruct_elements_of_array<T>(m_ptr, m_size);
706  if(m_deallocate)
708  }
709  protected:
710  T* m_ptr;
711  std::size_t m_size;
712  bool m_deallocate;
713 };
714 
715 #ifdef EIGEN_ALLOCA
716 
717 template<typename Xpr, int NbEvaluations,
718  bool MapExternalBuffer = nested_eval<Xpr,NbEvaluations>::Evaluate && Xpr::MaxSizeAtCompileTime==Dynamic
719  >
720 struct local_nested_eval_wrapper
721 {
722  static constexpr bool NeedExternalBuffer = false;
723  typedef typename Xpr::Scalar Scalar;
724  typedef typename nested_eval<Xpr,NbEvaluations>::type ObjectType;
725  ObjectType object;
726 
728  local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr) : object(xpr)
729  {
731  eigen_internal_assert(ptr==0);
732  }
733 };
734 
735 template<typename Xpr, int NbEvaluations>
736 struct local_nested_eval_wrapper<Xpr,NbEvaluations,true>
737 {
738  static constexpr bool NeedExternalBuffer = true;
739  typedef typename Xpr::Scalar Scalar;
740  typedef typename plain_object_eval<Xpr>::type PlainObject;
741  typedef Map<PlainObject,EIGEN_DEFAULT_ALIGN_BYTES> ObjectType;
742  ObjectType object;
743 
745  local_nested_eval_wrapper(const Xpr& xpr, Scalar* ptr)
746  : object(ptr==0 ? reinterpret_cast<Scalar*>(Eigen::internal::aligned_malloc(sizeof(Scalar)*xpr.size())) : ptr, xpr.rows(), xpr.cols()),
747  m_deallocate(ptr==0)
748  {
751  object = xpr;
752  }
753 
755  ~local_nested_eval_wrapper()
756  {
759  if(m_deallocate)
761  }
762 
763 private:
764  bool m_deallocate;
765 };
766 
767 #endif // EIGEN_ALLOCA
768 
769 template<typename T> class scoped_array : noncopyable
770 {
771  T* m_ptr;
772 public:
773  explicit scoped_array(std::ptrdiff_t size)
774  {
775  m_ptr = new T[size];
776  }
777  ~scoped_array()
778  {
779  delete[] m_ptr;
780  }
781  T& operator[](std::ptrdiff_t i) { return m_ptr[i]; }
782  const T& operator[](std::ptrdiff_t i) const { return m_ptr[i]; }
783  T* &ptr() { return m_ptr; }
784  const T* ptr() const { return m_ptr; }
785  operator const T*() const { return m_ptr; }
786 };
787 
788 template<typename T> void swap(scoped_array<T> &a,scoped_array<T> &b)
789 {
790  std::swap(a.ptr(),b.ptr());
791 }
792 
793 } // end namespace internal
794 
820 #ifdef EIGEN_ALLOCA
821 
822  #if EIGEN_DEFAULT_ALIGN_BYTES>0
823  // We always manually re-align the result of EIGEN_ALLOCA.
824  // If alloca is already aligned, the compiler should be smart enough to optimize away the re-alignment.
825  #define EIGEN_ALIGNED_ALLOCA(SIZE) reinterpret_cast<void*>((std::uintptr_t(EIGEN_ALLOCA(SIZE+EIGEN_DEFAULT_ALIGN_BYTES-1)) + EIGEN_DEFAULT_ALIGN_BYTES-1) & ~(std::size_t(EIGEN_DEFAULT_ALIGN_BYTES-1)))
826  #else
827  #define EIGEN_ALIGNED_ALLOCA(SIZE) EIGEN_ALLOCA(SIZE)
828  #endif
829 
830  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
831  Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
832  TYPE* NAME = (BUFFER)!=0 ? (BUFFER) \
833  : reinterpret_cast<TYPE*>( \
834  (sizeof(TYPE)*SIZE<=EIGEN_STACK_ALLOCATION_LIMIT) ? EIGEN_ALIGNED_ALLOCA(sizeof(TYPE)*SIZE) \
835  : Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE) ); \
836  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,sizeof(TYPE)*SIZE>EIGEN_STACK_ALLOCATION_LIMIT)
837 
838 
839  #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) \
840  Eigen::internal::local_nested_eval_wrapper<XPR_T,N> EIGEN_CAT(NAME,_wrapper)(XPR, reinterpret_cast<typename XPR_T::Scalar*>( \
841  ( (Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::NeedExternalBuffer) && ((sizeof(typename XPR_T::Scalar)*XPR.size())<=EIGEN_STACK_ALLOCATION_LIMIT) ) \
842  ? EIGEN_ALIGNED_ALLOCA( sizeof(typename XPR_T::Scalar)*XPR.size() ) : 0 ) ) ; \
843  typename Eigen::internal::local_nested_eval_wrapper<XPR_T,N>::ObjectType NAME(EIGEN_CAT(NAME,_wrapper).object)
844 
845 #else
846 
847  #define ei_declare_aligned_stack_constructed_variable(TYPE,NAME,SIZE,BUFFER) \
848  Eigen::internal::check_size_for_overflow<TYPE>(SIZE); \
849  TYPE* NAME = (BUFFER)!=0 ? BUFFER : reinterpret_cast<TYPE*>(Eigen::internal::aligned_malloc(sizeof(TYPE)*SIZE)); \
850  Eigen::internal::aligned_stack_memory_handler<TYPE> EIGEN_CAT(NAME,_stack_memory_destructor)((BUFFER)==0 ? NAME : 0,SIZE,true)
851 
852 
853 #define ei_declare_local_nested_eval(XPR_T,XPR,N,NAME) typename Eigen::internal::nested_eval<XPR_T,N>::type NAME(XPR)
854 
855 #endif
856 
857 
858 
862 #if EIGEN_HAS_CXX17_OVERALIGN
863 
864 // C++17 -> no need to bother about alignment anymore :)
865 
866 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign)
867 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
868 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW
869 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size)
870 
871 #else
872 
873 // HIP does not support new/delete on device.
874 #if EIGEN_MAX_ALIGN_BYTES!=0 && !defined(EIGEN_HIP_DEVICE_COMPILE)
875  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
876  EIGEN_DEVICE_FUNC \
877  void* operator new(std::size_t size, const std::nothrow_t&) EIGEN_NO_THROW { \
878  EIGEN_TRY { return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); } \
879  EIGEN_CATCH (...) { return 0; } \
880  }
881  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) \
882  EIGEN_DEVICE_FUNC \
883  void *operator new(std::size_t size) { \
884  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
885  } \
886  EIGEN_DEVICE_FUNC \
887  void *operator new[](std::size_t size) { \
888  return Eigen::internal::conditional_aligned_malloc<NeedsToAlign>(size); \
889  } \
890  EIGEN_DEVICE_FUNC \
891  void operator delete(void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
892  EIGEN_DEVICE_FUNC \
893  void operator delete[](void * ptr) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
894  EIGEN_DEVICE_FUNC \
895  void operator delete(void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
896  EIGEN_DEVICE_FUNC \
897  void operator delete[](void * ptr, std::size_t /* sz */) EIGEN_NO_THROW { Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); } \
898  /* in-place new and delete. since (at least afaik) there is no actual */ \
899  /* memory allocated we can safely let the default implementation handle */ \
900  /* this particular case. */ \
901  EIGEN_DEVICE_FUNC \
902  static void *operator new(std::size_t size, void *ptr) { return ::operator new(size,ptr); } \
903  EIGEN_DEVICE_FUNC \
904  static void *operator new[](std::size_t size, void* ptr) { return ::operator new[](size,ptr); } \
905  EIGEN_DEVICE_FUNC \
906  void operator delete(void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete(memory,ptr); } \
907  EIGEN_DEVICE_FUNC \
908  void operator delete[](void * memory, void *ptr) EIGEN_NO_THROW { return ::operator delete[](memory,ptr); } \
909  /* nothrow-new (returns zero instead of std::bad_alloc) */ \
910  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_NOTHROW(NeedsToAlign) \
911  EIGEN_DEVICE_FUNC \
912  void operator delete(void *ptr, const std::nothrow_t&) EIGEN_NO_THROW { \
913  Eigen::internal::conditional_aligned_free<NeedsToAlign>(ptr); \
914  } \
915  typedef void eigen_aligned_operator_new_marker_type;
916 #else
917  #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign)
918 #endif
919 
920 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(true)
921 #define EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF_VECTORIZABLE_FIXED_SIZE(Scalar,Size) \
922  EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(bool( \
923  ((Size)!=Eigen::Dynamic) && \
924  (((EIGEN_MAX_ALIGN_BYTES>=16) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES )==0)) || \
925  ((EIGEN_MAX_ALIGN_BYTES>=32) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/2)==0)) || \
926  ((EIGEN_MAX_ALIGN_BYTES>=64) && ((sizeof(Scalar)*(Size))%(EIGEN_MAX_ALIGN_BYTES/4)==0)) )))
927 
928 #endif
929 
930 
956 template<class T>
957 class aligned_allocator : public std::allocator<T>
958 {
959 public:
960  typedef std::size_t size_type;
961  typedef std::ptrdiff_t difference_type;
962  typedef T* pointer;
963  typedef const T* const_pointer;
964  typedef T& reference;
965  typedef const T& const_reference;
966  typedef T value_type;
967 
968  template<class U>
969  struct rebind
970  {
972  };
973 
974  aligned_allocator() : std::allocator<T>() {}
975 
976  aligned_allocator(const aligned_allocator& other) : std::allocator<T>(other) {}
977 
978  template<class U>
979  aligned_allocator(const aligned_allocator<U>& other) : std::allocator<T>(other) {}
980 
982 
983  #if EIGEN_COMP_GNUC_STRICT && EIGEN_GNUC_STRICT_AT_LEAST(7,0,0)
984  // In gcc std::allocator::max_size() is bugged making gcc triggers a warning:
985  // eigen/Eigen/src/Core/util/Memory.h:189:12: warning: argument 1 value '18446744073709551612' exceeds maximum object size 9223372036854775807
986  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87544
987  size_type max_size() const {
988  return (std::numeric_limits<std::ptrdiff_t>::max)()/sizeof(T);
989  }
990  #endif
991 
992  pointer allocate(size_type num, const void* /*hint*/ = 0)
993  {
994  internal::check_size_for_overflow<T>(num);
995  return static_cast<pointer>( internal::aligned_malloc(num * sizeof(T)) );
996  }
997 
998  void deallocate(pointer p, size_type /*num*/)
999  {
1001  }
1002 };
1003 
1004 //---------- Cache sizes ----------
1005 
1006 #if !defined(EIGEN_NO_CPUID)
1007 # if EIGEN_COMP_GNUC && EIGEN_ARCH_i386_OR_x86_64
1008 # if defined(__PIC__) && EIGEN_ARCH_i386
1009  // Case for x86 with PIC
1010 # define EIGEN_CPUID(abcd,func,id) \
1011  __asm__ __volatile__ ("xchgl %%ebx, %k1;cpuid; xchgl %%ebx,%k1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "a" (func), "c" (id));
1012 # elif defined(__PIC__) && EIGEN_ARCH_x86_64
1013  // Case for x64 with PIC. In theory this is only a problem with recent gcc and with medium or large code model, not with the default small code model.
1014  // However, we cannot detect which code model is used, and the xchg overhead is negligible anyway.
1015 # define EIGEN_CPUID(abcd,func,id) \
1016  __asm__ __volatile__ ("xchg{q}\t{%%}rbx, %q1; cpuid; xchg{q}\t{%%}rbx, %q1": "=a" (abcd[0]), "=&r" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id));
1017 # else
1018  // Case for x86_64 or x86 w/o PIC
1019 # define EIGEN_CPUID(abcd,func,id) \
1020  __asm__ __volatile__ ("cpuid": "=a" (abcd[0]), "=b" (abcd[1]), "=c" (abcd[2]), "=d" (abcd[3]) : "0" (func), "2" (id) );
1021 # endif
1022 # elif EIGEN_COMP_MSVC
1023 # if EIGEN_ARCH_i386_OR_x86_64
1024 # define EIGEN_CPUID(abcd,func,id) __cpuidex((int*)abcd,func,id)
1025 # endif
1026 # endif
1027 #endif
1028 
1029 namespace internal {
1030 
1031 #ifdef EIGEN_CPUID
1032 
1033 inline bool cpuid_is_vendor(int abcd[4], const int vendor[3])
1034 {
1035  return abcd[1]==vendor[0] && abcd[3]==vendor[1] && abcd[2]==vendor[2];
1036 }
1037 
1038 inline void queryCacheSizes_intel_direct(int& l1, int& l2, int& l3)
1039 {
1040  int abcd[4];
1041  l1 = l2 = l3 = 0;
1042  int cache_id = 0;
1043  int cache_type = 0;
1044  do {
1045  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1046  EIGEN_CPUID(abcd,0x4,cache_id);
1047  cache_type = (abcd[0] & 0x0F) >> 0;
1048  if(cache_type==1||cache_type==3) // data or unified cache
1049  {
1050  int cache_level = (abcd[0] & 0xE0) >> 5; // A[7:5]
1051  int ways = (abcd[1] & 0xFFC00000) >> 22; // B[31:22]
1052  int partitions = (abcd[1] & 0x003FF000) >> 12; // B[21:12]
1053  int line_size = (abcd[1] & 0x00000FFF) >> 0; // B[11:0]
1054  int sets = (abcd[2]); // C[31:0]
1055 
1056  int cache_size = (ways+1) * (partitions+1) * (line_size+1) * (sets+1);
1057 
1058  switch(cache_level)
1059  {
1060  case 1: l1 = cache_size; break;
1061  case 2: l2 = cache_size; break;
1062  case 3: l3 = cache_size; break;
1063  default: break;
1064  }
1065  }
1066  cache_id++;
1067  } while(cache_type>0 && cache_id<16);
1068 }
1069 
1070 inline void queryCacheSizes_intel_codes(int& l1, int& l2, int& l3)
1071 {
1072  int abcd[4];
1073  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1074  l1 = l2 = l3 = 0;
1075  EIGEN_CPUID(abcd,0x00000002,0);
1076  unsigned char * bytes = reinterpret_cast<unsigned char *>(abcd)+2;
1077  bool check_for_p2_core2 = false;
1078  for(int i=0; i<14; ++i)
1079  {
1080  switch(bytes[i])
1081  {
1082  case 0x0A: l1 = 8; break; // 0Ah data L1 cache, 8 KB, 2 ways, 32 byte lines
1083  case 0x0C: l1 = 16; break; // 0Ch data L1 cache, 16 KB, 4 ways, 32 byte lines
1084  case 0x0E: l1 = 24; break; // 0Eh data L1 cache, 24 KB, 6 ways, 64 byte lines
1085  case 0x10: l1 = 16; break; // 10h data L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1086  case 0x15: l1 = 16; break; // 15h code L1 cache, 16 KB, 4 ways, 32 byte lines (IA-64)
1087  case 0x2C: l1 = 32; break; // 2Ch data L1 cache, 32 KB, 8 ways, 64 byte lines
1088  case 0x30: l1 = 32; break; // 30h code L1 cache, 32 KB, 8 ways, 64 byte lines
1089  case 0x60: l1 = 16; break; // 60h data L1 cache, 16 KB, 8 ways, 64 byte lines, sectored
1090  case 0x66: l1 = 8; break; // 66h data L1 cache, 8 KB, 4 ways, 64 byte lines, sectored
1091  case 0x67: l1 = 16; break; // 67h data L1 cache, 16 KB, 4 ways, 64 byte lines, sectored
1092  case 0x68: l1 = 32; break; // 68h data L1 cache, 32 KB, 4 ways, 64 byte lines, sectored
1093  case 0x1A: l2 = 96; break; // code and data L2 cache, 96 KB, 6 ways, 64 byte lines (IA-64)
1094  case 0x22: l3 = 512; break; // code and data L3 cache, 512 KB, 4 ways (!), 64 byte lines, dual-sectored
1095  case 0x23: l3 = 1024; break; // code and data L3 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1096  case 0x25: l3 = 2048; break; // code and data L3 cache, 2048 KB, 8 ways, 64 byte lines, dual-sectored
1097  case 0x29: l3 = 4096; break; // code and data L3 cache, 4096 KB, 8 ways, 64 byte lines, dual-sectored
1098  case 0x39: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 64 byte lines, sectored
1099  case 0x3A: l2 = 192; break; // code and data L2 cache, 192 KB, 6 ways, 64 byte lines, sectored
1100  case 0x3B: l2 = 128; break; // code and data L2 cache, 128 KB, 2 ways, 64 byte lines, sectored
1101  case 0x3C: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 64 byte lines, sectored
1102  case 0x3D: l2 = 384; break; // code and data L2 cache, 384 KB, 6 ways, 64 byte lines, sectored
1103  case 0x3E: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines, sectored
1104  case 0x40: l2 = 0; break; // no integrated L2 cache (P6 core) or L3 cache (P4 core)
1105  case 0x41: l2 = 128; break; // code and data L2 cache, 128 KB, 4 ways, 32 byte lines
1106  case 0x42: l2 = 256; break; // code and data L2 cache, 256 KB, 4 ways, 32 byte lines
1107  case 0x43: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 32 byte lines
1108  case 0x44: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 32 byte lines
1109  case 0x45: l2 = 2048; break; // code and data L2 cache, 2048 KB, 4 ways, 32 byte lines
1110  case 0x46: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines
1111  case 0x47: l3 = 8192; break; // code and data L3 cache, 8192 KB, 8 ways, 64 byte lines
1112  case 0x48: l2 = 3072; break; // code and data L2 cache, 3072 KB, 12 ways, 64 byte lines
1113  case 0x49: if(l2!=0) l3 = 4096; else {check_for_p2_core2=true; l3 = l2 = 4096;} break;// code and data L3 cache, 4096 KB, 16 ways, 64 byte lines (P4) or L2 for core2
1114  case 0x4A: l3 = 6144; break; // code and data L3 cache, 6144 KB, 12 ways, 64 byte lines
1115  case 0x4B: l3 = 8192; break; // code and data L3 cache, 8192 KB, 16 ways, 64 byte lines
1116  case 0x4C: l3 = 12288; break; // code and data L3 cache, 12288 KB, 12 ways, 64 byte lines
1117  case 0x4D: l3 = 16384; break; // code and data L3 cache, 16384 KB, 16 ways, 64 byte lines
1118  case 0x4E: l2 = 6144; break; // code and data L2 cache, 6144 KB, 24 ways, 64 byte lines
1119  case 0x78: l2 = 1024; break; // code and data L2 cache, 1024 KB, 4 ways, 64 byte lines
1120  case 0x79: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 64 byte lines, dual-sectored
1121  case 0x7A: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 64 byte lines, dual-sectored
1122  case 0x7B: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines, dual-sectored
1123  case 0x7C: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines, dual-sectored
1124  case 0x7D: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 64 byte lines
1125  case 0x7E: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 128 byte lines, sect. (IA-64)
1126  case 0x7F: l2 = 512; break; // code and data L2 cache, 512 KB, 2 ways, 64 byte lines
1127  case 0x80: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 64 byte lines
1128  case 0x81: l2 = 128; break; // code and data L2 cache, 128 KB, 8 ways, 32 byte lines
1129  case 0x82: l2 = 256; break; // code and data L2 cache, 256 KB, 8 ways, 32 byte lines
1130  case 0x83: l2 = 512; break; // code and data L2 cache, 512 KB, 8 ways, 32 byte lines
1131  case 0x84: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 32 byte lines
1132  case 0x85: l2 = 2048; break; // code and data L2 cache, 2048 KB, 8 ways, 32 byte lines
1133  case 0x86: l2 = 512; break; // code and data L2 cache, 512 KB, 4 ways, 64 byte lines
1134  case 0x87: l2 = 1024; break; // code and data L2 cache, 1024 KB, 8 ways, 64 byte lines
1135  case 0x88: l3 = 2048; break; // code and data L3 cache, 2048 KB, 4 ways, 64 byte lines (IA-64)
1136  case 0x89: l3 = 4096; break; // code and data L3 cache, 4096 KB, 4 ways, 64 byte lines (IA-64)
1137  case 0x8A: l3 = 8192; break; // code and data L3 cache, 8192 KB, 4 ways, 64 byte lines (IA-64)
1138  case 0x8D: l3 = 3072; break; // code and data L3 cache, 3072 KB, 12 ways, 128 byte lines (IA-64)
1139 
1140  default: break;
1141  }
1142  }
1143  if(check_for_p2_core2 && l2 == l3)
1144  l3 = 0;
1145  l1 *= 1024;
1146  l2 *= 1024;
1147  l3 *= 1024;
1148 }
1149 
1150 inline void queryCacheSizes_intel(int& l1, int& l2, int& l3, int max_std_funcs)
1151 {
1152  if(max_std_funcs>=4)
1153  queryCacheSizes_intel_direct(l1,l2,l3);
1154  else if(max_std_funcs>=2)
1155  queryCacheSizes_intel_codes(l1,l2,l3);
1156  else
1157  l1 = l2 = l3 = 0;
1158 }
1159 
1160 inline void queryCacheSizes_amd(int& l1, int& l2, int& l3)
1161 {
1162  int abcd[4];
1163  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1164 
1165  // First query the max supported function.
1166  EIGEN_CPUID(abcd,0x80000000,0);
1167  if(static_cast<numext::uint32_t>(abcd[0]) >= static_cast<numext::uint32_t>(0x80000006))
1168  {
1169  EIGEN_CPUID(abcd,0x80000005,0);
1170  l1 = (abcd[2] >> 24) * 1024; // C[31:24] = L1 size in KB
1171  abcd[0] = abcd[1] = abcd[2] = abcd[3] = 0;
1172  EIGEN_CPUID(abcd,0x80000006,0);
1173  l2 = (abcd[2] >> 16) * 1024; // C[31;16] = l2 cache size in KB
1174  l3 = ((abcd[3] & 0xFFFC000) >> 18) * 512 * 1024; // D[31;18] = l3 cache size in 512KB
1175  }
1176  else
1177  {
1178  l1 = l2 = l3 = 0;
1179  }
1180 }
1181 #endif
1182 
1185 inline void queryCacheSizes(int& l1, int& l2, int& l3)
1186 {
1187  #ifdef EIGEN_CPUID
1188  int abcd[4];
1189  const int GenuineIntel[] = {0x756e6547, 0x49656e69, 0x6c65746e};
1190  const int AuthenticAMD[] = {0x68747541, 0x69746e65, 0x444d4163};
1191  const int AMDisbetter_[] = {0x69444d41, 0x74656273, 0x21726574}; // "AMDisbetter!"
1192 
1193  // identify the CPU vendor
1194  EIGEN_CPUID(abcd,0x0,0);
1195  int max_std_funcs = abcd[0];
1196  if(cpuid_is_vendor(abcd,GenuineIntel))
1197  queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1198  else if(cpuid_is_vendor(abcd,AuthenticAMD) || cpuid_is_vendor(abcd,AMDisbetter_))
1199  queryCacheSizes_amd(l1,l2,l3);
1200  else
1201  // by default let's use Intel's API
1202  queryCacheSizes_intel(l1,l2,l3,max_std_funcs);
1203 
1204  // here is the list of other vendors:
1205 // ||cpuid_is_vendor(abcd,"VIA VIA VIA ")
1206 // ||cpuid_is_vendor(abcd,"CyrixInstead")
1207 // ||cpuid_is_vendor(abcd,"CentaurHauls")
1208 // ||cpuid_is_vendor(abcd,"GenuineTMx86")
1209 // ||cpuid_is_vendor(abcd,"TransmetaCPU")
1210 // ||cpuid_is_vendor(abcd,"RiseRiseRise")
1211 // ||cpuid_is_vendor(abcd,"Geode by NSC")
1212 // ||cpuid_is_vendor(abcd,"SiS SiS SiS ")
1213 // ||cpuid_is_vendor(abcd,"UMC UMC UMC ")
1214 // ||cpuid_is_vendor(abcd,"NexGenDriven")
1215  #else
1216  l1 = l2 = l3 = -1;
1217  #endif
1218 }
1219 
1222 inline int queryL1CacheSize()
1223 {
1224  int l1(-1), l2, l3;
1225  queryCacheSizes(l1,l2,l3);
1226  return l1;
1227 }
1228 
1232 {
1233  int l1, l2(-1), l3(-1);
1234  queryCacheSizes(l1,l2,l3);
1235  return (std::max)(l2,l3);
1236 }
1237 
1238 
1239 
1244 #if EIGEN_COMP_CXXVER >= 20
1245 using std::construct_at;
1246 #else
1247 template<class T, class... Args>
1248 EIGEN_DEVICE_FUNC T* construct_at( T* p, Args&&... args )
1249 {
1250  return ::new (const_cast<void*>(static_cast<const volatile void*>(p)))
1251  T(std::forward<Args>(args)...);
1252 }
1253 #endif
1254 
1260 #if EIGEN_COMP_CXXVER >= 17
1261 using std::destroy_at;
1262 #else
1263 template<class T>
1265 {
1266  p->~T();
1267 }
1268 #endif
1269 
1270 } // end namespace internal
1271 
1272 } // end namespace Eigen
1273 
1274 #endif // EIGEN_MEMORY_H
Array< int, 3, 1 > b
#define EIGEN_DEFAULT_ALIGN_BYTES
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836
#define EIGEN_USING_STD(FUNC)
Definition: Macros.h:1080
#define EIGEN_CATCH(X)
Definition: Macros.h:1256
#define EIGEN_THROW
Definition: Macros.h:1253
#define eigen_internal_assert(x)
Definition: Macros.h:908
#define EIGEN_TRY
Definition: Macros.h:1255
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:957
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define eigen_assert(x)
Definition: Macros.h:902
int data[]
#define EIGEN_MALLOC_CHECK_THREAD_LOCAL
Definition: Memory.h:72
float * p
Eigen::Triplet< double > T
STL compatible allocator to use with types requiring a non-standard alignment.
Definition: Memory.h:958
pointer allocate(size_type num, const void *=0)
Definition: Memory.h:992
aligned_allocator(const aligned_allocator< U > &other)
Definition: Memory.h:979
std::size_t size_type
Definition: Memory.h:960
const T & const_reference
Definition: Memory.h:965
std::ptrdiff_t difference_type
Definition: Memory.h:961
aligned_allocator(const aligned_allocator &other)
Definition: Memory.h:976
const T * const_pointer
Definition: Memory.h:963
void deallocate(pointer p, size_type)
Definition: Memory.h:998
static const lastp1_t end
bfloat16() max(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:690
bfloat16() min(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:684
void * conditional_aligned_malloc< false >(std::size_t size)
Definition: Memory.h:286
void * handmade_aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size, std::size_t alignment=EIGEN_DEFAULT_ALIGN_BYTES)
Definition: Memory.h:182
T * aligned_new(std::size_t size)
Definition: Memory.h:412
void conditional_aligned_free< false >(void *ptr)
Definition: Memory.h:305
void handmade_aligned_free(void *ptr)
Definition: Memory.h:165
void queryCacheSizes(int &l1, int &l2, int &l3)
Definition: Memory.h:1185
T * default_construct_elements_of_array(T *ptr, std::size_t size)
Definition: Memory.h:346
T * copy_construct_elements_of_array(T *ptr, const T *src, std::size_t size)
Definition: Memory.h:364
void destruct_elements_of_array(T *ptr, std::size_t size)
Definition: Memory.h:336
T * conditional_aligned_new_auto(std::size_t size)
Definition: Memory.h:496
void conditional_aligned_free(void *ptr)
Definition: Memory.h:300
void check_that_malloc_is_allowed()
Definition: Memory.h:111
Index first_multiple(Index size, Index base)
Definition: Memory.h:592
void conditional_aligned_delete(T *ptr, std::size_t size)
Definition: Memory.h:456
void aligned_delete(T *ptr, std::size_t size)
Definition: Memory.h:447
void * aligned_malloc(std::size_t size)
Definition: Memory.h:207
void destroy_at(T *p)
Definition: Memory.h:1264
T * move_construct_elements_of_array(T *ptr, T *src, std::size_t size)
Definition: Memory.h:382
void * conditional_aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
Definition: Memory.h:313
static Index first_default_aligned(const DenseBase< Derived > &m)
void throw_std_bad_alloc()
Definition: Memory.h:117
void * aligned_realloc(void *ptr, std::size_t new_size, std::size_t old_size)
Definition: Memory.h:251
void * handmade_aligned_malloc(std::size_t size, std::size_t alignment=EIGEN_DEFAULT_ALIGN_BYTES)
Definition: Memory.h:150
void * conditional_aligned_malloc(std::size_t size)
Definition: Memory.h:281
T * construct_at(T *p, Args &&... args)
Definition: Memory.h:1248
void conditional_aligned_delete_auto(T *ptr, std::size_t size)
Definition: Memory.h:528
T * conditional_aligned_new(std::size_t size)
Definition: Memory.h:428
void aligned_free(void *ptr)
Definition: Memory.h:232
EIGEN_CONSTEXPR Index first(const T &x) EIGEN_NOEXCEPT
int queryTopLevelCacheSize()
Definition: Memory.h:1231
void smart_memmove(const T *start, const T *end, T *target)
Definition: Memory.h:625
static Index first_aligned(const DenseBase< Derived > &m)
T * smart_move(T *start, T *end, T *target)
Definition: Memory.h:655
int queryL1CacheSize()
Definition: Memory.h:1222
EIGEN_ALWAYS_INLINE void check_size_for_overflow(std::size_t size)
Definition: Memory.h:402
void smart_copy(const T *start, const T *end, T *target)
Definition: Memory.h:601
T * conditional_aligned_realloc_new_auto(T *pts, std::size_t new_size, std::size_t old_size)
Definition: Memory.h:517
void * conditional_aligned_realloc< false >(void *ptr, std::size_t new_size, std::size_t old_size)
Definition: Memory.h:318
T * conditional_aligned_realloc_new(T *pts, std::size_t new_size, std::size_t old_size)
Definition: Memory.h:462
void swap(scoped_array< T > &a, scoped_array< T > &b)
Definition: Memory.h:788
std::uint8_t uint8_t
Definition: Meta.h:35
std::uint32_t uint32_t
Definition: Meta.h:39
: InteropHeaders
Definition: Core:139
std::array< T, N > array
Definition: EmulateArray.h:256
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const int Dynamic
Definition: Constants.h:24
Definition: BFloat16.h:222
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:231
aligned_allocator< U > other
Definition: Memory.h:971