GenericPacketMath.h
Go to the documentation of this file.
1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
5 // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
6 //
7 // This Source Code Form is subject to the terms of the Mozilla
8 // Public License v. 2.0. If a copy of the MPL was not distributed
9 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 
11 #ifndef EIGEN_GENERIC_PACKET_MATH_H
12 #define EIGEN_GENERIC_PACKET_MATH_H
13 
14 #include "./InternalHeaderCheck.h"
15 
16 namespace Eigen {
17 
18 namespace internal {
19 
28 #ifndef EIGEN_DEBUG_ALIGNED_LOAD
29 #define EIGEN_DEBUG_ALIGNED_LOAD
30 #endif
31 
32 #ifndef EIGEN_DEBUG_UNALIGNED_LOAD
33 #define EIGEN_DEBUG_UNALIGNED_LOAD
34 #endif
35 
36 #ifndef EIGEN_DEBUG_ALIGNED_STORE
37 #define EIGEN_DEBUG_ALIGNED_STORE
38 #endif
39 
40 #ifndef EIGEN_DEBUG_UNALIGNED_STORE
41 #define EIGEN_DEBUG_UNALIGNED_STORE
42 #endif
43 
44 struct default_packet_traits
45 {
46  enum {
47  HasAdd = 1,
48  HasSub = 1,
49  HasShift = 1,
50  HasMul = 1,
51  HasNegate = 1,
52  HasAbs = 1,
53  HasArg = 0,
54  HasAbs2 = 1,
55  HasAbsDiff = 0,
56  HasMin = 1,
57  HasMax = 1,
58  HasConj = 1,
59  HasSetLinear = 1,
60  HasSign = 1,
61  HasBlend = 0,
62  // This flag is used to indicate whether packet comparison is supported.
63  // pcmp_eq, pcmp_lt and pcmp_le should be defined for it to be true.
64  HasCmp = 0,
65 
66  HasDiv = 0,
67  HasReciprocal = 0,
68  HasSqrt = 0,
69  HasRsqrt = 0,
70  HasExp = 0,
71  HasExpm1 = 0,
72  HasLog = 0,
73  HasLog1p = 0,
74  HasLog10 = 0,
75  HasPow = 0,
76 
77  HasSin = 0,
78  HasCos = 0,
79  HasTan = 0,
80  HasASin = 0,
81  HasACos = 0,
82  HasATan = 0,
83  HasATanh = 0,
84  HasSinh = 0,
85  HasCosh = 0,
86  HasTanh = 0,
87  HasLGamma = 0,
88  HasDiGamma = 0,
89  HasZeta = 0,
90  HasPolygamma = 0,
91  HasErf = 0,
92  HasErfc = 0,
93  HasNdtri = 0,
94  HasBessel = 0,
95  HasIGamma = 0,
96  HasIGammaDerA = 0,
97  HasGammaSampleDerAlpha = 0,
98  HasIGammac = 0,
99  HasBetaInc = 0,
100 
101  HasRound = 0,
102  HasRint = 0,
103  HasFloor = 0,
104  HasCeil = 0
105  };
106 };
107 
108 template<typename T> struct packet_traits : default_packet_traits
109 {
110  typedef T type;
111  typedef T half;
112  enum {
113  Vectorizable = 0,
114  size = 1,
115  AlignedOnScalar = 0,
116  };
117  enum {
118  HasAdd = 0,
119  HasSub = 0,
120  HasMul = 0,
121  HasNegate = 0,
122  HasAbs = 0,
123  HasAbs2 = 0,
124  HasMin = 0,
125  HasMax = 0,
126  HasConj = 0,
127  HasSetLinear = 0
128  };
129 };
130 
131 template<typename T> struct packet_traits<const T> : packet_traits<T> { };
132 
133 template<typename T> struct unpacket_traits
134 {
135  typedef T type;
136  typedef T half;
137  enum
138  {
139  size = 1,
140  alignment = 1,
141  vectorizable = false,
142  masked_load_available=false,
143  masked_store_available=false
144  };
145 };
146 
147 template<typename T> struct unpacket_traits<const T> : unpacket_traits<T> { };
148 
152 template <typename Packet>
153 struct is_scalar {
154  using Scalar = typename unpacket_traits<Packet>::type;
155  enum { value = internal::is_same<Packet, Scalar>::value };
156 };
157 
158 // automatically and succinctly define combinations of pcast<SrcPacket,TgtPacket> when
159 // 1) the packets are the same type, or
160 // 2) the packets differ only in sign.
161 // In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast)
162 template <typename SrcPacket, typename TgtPacket,
163  bool Scalar = is_scalar<SrcPacket>::value && is_scalar<TgtPacket>::value>
164 struct is_degenerate_helper : is_same<SrcPacket, TgtPacket> {};
165 template <>
166 struct is_degenerate_helper<int8_t, uint8_t, true> : std::true_type {};
167 template <>
168 struct is_degenerate_helper<int16_t, uint16_t, true> : std::true_type {};
169 template <>
170 struct is_degenerate_helper<int32_t, uint32_t, true> : std::true_type {};
171 template <>
172 struct is_degenerate_helper<int64_t, uint64_t, true> : std::true_type {};
173 
174 template <typename SrcPacket, typename TgtPacket>
175 struct is_degenerate_helper<SrcPacket, TgtPacket, false> {
176  using SrcScalar = typename unpacket_traits<SrcPacket>::type;
177  static constexpr int SrcSize = unpacket_traits<SrcPacket>::size;
178  using TgtScalar = typename unpacket_traits<TgtPacket>::type;
179  static constexpr int TgtSize = unpacket_traits<TgtPacket>::size;
180  static constexpr bool value = is_degenerate_helper<SrcScalar, TgtScalar, true>::value && (SrcSize == TgtSize);
181 };
182 
183 // is_degenerate<T1,T2>::value == is_degenerate<T2,T1>::value
184 template <typename SrcPacket, typename TgtPacket>
185 struct is_degenerate {
186  static constexpr bool value =
187  is_degenerate_helper<SrcPacket, TgtPacket>::value || is_degenerate_helper<TgtPacket, SrcPacket>::value;
188 };
189 
190 template <typename Packet>
191 struct is_half {
192  using Scalar = typename unpacket_traits<Packet>::type;
193  static constexpr int Size = unpacket_traits<Packet>::size;
194  using DefaultPacket = typename packet_traits<Scalar>::type;
195  static constexpr int DefaultSize = unpacket_traits<DefaultPacket>::size;
196  static constexpr bool value = Size < DefaultSize;
197 };
198 
199 template <typename Src, typename Tgt>
200 struct type_casting_traits {
201  enum {
202  VectorizedCast =
203  is_degenerate<Src, Tgt>::value && packet_traits<Src>::Vectorizable && packet_traits<Tgt>::Vectorizable,
204  SrcCoeffRatio = 1,
205  TgtCoeffRatio = 1
206  };
207 };
208 
209 
212 template<typename T, int unique_id = 0>
213 struct eigen_packet_wrapper
214 {
215  EIGEN_ALWAYS_INLINE operator T&() { return m_val; }
216  EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; }
217  EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default;
218  EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T &v) : m_val(v) {}
219  EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T &v) {
220  m_val = v;
221  return *this;
222  }
223 
224  T m_val;
225 };
226 
227 template <typename Target, typename Packet, bool IsSame = is_same<Target, Packet>::value>
228 struct preinterpret_generic;
229 
230 template <typename Target, typename Packet>
231 struct preinterpret_generic<Target, Packet, false> {
232  // the packets are not the same, attempt scalar bit_cast
233  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) {
234  return numext::bit_cast<Target, Packet>(a);
235  }
236 };
237 
238 template <typename Packet>
239 struct preinterpret_generic<Packet, Packet, true> {
240  // the packets are the same type: do nothing
241  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
242 };
243 
245 template <typename Target, typename Packet>
246 EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) {
247  return preinterpret_generic<Target, Packet>::run(a);
248 }
249 
250 template <typename SrcPacket, typename TgtPacket, bool Degenerate = is_degenerate<SrcPacket, TgtPacket>::value, bool TgtIsHalf = is_half<TgtPacket>::value>
251 struct pcast_generic;
252 
253 template <typename SrcPacket, typename TgtPacket>
254 struct pcast_generic<SrcPacket, TgtPacket, false, false> {
255  // the packets are not degenerate: attempt scalar static_cast
256  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
257  return cast_impl<SrcPacket, TgtPacket>::run(a);
258  }
259 };
260 
261 template <typename Packet>
262 struct pcast_generic<Packet, Packet, true, false> {
263  // the packets are the same: do nothing
264  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; }
265 };
266 
267 template <typename SrcPacket, typename TgtPacket, bool TgtIsHalf>
268 struct pcast_generic<SrcPacket, TgtPacket, true, TgtIsHalf> {
269  // the packets are degenerate: preinterpret is equivalent to pcast
270  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret<TgtPacket>(a); }
271 };
272 
273 
274 
276 template <typename SrcPacket, typename TgtPacket>
277 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) {
278  return pcast_generic<SrcPacket, TgtPacket>::run(a);
279 }
280 template <typename SrcPacket, typename TgtPacket>
281 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) {
282  return pcast_generic<SrcPacket, TgtPacket>::run(a, b);
283 }
284 template <typename SrcPacket, typename TgtPacket>
285 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c,
286  const SrcPacket& d) {
287  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d);
288 }
289 template <typename SrcPacket, typename TgtPacket>
290 EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d,
291  const SrcPacket& e, const SrcPacket& f, const SrcPacket& g,
292  const SrcPacket& h) {
293  return pcast_generic<SrcPacket, TgtPacket>::run(a, b, c, d, e, f, g, h);
294 }
295 
296 template <typename SrcPacket, typename TgtPacket>
297 struct pcast_generic<SrcPacket, TgtPacket, false, true> {
298  // TgtPacket is a half packet of some other type
299  // perform cast and truncate result
300  using DefaultTgtPacket = typename is_half<TgtPacket>::DefaultPacket;
301  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) {
302  return preinterpret<TgtPacket>(pcast<SrcPacket, DefaultTgtPacket>(a));
303  }
304 };
305 
307 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
308 padd(const Packet& a, const Packet& b) { return a+b; }
309 // Avoid compiler warning for boolean algebra.
310 template<> EIGEN_DEVICE_FUNC inline bool
311 padd(const bool& a, const bool& b) { return a || b; }
312 
317 template<typename Packet> EIGEN_DEVICE_FUNC inline
318 std::enable_if_t<unpacket_traits<Packet>::masked_fpops_available, Packet>
319 padd(const Packet& a, const Packet& b, typename unpacket_traits<Packet>::mask_t umask);
320 
321 
323 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
324 psub(const Packet& a, const Packet& b) { return a-b; }
325 
327 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
328 pnegate(const Packet& a) { return -a; }
329 
330 template<> EIGEN_DEVICE_FUNC inline bool
331 pnegate(const bool& a) { return !a; }
332 
334 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
335 pconj(const Packet& a) { return numext::conj(a); }
336 
338 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
339 pmul(const Packet& a, const Packet& b) { return a*b; }
340 // Avoid compiler warning for boolean algebra.
341 template<> EIGEN_DEVICE_FUNC inline bool
342 pmul(const bool& a, const bool& b) { return a && b; }
343 
345 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
346 pdiv(const Packet& a, const Packet& b) { return a/b; }
347 
348 // In the generic case, memset to all one bits.
349 template<typename Packet, typename EnableIf = void>
350 struct ptrue_impl {
351  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/){
352  Packet b;
353  memset(static_cast<void*>(&b), 0xff, sizeof(Packet));
354  return b;
355  }
356 };
357 
358 // For non-trivial scalars, set to Scalar(1) (i.e. a non-zero value).
359 // Although this is technically not a valid bitmask, the scalar path for pselect
360 // uses a comparison to zero, so this should still work in most cases. We don't
361 // have another option, since the scalar type requires initialization.
362 template<typename T>
363 struct ptrue_impl<T,
364  std::enable_if_t<is_scalar<T>::value && NumTraits<T>::RequireInitialization> > {
365  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/){
366  return T(1);
367  }
368 };
369 
371 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
372 ptrue(const Packet& a) {
373  return ptrue_impl<Packet>::run(a);
374 }
375 
376 // In the general case, memset to zero.
377 template<typename Packet, typename EnableIf = void>
378 struct pzero_impl {
379  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) {
380  Packet b;
381  memset(static_cast<void*>(&b), 0x00, sizeof(Packet));
382  return b;
383  }
384 };
385 
386 // For scalars, explicitly set to Scalar(0), since the underlying representation
387 // for zero may not consist of all-zero bits.
388 template<typename T>
389 struct pzero_impl<T,
390  std::enable_if_t<is_scalar<T>::value>> {
391  static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) {
392  return T(0);
393  }
394 };
395 
397 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
398 pzero(const Packet& a) {
399  return pzero_impl<Packet>::run(a);
400 }
401 
403 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
404 pcmp_le(const Packet& a, const Packet& b) { return a<=b ? ptrue(a) : pzero(a); }
405 
407 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
408 pcmp_lt(const Packet& a, const Packet& b) { return a<b ? ptrue(a) : pzero(a); }
409 
411 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
412 pcmp_eq(const Packet& a, const Packet& b) { return a==b ? ptrue(a) : pzero(a); }
413 
415 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
416 pcmp_lt_or_nan(const Packet& a, const Packet& b) { return a>=b ? pzero(a) : ptrue(a); }
417 
418 template<typename T>
419 struct bit_and {
421  return a & b;
422  }
423 };
424 
425 template<typename T>
426 struct bit_or {
428  return a | b;
429  }
430 };
431 
432 template<typename T>
433 struct bit_xor {
435  return a ^ b;
436  }
437 };
438 
439 template<typename T>
440 struct bit_not {
442  return ~a;
443  }
444 };
445 
446 // Use operators &, |, ^, ~.
447 template<typename T>
448 struct operator_bitwise_helper {
449  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and<T>()(a, b); }
450  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or<T>()(a, b); }
451  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor<T>()(a, b); }
452  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not<T>()(a); }
453 };
454 
455 // Apply binary operations byte-by-byte
456 template<typename T>
457 struct bytewise_bitwise_helper {
458  EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) {
459  return binary(a, b, bit_and<unsigned char>());
460  }
461  EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) {
462  return binary(a, b, bit_or<unsigned char>());
463  }
464  EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) {
465  return binary(a, b, bit_xor<unsigned char>());
466  }
467  EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) {
468  return unary(a,bit_not<unsigned char>());
469  }
470 
471  private:
472  template<typename Op>
473  EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) {
474  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
475  T c;
476  unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
477  for (size_t i = 0; i < sizeof(T); ++i) {
478  *c_ptr++ = op(*a_ptr++);
479  }
480  return c;
481  }
482 
483  template<typename Op>
484  EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) {
485  const unsigned char* a_ptr = reinterpret_cast<const unsigned char*>(&a);
486  const unsigned char* b_ptr = reinterpret_cast<const unsigned char*>(&b);
487  T c;
488  unsigned char* c_ptr = reinterpret_cast<unsigned char*>(&c);
489  for (size_t i = 0; i < sizeof(T); ++i) {
490  *c_ptr++ = op(*a_ptr++, *b_ptr++);
491  }
492  return c;
493  }
494 };
495 
496 // In the general case, use byte-by-byte manipulation.
497 template<typename T, typename EnableIf = void>
498 struct bitwise_helper : public bytewise_bitwise_helper<T> {};
499 
500 // For integers or non-trivial scalars, use binary operators.
501 template<typename T>
502 struct bitwise_helper<T,
503  typename std::enable_if_t<
504  is_scalar<T>::value && (NumTraits<T>::IsInteger || NumTraits<T>::RequireInitialization)>
505  > : public operator_bitwise_helper<T> {};
506 
508 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
509 pand(const Packet& a, const Packet& b) {
510  return bitwise_helper<Packet>::bitwise_and(a, b);
511 }
512 
514 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
515 por(const Packet& a, const Packet& b) {
516  return bitwise_helper<Packet>::bitwise_or(a, b);
517 }
518 
520 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
521 pxor(const Packet& a, const Packet& b) {
522  return bitwise_helper<Packet>::bitwise_xor(a, b);
523 }
524 
526 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
527 pnot(const Packet& a) {
528  return bitwise_helper<Packet>::bitwise_not(a);
529 }
530 
532 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
533 pandnot(const Packet& a, const Packet& b) { return pand(a, pnot(b)); }
534 
536 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
537 pisnan(const Packet& a) {
538  return pandnot(ptrue(a), pcmp_eq(a, a));
539 }
540 
541 // In the general case, use bitwise select.
542 template<typename Packet, typename EnableIf = void>
543 struct pselect_impl {
544  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
545  return por(pand(a,mask),pandnot(b,mask));
546  }
547 };
548 
549 // For scalars, use ternary select.
550 template<typename Packet>
551 struct pselect_impl<Packet,
552  std::enable_if_t<is_scalar<Packet>::value> > {
553  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) {
554  return numext::equal_strict(mask, Packet(0)) ? b : a;
555  }
556 };
557 
559 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
560 pselect(const Packet& mask, const Packet& a, const Packet& b) {
561  return pselect_impl<Packet>::run(mask, a, b);
562 }
563 
564 template<> EIGEN_DEVICE_FUNC inline bool pselect<bool>(
565  const bool& cond, const bool& a, const bool& b) {
566  return cond ? a : b;
567 }
568 
571 template<int NaNPropagation>
572 struct pminmax_impl {
573  template <typename Packet, typename Op>
574  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
575  return op(a,b);
576  }
577 };
578 
581 template<>
582 struct pminmax_impl<PropagateNaN> {
583  template <typename Packet, typename Op>
584  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
585  Packet not_nan_mask_a = pcmp_eq(a, a);
586  Packet not_nan_mask_b = pcmp_eq(b, b);
587  return pselect(not_nan_mask_a,
588  pselect(not_nan_mask_b, op(a, b), b),
589  a);
590  }
591 };
592 
596 template<>
597 struct pminmax_impl<PropagateNumbers> {
598  template <typename Packet, typename Op>
599  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) {
600  Packet not_nan_mask_a = pcmp_eq(a, a);
601  Packet not_nan_mask_b = pcmp_eq(b, b);
602  return pselect(not_nan_mask_a,
603  pselect(not_nan_mask_b, op(a, b), a),
604  b);
605  }
606 };
607 
608 
609 #ifndef SYCL_DEVICE_ONLY
610 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) Func
611 #else
612 #define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) \
613 [](const Type& a, const Type& b) { \
614  return Func(a, b);}
615 #endif
616 
619 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
620 pmin(const Packet& a, const Packet& b) { return numext::mini(a,b); }
621 
624 template <int NaNPropagation, typename Packet>
625 EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) {
626  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin<Packet>)));
627 }
628 
631 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
632 pmax(const Packet& a, const Packet& b) { return numext::maxi(a, b); }
633 
636 template <int NaNPropagation, typename Packet>
637 EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) {
638  return pminmax_impl<NaNPropagation>::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet,(pmax<Packet>)));
639 }
640 
642 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
643 pabs(const Packet& a) { return numext::abs(a); }
644 template<> EIGEN_DEVICE_FUNC inline unsigned int
645 pabs(const unsigned int& a) { return a; }
646 template<> EIGEN_DEVICE_FUNC inline unsigned long
647 pabs(const unsigned long& a) { return a; }
648 template<> EIGEN_DEVICE_FUNC inline unsigned long long
649 pabs(const unsigned long long& a) { return a; }
650 
652 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
653 paddsub(const Packet& a, const Packet& b) {
654  return pselect(peven_mask(a), padd(a, b), psub(a, b));
655  }
656 
658 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
659 parg(const Packet& a) { using numext::arg; return arg(a); }
660 
661 
663 template<int N> EIGEN_DEVICE_FUNC inline int
664 parithmetic_shift_right(const int& a) { return a >> N; }
665 template<int N> EIGEN_DEVICE_FUNC inline long int
666 parithmetic_shift_right(const long int& a) { return a >> N; }
667 
669 template<int N> EIGEN_DEVICE_FUNC inline int
670 plogical_shift_right(const int& a) { return static_cast<int>(static_cast<unsigned int>(a) >> N); }
671 template<int N> EIGEN_DEVICE_FUNC inline long int
672 plogical_shift_right(const long int& a) { return static_cast<long>(static_cast<unsigned long>(a) >> N); }
673 
675 template<int N> EIGEN_DEVICE_FUNC inline int
676 plogical_shift_left(const int& a) { return a << N; }
677 template<int N> EIGEN_DEVICE_FUNC inline long int
678 plogical_shift_left(const long int& a) { return a << N; }
679 
683 template <typename Packet>
684 EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) {
685  int exp;
686  EIGEN_USING_STD(frexp);
687  Packet result = static_cast<Packet>(frexp(a, &exp));
688  exponent = static_cast<Packet>(exp);
689  return result;
690 }
691 
695 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
696 pldexp(const Packet &a, const Packet &exponent) {
697  EIGEN_USING_STD(ldexp)
698  return static_cast<Packet>(ldexp(a, static_cast<int>(exponent)));
699 }
700 
702 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
703 pabsdiff(const Packet& a, const Packet& b) { return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); }
704 
706 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
707 pload(const typename unpacket_traits<Packet>::type* from) { return *from; }
708 
713 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
714 pload_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
715 {
716  const Index packet_size = unpacket_traits<Packet>::size;
717  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
718  typedef typename unpacket_traits<Packet>::type Scalar;
719  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
720  for (Index i = offset; i < numext::mini(n+offset,packet_size); i++) {
721  elements[i] = from[i-offset];
722  }
723  return pload<Packet>(elements);
724 }
725 
727 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
728 ploadu(const typename unpacket_traits<Packet>::type* from) { return *from; }
729 
732 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
733 ploadu_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
734 {
735  const Index packet_size = unpacket_traits<Packet>::size;
736  eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet");
737  typedef typename unpacket_traits<Packet>::type Scalar;
738  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
739  for (Index i = offset; i < numext::mini(n+offset,packet_size); i++) {
740  elements[i] = from[i-offset];
741  }
742  return pload<Packet>(elements);
743 }
744 
749 template<typename Packet> EIGEN_DEVICE_FUNC inline
750 std::enable_if_t<unpacket_traits<Packet>::masked_load_available, Packet>
751 ploadu(const typename unpacket_traits<Packet>::type* from, typename unpacket_traits<Packet>::mask_t umask);
752 
754 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
755 pset1(const typename unpacket_traits<Packet>::type& a) { return a; }
756 
758 template<typename Packet,typename BitsType> EIGEN_DEVICE_FUNC inline Packet
759 pset1frombits(BitsType a);
760 
762 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
763 pload1(const typename unpacket_traits<Packet>::type *a) { return pset1<Packet>(*a); }
764 
770 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
771 ploaddup(const typename unpacket_traits<Packet>::type* from) { return *from; }
772 
779 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
780 ploadquad(const typename unpacket_traits<Packet>::type* from)
781 { return pload1<Packet>(from); }
782 
792 template<typename Packet> EIGEN_DEVICE_FUNC
793 inline void pbroadcast4(const typename unpacket_traits<Packet>::type *a,
794  Packet& a0, Packet& a1, Packet& a2, Packet& a3)
795 {
796  a0 = pload1<Packet>(a+0);
797  a1 = pload1<Packet>(a+1);
798  a2 = pload1<Packet>(a+2);
799  a3 = pload1<Packet>(a+3);
800 }
801 
809 template<typename Packet> EIGEN_DEVICE_FUNC
810 inline void pbroadcast2(const typename unpacket_traits<Packet>::type *a,
811  Packet& a0, Packet& a1)
812 {
813  a0 = pload1<Packet>(a+0);
814  a1 = pload1<Packet>(a+1);
815 }
816 
818 template<typename Packet> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet
819 plset(const typename unpacket_traits<Packet>::type& a) { return a; }
820 
823 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
824 peven_mask(const Packet& /*a*/) {
825  typedef typename unpacket_traits<Packet>::type Scalar;
826  const size_t n = unpacket_traits<Packet>::size;
827  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
828  for(size_t i = 0; i < n; ++i) {
829  memset(elements+i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar));
830  }
831  return ploadu<Packet>(elements);
832 }
833 
834 
836 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from)
837 { (*to) = from; }
838 
842 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
843 {
844  const Index packet_size = unpacket_traits<Packet>::size;
845  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
846  EIGEN_ALIGN_MAX Scalar elements[packet_size];
847  pstore<Scalar>(elements, from);
848  for (Index i = 0; i < numext::mini(n,packet_size-offset); i++) {
849  to[i] = elements[i + offset];
850  }
851 }
852 
854 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from)
855 { (*to) = from; }
856 
858 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
859 {
860  const Index packet_size = unpacket_traits<Packet>::size;
861  eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet");
862  EIGEN_ALIGN_MAX Scalar elements[packet_size];
863  pstore<Scalar>(elements, from);
864  for (Index i = 0; i < numext::mini(n,packet_size-offset); i++) {
865  to[i] = elements[i + offset];
866  }
867 }
868 
873 template<typename Scalar, typename Packet>
874 EIGEN_DEVICE_FUNC inline
875 std::enable_if_t<unpacket_traits<Packet>::masked_store_available, void>
876 pstoreu(Scalar* to, const Packet& from, typename unpacket_traits<Packet>::mask_t umask);
877 
878 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/)
879 { return ploadu<Packet>(from); }
880 
881 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n)
882 {
883  const Index packet_size = unpacket_traits<Packet>::size;
884  EIGEN_ALIGN_MAX Scalar elements[packet_size] = { Scalar(0) };
885  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
886  elements[i] = from[i*stride];
887  }
888  return pload<Packet>(elements);
889 }
890 
891 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/)
892 { pstore(to, from); }
893 
894 template<typename Scalar, typename Packet> EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n)
895 {
896  const Index packet_size = unpacket_traits<Packet>::size;
897  EIGEN_ALIGN_MAX Scalar elements[packet_size];
898  pstore<Scalar>(elements, from);
899  for (Index i = 0; i < numext::mini(n,packet_size); i++) {
900  to[i*stride] = elements[i];
901  }
902 }
903 
905 template<typename Scalar> EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr)
906 {
907 #if defined(EIGEN_HIP_DEVICE_COMPILE)
908  // do nothing
909 #elif defined(EIGEN_CUDA_ARCH)
910 #if defined(__LP64__) || EIGEN_OS_WIN64
911  // 64-bit pointer operand constraint for inlined asm
912  asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr));
913 #else
914  // 32-bit pointer operand constraint for inlined asm
915  asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr));
916 #endif
917 #elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC)
918  __builtin_prefetch(addr);
919 #endif
920 }
921 
923 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a)
924 { return a; }
925 
927 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a)
928 {
929  return Packet(numext::imag(a),numext::real(a));
930 }
931 
932 
938 Packet psin(const Packet& a) { EIGEN_USING_STD(sin); return sin(a); }
939 
942 Packet pcos(const Packet& a) { EIGEN_USING_STD(cos); return cos(a); }
943 
946 Packet ptan(const Packet& a) { EIGEN_USING_STD(tan); return tan(a); }
947 
950 Packet pasin(const Packet& a) { EIGEN_USING_STD(asin); return asin(a); }
951 
954 Packet pacos(const Packet& a) { EIGEN_USING_STD(acos); return acos(a); }
955 
956 
959 Packet psinh(const Packet& a) { EIGEN_USING_STD(sinh); return sinh(a); }
960 
963 Packet pcosh(const Packet& a) { EIGEN_USING_STD(cosh); return cosh(a); }
964 
967 Packet patan(const Packet& a) { EIGEN_USING_STD(atan); return atan(a); }
968 
971 Packet ptanh(const Packet& a) { EIGEN_USING_STD(tanh); return tanh(a); }
972 
975 Packet patanh(const Packet& a) { EIGEN_USING_STD(atanh); return atanh(a); }
976 
979 Packet pexp(const Packet& a) { EIGEN_USING_STD(exp); return exp(a); }
980 
983 Packet pexpm1(const Packet& a) { return numext::expm1(a); }
984 
987 Packet plog(const Packet& a) { EIGEN_USING_STD(log); return log(a); }
988 
991 Packet plog1p(const Packet& a) { return numext::log1p(a); }
992 
995 Packet plog10(const Packet& a) { EIGEN_USING_STD(log10); return log10(a); }
996 
999 Packet plog2(const Packet& a) {
1000  typedef typename internal::unpacket_traits<Packet>::type Scalar;
1001  return pmul(pset1<Packet>(Scalar(EIGEN_LOG2E)), plog(a));
1002 }
1003 
1006 Packet psqrt(const Packet& a) { return numext::sqrt(a); }
1007 
1010 Packet pround(const Packet& a) { using numext::round; return round(a); }
1011 
1014 Packet pfloor(const Packet& a) { using numext::floor; return floor(a); }
1015 
1019 Packet print(const Packet& a) { using numext::rint; return rint(a); }
1020 
1023 Packet pceil(const Packet& a) { using numext::ceil; return ceil(a); }
1024 
1025 template<typename Packet, typename EnableIf = void>
1026 struct psign_impl {
1027  static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) {
1028  return numext::sign(a);
1029  }
1030 };
1031 
1033 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
1034 psign(const Packet& a) {
1035  return psign_impl<Packet>::run(a);
1036 }
1037 
1038 template<> EIGEN_DEVICE_FUNC inline bool
1039 psign(const bool& a) {
1040  return a;
1041 }
1042 
1044 template<typename Packet>
1045 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
1046 pfirst(const Packet& a)
1047 { return a; }
1048 
1053 template<typename Packet>
1054 EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits<Packet>::size%8)==0,typename unpacket_traits<Packet>::half,Packet>
1055 predux_half_dowto4(const Packet& a)
1056 { return a; }
1057 
1058 // Slow generic implementation of Packet reduction.
1059 template <typename Packet, typename Op>
1060 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
1061 predux_helper(const Packet& a, Op op) {
1062  typedef typename unpacket_traits<Packet>::type Scalar;
1063  const size_t n = unpacket_traits<Packet>::size;
1064  EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n];
1065  pstoreu<Scalar>(elements, a);
1066  for(size_t k = n / 2; k > 0; k /= 2) {
1067  for(size_t i = 0; i < k; ++i) {
1068  elements[i] = op(elements[i], elements[i + k]);
1069  }
1070  }
1071  return elements[0];
1072 }
1073 
1075 template<typename Packet>
1076 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type
1077 predux(const Packet& a)
1078 {
1079  return a;
1080 }
1081 
1083 template <typename Packet>
1084 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_mul(
1085  const Packet& a) {
1086  typedef typename unpacket_traits<Packet>::type Scalar;
1087  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul<Scalar>)));
1088 }
1089 
1091 template <typename Packet>
1092 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
1093  const Packet &a) {
1094  typedef typename unpacket_traits<Packet>::type Scalar;
1095  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<PropagateFast, Scalar>)));
1096 }
1097 
1098 template <int NaNPropagation, typename Packet>
1099 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_min(
1100  const Packet& a) {
1101  typedef typename unpacket_traits<Packet>::type Scalar;
1102  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin<NaNPropagation, Scalar>)));
1103 }
1104 
1106 template <typename Packet>
1107 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
1108  const Packet &a) {
1109  typedef typename unpacket_traits<Packet>::type Scalar;
1110  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<PropagateFast, Scalar>)));
1111 }
1112 
1113 template <int NaNPropagation, typename Packet>
1114 EIGEN_DEVICE_FUNC inline typename unpacket_traits<Packet>::type predux_max(
1115  const Packet& a) {
1116  typedef typename unpacket_traits<Packet>::type Scalar;
1117  return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax<NaNPropagation, Scalar>)));
1118 }
1119 
1120 #undef EIGEN_BINARY_OP_NAN_PROPAGATION
1121 
1125 // not needed yet
1126 // template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a)
1127 // { return bool(a); }
1128 
1132 template<typename Packet> EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a)
1133 {
1134  // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames.
1135  // It is expected that "true" is either:
1136  // - Scalar(1)
1137  // - bits full of ones (NaN for floats),
1138  // - or first bit equals to 1 (1 for ints, smallest denormal for floats).
1139  // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars.
1140  typedef typename unpacket_traits<Packet>::type Scalar;
1141  return numext::not_equal_strict(predux(a), Scalar(0));
1142 }
1143 
1144 
1148 // FMA instructions.
1150 template <typename Packet>
1151 EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b,
1152  const Packet& c) {
1153  return padd(pmul(a, b), c);
1154 }
1155 
1157 template <typename Packet>
1158 EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b,
1159  const Packet& c) {
1160  return psub(pmul(a, b), c);
1161 }
1162 
1164 template <typename Packet>
1165 EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b,
1166  const Packet& c) {
1167  return padd(pnegate(pmul(a, b)), c);
1168 }
1169 
1171 template <typename Packet>
1172 EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b,
1173  const Packet& c) {
1174  return psub(pnegate(pmul(a, b)), c);
1175 }
1176 
1178 // NOTE: this function must really be templated on the packet type (think about different packet types for the same scalar type)
1179 template<typename Packet>
1180 inline void pstore1(typename unpacket_traits<Packet>::type* to, const typename unpacket_traits<Packet>::type& a)
1181 {
1182  pstore(to, pset1<Packet>(a));
1183 }
1184 
1187 template<typename Packet, int Alignment>
1188 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits<Packet>::type* from)
1189 {
1190  if(Alignment >= unpacket_traits<Packet>::alignment)
1191  return pload<Packet>(from);
1192  else
1193  return ploadu<Packet>(from);
1194 }
1195 
1198 template<typename Packet, int Alignment>
1199 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits<Packet>::type* from, const Index n, const Index offset = 0)
1200 {
1201  if(Alignment >= unpacket_traits<Packet>::alignment)
1202  return pload_partial<Packet>(from, n, offset);
1203  else
1204  return ploadu_partial<Packet>(from, n, offset);
1205 }
1206 
1209 template<typename Scalar, typename Packet, int Alignment>
1210 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from)
1211 {
1212  if(Alignment >= unpacket_traits<Packet>::alignment)
1213  pstore(to, from);
1214  else
1215  pstoreu(to, from);
1216 }
1217 
1220 template<typename Scalar, typename Packet, int Alignment>
1221 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0)
1222 {
1223  if(Alignment >= unpacket_traits<Packet>::alignment)
1224  pstore_partial(to, from, n, offset);
1225  else
1226  pstoreu_partial(to, from, n, offset);
1227 }
1228 
1234 template<typename Packet, int LoadMode>
1235 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits<Packet>::type* from)
1236 {
1237  return ploadt<Packet, LoadMode>(from);
1238 }
1239 
1240 
1244 // Eigen+CUDA does not support complexes.
1245 #if !defined(EIGEN_GPUCC)
1246 
1247 template<> inline std::complex<float> pmul(const std::complex<float>& a, const std::complex<float>& b)
1248 { return std::complex<float>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1249 
1250 template<> inline std::complex<double> pmul(const std::complex<double>& a, const std::complex<double>& b)
1251 { return std::complex<double>(a.real()*b.real() - a.imag()*b.imag(), a.imag()*b.real() + a.real()*b.imag()); }
1252 
1253 #endif
1254 
1255 
1256 
1261  Packet packet[N];
1262 };
1263 
1264 template<typename Packet> EIGEN_DEVICE_FUNC inline void
1265 ptranspose(PacketBlock<Packet,1>& /*kernel*/) {
1266  // Nothing to do in the scalar case, i.e. a 1x1 matrix.
1267 }
1268 
1269 
1273 template <size_t N> struct Selector {
1274  bool select[N];
1275 };
1276 
1277 template<typename Packet> EIGEN_DEVICE_FUNC inline Packet
1278 pblend(const Selector<unpacket_traits<Packet>::size>& ifPacket, const Packet& thenPacket, const Packet& elsePacket) {
1279  return ifPacket.select[0] ? thenPacket : elsePacket;
1280 }
1281 
1283 template <typename Packet>
1284 EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) {
1285  using Scalar = typename unpacket_traits<Packet>::type;
1286  return pdiv(pset1<Packet>(Scalar(1)), a);
1287 }
1288 
1291 Packet prsqrt(const Packet& a) {
1292  return preciprocal<Packet>(psqrt(a));
1293 }
1294 
1295 template <typename Packet, bool IsScalar = is_scalar<Packet>::value,
1296  bool IsInteger = NumTraits<typename unpacket_traits<Packet>::type>::IsInteger>
1297 struct psignbit_impl;
1298 template <typename Packet, bool IsInteger>
1299 struct psignbit_impl<Packet, true, IsInteger> {
1300  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); }
1301 };
1302 template <typename Packet>
1303 struct psignbit_impl<Packet, false, false> {
1304  // generic implementation if not specialized in PacketMath.h
1305  // slower than arithmetic shift
1306  typedef typename unpacket_traits<Packet>::type Scalar;
1307  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) {
1308  const Packet cst_pos_one = pset1<Packet>(Scalar(1));
1309  const Packet cst_neg_one = pset1<Packet>(Scalar(-1));
1310  return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one);
1311  }
1312 };
1313 template <typename Packet>
1314 struct psignbit_impl<Packet, false, true> {
1315  // generic implementation for integer packets
1316  EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); }
1317 };
1319 template <typename Packet>
1320 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet
1321 psignbit(const Packet& a) { return psignbit_impl<Packet>::run(a); }
1322 
1324 template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1325 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1326  return numext::atan2(y, x);
1327 }
1328 
1330 template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1331 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) {
1332  typedef typename internal::unpacket_traits<Packet>::type Scalar;
1333 
1334  // See https://en.cppreference.com/w/cpp/numeric/math/atan2
1335  // for how corner cases are supposed to be handled according to the
1336  // IEEE floating-point standard (IEC 60559).
1337  const Packet kSignMask = pset1<Packet>(-Scalar(0));
1338  const Packet kZero = pzero(x);
1339  const Packet kOne = pset1<Packet>(Scalar(1));
1340  const Packet kPi = pset1<Packet>(Scalar(EIGEN_PI));
1341 
1342  const Packet x_has_signbit = psignbit(x);
1343  const Packet y_signmask = pand(y, kSignMask);
1344  const Packet x_signmask = pand(x, kSignMask);
1345  const Packet result_signmask = pxor(y_signmask, x_signmask);
1346  const Packet shift = por(pand(x_has_signbit, kPi), y_signmask);
1347 
1348  const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y));
1349  const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero);
1350 
1351  Packet arg = pdiv(y, x);
1352  arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg);
1353  arg = pselect(x_and_y_are_zero, result_signmask, arg);
1354 
1355  Packet result = patan(arg);
1356  result = padd(result, shift);
1357  return result;
1358 }
1359 
1361 template <typename Packet, std::enable_if_t<is_scalar<Packet>::value, int> = 0>
1363  return Packet(numext::arg(a));
1364 }
1365 
1367 template <typename Packet, std::enable_if_t<!is_scalar<Packet>::value, int> = 0>
1368 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) {
1369  EIGEN_STATIC_ASSERT(NumTraits<typename unpacket_traits<Packet>::type>::IsComplex, THIS METHOD IS FOR COMPLEX TYPES ONLY)
1370  using RealPacket = typename unpacket_traits<Packet>::as_real;
1371  // a // r i r i ...
1372  RealPacket aflip = pcplxflip(a).v; // i r i r ...
1373  RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ...
1374  return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ...
1375 }
1376 
1377 } // end namespace internal
1378 
1379 } // end namespace Eigen
1380 
1381 #endif // EIGEN_GENERIC_PACKET_MATH_H
const SignReturnType sign() const
const ArgReturnType arg() const
const Log1pReturnType log1p() const
const Expm1ReturnType expm1() const
Array< int, Dynamic, 1 > v
Array< int, 3, 1 > b
int n
const ImagReturnType imag() const
RealReturnType real() const
#define EIGEN_ALIGN_MAX
#define EIGEN_ALIGN_TO_BOUNDARY(n)
Array< double, 1, 3 > e(1./3., 0.5, 2.)
Array33i c
#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func)
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836
#define EIGEN_USING_STD(FUNC)
Definition: Macros.h:1080
#define EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:891
#define EIGEN_CONSTEXPR
Definition: Macros.h:747
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define eigen_assert(x)
Definition: Macros.h:902
#define EIGEN_LOG2E
Definition: MathFunctions.h:17
#define EIGEN_PI
Definition: MathFunctions.h:16
#define EIGEN_STATIC_ASSERT(X, MSG)
Definition: StaticAssert.h:26
Eigen::Triplet< double > T
@ PropagateNaN
Definition: Constants.h:345
@ PropagateNumbers
Definition: Constants.h:347
Packet pmin(const Packet &a, const Packet &b)
Packet pnmsub(const Packet &a, const Packet &b, const Packet &c)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Packet padd(const Packet &a, const Packet &b)
Packet8f pzero(const Packet8f &)
void pstore(Scalar *to, const Packet &from)
EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar *to, const Packet &from, const Index n, const Index offset=0)
Packet pload_partial(const typename unpacket_traits< Packet >::type *from, const Index n, const Index offset=0)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet &a)
Packet pgather_partial(const Scalar *from, Index stride, const Index n)
unpacket_traits< Packet >::type predux(const Packet &a)
Packet8h ptrue(const Packet8h &a)
Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits< Packet >::type *from)
Packet ploadu(const typename unpacket_traits< Packet >::type *from)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
const Scalar & y
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Packet8h pandnot(const Packet8h &a, const Packet8h &b)
void pstore_partial(Scalar *to, const Packet &from, const Index n, const Index offset=0)
Packet4f pabs(const Packet4f &a)
Packet pmax(const Packet &a, const Packet &b)
void pbroadcast4(const typename unpacket_traits< Packet >::type *a, Packet &a0, Packet &a1, Packet &a2, Packet &a3)
Packet2cf pnegate(const Packet2cf &a)
TgtPacket pcast(const SrcPacket &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
Packet1cd pcplxflip(const Packet1cd &x)
Definition: MSA/Complex.h:617
Packet4i plogical_shift_right(const Packet4i &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits< Packet >::type *from)
EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits< Packet >::type *from, const Index n, const Index offset=0)
Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
void pstoreu(Scalar *to, const Packet &from)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
EIGEN_ALWAYS_INLINE Packet patan2(const Packet &y, const Packet &x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet &a)
Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
bfloat16 pfirst(const Packet8bf &a)
Packet psign(const Packet &a)
void pstoreu_partial(Scalar *to, const Packet &from, const Index n, const Index offset=0)
Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
bool pselect< bool >(const bool &cond, const bool &a, const bool &b)
Packet pmul(const Packet &a, const Packet &b)
void pscatter(Scalar *to, const Packet &from, Index stride, typename unpacket_traits< Packet >::mask_t umask)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet &a)
void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Packet pmsub(const Packet &a, const Packet &b, const Packet &c)
Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet &a)
Packet pset1frombits(BitsType a)
void pscatter_partial(Scalar *to, const Packet &from, Index stride, const Index n)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
void pbroadcast2(const typename unpacket_traits< Packet >::type *a, Packet &a0, Packet &a1)
Packet4f psqrt(const Packet4f &a)
Packet pnmadd(const Packet &a, const Packet &b, const Packet &c)
Packet4f print(const Packet4f &a)
Packet psub(const Packet &a, const Packet &b)
Packet ploadu_partial(const typename unpacket_traits< Packet >::type *from, const Index n, const Index offset=0)
void prefetch(const Scalar *addr)
Packet pgather(const Packet &src, const Scalar *from, Index stride, typename unpacket_traits< Packet >::mask_t umask)
unpacket_traits< Packet >::type predux_mul(const Packet &a)
EIGEN_ALWAYS_INLINE Packet pcarg(const Packet &a)
Packet parg(const Packet &a)
Packet8h pand(const Packet8h &a, const Packet8h &b)
Packet16h ploadquad(const Eigen::half *from)
Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Packet pnot(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pceil(const Packet &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Packet pabsdiff(const Packet &a, const Packet &b)
EIGEN_ALWAYS_INLINE void pstoret(Scalar *to, const Packet &from)
Packet8h pxor(const Packet8h &a, const Packet8h &b)
unpacket_traits< Packet >::type predux_helper(const Packet &a, Op op)
Packet ploaddup(const typename unpacket_traits< Packet >::type *from)
Packet pset1(const typename unpacket_traits< Packet >::type &a)
Packet8bf psignbit(const Packet8bf &a)
Packet pload1(const typename unpacket_traits< Packet >::type *a)
Packet pdiv(const Packet &a, const Packet &b)
Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Packet preciprocal(const Packet &a)
unpacket_traits< Packet >::type predux_max(const Packet &a)
Packet2cf pconj(const Packet2cf &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet &a)
Packet paddsub(const Packet &a, const Packet &b)
Packet plset(const typename unpacket_traits< Packet >::type &a)
Packet4i plogical_shift_left(const Packet4i &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet &a)
Packet2cf preverse(const Packet2cf &a)
Packet4i parithmetic_shift_right(const Packet4i &a)
Packet8h por(const Packet8h &a, const Packet8h &b)
Packet4i pcmp_lt(const Packet4i &a, const Packet4i &b)
Packet pload(const typename unpacket_traits< Packet >::type *from)
Target preinterpret(const Packet &a)
void pstore1(typename unpacket_traits< Packet >::type *to, const typename unpacket_traits< Packet >::type &a)
Packet8f pisnan(const Packet8f &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet &a)
Packet4c predux_half_dowto4(const Packet8c &a)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet &a)
unpacket_traits< Packet >::type predux_min(const Packet &a)
bool predux_any(const Packet4f &x)
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet &a)
Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Packet4f prsqrt(const Packet4f &a)
Packet8f peven_mask(const Packet8f &)
Scalar round(const Scalar &x)
std::int32_t int32_t
Definition: Meta.h:40
std::int8_t int8_t
Definition: Meta.h:36
bool equal_strict(const X &x, const Y &y)
Definition: Meta.h:460
std::uint8_t uint8_t
Definition: Meta.h:35
bool not_equal_strict(const X &x, const Y &y)
Definition: Meta.h:485
std::int16_t int16_t
Definition: Meta.h:38
std::int64_t int64_t
Definition: Meta.h:42
Scalar rint(const Scalar &x)
EIGEN_ALWAYS_INLINE T maxi(const T &x, const T &y)
std::uint16_t uint16_t
Definition: Meta.h:37
EIGEN_ALWAYS_INLINE float sqrt(const float &x)
Scalar() floor(const Scalar &x)
EIGEN_ALWAYS_INLINE T atan2(const T &y, const T &x)
Scalar() ceil(const Scalar &x)
static constexpr EIGEN_ALWAYS_INLINE Scalar signbit(const Scalar &x)
std::uint32_t uint32_t
Definition: Meta.h:39
std::uint64_t uint64_t
Definition: Meta.h:41
EIGEN_ALWAYS_INLINE T mini(const T &x, const T &y)
EIGEN_ALWAYS_INLINE std::enable_if_t< NumTraits< T >::IsSigned||NumTraits< T >::IsComplex, typename NumTraits< T >::Real > abs(const T &x)
: InteropHeaders
Definition: Core:139
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tanh_op< typename Derived::Scalar >, const Derived > tanh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_rint_op< typename Derived::Scalar >, const Derived > rint(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atan_op< typename Derived::Scalar >, const Derived > atan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log10_op< typename Derived::Scalar >, const Derived > log10(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cosh_op< typename Derived::Scalar >, const Derived > cosh(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_tan_op< typename Derived::Scalar >, const Derived > tan(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_acos_op< typename Derived::Scalar >, const Derived > acos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_atanh_op< typename Derived::Scalar >, const Derived > atanh(const Eigen::ArrayBase< Derived > &x)
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_asin_op< typename Derived::Scalar >, const Derived > asin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_arg_op< typename Derived::Scalar >, const Derived > arg(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_ceil_op< typename Derived::Scalar >, const Derived > ceil(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_conjugate_op< typename Derived::Scalar >, const Derived > conj(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_floor_op< typename Derived::Scalar >, const Derived > floor(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_cos_op< typename Derived::Scalar >, const Derived > cos(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_round_op< typename Derived::Scalar >, const Derived > round(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_exp_op< typename Derived::Scalar >, const Derived > exp(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sin_op< typename Derived::Scalar >, const Derived > sin(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_log_op< typename Derived::Scalar >, const Derived > log(const Eigen::ArrayBase< Derived > &x)
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_sinh_op< typename Derived::Scalar >, const Derived > sinh(const Eigen::ArrayBase< Derived > &x)
Definition: BFloat16.h:222
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:231