eigen/arch_2SSE_2MathFunctions_8h_source.html

 // This file is part of Eigen, a lightweight C++ template library

 // for linear algebra.

 //

 // Copyright (C) 2007 Julien Pommier

 // Copyright (C) 2009 Gael Guennebaud <gael.guennebaud@inria.fr>

 //

 // This Source Code Form is subject to the terms of the Mozilla

 // Public License v. 2.0. If a copy of the MPL was not distributed

 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.


 /* The sin and cos and functions of this file come from

  * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/

  */


 #ifndef EIGEN_MATH_FUNCTIONS_SSE_H

 #define EIGEN_MATH_FUNCTIONS_SSE_H


 #include "../../InternalHeaderCheck.h"


 namespace Eigen {


 namespace internal {


 EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(Packet4f)

 EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(Packet2d)


 // Notice that for newer processors, it is counterproductive to use Newton

 // iteration for square root. In particular, Skylake and Zen2 processors

 // have approximately doubled throughput of the _mm_sqrt_ps instruction

 // compared to their predecessors.

 template<>EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS

 Packet4f psqrt<Packet4f>(const Packet4f& x) { return _mm_sqrt_ps(x); }

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS

 Packet2d psqrt<Packet2d>(const Packet2d& x) { return _mm_sqrt_pd(x); }

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS

 Packet16b psqrt<Packet16b>(const Packet16b& x) { return x; }


 #if EIGEN_FAST_MATH

 // Even on Skylake, using Newton iteration is a win for reciprocal square root.

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED

 Packet4f prsqrt<Packet4f>(const Packet4f& x) {

   return generic_rsqrt_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rsqrt_ps(x));

 }


 #ifdef EIGEN_VECTORIZE_FMA

 // Trying to speed up reciprocal using Newton-Raphson is counterproductive

 // unless FMA is available. Without FMA pdiv(pset1<Packet>(Scalar(1),a)) is

 // 30% faster.

 template<> EIGEN_STRONG_INLINE Packet4f preciprocal<Packet4f>(const Packet4f& x) {

   return generic_reciprocal_newton_step<Packet4f, /*Steps=*/1>::run(x, _mm_rcp_ps(x));

 }

 #endif


 #endif


 } // end namespace internal


 namespace numext {


 template<>

 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE

 float sqrt(const float &x)

 {

   return internal::pfirst(internal::Packet4f(_mm_sqrt_ss(_mm_set_ss(x))));

 }


 template<>

 EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE

 double sqrt(const double &x)

 {

 #if EIGEN_COMP_GNUC_STRICT

   // This works around a GCC bug generating poor code for _mm_sqrt_pd

   // See https://gitlab.com/libeigen/eigen/commit/8dca9f97e38970

   return internal::pfirst(internal::Packet2d(__builtin_ia32_sqrtsd(_mm_set_sd(x))));

 #else

   return internal::pfirst(internal::Packet2d(_mm_sqrt_pd(_mm_set_sd(x))));

 #endif

 }


 } // end namespace numex


 } // end namespace Eigen


 #endif // EIGEN_MATH_FUNCTIONS_SSE_H

x
x
Definition: BiCGSTAB_simple.cpp:7

EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_DOUBLE(PACKET)
Definition: GenericPacketMathFunctionsFwd.h:162

EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT
#define EIGEN_INSTANTIATE_GENERIC_MATH_FUNCS_FLOAT(PACKET)
Definition: GenericPacketMathFunctionsFwd.h:139

EIGEN_ALWAYS_INLINE
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836

EIGEN_UNUSED
#define EIGEN_UNUSED
Definition: Macros.h:932

EIGEN_DEVICE_FUNC
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883

EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:892

Eigen::internal::Packet2d
v2f64 Packet2d
Definition: MSA/PacketMath.h:820

Eigen::internal::psqrt< Packet4f >
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f psqrt< Packet4f >(const Packet4f &x)
Definition: arch/AltiVec/MathFunctions.h:69

Eigen::internal::Packet16b
eigen_packet_wrapper< __m128i, 1 > Packet16b
Definition: SSE/PacketMath.h:50

Eigen::internal::pfirst
bfloat16 pfirst(const Packet8bf &a)
Definition: AltiVec/PacketMath.h:2011

Eigen::internal::psqrt< Packet16b >
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet16b psqrt< Packet16b >(const Packet16b &x)
Definition: arch/SSE/MathFunctions.h:36

Eigen::internal::psqrt< Packet2d >
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet2d psqrt< Packet2d >(const Packet2d &x)
Definition: arch/SSE/MathFunctions.h:34

Eigen::internal::prsqrt< Packet4f >
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet4f prsqrt< Packet4f >(const Packet4f &x)
Definition: arch/ZVector/MathFunctions.h:213

Eigen::internal::Packet4f
__vector float Packet4f
Definition: AltiVec/PacketMath.h:32

Eigen::numext::sqrt
EIGEN_ALWAYS_INLINE float sqrt(const float &x)
Definition: arch/SSE/MathFunctions.h:62

Eigen
: InteropHeaders
Definition: Core:139

internal
Definition: Eigen_Colamd.h:50