BFloat16.h
Go to the documentation of this file.
1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7  http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef EIGEN_BFLOAT16_H
17 #define EIGEN_BFLOAT16_H
18 
19 #include "../../InternalHeaderCheck.h"
20 
21 #if defined(EIGEN_HAS_HIP_BF16)
22 // When compiling with GPU support, the "hip_bfloat16" base class as well as
23 // some other routines are defined in the GPU compiler header files
24 // (hip_bfloat16.h), and they are not tagged constexpr
25 // As a consequence, we get compile failures when compiling Eigen with
26 // GPU support. Hence the need to disable EIGEN_CONSTEXPR when building
27 // Eigen with GPU support
28  #pragma push_macro("EIGEN_CONSTEXPR")
29  #undef EIGEN_CONSTEXPR
30  #define EIGEN_CONSTEXPR
31 #endif
32 
33 #define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD) \
34  template <> \
35  EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED \
36  PACKET_BF16 METHOD<PACKET_BF16>(const PACKET_BF16& _x) { \
37  return F32ToBf16(METHOD<PACKET_F>(Bf16ToF32(_x))); \
38  }
39 
40 // Only use HIP GPU bf16 in kernels
41 #if defined(EIGEN_HAS_HIP_BF16) && defined(EIGEN_GPU_COMPILE_PHASE)
42 #define EIGEN_USE_HIP_BF16
43 #endif
44 
45 namespace Eigen {
46 
47 struct bfloat16;
48 
49 namespace numext {
50 template <>
51 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src);
52 
53 template <>
54 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src);
55 } // namespace numext
56 namespace bfloat16_impl {
57 
58 #if defined(EIGEN_USE_HIP_BF16)
59 
60 struct __bfloat16_raw : public hip_bfloat16 {
62  EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(hip_bfloat16 hb) : hip_bfloat16(hb) {}
63  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : hip_bfloat16(raw) {}
64 };
65 
66 #else
67 
68 // Make our own __bfloat16_raw definition.
70 #if defined(EIGEN_HAS_HIP_BF16) && !defined(EIGEN_GPU_COMPILE_PHASE)
72 #else
74 #endif
75  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw) : value(raw) {}
76  unsigned short value;
77 };
78 
79 #endif // defined(EIGEN_USE_HIP_BF16)
80 
81 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value);
82 template <bool AssumeArgumentIsNormalOrInfinityOrZero>
84 // Forward declarations of template specializations, to avoid Visual C++ 2019 errors, saying:
85 // > error C2908: explicit specialization; 'float_to_bfloat16_rtne' has already been instantiated
86 template <>
88 template <>
90 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h);
91 
92 struct bfloat16_base : public __bfloat16_raw {
95 };
96 
97 } // namespace bfloat16_impl
98 
99 // Class definition.
101 
103 
105 
107 
109  : bfloat16_impl::bfloat16_base(bfloat16_impl::raw_uint16_to_bfloat16(b ? 0x3f80 : 0)) {}
110 
111  template<class T>
113  : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<internal::is_integral<T>::value>(static_cast<float>(val))) {}
114 
115  explicit EIGEN_DEVICE_FUNC bfloat16(float f)
116  : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(f)) {}
117 
118  // Following the convention of numpy, converting between complex and
119  // float will lead to loss of imag value.
120  template<typename RealScalar>
121  explicit EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR bfloat16(const std::complex<RealScalar>& val)
122  : bfloat16_impl::bfloat16_base(bfloat16_impl::float_to_bfloat16_rtne<false>(static_cast<float>(val.real()))) {}
123 
124  EIGEN_DEVICE_FUNC operator float() const { // NOLINT: Allow implicit conversion to float, because it is lossless.
125  return bfloat16_impl::bfloat16_to_float(*this);
126  }
127 };
128 
129 // TODO(majnemer): Get rid of this once we can rely on C++17 inline variables do
130 // solve the ODR issue.
131 namespace bfloat16_impl {
132 template <typename = void>
134  static EIGEN_CONSTEXPR const bool is_specialized = true;
135  static EIGEN_CONSTEXPR const bool is_signed = true;
136  static EIGEN_CONSTEXPR const bool is_integer = false;
137  static EIGEN_CONSTEXPR const bool is_exact = false;
138  static EIGEN_CONSTEXPR const bool has_infinity = true;
139  static EIGEN_CONSTEXPR const bool has_quiet_NaN = true;
140  static EIGEN_CONSTEXPR const bool has_signaling_NaN = true;
141  static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm = std::denorm_present;
142  static EIGEN_CONSTEXPR const bool has_denorm_loss = false;
143  static EIGEN_CONSTEXPR const std::float_round_style round_style = std::numeric_limits<float>::round_style;
144  static EIGEN_CONSTEXPR const bool is_iec559 = true;
145  // The C++ standard defines this as "true if the set of values representable
146  // by the type is finite." BFloat16 has finite precision.
147  static EIGEN_CONSTEXPR const bool is_bounded = true;
148  static EIGEN_CONSTEXPR const bool is_modulo = false;
149  static EIGEN_CONSTEXPR const int digits = 8;
150  static EIGEN_CONSTEXPR const int digits10 = 2;
151  static EIGEN_CONSTEXPR const int max_digits10 = 4;
152  static EIGEN_CONSTEXPR const int radix = std::numeric_limits<float>::radix;
153  static EIGEN_CONSTEXPR const int min_exponent = std::numeric_limits<float>::min_exponent;
154  static EIGEN_CONSTEXPR const int min_exponent10 = std::numeric_limits<float>::min_exponent10;
155  static EIGEN_CONSTEXPR const int max_exponent = std::numeric_limits<float>::max_exponent;
156  static EIGEN_CONSTEXPR const int max_exponent10 = std::numeric_limits<float>::max_exponent10;
157  static EIGEN_CONSTEXPR const bool traps = std::numeric_limits<float>::traps;
158  // IEEE754: "The implementer shall choose how tininess is detected, but shall
159  // detect tininess in the same way for all operations in radix two"
160  static EIGEN_CONSTEXPR const bool tinyness_before = std::numeric_limits<float>::tinyness_before;
161 
171 };
172 
173 template<typename T>
175 template<typename T>
177 template<typename T>
179 template<typename T>
181 template<typename T>
183 template<typename T>
185 template<typename T>
187 template<typename T>
189 template<typename T>
191 template<typename T>
193 template<typename T>
195 template<typename T>
197 template<typename T>
199 template<typename T>
201 template<typename T>
203 template<typename T>
205 template<typename T>
207 template<typename T>
209 template<typename T>
211 template<typename T>
213 template<typename T>
215 template<typename T>
217 template<typename T>
219 } // end namespace bfloat16_impl
220 } // end namespace Eigen
221 
222 namespace std {
223 // If std::numeric_limits<T> is specialized, should also specialize
224 // std::numeric_limits<const T>, std::numeric_limits<volatile T>, and
225 // std::numeric_limits<const volatile T>
226 // https://stackoverflow.com/a/16519653/
227 template<>
228 class numeric_limits<Eigen::bfloat16> : public Eigen::bfloat16_impl::numeric_limits_bfloat16_impl<> {};
229 template<>
230 class numeric_limits<const Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
231 template<>
232 class numeric_limits<volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
233 template<>
234 class numeric_limits<const volatile Eigen::bfloat16> : public numeric_limits<Eigen::bfloat16> {};
235 } // end namespace std
236 
237 namespace Eigen {
238 
239 namespace bfloat16_impl {
240 
241 // We need to distinguish ‘clang as the CUDA compiler’ from ‘clang as the host compiler,
242 // invoked by NVCC’ (e.g. on MacOS). The former needs to see both host and device implementation
243 // of the functions, while the latter can only deal with one of them.
244 #if !defined(EIGEN_HAS_NATIVE_BF16) || (EIGEN_COMP_CLANG && !EIGEN_COMP_NVCC) // Emulate support for bfloat16 floats
245 
246 #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
247 // We need to provide emulated *host-side* BF16 operators for clang.
248 #pragma push_macro("EIGEN_DEVICE_FUNC")
249 #undef EIGEN_DEVICE_FUNC
250 #if (defined(EIGEN_HAS_GPU_BF16) && defined(EIGEN_HAS_NATIVE_BF16))
251 #define EIGEN_DEVICE_FUNC __host__
252 #else // both host and device need emulated ops.
253 #define EIGEN_DEVICE_FUNC __host__ __device__
254 #endif
255 #endif
256 
257 // Definitions for CPUs, mostly working through conversion
258 // to/from fp32.
259 
260 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const bfloat16& b) {
261  return bfloat16(float(a) + float(b));
262 }
263 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const bfloat16& a, const int& b) {
264  return bfloat16(float(a) + static_cast<float>(b));
265 }
266 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator + (const int& a, const bfloat16& b) {
267  return bfloat16(static_cast<float>(a) + float(b));
268 }
269 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator * (const bfloat16& a, const bfloat16& b) {
270  return bfloat16(float(a) * float(b));
271 }
272 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a, const bfloat16& b) {
273  return bfloat16(float(a) - float(b));
274 }
275 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, const bfloat16& b) {
276  return bfloat16(float(a) / float(b));
277 }
278 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator - (const bfloat16& a) {
279  numext::uint16_t x = numext::bit_cast<uint16_t>(a) ^ 0x8000;
280  return numext::bit_cast<bfloat16>(x);
281 }
282 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator += (bfloat16& a, const bfloat16& b) {
283  a = bfloat16(float(a) + float(b));
284  return a;
285 }
286 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator *= (bfloat16& a, const bfloat16& b) {
287  a = bfloat16(float(a) * float(b));
288  return a;
289 }
290 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator -= (bfloat16& a, const bfloat16& b) {
291  a = bfloat16(float(a) - float(b));
292  return a;
293 }
294 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16& operator /= (bfloat16& a, const bfloat16& b) {
295  a = bfloat16(float(a) / float(b));
296  return a;
297 }
299  a += bfloat16(1);
300  return a;
301 }
303  a -= bfloat16(1);
304  return a;
305 }
306 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator++(bfloat16& a, int) {
307  bfloat16 original_value = a;
308  ++a;
309  return original_value;
310 }
311 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator--(bfloat16& a, int) {
312  bfloat16 original_value = a;
313  --a;
314  return original_value;
315 }
316 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator == (const bfloat16& a, const bfloat16& b) {
317  return numext::equal_strict(float(a),float(b));
318 }
319 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator != (const bfloat16& a, const bfloat16& b) {
320  return numext::not_equal_strict(float(a), float(b));
321 }
322 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator < (const bfloat16& a, const bfloat16& b) {
323  return float(a) < float(b);
324 }
325 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator <= (const bfloat16& a, const bfloat16& b) {
326  return float(a) <= float(b);
327 }
328 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator > (const bfloat16& a, const bfloat16& b) {
329  return float(a) > float(b);
330 }
331 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool operator >= (const bfloat16& a, const bfloat16& b) {
332  return float(a) >= float(b);
333 }
334 
335 #if EIGEN_COMP_CLANG && defined(EIGEN_CUDACC)
336 #pragma pop_macro("EIGEN_DEVICE_FUNC")
337 #endif
338 #endif // Emulate support for bfloat16 floats
339 
340 // Division by an index. Do it in full float precision to avoid accuracy
341 // issues in converting the denominator to bfloat16.
342 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 operator / (const bfloat16& a, Index b) {
343  return bfloat16(static_cast<float>(a) / static_cast<float>(b));
344 }
345 
346 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC __bfloat16_raw truncate_to_bfloat16(const float v) {
347 #if defined(EIGEN_USE_HIP_BF16)
348  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(v, __bfloat16_raw::truncate));
349 #else
350  __bfloat16_raw output;
352  output.value = std::signbit(v) ? 0xFFC0: 0x7FC0;
353  return output;
354  }
355  output.value = static_cast<numext::uint16_t>(numext::bit_cast<numext::uint32_t>(v) >> 16);
356  return output;
357 #endif
358 }
359 
361 #if defined(EIGEN_USE_HIP_BF16)
362  __bfloat16_raw bf;
363  bf.data = value;
364  return bf;
365 #else
366  return __bfloat16_raw(value);
367 #endif
368 }
369 
371 #if defined(EIGEN_USE_HIP_BF16)
372  return bf.data;
373 #else
374  return bf.value;
375 #endif
376 }
377 
378 // float_to_bfloat16_rtne template specialization that does not make any
379 // assumption about the value of its function argument (ff).
380 template <>
382 #if defined(EIGEN_USE_HIP_BF16)
383  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
384 #else
385  __bfloat16_raw output;
386 
388  // If the value is a NaN, squash it to a qNaN with msb of fraction set,
389  // this makes sure after truncation we don't end up with an inf.
390  //
391  // qNaN magic: All exponent bits set + most significant bit of fraction
392  // set.
393  output.value = std::signbit(ff) ? 0xFFC0: 0x7FC0;
394  } else {
395  // Fast rounding algorithm that rounds a half value to nearest even. This
396  // reduces expected error when we convert a large number of floats. Here
397  // is how it works:
398  //
399  // Definitions:
400  // To convert a float 32 to bfloat16, a float 32 can be viewed as 32 bits
401  // with the following tags:
402  //
403  // Sign | Exp (8 bits) | Frac (23 bits)
404  // S EEEEEEEE FFFFFFLRTTTTTTTTTTTTTTT
405  //
406  // S: Sign bit.
407  // E: Exponent bits.
408  // F: First 6 bits of fraction.
409  // L: Least significant bit of resulting bfloat16 if we truncate away the
410  // rest of the float32. This is also the 7th bit of fraction
411  // R: Rounding bit, 8th bit of fraction.
412  // T: Sticky bits, rest of fraction, 15 bits.
413  //
414  // To round half to nearest even, there are 3 cases where we want to round
415  // down (simply truncate the result of the bits away, which consists of
416  // rounding bit and sticky bits) and two cases where we want to round up
417  // (truncate then add one to the result).
418  //
419  // The fast converting algorithm simply adds lsb (L) to 0x7fff (15 bits of
420  // 1s) as the rounding bias, adds the rounding bias to the input, then
421  // truncates the last 16 bits away.
422  //
423  // To understand how it works, we can analyze this algorithm case by case:
424  //
425  // 1. L = 0, R = 0:
426  // Expect: round down, this is less than half value.
427  //
428  // Algorithm:
429  // - Rounding bias: 0x7fff + 0 = 0x7fff
430  // - Adding rounding bias to input may create any carry, depending on
431  // whether there is any value set to 1 in T bits.
432  // - R may be set to 1 if there is a carry.
433  // - L remains 0.
434  // - Note that this case also handles Inf and -Inf, where all fraction
435  // bits, including L, R and Ts are all 0. The output remains Inf after
436  // this algorithm.
437  //
438  // 2. L = 1, R = 0:
439  // Expect: round down, this is less than half value.
440  //
441  // Algorithm:
442  // - Rounding bias: 0x7fff + 1 = 0x8000
443  // - Adding rounding bias to input doesn't change sticky bits but
444  // adds 1 to rounding bit.
445  // - L remains 1.
446  //
447  // 3. L = 0, R = 1, all of T are 0:
448  // Expect: round down, this is exactly at half, the result is already
449  // even (L=0).
450  //
451  // Algorithm:
452  // - Rounding bias: 0x7fff + 0 = 0x7fff
453  // - Adding rounding bias to input sets all sticky bits to 1, but
454  // doesn't create a carry.
455  // - R remains 1.
456  // - L remains 0.
457  //
458  // 4. L = 1, R = 1:
459  // Expect: round up, this is exactly at half, the result needs to be
460  // round to the next even number.
461  //
462  // Algorithm:
463  // - Rounding bias: 0x7fff + 1 = 0x8000
464  // - Adding rounding bias to input doesn't change sticky bits, but
465  // creates a carry from rounding bit.
466  // - The carry sets L to 0, creates another carry bit and propagate
467  // forward to F bits.
468  // - If all the F bits are 1, a carry then propagates to the exponent
469  // bits, which then creates the minimum value with the next exponent
470  // value. Note that we won't have the case where exponents are all 1,
471  // since that's either a NaN (handled in the other if condition) or inf
472  // (handled in case 1).
473  //
474  // 5. L = 0, R = 1, any of T is 1:
475  // Expect: round up, this is greater than half.
476  //
477  // Algorithm:
478  // - Rounding bias: 0x7fff + 0 = 0x7fff
479  // - Adding rounding bias to input creates a carry from sticky bits,
480  // sets rounding bit to 0, then create another carry.
481  // - The second carry sets L to 1.
482  //
483  // Examples:
484  //
485  // Exact half value that is already even:
486  // Input:
487  // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
488  // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
489  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1000000000000000
490  //
491  // This falls into case 3. We truncate the rest of 16 bits and no
492  // carry is created into F and L:
493  //
494  // Output:
495  // Sign | Exp (8 bit) | Frac (first 7 bit)
496  // S E E E E E E E E F F F F F F L
497  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
498  //
499  // Exact half value, round to next even number:
500  // Input:
501  // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
502  // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
503  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1000000000000000
504  //
505  // This falls into case 4. We create a carry from R and T,
506  // which then propagates into L and F:
507  //
508  // Output:
509  // Sign | Exp (8 bit) | Frac (first 7 bit)
510  // S E E E E E E E E F F F F F F L
511  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
512  //
513  //
514  // Max denormal value round to min normal value:
515  // Input:
516  // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
517  // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
518  // 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1111111111111111
519  //
520  // This falls into case 4. We create a carry from R and T,
521  // propagate into L and F, which then propagates into exponent
522  // bits:
523  //
524  // Output:
525  // Sign | Exp (8 bit) | Frac (first 7 bit)
526  // S E E E E E E E E F F F F F F L
527  // 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
528  //
529  // Max normal value round to Inf:
530  // Input:
531  // Sign | Exp (8 bit) | Frac (first 7 bit) | Frac (last 16 bit)
532  // S E E E E E E E E F F F F F F L RTTTTTTTTTTTTTTT
533  // 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1111111111111111
534  //
535  // This falls into case 4. We create a carry from R and T,
536  // propagate into L and F, which then propagates into exponent
537  // bits:
538  //
539  // Sign | Exp (8 bit) | Frac (first 7 bit)
540  // S E E E E E E E E F F F F F F L
541  // 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0
542 
543  // At this point, ff must be either a normal float, or +/-infinity.
544  output = float_to_bfloat16_rtne<true>(ff);
545  }
546  return output;
547 #endif
548 }
549 
550 // float_to_bfloat16_rtne template specialization that assumes that its function
551 // argument (ff) is either a normal floating point number, or +/-infinity, or
552 // zero. Used to improve the runtime performance of conversion from an integer
553 // type to bfloat16.
554 template <>
556 #if defined(EIGEN_USE_HIP_BF16)
557  return __bfloat16_raw(__bfloat16_raw::round_to_bfloat16(ff));
558 #else
559  numext::uint32_t input = numext::bit_cast<numext::uint32_t>(ff);
560  __bfloat16_raw output;
561 
562  // Least significant bit of resulting bfloat.
563  numext::uint32_t lsb = (input >> 16) & 1;
564  numext::uint32_t rounding_bias = 0x7fff + lsb;
565  input += rounding_bias;
566  output.value = static_cast<numext::uint16_t>(input >> 16);
567  return output;
568 #endif
569 }
570 
571 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC float bfloat16_to_float(__bfloat16_raw h) {
572 #if defined(EIGEN_USE_HIP_BF16)
573  return static_cast<float>(h);
574 #else
575  return numext::bit_cast<float>(static_cast<numext::uint32_t>(h.value) << 16);
576 #endif
577 }
578 
579 // --- standard functions ---
580 
581 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isinf)(const bfloat16& a) {
583 #if defined(EIGEN_USE_HIP_BF16)
584  return (isinf)(a); // Uses HIP hip_bfloat16 isinf operator
585 #else
586  return (isinf)(float(a));
587 #endif
588 }
589 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isnan)(const bfloat16& a) {
591 #if defined(EIGEN_USE_HIP_BF16)
592  return (isnan)(a); // Uses HIP hip_bfloat16 isnan operator
593 #else
594  return (isnan)(float(a));
595 #endif
596 }
597 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bool (isfinite)(const bfloat16& a) {
598  return !(isinf EIGEN_NOT_A_MACRO (a)) && !(isnan EIGEN_NOT_A_MACRO (a));
599 }
600 
601 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 abs(const bfloat16& a) {
602  numext::uint16_t x = numext::bit_cast<numext::uint16_t>(a) & 0x7FFF;
603  return numext::bit_cast<bfloat16>(x);
604 }
605 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 exp(const bfloat16& a) {
606  return bfloat16(::expf(float(a)));
607 }
608 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 expm1(const bfloat16& a) {
609  return bfloat16(numext::expm1(float(a)));
610 }
611 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log(const bfloat16& a) {
612  return bfloat16(::logf(float(a)));
613 }
614 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log1p(const bfloat16& a) {
615  return bfloat16(numext::log1p(float(a)));
616 }
617 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log10(const bfloat16& a) {
618  return bfloat16(::log10f(float(a)));
619 }
620 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 log2(const bfloat16& a) {
621  return bfloat16(static_cast<float>(EIGEN_LOG2E) * ::logf(float(a)));
622 }
623 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sqrt(const bfloat16& a) {
624  return bfloat16(::sqrtf(float(a)));
625 }
626 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 pow(const bfloat16& a, const bfloat16& b) {
627  return bfloat16(::powf(float(a), float(b)));
628 }
629 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan2(const bfloat16& a, const bfloat16& b) {
630  return bfloat16(::atan2f(float(a), float(b)));
631 }
632 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sin(const bfloat16& a) {
633  return bfloat16(::sinf(float(a)));
634 }
635 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cos(const bfloat16& a) {
636  return bfloat16(::cosf(float(a)));
637 }
638 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tan(const bfloat16& a) {
639  return bfloat16(::tanf(float(a)));
640 }
641 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asin(const bfloat16& a) {
642  return bfloat16(::asinf(float(a)));
643 }
644 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acos(const bfloat16& a) {
645  return bfloat16(::acosf(float(a)));
646 }
647 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atan(const bfloat16& a) {
648  return bfloat16(::atanf(float(a)));
649 }
650 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 sinh(const bfloat16& a) {
651  return bfloat16(::sinhf(float(a)));
652 }
653 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 cosh(const bfloat16& a) {
654  return bfloat16(::coshf(float(a)));
655 }
656 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 tanh(const bfloat16& a) {
657  return bfloat16(::tanhf(float(a)));
658 }
659 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 asinh(const bfloat16& a) {
660  return bfloat16(::asinhf(float(a)));
661 }
662 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 acosh(const bfloat16& a) {
663  return bfloat16(::acoshf(float(a)));
664 }
665 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 atanh(const bfloat16& a) {
666  return bfloat16(::atanhf(float(a)));
667 }
668 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 floor(const bfloat16& a) {
669  return bfloat16(::floorf(float(a)));
670 }
671 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 ceil(const bfloat16& a) {
672  return bfloat16(::ceilf(float(a)));
673 }
674 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 rint(const bfloat16& a) {
675  return bfloat16(::rintf(float(a)));
676 }
677 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 round(const bfloat16& a) {
678  return bfloat16(::roundf(float(a)));
679 }
680 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmod(const bfloat16& a, const bfloat16& b) {
681  return bfloat16(::fmodf(float(a), float(b)));
682 }
683 
684 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (min)(const bfloat16& a, const bfloat16& b) {
685  const float f1 = static_cast<float>(a);
686  const float f2 = static_cast<float>(b);
687  return f2 < f1 ? b : a;
688 }
689 
690 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 (max)(const bfloat16& a, const bfloat16& b) {
691  const float f1 = static_cast<float>(a);
692  const float f2 = static_cast<float>(b);
693  return f1 < f2 ? b : a;
694 }
695 
696 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmin(const bfloat16& a, const bfloat16& b) {
697  const float f1 = static_cast<float>(a);
698  const float f2 = static_cast<float>(b);
699  return bfloat16(::fminf(f1, f2));
700 }
701 
702 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16 fmax(const bfloat16& a, const bfloat16& b) {
703  const float f1 = static_cast<float>(a);
704  const float f2 = static_cast<float>(b);
705  return bfloat16(::fmaxf(f1, f2));
706 }
707 
708 #ifndef EIGEN_NO_IO
709 EIGEN_ALWAYS_INLINE std::ostream& operator << (std::ostream& os, const bfloat16& v) {
710  os << static_cast<float>(v);
711  return os;
712 }
713 #endif
714 
715 } // namespace bfloat16_impl
716 
717 namespace internal {
718 
719 template<>
720 struct random_default_impl<bfloat16, false, false>
721 {
722  static inline bfloat16 run(const bfloat16& x, const bfloat16& y)
723  {
724  return x + (y-x) * bfloat16(float(std::rand()) / float(RAND_MAX));
725  }
726  static inline bfloat16 run()
727  {
728  return run(bfloat16(-1.f), bfloat16(1.f));
729  }
730 };
731 
732 template<> struct is_arithmetic<bfloat16> { enum { value = true }; };
733 
734 } // namespace internal
735 
736 template<> struct NumTraits<Eigen::bfloat16>
737  : GenericNumTraits<Eigen::bfloat16>
738 {
739  enum {
740  IsSigned = true,
741  IsInteger = false,
742  IsComplex = false,
743  RequireInitialization = false
744  };
745 
748  }
750  return bfloat16_impl::raw_uint16_to_bfloat16(0x3D4D); // bfloat16(5e-2f);
751  }
754  }
757  }
760  }
763  }
764 };
765 
766 } // namespace Eigen
767 
768 
769 #if defined(EIGEN_HAS_HIP_BF16)
770  #pragma pop_macro("EIGEN_CONSTEXPR")
771 #endif
772 
773 namespace Eigen {
774 namespace numext {
775 
776 template<>
779  return (bfloat16_impl::isnan)(h);
780 }
781 
782 template<>
785  return (bfloat16_impl::isinf)(h);
786 }
787 
788 template<>
791  return (bfloat16_impl::isfinite)(h);
792 }
793 
794 template <>
795 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Eigen::bfloat16 bit_cast<Eigen::bfloat16, uint16_t>(const uint16_t& src) {
797 }
798 
799 template <>
800 EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC uint16_t bit_cast<uint16_t, Eigen::bfloat16>(const Eigen::bfloat16& src) {
802 }
803 
804 } // namespace numext
805 } // namespace Eigen
806 
807 #if EIGEN_HAS_STD_HASH
808 namespace std {
809 template <>
810 struct hash<Eigen::bfloat16> {
811  EIGEN_STRONG_INLINE std::size_t operator()(const Eigen::bfloat16& a) const {
812  return static_cast<std::size_t>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(a));
813  }
814 };
815 } // namespace std
816 #endif
817 
818 // Add the missing shfl* intrinsics.
819 // The __shfl* functions are only valid on HIP or _CUDA_ARCH_ >= 300.
820 // CUDA defines them for (__CUDA_ARCH__ >= 300 || !defined(__CUDA_ARCH__))
821 //
822 // HIP and CUDA prior to SDK 9.0 define
823 // __shfl, __shfl_up, __shfl_down, __shfl_xor for int and float
824 // CUDA since 9.0 deprecates those and instead defines
825 // __shfl_sync, __shfl_up_sync, __shfl_down_sync, __shfl_xor_sync,
826 // with native support for __half and __nv_bfloat16
827 //
828 // Note that the following are __device__ - only functions.
829 #if defined(EIGEN_HIPCC)
830 
831 #if defined(EIGEN_HAS_HIP_BF16)
832 
833 __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl(Eigen::bfloat16 var, int srcLane, int width=warpSize) {
834  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
835  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl(ivar, srcLane, width)));
836 }
837 
838 __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_up(Eigen::bfloat16 var, unsigned int delta, int width=warpSize) {
839  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
840  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_up(ivar, delta, width)));
841 }
842 
843 __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_down(Eigen::bfloat16 var, unsigned int delta, int width=warpSize) {
844  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
845  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_down(ivar, delta, width)));
846 }
847 
848 __device__ EIGEN_STRONG_INLINE Eigen::bfloat16 __shfl_xor(Eigen::bfloat16 var, int laneMask, int width=warpSize) {
849  const int ivar = static_cast<int>(Eigen::numext::bit_cast<Eigen::numext::uint16_t>(var));
850  return Eigen::numext::bit_cast<Eigen::bfloat16>(static_cast<Eigen::numext::uint16_t>(__shfl_xor(ivar, laneMask, width)));
851 }
852 
853 #endif // HIP
854 
855 #endif // __shfl*
856 
857 #if defined(EIGEN_HIPCC)
858 EIGEN_STRONG_INLINE __device__ Eigen::bfloat16 __ldg(const Eigen::bfloat16* ptr) {
859  return Eigen::bfloat16_impl::raw_uint16_to_bfloat16(__ldg(Eigen::numext::bit_cast<const Eigen::numext::uint16_t*>(ptr)));
860 }
861 #endif // __ldg
862 
863 #endif // EIGEN_BFLOAT16_H
const Log1pReturnType log1p() const
const Expm1ReturnType expm1() const
Array< int, Dynamic, 1 > v
Array< int, 3, 1 > b
IndexedView_or_Block operator()(const RowIndices &rowIndices, const ColIndices &colIndices)
#define EIGEN_ALWAYS_INLINE
Definition: Macros.h:836
#define EIGEN_USING_STD(FUNC)
Definition: Macros.h:1080
#define EIGEN_CONSTEXPR
Definition: Macros.h:747
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:883
#define EIGEN_NOT_A_MACRO
Definition: Macros.h:804
#define EIGEN_LOG2E
Definition: MathFunctions.h:17
bfloat16 asin(const bfloat16 &a)
Definition: BFloat16.h:641
bfloat16 cos(const bfloat16 &a)
Definition: BFloat16.h:635
bfloat16 rint(const bfloat16 &a)
Definition: BFloat16.h:674
bfloat16 acosh(const bfloat16 &a)
Definition: BFloat16.h:662
bfloat16 acos(const bfloat16 &a)
Definition: BFloat16.h:644
float bfloat16_to_float(__bfloat16_raw h)
Definition: BFloat16.h:571
bool() isinf(const bfloat16 &a)
Definition: BFloat16.h:581
bfloat16 sin(const bfloat16 &a)
Definition: BFloat16.h:632
bfloat16 tanh(const bfloat16 &a)
Definition: BFloat16.h:656
bfloat16 fmax(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:702
bfloat16 asinh(const bfloat16 &a)
Definition: BFloat16.h:659
bfloat16 floor(const bfloat16 &a)
Definition: BFloat16.h:668
bfloat16 expm1(const bfloat16 &a)
Definition: BFloat16.h:608
bfloat16 operator+(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:260
bfloat16 & operator/=(bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:294
bfloat16() max(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:690
bfloat16 ceil(const bfloat16 &a)
Definition: BFloat16.h:671
__bfloat16_raw truncate_to_bfloat16(const float v)
Definition: BFloat16.h:346
bfloat16 & operator*=(bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:286
bool operator==(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:316
bfloat16 log1p(const bfloat16 &a)
Definition: BFloat16.h:614
bfloat16 atanh(const bfloat16 &a)
Definition: BFloat16.h:665
bfloat16 atan(const bfloat16 &a)
Definition: BFloat16.h:647
bfloat16 abs(const bfloat16 &a)
Definition: BFloat16.h:601
bfloat16 cosh(const bfloat16 &a)
Definition: BFloat16.h:653
bfloat16 log2(const bfloat16 &a)
Definition: BFloat16.h:620
__bfloat16_raw float_to_bfloat16_rtne< false >(float ff)
Definition: BFloat16.h:381
EIGEN_ALWAYS_INLINE std::ostream & operator<<(std::ostream &os, const bfloat16 &v)
Definition: BFloat16.h:709
bfloat16 log10(const bfloat16 &a)
Definition: BFloat16.h:617
EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(numext::uint16_t value)
Definition: BFloat16.h:360
bool operator>=(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:331
bool operator>(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:328
bfloat16 sinh(const bfloat16 &a)
Definition: BFloat16.h:650
bfloat16 operator*(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:269
bfloat16 operator++(bfloat16 &a)
Definition: BFloat16.h:298
bool() isfinite(const bfloat16 &a)
Definition: BFloat16.h:597
EIGEN_CONSTEXPR numext::uint16_t raw_bfloat16_as_uint16(const __bfloat16_raw &bf)
Definition: BFloat16.h:370
EIGEN_CONSTEXPR __bfloat16_raw raw_uint16_to_bfloat16(unsigned short value)
bfloat16 round(const bfloat16 &a)
Definition: BFloat16.h:677
bfloat16 & operator+=(bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:282
bfloat16 operator--(bfloat16 &a)
Definition: BFloat16.h:302
bfloat16 exp(const bfloat16 &a)
Definition: BFloat16.h:605
__bfloat16_raw float_to_bfloat16_rtne(float ff)
bfloat16 pow(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:626
bool operator<(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:322
bfloat16 & operator-=(bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:290
bfloat16 atan2(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:629
bfloat16 tan(const bfloat16 &a)
Definition: BFloat16.h:638
bool operator<=(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:325
__bfloat16_raw float_to_bfloat16_rtne< true >(float ff)
Definition: BFloat16.h:555
bool operator!=(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:319
bfloat16 fmin(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:696
bfloat16 operator/(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:275
bfloat16 log(const bfloat16 &a)
Definition: BFloat16.h:611
bool() isnan(const bfloat16 &a)
Definition: BFloat16.h:589
bfloat16 fmod(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:680
bfloat16 operator-(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:272
bfloat16() min(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:684
bfloat16 sqrt(const bfloat16 &a)
Definition: BFloat16.h:623
const Scalar & y
bool equal_strict(const X &x, const Y &y)
Definition: Meta.h:460
bool not_equal_strict(const X &x, const Y &y)
Definition: Meta.h:485
EIGEN_ALWAYS_INLINE bool() isinf(const Eigen::bfloat16 &h)
Definition: BFloat16.h:784
std::uint16_t uint16_t
Definition: Meta.h:37
static constexpr EIGEN_ALWAYS_INLINE Scalar signbit(const Scalar &x)
EIGEN_ALWAYS_INLINE bool() isnan(const Eigen::bfloat16 &h)
Definition: BFloat16.h:778
std::uint32_t uint32_t
Definition: Meta.h:39
EIGEN_ALWAYS_INLINE bool() isfinite(const Eigen::bfloat16 &h)
Definition: BFloat16.h:790
: InteropHeaders
Definition: Core:139
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:82
const Eigen::CwiseUnaryOp< Eigen::internal::scalar_real_op< typename Derived::Scalar >, const Derived > real(const Eigen::ArrayBase< Derived > &x)
Definition: BFloat16.h:222
static EIGEN_CONSTEXPR Eigen::bfloat16 dummy_precision()
Definition: BFloat16.h:749
static EIGEN_CONSTEXPR Eigen::bfloat16 lowest()
Definition: BFloat16.h:755
static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN()
Definition: BFloat16.h:761
static EIGEN_CONSTEXPR Eigen::bfloat16 infinity()
Definition: BFloat16.h:758
static EIGEN_CONSTEXPR Eigen::bfloat16 highest()
Definition: BFloat16.h:752
static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon()
Definition: BFloat16.h:746
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:231
EIGEN_CONSTEXPR __bfloat16_raw(unsigned short raw)
Definition: BFloat16.h:75
EIGEN_CONSTEXPR __bfloat16_raw()
Definition: BFloat16.h:73
EIGEN_CONSTEXPR bfloat16_base(const __bfloat16_raw &h)
Definition: BFloat16.h:94
EIGEN_CONSTEXPR bfloat16_base()
Definition: BFloat16.h:93
static EIGEN_CONSTEXPR const bool tinyness_before
Definition: BFloat16.h:160
static EIGEN_CONSTEXPR Eigen::bfloat16() min()
Definition: BFloat16.h:162
static EIGEN_CONSTEXPR const std::float_round_style round_style
Definition: BFloat16.h:143
static EIGEN_CONSTEXPR const bool has_denorm_loss
Definition: BFloat16.h:142
static EIGEN_CONSTEXPR const int min_exponent
Definition: BFloat16.h:153
static EIGEN_CONSTEXPR const bool has_infinity
Definition: BFloat16.h:138
static EIGEN_CONSTEXPR const int radix
Definition: BFloat16.h:152
static EIGEN_CONSTEXPR const bool is_iec559
Definition: BFloat16.h:144
static EIGEN_CONSTEXPR const bool is_bounded
Definition: BFloat16.h:147
static EIGEN_CONSTEXPR const bool is_exact
Definition: BFloat16.h:137
static EIGEN_CONSTEXPR const bool is_integer
Definition: BFloat16.h:136
static EIGEN_CONSTEXPR Eigen::bfloat16 denorm_min()
Definition: BFloat16.h:170
static EIGEN_CONSTEXPR Eigen::bfloat16 quiet_NaN()
Definition: BFloat16.h:168
static EIGEN_CONSTEXPR Eigen::bfloat16 epsilon()
Definition: BFloat16.h:165
static EIGEN_CONSTEXPR const int max_digits10
Definition: BFloat16.h:151
static EIGEN_CONSTEXPR const int max_exponent10
Definition: BFloat16.h:156
static EIGEN_CONSTEXPR const int max_exponent
Definition: BFloat16.h:155
static EIGEN_CONSTEXPR const int digits10
Definition: BFloat16.h:150
static EIGEN_CONSTEXPR Eigen::bfloat16 lowest()
Definition: BFloat16.h:163
static EIGEN_CONSTEXPR const bool has_signaling_NaN
Definition: BFloat16.h:140
static EIGEN_CONSTEXPR Eigen::bfloat16 infinity()
Definition: BFloat16.h:167
static EIGEN_CONSTEXPR Eigen::bfloat16() max()
Definition: BFloat16.h:164
static EIGEN_CONSTEXPR Eigen::bfloat16 signaling_NaN()
Definition: BFloat16.h:169
static EIGEN_CONSTEXPR Eigen::bfloat16 round_error()
Definition: BFloat16.h:166
static EIGEN_CONSTEXPR const bool traps
Definition: BFloat16.h:157
static EIGEN_CONSTEXPR const bool is_modulo
Definition: BFloat16.h:148
static EIGEN_CONSTEXPR const bool is_specialized
Definition: BFloat16.h:134
static EIGEN_CONSTEXPR const bool is_signed
Definition: BFloat16.h:135
static EIGEN_CONSTEXPR const bool has_quiet_NaN
Definition: BFloat16.h:139
static EIGEN_CONSTEXPR const std::float_denorm_style has_denorm
Definition: BFloat16.h:141
static EIGEN_CONSTEXPR const int min_exponent10
Definition: BFloat16.h:154
static EIGEN_CONSTEXPR const int digits
Definition: BFloat16.h:149
EIGEN_CONSTEXPR bfloat16(bool b)
Definition: BFloat16.h:108
bfloat16_impl::__bfloat16_raw __bfloat16_raw
Definition: BFloat16.h:102
EIGEN_CONSTEXPR bfloat16(T val)
Definition: BFloat16.h:112
EIGEN_CONSTEXPR bfloat16()
Definition: BFloat16.h:104
bfloat16(float f)
Definition: BFloat16.h:115
EIGEN_CONSTEXPR bfloat16(const __bfloat16_raw &h)
Definition: BFloat16.h:106
EIGEN_CONSTEXPR bfloat16(const std::complex< RealScalar > &val)
Definition: BFloat16.h:121