Struct half::bf16

source ·

#[repr(transparent)]
pub struct bf16(_);

Expand description

A 16-bit floating point type implementing the bfloat16 format.

The bfloat16 floating point format is a truncated 16-bit version of the IEEE 754 standard binary32, a.k.a f32. bf16 has approximately the same dynamic range as f32 by having a lower precision than f16. While f16 has a precision of 11 bits, bf16 has a precision of only 8 bits.

Like f16, bf16 does not offer arithmetic operations as it is intended for compact storage rather than calculations. Operations should be performed with f32 or higher-precision types and converted to/from bf16 as necessary.

Struct half::bf16

Implementations§

impl bf16

pub const fn from_bits(bits: u16) -> bf16

pub fn from_f32(value: f32) -> bf16

pub fn from_f64(value: f64) -> bf16

pub const fn to_bits(self) -> u16

pub const fn to_le_bytes(self) -> [u8; 2]

pub const fn to_be_bytes(self) -> [u8; 2]

pub const fn to_ne_bytes(self) -> [u8; 2]

pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16

pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16

pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16

pub fn to_f32(self) -> f32

pub fn to_f64(self) -> f64

pub const fn is_nan(self) -> bool

pub const fn is_infinite(self) -> bool

pub const fn is_finite(self) -> bool

pub const fn is_normal(self) -> bool

pub const fn classify(self) -> FpCategory

pub const fn signum(self) -> bf16

pub const fn is_sign_positive(self) -> bool

pub const fn is_sign_negative(self) -> bool

pub const fn copysign(self, sign: bf16) -> bf16

pub fn max(self, other: bf16) -> bf16

pub fn min(self, other: bf16) -> bf16

pub fn clamp(self, min: bf16, max: bf16) -> bf16

pub const DIGITS: u32 = 2u32

pub const EPSILON: bf16 = _

pub const INFINITY: bf16 = _

pub const MANTISSA_DIGITS: u32 = 8u32

pub const MAX: bf16 = _

pub const MAX_10_EXP: i32 = 38i32

pub const MAX_EXP: i32 = 128i32

pub const MIN: bf16 = _

pub const MIN_10_EXP: i32 = -37i32

pub const MIN_EXP: i32 = -125i32

pub const MIN_POSITIVE: bf16 = _

pub const NAN: bf16 = _

pub const NEG_INFINITY: bf16 = _

pub const RADIX: u32 = 2u32

pub const MIN_POSITIVE_SUBNORMAL: bf16 = _

pub const MAX_SUBNORMAL: bf16 = _

pub const ONE: bf16 = _

pub const ZERO: bf16 = _

pub const NEG_ZERO: bf16 = _

pub const NEG_ONE: bf16 = _

pub const E: bf16 = _

pub const PI: bf16 = _

pub const FRAC_1_PI: bf16 = _

pub const FRAC_1_SQRT_2: bf16 = _

pub const FRAC_2_PI: bf16 = _

pub const FRAC_2_SQRT_PI: bf16 = _

pub const FRAC_PI_2: bf16 = _

pub const FRAC_PI_3: bf16 = _

pub const FRAC_PI_4: bf16 = _

pub const FRAC_PI_6: bf16 = _

pub const FRAC_PI_8: bf16 = _

pub const LN_10: bf16 = _

pub const LN_2: bf16 = _

pub const LOG10_E: bf16 = _

pub const LOG10_2: bf16 = _

pub const LOG2_E: bf16 = _

pub const LOG2_10: bf16 = _

pub const SQRT_2: bf16 = _

Trait Implementations§

impl Add<&bf16> for &bf16

type Output = <bf16 as Add<bf16>>::Output

fn add(self, rhs: &bf16) -> Self::Output

impl Add<&bf16> for bf16

type Output = <bf16 as Add<bf16>>::Output

fn add(self, rhs: &bf16) -> Self::Output

impl Add<bf16> for &bf16

type Output = <bf16 as Add<bf16>>::Output

fn add(self, rhs: bf16) -> Self::Output

impl Add<bf16> for bf16

type Output = bf16

fn add(self, rhs: Self) -> Self::Output

impl AddAssign<&bf16> for bf16

fn add_assign(&mut self, rhs: &bf16)