Library Flocq.IEEE754.Binary
This file is part of the Flocq formalization of floating-point
arithmetic in Coq: https://flocq.gitlabpages.inria.fr/
Copyright (C) 2010-2018 Sylvie Boldo
Copyright (C) 2010-2018 Guillaume Melquiond
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
COPYING file for more details.
Copyright (C) 2010-2018 Guillaume Melquiond
IEEE-754 arithmetic
From Coq Require Import ZArith Reals Psatz SpecFloat.
Require Import Core Round Bracket Operations Div Sqrt Relative BinarySingleNaN.
Arguments BinarySingleNaN.B754_zero {prec emax}.
Arguments BinarySingleNaN.B754_infinity {prec emax}.
Arguments BinarySingleNaN.B754_nan {prec emax}.
Arguments BinarySingleNaN.B754_finite {prec emax}.
Section AnyRadix.
Inductive full_float :=
| F754_zero (s : bool)
| F754_infinity (s : bool)
| F754_nan (s : bool) (m : positive)
| F754_finite (s : bool) (m : positive) (e : Z).
Definition FF2SF x :=
match x with
| F754_zero s ⇒ S754_zero s
| F754_infinity s ⇒ S754_infinity s
| F754_nan _ _ ⇒ S754_nan
| F754_finite s m e ⇒ S754_finite s m e
end.
Definition FF2R beta x :=
match x with
| F754_finite s m e ⇒ F2R (Float beta (cond_Zopp s (Zpos m)) e)
| _ ⇒ 0%R
end.
Lemma SF2R_FF2SF :
∀ beta x,
SF2R beta (FF2SF x) = FF2R beta x.
Definition SF2FF x :=
match x with
| S754_zero s ⇒ F754_zero s
| S754_infinity s ⇒ F754_infinity s
| S754_nan ⇒ F754_nan false xH
| S754_finite s m e ⇒ F754_finite s m e
end.
Lemma FF2SF_SF2FF :
∀ x,
FF2SF (SF2FF x) = x.
Lemma FF2R_SF2FF :
∀ beta x,
FF2R beta (SF2FF x) = SF2R beta x.
Definition is_nan_FF f :=
match f with
| F754_nan _ _ ⇒ true
| _ ⇒ false
end.
Lemma is_nan_SF2FF :
∀ x,
is_nan_FF (SF2FF x) = is_nan_SF x.
Lemma is_nan_FF2SF :
∀ x,
is_nan_SF (FF2SF x) = is_nan_FF x.
Lemma SF2FF_FF2SF :
∀ x,
is_nan_FF x = false →
SF2FF (FF2SF x) = x.
Definition sign_FF x :=
match x with
| F754_nan s _ ⇒ s
| F754_zero s ⇒ s
| F754_infinity s ⇒ s
| F754_finite s _ _ ⇒ s
end.
Definition is_finite_FF f :=
match f with
| F754_finite _ _ _ ⇒ true
| F754_zero _ ⇒ true
| _ ⇒ false
end.
Lemma is_finite_SF2FF :
∀ x,
is_finite_FF (SF2FF x) = is_finite_SF x.
Lemma sign_SF2FF :
∀ x,
sign_FF (SF2FF x) = sign_SF x.
End AnyRadix.
Section Binary.
Arguments exist {A} {P}.
prec is the number of bits of the mantissa including the implicit one;
emax is the exponent of the infinities.
For instance, binary32 is defined by prec = 24 and emax = 128.
Variable prec emax : Z.
Context (prec_gt_0_ : Prec_gt_0 prec).
Context (prec_lt_emax_ : Prec_lt_emax prec emax).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Notation canonical_mantissa := (canonical_mantissa prec emax).
Notation bounded := (SpecFloat.bounded prec emax).
Definition nan_pl pl :=
Zlt_bool (Zpos (digits2_pos pl)) prec.
Notation valid_binary_SF := (valid_binary prec emax).
Definition valid_binary x :=
match x with
| F754_finite _ m e ⇒ bounded m e
| F754_nan _ pl ⇒ nan_pl pl
| _ ⇒ true
end.
Lemma valid_binary_SF2FF :
∀ x,
is_nan_SF x = false →
valid_binary (SF2FF x) = valid_binary_SF x.
Context (prec_gt_0_ : Prec_gt_0 prec).
Context (prec_lt_emax_ : Prec_lt_emax prec emax).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Notation canonical_mantissa := (canonical_mantissa prec emax).
Notation bounded := (SpecFloat.bounded prec emax).
Definition nan_pl pl :=
Zlt_bool (Zpos (digits2_pos pl)) prec.
Notation valid_binary_SF := (valid_binary prec emax).
Definition valid_binary x :=
match x with
| F754_finite _ m e ⇒ bounded m e
| F754_nan _ pl ⇒ nan_pl pl
| _ ⇒ true
end.
Lemma valid_binary_SF2FF :
∀ x,
is_nan_SF x = false →
valid_binary (SF2FF x) = valid_binary_SF x.
Basic type used for representing binary FP numbers.
Note that there is exactly one such object per FP datum.
Inductive binary_float :=
| B754_zero (s : bool)
| B754_infinity (s : bool)
| B754_nan (s : bool) (pl : positive) :
nan_pl pl = true → binary_float
| B754_finite (s : bool) (m : positive) (e : Z) :
bounded m e = true → binary_float.
Definition B2BSN (x : binary_float) : BinarySingleNaN.binary_float prec emax :=
match x with
| B754_zero s ⇒ BinarySingleNaN.B754_zero s
| B754_infinity s ⇒ BinarySingleNaN.B754_infinity s
| B754_nan _ _ _ ⇒ BinarySingleNaN.B754_nan
| B754_finite s m e H ⇒ BinarySingleNaN.B754_finite s m e H
end.
Definition FF2B x :=
match x as x return valid_binary x = true → binary_float with
| F754_finite s m e ⇒ B754_finite s m e
| F754_infinity s ⇒ fun _ ⇒ B754_infinity s
| F754_zero s ⇒ fun _ ⇒ B754_zero s
| F754_nan b pl ⇒ fun H ⇒ B754_nan b pl H
end.
Definition B2FF x :=
match x with
| B754_finite s m e _ ⇒ F754_finite s m e
| B754_infinity s ⇒ F754_infinity s
| B754_zero s ⇒ F754_zero s
| B754_nan b pl _ ⇒ F754_nan b pl
end.
Definition B2R f :=
match f with
| B754_finite s m e _ ⇒ F2R (Float radix2 (cond_Zopp s (Zpos m)) e)
| _ ⇒ 0%R
end.
Definition B2SF (x : binary_float) :=
match x with
| B754_finite s m e _ ⇒ S754_finite s m e
| B754_infinity s ⇒ S754_infinity s
| B754_zero s ⇒ S754_zero s
| B754_nan _ _ _ ⇒ S754_nan
end.
Lemma B2SF_B2BSN :
∀ x,
BinarySingleNaN.B2SF (B2BSN x) = B2SF x.
Lemma B2SF_FF2B :
∀ x Bx,
B2SF (FF2B x Bx) = FF2SF x.
Lemma B2R_B2BSN :
∀ x, BinarySingleNaN.B2R (B2BSN x) = B2R x.
Lemma FF2SF_B2FF :
∀ x,
FF2SF (B2FF x) = B2SF x.
Theorem FF2R_B2FF :
∀ x,
FF2R radix2 (B2FF x) = B2R x.
Theorem B2FF_FF2B :
∀ x Hx,
B2FF (FF2B x Hx) = x.
Theorem valid_binary_B2FF :
∀ x,
valid_binary (B2FF x) = true.
Theorem FF2B_B2FF :
∀ x H,
FF2B (B2FF x) H = x.
Theorem FF2B_B2FF_valid :
∀ x,
FF2B (B2FF x) (valid_binary_B2FF x) = x.
Theorem B2R_FF2B :
∀ x Hx,
B2R (FF2B x Hx) = FF2R radix2 x.
Theorem match_FF2B :
∀ {T} fz fi fn ff x Hx,
match FF2B x Hx return T with
| B754_zero sx ⇒ fz sx
| B754_infinity sx ⇒ fi sx
| B754_nan b p _ ⇒ fn b p
| B754_finite sx mx ex _ ⇒ ff sx mx ex
end =
match x with
| F754_zero sx ⇒ fz sx
| F754_infinity sx ⇒ fi sx
| F754_nan b p ⇒ fn b p
| F754_finite sx mx ex ⇒ ff sx mx ex
end.
Theorem canonical_canonical_mantissa :
∀ (sx : bool) mx ex,
canonical_mantissa mx ex = true →
canonical radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).
Theorem generic_format_B2R :
∀ x,
generic_format radix2 fexp (B2R x).
Theorem FLT_format_B2R :
∀ x,
FLT_format radix2 emin prec (B2R x).
Theorem B2FF_inj :
∀ x y : binary_float,
B2FF x = B2FF y →
x = y.
Definition is_finite_strict f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| _ ⇒ false
end.
Lemma is_finite_strict_B2BSN :
∀ x, BinarySingleNaN.is_finite_strict (B2BSN x) = is_finite_strict x.
Theorem B2R_inj:
∀ x y : binary_float,
is_finite_strict x = true →
is_finite_strict y = true →
B2R x = B2R y →
x = y.
Definition Bsign x :=
match x with
| B754_nan s _ _ ⇒ s
| B754_zero s ⇒ s
| B754_infinity s ⇒ s
| B754_finite s _ _ _ ⇒ s
end.
Theorem Bsign_FF2B :
∀ x H,
Bsign (FF2B x H) = sign_FF x.
Definition is_finite f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| B754_zero _ ⇒ true
| _ ⇒ false
end.
Lemma is_finite_B2BSN :
∀ x, BinarySingleNaN.is_finite (B2BSN x) = is_finite x.
Theorem is_finite_FF2B :
∀ x Hx,
is_finite (FF2B x Hx) = is_finite_FF x.
Theorem is_finite_B2FF :
∀ x,
is_finite_FF (B2FF x) = is_finite x.
Theorem B2R_Bsign_inj:
∀ x y : binary_float,
is_finite x = true →
is_finite y = true →
B2R x = B2R y →
Bsign x = Bsign y →
x = y.
Definition is_nan f :=
match f with
| B754_nan _ _ _ ⇒ true
| _ ⇒ false
end.
Lemma is_nan_B2BSN :
∀ x,
BinarySingleNaN.is_nan (B2BSN x) = is_nan x.
Theorem is_nan_FF2B :
∀ x Hx,
is_nan (FF2B x Hx) = is_nan_FF x.
Theorem is_nan_B2FF :
∀ x,
is_nan_FF (B2FF x) = is_nan x.
Definition get_nan_pl (x : binary_float) : positive :=
match x with B754_nan _ pl _ ⇒ pl | _ ⇒ xH end.
Definition build_nan (x : { x | is_nan x = true }) : binary_float.
Theorem build_nan_correct :
∀ x : { x | is_nan x = true },
build_nan x = proj1_sig x.
Theorem B2R_build_nan :
∀ x, B2R (build_nan x) = 0%R.
Theorem is_finite_build_nan :
∀ x, is_finite (build_nan x) = false.
Theorem is_nan_build_nan :
∀ x, is_nan (build_nan x) = true.
Definition BSN2B (nan : {x : binary_float | is_nan x = true }) (x : BinarySingleNaN.binary_float prec emax) : binary_float :=
match x with
| BinarySingleNaN.B754_nan ⇒ build_nan nan
| BinarySingleNaN.B754_zero s ⇒ B754_zero s
| BinarySingleNaN.B754_infinity s ⇒ B754_infinity s
| BinarySingleNaN.B754_finite s m e H ⇒ B754_finite s m e H
end.
Lemma B2BSN_BSN2B :
∀ nan x,
B2BSN (BSN2B nan x) = x.
Lemma B2R_BSN2B :
∀ nan x, B2R (BSN2B nan x) = BinarySingleNaN.B2R x.
Lemma is_finite_BSN2B :
∀ nan x, is_finite (BSN2B nan x) = BinarySingleNaN.is_finite x.
Lemma is_nan_BSN2B :
∀ nan x, is_nan (BSN2B nan x) = BinarySingleNaN.is_nan x.
Lemma Bsign_B2BSN :
∀ x, is_nan x = false → BinarySingleNaN.Bsign (B2BSN x) = Bsign x.
Lemma Bsign_BSN2B :
∀ nan x, BinarySingleNaN.is_nan x = false →
Bsign (BSN2B nan x) = BinarySingleNaN.Bsign x.
Definition BSN2B' (x : BinarySingleNaN.binary_float prec emax) : BinarySingleNaN.is_nan x = false → binary_float.
Lemma B2BSN_BSN2B' :
∀ x Nx,
B2BSN (BSN2B' x Nx) = x.
Lemma B2R_BSN2B' :
∀ x Nx,
B2R (BSN2B' x Nx) = BinarySingleNaN.B2R x.
Lemma B2FF_BSN2B' :
∀ x Nx,
B2FF (BSN2B' x Nx) = SF2FF (BinarySingleNaN.B2SF x).
Lemma Bsign_BSN2B' :
∀ x Nx,
Bsign (BSN2B' x Nx) = BinarySingleNaN.Bsign x.
Lemma is_finite_BSN2B' :
∀ x Nx,
is_finite (BSN2B' x Nx) = BinarySingleNaN.is_finite x.
Lemma is_nan_BSN2B' :
∀ x Nx,
is_nan (BSN2B' x Nx) = false.
Definition erase (x : binary_float) : binary_float.
Theorem erase_correct :
∀ x, erase x = x.
Opposite
Definition Bopp opp_nan x :=
match x with
| B754_nan _ _ _ ⇒ build_nan (opp_nan x)
| B754_infinity sx ⇒ B754_infinity (negb sx)
| B754_finite sx mx ex Hx ⇒ B754_finite (negb sx) mx ex Hx
| B754_zero sx ⇒ B754_zero (negb sx)
end.
Theorem Bopp_involutive :
∀ opp_nan x,
is_nan x = false →
Bopp opp_nan (Bopp opp_nan x) = x.
Theorem B2R_Bopp :
∀ opp_nan x,
B2R (Bopp opp_nan x) = (- B2R x)%R.
Theorem is_finite_Bopp :
∀ opp_nan x,
is_finite (Bopp opp_nan x) = is_finite x.
Lemma Bsign_Bopp :
∀ opp_nan x, is_nan x = false → Bsign (Bopp opp_nan x) = negb (Bsign x).
Absolute value
Definition Babs abs_nan (x : binary_float) : binary_float :=
match x with
| B754_nan _ _ _ ⇒ build_nan (abs_nan x)
| B754_infinity sx ⇒ B754_infinity false
| B754_finite sx mx ex Hx ⇒ B754_finite false mx ex Hx
| B754_zero sx ⇒ B754_zero false
end.
Theorem B2R_Babs :
∀ abs_nan x,
B2R (Babs abs_nan x) = Rabs (B2R x).
Theorem is_finite_Babs :
∀ abs_nan x,
is_finite (Babs abs_nan x) = is_finite x.
Theorem Bsign_Babs :
∀ abs_nan x,
is_nan x = false →
Bsign (Babs abs_nan x) = false.
Theorem Babs_idempotent :
∀ abs_nan (x: binary_float),
is_nan x = false →
Babs abs_nan (Babs abs_nan x) = Babs abs_nan x.
Theorem Babs_Bopp :
∀ abs_nan opp_nan x,
is_nan x = false →
Babs abs_nan (Bopp opp_nan x) = Babs abs_nan x.
Definition Bcompare (f1 f2 : binary_float) : option comparison :=
BinarySingleNaN.Bcompare (B2BSN f1) (B2BSN f2).
Theorem Bcompare_correct :
∀ f1 f2,
is_finite f1 = true → is_finite f2 = true →
Bcompare f1 f2 = Some (Rcompare (B2R f1) (B2R f2)).
Theorem Bcompare_swap :
∀ x y,
Bcompare y x = match Bcompare x y with Some c ⇒ Some (CompOpp c) | None ⇒ None end.
Theorem bounded_le_emax_minus_prec :
∀ mx ex,
bounded mx ex = true →
(F2R (Float radix2 (Zpos mx) ex)
≤ bpow radix2 emax - bpow radix2 (emax - prec))%R.
Theorem bounded_lt_emax :
∀ mx ex,
bounded mx ex = true →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R.
Theorem bounded_ge_emin :
∀ mx ex,
bounded mx ex = true →
(bpow radix2 emin ≤ F2R (Float radix2 (Zpos mx) ex))%R.
Theorem abs_B2R_le_emax_minus_prec :
∀ x,
(Rabs (B2R x) ≤ bpow radix2 emax - bpow radix2 (emax - prec))%R.
Theorem abs_B2R_lt_emax :
∀ x,
(Rabs (B2R x) < bpow radix2 emax)%R.
Theorem abs_B2R_ge_emin :
∀ x,
is_finite_strict x = true →
(bpow radix2 emin ≤ Rabs (B2R x))%R.
Theorem bounded_canonical_lt_emax :
∀ mx ex,
canonical radix2 fexp (Float radix2 (Zpos mx) ex) →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R →
bounded mx ex = true.
Truncation
Notation shr_fexp := (shr_fexp prec emax) (only parsing).
Theorem shr_fexp_truncate :
∀ m e l,
(0 ≤ m)%Z →
shr_fexp m e l =
let '(m', e', l') := truncate radix2 fexp (m, e, l) in (shr_record_of_loc m' l', e').
Rounding modes
Definition binary_overflow m s :=
SF2FF (binary_overflow prec emax m s).
Lemma eq_binary_overflow_FF2SF :
∀ x m s,
FF2SF x = BinarySingleNaN.binary_overflow prec emax m s →
x = binary_overflow m s.
Definition binary_round_aux mode sx mx ex lx :=
SF2FF (binary_round_aux prec emax mode sx mx ex lx).
Theorem binary_round_aux_correct' :
∀ mode x mx ex lx,
(x ≠ 0)%R →
inbetween_float radix2 mx ex (Rabs x) lx →
(ex ≤ cexp radix2 fexp x)%Z →
let z := binary_round_aux mode (Rlt_bool x 0) mx ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_FF z = true ∧ sign_FF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Theorem binary_round_aux_correct :
∀ mode x mx ex lx,
inbetween_float radix2 (Zpos mx) ex (Rabs x) lx →
(ex ≤ fexp (Zdigits radix2 (Zpos mx) + ex))%Z →
let z := binary_round_aux mode (Rlt_bool x 0) (Zpos mx) ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_FF z = true ∧ sign_FF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Multiplication
Definition Bmult mult_nan m x y :=
BSN2B (mult_nan x y) (Bmult m (B2BSN x) (B2BSN y)).
Theorem Bmult_correct :
∀ mult_nan m x y,
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x × B2R y))) (bpow radix2 emax) then
B2R (Bmult mult_nan m x y) = round radix2 fexp (round_mode m) (B2R x × B2R y) ∧
is_finite (Bmult mult_nan m x y) = andb (is_finite x) (is_finite y) ∧
(is_nan (Bmult mult_nan m x y) = false →
Bsign (Bmult mult_nan m x y) = xorb (Bsign x) (Bsign y))
else
B2FF (Bmult mult_nan m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Normalization and rounding
Definition shl_align_fexp mx ex :=
shl_align mx ex (fexp (Zpos (digits2_pos mx) + ex)).
Lemma shl_align_fexp_correct :
∀ mx ex,
let (mx', ex') := shl_align_fexp mx ex in
F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex') ∧
(ex' ≤ fexp (Zdigits radix2 (Zpos mx') + ex'))%Z.
Definition binary_round m sx mx ex :=
SF2FF (binary_round prec emax m sx mx ex).
Theorem binary_round_correct :
∀ m sx mx ex,
let z := binary_round m sx mx ex in
valid_binary z = true ∧
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
FF2R radix2 z = round radix2 fexp (round_mode m) x ∧
is_finite_FF z = true ∧
sign_FF z = sx
else
z = binary_overflow m sx.
Definition binary_normalize mode m e szero :=
BSN2B' _ (is_nan_binary_normalize prec emax _ _ mode m e szero).
Theorem binary_normalize_correct :
∀ m mx ex szero,
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (F2R (Float radix2 mx ex)))) (bpow radix2 emax) then
B2R (binary_normalize m mx ex szero) = round radix2 fexp (round_mode m) (F2R (Float radix2 mx ex)) ∧
is_finite (binary_normalize m mx ex szero) = true ∧
Bsign (binary_normalize m mx ex szero) =
match Rcompare (F2R (Float radix2 mx ex)) 0 with
| Eq ⇒ szero
| Lt ⇒ true
| Gt ⇒ false
end
else
B2FF (binary_normalize m mx ex szero) = binary_overflow m (Rlt_bool (F2R (Float radix2 mx ex)) 0).
Addition
Definition Bplus plus_nan m x y :=
BSN2B (plus_nan x y) (Bplus m (B2BSN x) (B2BSN y)).
Theorem Bplus_correct :
∀ plus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x + B2R y))) (bpow radix2 emax) then
B2R (Bplus plus_nan m x y) = round radix2 fexp (round_mode m) (B2R x + B2R y) ∧
is_finite (Bplus plus_nan m x y) = true ∧
Bsign (Bplus plus_nan m x y) =
match Rcompare (B2R x + B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (Bsign y)
| _ ⇒ andb (Bsign x) (Bsign y) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bplus plus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = Bsign y).
Subtraction
Definition Bminus minus_nan m x y :=
BSN2B (minus_nan x y) (Bminus m (B2BSN x) (B2BSN y)).
Theorem Bminus_correct :
∀ minus_nan m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x - B2R y))) (bpow radix2 emax) then
B2R (Bminus minus_nan m x y) = round radix2 fexp (round_mode m) (B2R x - B2R y) ∧
is_finite (Bminus minus_nan m x y) = true ∧
Bsign (Bminus minus_nan m x y) =
match Rcompare (B2R x - B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (negb (Bsign y))
| _ ⇒ andb (Bsign x) (negb (Bsign y)) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2FF (Bminus minus_nan m x y) = binary_overflow m (Bsign x) ∧ Bsign x = negb (Bsign y)).
Fused Multiply-Add
Definition Bfma_szero m (x y z : binary_float) :=
Bfma_szero prec emax m (B2BSN x) (B2BSN y) (B2BSN z).
Definition Bfma fma_nan m (x y z: binary_float) :=
BSN2B (fma_nan x y z) (Bfma m (B2BSN x) (B2BSN y) (B2BSN z)).
Theorem Bfma_correct:
∀ fma_nan m x y z,
is_finite x = true →
is_finite y = true →
is_finite z = true →
let res := (B2R x × B2R y + B2R z)%R in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) res)) (bpow radix2 emax) then
B2R (Bfma fma_nan m x y z) = round radix2 fexp (round_mode m) res ∧
is_finite (Bfma fma_nan m x y z) = true ∧
Bsign (Bfma fma_nan m x y z) =
match Rcompare res 0 with
| Eq ⇒ Bfma_szero m x y z
| Lt ⇒ true
| Gt ⇒ false
end
else
B2FF (Bfma fma_nan m x y z) = binary_overflow m (Rlt_bool res 0).
Division
Definition Bdiv div_nan m x y :=
BSN2B (div_nan x y) (Bdiv m (B2BSN x) (B2BSN y)).
Theorem Bdiv_correct :
∀ div_nan m x y,
B2R y ≠ 0%R →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x / B2R y))) (bpow radix2 emax) then
B2R (Bdiv div_nan m x y) = round radix2 fexp (round_mode m) (B2R x / B2R y) ∧
is_finite (Bdiv div_nan m x y) = is_finite x ∧
(is_nan (Bdiv div_nan m x y) = false →
Bsign (Bdiv div_nan m x y) = xorb (Bsign x) (Bsign y))
else
B2FF (Bdiv div_nan m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Square root
Definition Bsqrt sqrt_nan m x :=
BSN2B (sqrt_nan x) (Bsqrt m (B2BSN x)).
Theorem Bsqrt_correct :
∀ sqrt_nan m x,
B2R (Bsqrt sqrt_nan m x) = round radix2 fexp (round_mode m) (sqrt (B2R x)) ∧
is_finite (Bsqrt sqrt_nan m x) = match x with B754_zero _ ⇒ true | B754_finite false _ _ _ ⇒ true | _ ⇒ false end ∧
(is_nan (Bsqrt sqrt_nan m x) = false → Bsign (Bsqrt sqrt_nan m x) = Bsign x).
NearbyInt and Trunc
Definition Bnearbyint nearbyint_nan m x :=
BSN2B (nearbyint_nan x) (Bnearbyint m (B2BSN x)).
Theorem Bnearbyint_correct :
∀ nearbyint_nan md x,
B2R (Bnearbyint nearbyint_nan md x) = round radix2 (FIX_exp 0) (round_mode md) (B2R x) ∧
is_finite (Bnearbyint nearbyint_nan md x) = is_finite x ∧
(is_nan (Bnearbyint nearbyint_nan md x) = false → Bsign (Bnearbyint nearbyint_nan md x) = Bsign x).
Definition Btrunc x := Btrunc (B2BSN x).
Theorem Btrunc_correct :
∀ x,
IZR (Btrunc x) = round radix2 (FIX_exp 0) Ztrunc (B2R x).
A few values
Definition Bone :=
BSN2B' _ (@is_nan_Bone prec emax _ _).
Theorem Bone_correct : B2R Bone = 1%R.
Lemma is_finite_Bone : is_finite Bone = true.
Lemma Bsign_Bone : Bsign Bone = false.
Definition Bmax_float :=
BSN2B' Bmax_float eq_refl.
Extraction/modification of mantissa/exponent
Definition Bnormfr_mantissa x :=
Bnormfr_mantissa (B2BSN x).
Definition lift x y (Ny : @BinarySingleNaN.is_nan prec emax y = is_nan x) : binary_float.
Lemma B2BSN_lift :
∀ x y Ny,
B2BSN (lift x y Ny) = y.
Definition Bldexp (mode : mode) (x : binary_float) (e : Z) : binary_float.
Theorem Bldexp_correct :
∀ m (f : binary_float) e,
if Rlt_bool
(Rabs (round radix2 fexp (round_mode m) (B2R f × bpow radix2 e)))
(bpow radix2 emax) then
(B2R (Bldexp m f e)
= round radix2 fexp (round_mode m) (B2R f × bpow radix2 e))%R ∧
is_finite (Bldexp m f e) = is_finite f ∧
Bsign (Bldexp m f e) = Bsign f
else
B2FF (Bldexp m f e) = binary_overflow m (Bsign f).
Section Bfrexp.
This hypothesis is needed to implement Bfrexp
(otherwise, we have emin > - prec
and Bfrexp cannot fit the mantissa in interval [0.5, 1))
Hypothesis Hemax : (2 < emax)%Z.
Definition Bfrexp (x : binary_float) : binary_float × Z.
Theorem Bfrexp_correct :
∀ f,
is_finite_strict f = true →
let x := B2R f in
let z := fst (Bfrexp f) in
let e := snd (Bfrexp f) in
(/2 ≤ Rabs (B2R z) < 1)%R ∧
(x = B2R z × bpow radix2 e)%R ∧
e = mag radix2 x.
End Bfrexp.
Definition Bfrexp (x : binary_float) : binary_float × Z.
Theorem Bfrexp_correct :
∀ f,
is_finite_strict f = true →
let x := B2R f in
let z := fst (Bfrexp f) in
let e := snd (Bfrexp f) in
(/2 ≤ Rabs (B2R z) < 1)%R ∧
(x = B2R z × bpow radix2 e)%R ∧
e = mag radix2 x.
End Bfrexp.
Ulp
Definition Bulp (x : binary_float) : binary_float.
Theorem Bulp_correct :
∀ x,
is_finite x = true →
B2R (Bulp x) = ulp radix2 fexp (B2R x) ∧
is_finite (Bulp x) = true ∧
Bsign (Bulp x) = false.
Successor (and predecessor)
Definition Bsucc (x : binary_float) : binary_float.
Lemma Bsucc_correct :
∀ x,
is_finite x = true →
if Rlt_bool (succ radix2 fexp (B2R x)) (bpow radix2 emax) then
B2R (Bsucc x) = succ radix2 fexp (B2R x) ∧
is_finite (Bsucc x) = true ∧
(Bsign (Bsucc x) = Bsign x && is_finite_strict x)%bool
else
B2FF (Bsucc x) = F754_infinity false.
Definition Bpred (x : binary_float) : binary_float.
Lemma Bpred_correct :
∀ x,
is_finite x = true →
if Rlt_bool (- bpow radix2 emax) (pred radix2 fexp (B2R x)) then
B2R (Bpred x) = pred radix2 fexp (B2R x) ∧
is_finite (Bpred x) = true ∧
(Bsign (Bpred x) = Bsign x || negb (is_finite_strict x))%bool
else
B2FF (Bpred x) = F754_infinity true.
End Binary.