Library Flocq.IEEE754.BinarySingleNaN
This file is part of the Flocq formalization of floating-point
arithmetic in Coq: https://flocq.gitlabpages.inria.fr/
Copyright (C) 2010-2018 Sylvie Boldo
Copyright (C) 2010-2018 Guillaume Melquiond
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
COPYING file for more details.
Copyright (C) 2010-2018 Guillaume Melquiond
IEEE-754 arithmetic
From Coq Require Import ZArith Reals Psatz SpecFloat.
Require Import Core Round Bracket Operations Div Sqrt Relative.
Definition SF2R beta x :=
match x with
| S754_finite s m e ⇒ F2R (Float beta (cond_Zopp s (Zpos m)) e)
| _ ⇒ 0%R
end.
Class Prec_lt_emax prec emax := prec_lt_emax : (prec < emax)%Z.
Arguments prec_lt_emax prec emax {Prec_lt_emax}.
Section Binary.
prec is the number of bits of the mantissa including the implicit one;
emax is the exponent of the infinities.
For instance, binary32 is defined by prec = 24 and emax = 128.
Variable prec emax : Z.
Context (prec_gt_0_ : Prec_gt_0 prec).
Context (prec_lt_emax_ : Prec_lt_emax prec emax).
Notation emin := (emin prec emax).
Notation fexp := (fexp prec emax).
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Notation canonical_mantissa := (canonical_mantissa prec emax).
Notation bounded := (SpecFloat.bounded prec emax).
Notation valid_binary := (valid_binary prec emax).
Context (prec_gt_0_ : Prec_gt_0 prec).
Context (prec_lt_emax_ : Prec_lt_emax prec emax).
Notation emin := (emin prec emax).
Notation fexp := (fexp prec emax).
Instance fexp_correct : Valid_exp fexp := FLT_exp_valid emin prec.
Instance fexp_monotone : Monotone_exp fexp := FLT_exp_monotone emin prec.
Notation canonical_mantissa := (canonical_mantissa prec emax).
Notation bounded := (SpecFloat.bounded prec emax).
Notation valid_binary := (valid_binary prec emax).
Basic type used for representing binary FP numbers.
Note that there is exactly one such object per FP datum.
Inductive binary_float :=
| B754_zero (s : bool)
| B754_infinity (s : bool)
| B754_nan : binary_float
| B754_finite (s : bool) (m : positive) (e : Z) :
bounded m e = true → binary_float.
Definition SF2B x :=
match x as x return valid_binary x = true → binary_float with
| S754_finite s m e ⇒ B754_finite s m e
| S754_infinity s ⇒ fun _ ⇒ B754_infinity s
| S754_zero s ⇒ fun _ ⇒ B754_zero s
| S754_nan ⇒ fun _ ⇒ B754_nan
end.
Definition SF2B' x :=
match x with
| S754_zero s ⇒ B754_zero s
| S754_infinity s ⇒ B754_infinity s
| S754_nan ⇒ B754_nan
| S754_finite s m e ⇒
match bounded m e as b return bounded m e = b → _ with
| true ⇒ B754_finite s m e
| false ⇒ fun H ⇒ B754_nan
end eq_refl
end.
Definition B2SF x :=
match x with
| B754_finite s m e _ ⇒ S754_finite s m e
| B754_infinity s ⇒ S754_infinity s
| B754_zero s ⇒ S754_zero s
| B754_nan ⇒ S754_nan
end.
Definition B2R f :=
match f with
| B754_finite s m e _ ⇒ F2R (Float radix2 (cond_Zopp s (Zpos m)) e)
| _ ⇒ 0%R
end.
Theorem SF2R_B2SF :
∀ x,
SF2R radix2 (B2SF x) = B2R x.
Theorem B2SF_SF2B :
∀ x Hx,
B2SF (SF2B x Hx) = x.
Theorem valid_binary_B2SF :
∀ x,
valid_binary (B2SF x) = true.
Theorem SF2B_B2SF :
∀ x H,
SF2B (B2SF x) H = x.
Theorem SF2B_B2SF_valid :
∀ x,
SF2B (B2SF x) (valid_binary_B2SF x) = x.
Theorem B2R_SF2B :
∀ x Hx,
B2R (SF2B x Hx) = SF2R radix2 x.
Theorem match_SF2B :
∀ {T} fz fi fn ff x Hx,
match SF2B x Hx return T with
| B754_zero sx ⇒ fz sx
| B754_infinity sx ⇒ fi sx
| B754_nan ⇒ fn
| B754_finite sx mx ex _ ⇒ ff sx mx ex
end =
match x with
| S754_zero sx ⇒ fz sx
| S754_infinity sx ⇒ fi sx
| S754_nan ⇒ fn
| S754_finite sx mx ex ⇒ ff sx mx ex
end.
Theorem canonical_canonical_mantissa :
∀ (sx : bool) mx ex,
canonical_mantissa mx ex = true →
canonical radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).
Theorem canonical_bounded :
∀ sx mx ex,
bounded mx ex = true →
canonical radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).
Lemma emin_lt_emax :
(emin < emax)%Z.
Lemma fexp_emax :
fexp emax = (emax - prec)%Z.
Theorem generic_format_B2R :
∀ x,
generic_format radix2 fexp (B2R x).
Theorem FLT_format_B2R :
∀ x,
FLT_format radix2 emin prec (B2R x).
Theorem B2SF_inj :
∀ x y : binary_float,
B2SF x = B2SF y →
x = y.
Theorem SF2B'_B2SF :
∀ x,
SF2B' (B2SF x) = x.
Definition is_finite_strict f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| _ ⇒ false
end.
Definition is_finite_strict_SF f :=
match f with
| S754_finite _ _ _ ⇒ true
| _ ⇒ false
end.
Theorem is_finite_strict_B2R :
∀ x,
B2R x ≠ 0%R →
is_finite_strict x = true.
Theorem is_finite_strict_SF2B :
∀ x Hx,
is_finite_strict (SF2B x Hx) = is_finite_strict_SF x.
Theorem B2R_inj:
∀ x y : binary_float,
is_finite_strict x = true →
is_finite_strict y = true →
B2R x = B2R y →
x = y.
Definition Bsign x :=
match x with
| B754_nan ⇒ false
| B754_zero s ⇒ s
| B754_infinity s ⇒ s
| B754_finite s _ _ _ ⇒ s
end.
Definition sign_SF x :=
match x with
| S754_nan ⇒ false
| S754_zero s ⇒ s
| S754_infinity s ⇒ s
| S754_finite s _ _ ⇒ s
end.
Theorem Bsign_SF2B :
∀ x H,
Bsign (SF2B x H) = sign_SF x.
Definition is_finite f :=
match f with
| B754_finite _ _ _ _ ⇒ true
| B754_zero _ ⇒ true
| _ ⇒ false
end.
Definition is_finite_SF f :=
match f with
| S754_finite _ _ _ ⇒ true
| S754_zero _ ⇒ true
| _ ⇒ false
end.
Theorem is_finite_SF2B :
∀ x Hx,
is_finite (SF2B x Hx) = is_finite_SF x.
Theorem is_finite_SF_B2SF :
∀ x,
is_finite_SF (B2SF x) = is_finite x.
Theorem B2R_Bsign_inj:
∀ x y : binary_float,
is_finite x = true →
is_finite y = true →
B2R x = B2R y →
Bsign x = Bsign y →
x = y.
Definition is_nan f :=
match f with
| B754_nan ⇒ true
| _ ⇒ false
end.
Definition is_nan_SF f :=
match f with
| S754_nan ⇒ true
| _ ⇒ false
end.
Theorem is_nan_SF2B :
∀ x Hx,
is_nan (SF2B x Hx) = is_nan_SF x.
Theorem is_nan_SF_B2SF :
∀ x,
is_nan_SF (B2SF x) = is_nan x.
Definition erase (x : binary_float) : binary_float.
Theorem erase_correct :
∀ x, erase x = x.
Opposite
Definition Bopp x :=
match x with
| B754_nan ⇒ x
| B754_infinity sx ⇒ B754_infinity (negb sx)
| B754_finite sx mx ex Hx ⇒ B754_finite (negb sx) mx ex Hx
| B754_zero sx ⇒ B754_zero (negb sx)
end.
Theorem Bopp_involutive :
∀ x,
Bopp (Bopp x) = x.
Theorem B2R_Bopp :
∀ x,
B2R (Bopp x) = (- B2R x)%R.
Theorem is_nan_Bopp :
∀ x,
is_nan (Bopp x) = is_nan x.
Theorem is_finite_Bopp :
∀ x,
is_finite (Bopp x) = is_finite x.
Theorem is_finite_strict_Bopp :
∀ x,
is_finite_strict (Bopp x) = is_finite_strict x.
Lemma Bsign_Bopp :
∀ x, is_nan x = false → Bsign (Bopp x) = negb (Bsign x).
Absolute value
Definition Babs (x : binary_float) : binary_float :=
match x with
| B754_nan ⇒ x
| B754_infinity sx ⇒ B754_infinity false
| B754_finite sx mx ex Hx ⇒ B754_finite false mx ex Hx
| B754_zero sx ⇒ B754_zero false
end.
Theorem B2R_Babs :
∀ x,
B2R (Babs x) = Rabs (B2R x).
Theorem is_nan_Babs :
∀ x,
is_nan (Babs x) = is_nan x.
Theorem is_finite_Babs :
∀ x,
is_finite (Babs x) = is_finite x.
Theorem is_finite_strict_Babs :
∀ x,
is_finite_strict (Babs x) = is_finite_strict x.
Theorem Bsign_Babs :
∀ x,
Bsign (Babs x) = false.
Theorem Babs_idempotent :
∀ (x: binary_float),
Babs (Babs x) = Babs x.
Theorem Babs_Bopp :
∀ x,
Babs (Bopp x) = Babs x.
Definition Bcompare (f1 f2 : binary_float) : option comparison :=
SFcompare (B2SF f1) (B2SF f2).
Theorem Bcompare_correct :
∀ f1 f2,
is_finite f1 = true → is_finite f2 = true →
Bcompare f1 f2 = Some (Rcompare (B2R f1) (B2R f2)).
Theorem Bcompare_swap :
∀ x y,
Bcompare y x = match Bcompare x y with Some c ⇒ Some (CompOpp c) | None ⇒ None end.
Definition Beqb (f1 f2 : binary_float) : bool := SFeqb (B2SF f1) (B2SF f2).
Theorem Beqb_correct :
∀ f1 f2,
is_finite f1 = true → is_finite f2 = true →
Beqb f1 f2 = Req_bool (B2R f1) (B2R f2).
Theorem Beqb_refl :
∀ f, Beqb f f = negb (is_nan f).
Definition Bltb (f1 f2 : binary_float) : bool := SFltb (B2SF f1) (B2SF f2).
Theorem Bltb_correct :
∀ f1 f2,
is_finite f1 = true → is_finite f2 = true →
Bltb f1 f2 = Rlt_bool (B2R f1) (B2R f2).
Definition Bleb (f1 f2 : binary_float) : bool := SFleb (B2SF f1) (B2SF f2).
Theorem Bleb_correct :
∀ f1 f2,
is_finite f1 = true → is_finite f2 = true →
Bleb f1 f2 = Rle_bool (B2R f1) (B2R f2).
Theorem bounded_le_emax_minus_prec :
∀ mx ex,
bounded mx ex = true →
(F2R (Float radix2 (Zpos mx) ex)
≤ bpow radix2 emax - bpow radix2 (emax - prec))%R.
Theorem bounded_lt_emax :
∀ mx ex,
bounded mx ex = true →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R.
Theorem bounded_ge_emin :
∀ mx ex,
bounded mx ex = true →
(bpow radix2 emin ≤ F2R (Float radix2 (Zpos mx) ex))%R.
Theorem abs_B2R_le_emax_minus_prec :
∀ x,
(Rabs (B2R x) ≤ bpow radix2 emax - bpow radix2 (emax - prec))%R.
Theorem abs_B2R_lt_emax :
∀ x,
(Rabs (B2R x) < bpow radix2 emax)%R.
Theorem abs_B2R_ge_emin :
∀ x,
is_finite_strict x = true →
(bpow radix2 emin ≤ Rabs (B2R x))%R.
Theorem bounded_canonical_lt_emax :
∀ mx ex,
canonical radix2 fexp (Float radix2 (Zpos mx) ex) →
(F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R →
bounded mx ex = true.
Truncation
Theorem shr_m_shr_record_of_loc :
∀ m l,
shr_m (shr_record_of_loc m l) = m.
Theorem loc_of_shr_record_of_loc :
∀ m l,
loc_of_shr_record (shr_record_of_loc m l) = l.
Lemma inbetween_shr_1 :
∀ x mrs e,
(0 ≤ shr_m mrs)%Z →
inbetween_float radix2 (shr_m mrs) e x (loc_of_shr_record mrs) →
inbetween_float radix2 (shr_m (shr_1 mrs)) (e + 1) x (loc_of_shr_record (shr_1 mrs)).
Theorem inbetween_shr :
∀ x m e l n,
(0 ≤ m)%Z →
inbetween_float radix2 m e x l →
let '(mrs, e') := shr (shr_record_of_loc m l) e n in
inbetween_float radix2 (shr_m mrs) e' x (loc_of_shr_record mrs).
Lemma le_shr1_le :
∀ mrs, (0 ≤ shr_m mrs)%Z →
(0 ≤ 2 × shr_m (shr_1 mrs) ≤ shr_m mrs)%Z ∧
(shr_m mrs < 2 × (shr_m (shr_1 mrs) + 1))%Z.
Lemma le_shr_le :
∀ mrs e n,
(0 ≤ shr_m mrs)%Z → (0 ≤ n)%Z →
(0 ≤ 2 ^ n × shr_m (fst (shr mrs e n)) ≤ shr_m mrs)%Z ∧
(shr_m mrs < 2 ^ n × (shr_m (fst (shr mrs e n)) + 1))%Z.
Lemma shr_limit :
∀ mrs e n,
((0 < shr_m mrs)%Z ∨ shr_m mrs = 0%Z ∧ (shr_r mrs || shr_s mrs = true)%bool) →
(shr_m mrs < radix2 ^ (n - 1))%Z →
fst (shr mrs e n) = {| shr_m := 0%Z; shr_r := false; shr_s := true |}.
Theorem shr_truncate :
∀ f m e l,
Valid_exp f →
(0 ≤ m)%Z →
shr (shr_record_of_loc m l) e (f (Zdigits2 m + e) - e)%Z =
let '(m', e', l') := truncate radix2 f (m, e, l) in (shr_record_of_loc m' l', e').
Notation shr_fexp := (shr_fexp prec emax).
Theorem shr_fexp_truncate :
∀ m e l,
(0 ≤ m)%Z →
shr_fexp m e l =
let '(m', e', l') := truncate radix2 fexp (m, e, l) in (shr_record_of_loc m' l', e').
Rounding modes
Inductive mode := mode_NE | mode_ZR | mode_DN | mode_UP | mode_NA.
Definition round_mode m :=
match m with
| mode_NE ⇒ ZnearestE
| mode_ZR ⇒ Ztrunc
| mode_DN ⇒ Zfloor
| mode_UP ⇒ Zceil
| mode_NA ⇒ ZnearestA
end.
Definition choice_mode m sx mx lx :=
match m with
| mode_NE ⇒ cond_incr (round_N (negb (Z.even mx)) lx) mx
| mode_ZR ⇒ mx
| mode_DN ⇒ cond_incr (round_sign_DN sx lx) mx
| mode_UP ⇒ cond_incr (round_sign_UP sx lx) mx
| mode_NA ⇒ cond_incr (round_N true lx) mx
end.
Lemma le_choice_mode_le :
∀ m sx mx lx, (mx ≤ choice_mode m sx mx lx ≤ mx + 1)%Z.
Lemma round_mode_choice_mode :
∀ md x m l,
inbetween_int m (Rabs x) l →
round_mode md x = cond_Zopp (Rlt_bool x 0) (choice_mode md (Rlt_bool x 0) m l).
Global Instance valid_rnd_round_mode : ∀ m, Valid_rnd (round_mode m).
Definition overflow_to_inf m s :=
match m with
| mode_NE ⇒ true
| mode_NA ⇒ true
| mode_ZR ⇒ false
| mode_UP ⇒ negb s
| mode_DN ⇒ s
end.
Definition binary_overflow m s :=
if overflow_to_inf m s then S754_infinity s
else S754_finite s (Z.to_pos (Zpower 2 prec - 1)%Z) (emax - prec).
Theorem is_nan_binary_overflow :
∀ mode s,
is_nan_SF (binary_overflow mode s) = false.
Theorem binary_overflow_correct :
∀ m s,
valid_binary (binary_overflow m s) = true.
Definition binary_fit_aux mode sx mx ex :=
if Zle_bool ex (emax - prec) then S754_finite sx mx ex
else binary_overflow mode sx.
Theorem binary_fit_aux_correct :
∀ mode sx mx ex,
canonical_mantissa mx ex = true →
let x := SF2R radix2 (S754_finite sx mx ex) in
let z := binary_fit_aux mode sx mx ex in
valid_binary z = true ∧
if Rlt_bool (Rabs x) (bpow radix2 emax) then
SF2R radix2 z = x ∧ is_finite_SF z = true ∧ sign_SF z = sx
else
z = binary_overflow mode sx.
Definition binary_round_aux mode sx mx ex lx :=
let '(mrs', e') := shr_fexp mx ex lx in
let '(mrs'', e'') := shr_fexp (choice_mode mode sx (shr_m mrs') (loc_of_shr_record mrs')) e' loc_Exact in
match shr_m mrs'' with
| Z0 ⇒ S754_zero sx
| Zpos m ⇒ binary_fit_aux mode sx m e''
| _ ⇒ S754_nan
end.
Theorem binary_round_aux_correct' :
∀ mode x mx ex lx,
(x ≠ 0)%R →
inbetween_float radix2 mx ex (Rabs x) lx →
(ex ≤ cexp radix2 fexp x)%Z →
let z := binary_round_aux mode (Rlt_bool x 0) mx ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
SF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_SF z = true ∧ sign_SF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Theorem binary_round_aux_correct :
∀ mode x mx ex lx,
inbetween_float radix2 (Zpos mx) ex (Rabs x) lx →
(ex ≤ fexp (Zdigits radix2 (Zpos mx) + ex))%Z →
let z := binary_round_aux mode (Rlt_bool x 0) (Zpos mx) ex lx in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
SF2R radix2 z = round radix2 fexp (round_mode mode) x ∧
is_finite_SF z = true ∧ sign_SF z = Rlt_bool x 0
else
z = binary_overflow mode (Rlt_bool x 0).
Multiplication
Lemma Bmult_correct_aux :
∀ m sx mx ex (Hx : bounded mx ex = true) sy my ey (Hy : bounded my ey = true),
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
let z := binary_round_aux m (xorb sx sy) (Zpos (mx × my)) (ex + ey) loc_Exact in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x × y))) (bpow radix2 emax) then
SF2R radix2 z = round radix2 fexp (round_mode m) (x × y) ∧
is_finite_SF z = true ∧ sign_SF z = xorb sx sy
else
z = binary_overflow m (xorb sx sy).
Definition Bmult m x y :=
match x, y with
| B754_nan, _ | _, B754_nan ⇒ B754_nan
| B754_infinity sx, B754_infinity sy ⇒ B754_infinity (xorb sx sy)
| B754_infinity sx, B754_finite sy _ _ _ ⇒ B754_infinity (xorb sx sy)
| B754_finite sx _ _ _, B754_infinity sy ⇒ B754_infinity (xorb sx sy)
| B754_infinity _, B754_zero _ ⇒ B754_nan
| B754_zero _, B754_infinity _ ⇒ B754_nan
| B754_finite sx _ _ _, B754_zero sy ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_finite sy _ _ _ ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_zero sy ⇒ B754_zero (xorb sx sy)
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
SF2B _ (proj1 (Bmult_correct_aux m sx mx ex Hx sy my ey Hy))
end.
Theorem Bmult_correct :
∀ m x y,
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x × B2R y))) (bpow radix2 emax) then
B2R (Bmult m x y) = round radix2 fexp (round_mode m) (B2R x × B2R y) ∧
is_finite (Bmult m x y) = andb (is_finite x) (is_finite y) ∧
(is_nan (Bmult m x y) = false →
Bsign (Bmult m x y) = xorb (Bsign x) (Bsign y))
else
B2SF (Bmult m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Normalization and rounding
Theorem shl_align_correct':
∀ mx ex e,
(e ≤ ex)%Z →
let (mx', ex') := shl_align mx ex e in
F2R (Float radix2 (Zpos mx') e) = F2R (Float radix2 (Zpos mx) ex) ∧
ex' = e.
Theorem shl_align_correct :
∀ mx ex ex',
let (mx', ex'') := shl_align mx ex ex' in
F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex'') ∧
(ex'' ≤ ex')%Z.
Theorem snd_shl_align :
∀ mx ex ex',
(ex' ≤ ex)%Z →
snd (shl_align mx ex ex') = ex'.
Definition shl_align_fexp mx ex :=
shl_align mx ex (fexp (Zpos (digits2_pos mx) + ex)).
Theorem shl_align_fexp_correct :
∀ mx ex,
let (mx', ex') := shl_align_fexp mx ex in
F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex') ∧
(ex' ≤ fexp (Zdigits radix2 (Zpos mx') + ex'))%Z.
Definition binary_round m sx mx ex :=
let '(mz, ez) := shl_align_fexp mx ex in binary_round_aux m sx (Zpos mz) ez loc_Exact.
Theorem binary_round_correct :
∀ m sx mx ex,
let z := binary_round m sx mx ex in
valid_binary z = true ∧
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
SF2R radix2 z = round radix2 fexp (round_mode m) x ∧
is_finite_SF z = true ∧
sign_SF z = sx
else
z = binary_overflow m sx.
Theorem is_nan_binary_round :
∀ mode sx mx ex,
is_nan_SF (binary_round mode sx mx ex) = false.
Definition binary_normalize mode m e szero :=
match m with
| Z0 ⇒ B754_zero szero
| Zpos m ⇒ SF2B _ (proj1 (binary_round_correct mode false m e))
| Zneg m ⇒ SF2B _ (proj1 (binary_round_correct mode true m e))
end.
Theorem binary_normalize_correct :
∀ m mx ex szero,
let x := F2R (Float radix2 mx ex) in
let z := binary_normalize m mx ex szero in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
B2R z = round radix2 fexp (round_mode m) x ∧
is_finite z = true ∧
Bsign z =
match Rcompare x 0 with
| Eq ⇒ szero
| Lt ⇒ true
| Gt ⇒ false
end
else
B2SF z = binary_overflow m (Rlt_bool x 0).
Theorem is_nan_binary_normalize :
∀ mode m e szero,
is_nan (binary_normalize mode m e szero) = false.
Addition
Definition Fplus_naive sx mx ex sy my ey ez :=
(Zplus (cond_Zopp sx (Zpos (fst (shl_align mx ex ez)))) (cond_Zopp sy (Zpos (fst (shl_align my ey ez))))).
Lemma Fplus_naive_correct :
∀ sx mx ex sy my ey ez,
(ez ≤ ex)%Z → (ez ≤ ey)%Z →
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
F2R (Float radix2 (Fplus_naive sx mx ex sy my ey ez) ez) = (x + y)%R.
Lemma sign_plus_overflow :
∀ m sx mx ex sy my ey,
bounded mx ex = true →
bounded my ey = true →
let z := (F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) + F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey))%R in
(bpow radix2 emax ≤ Rabs (round radix2 fexp (round_mode m) z))%R →
sx = Rlt_bool z 0 ∧ sx = sy.
Definition Bplus m x y :=
match x, y with
| B754_nan, _ | _, B754_nan ⇒ B754_nan
| B754_infinity sx, B754_infinity sy ⇒ if Bool.eqb sx sy then x else B754_nan
| B754_infinity _, _ ⇒ x
| _, B754_infinity _ ⇒ y
| B754_zero sx, B754_zero sy ⇒
if Bool.eqb sx sy then x else
match m with mode_DN ⇒ B754_zero true | _ ⇒ B754_zero false end
| B754_zero _, _ ⇒ y
| _, B754_zero _ ⇒ x
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
let ez := Z.min ex ey in
binary_normalize m (Fplus_naive sx mx ex sy my ey ez)
ez (match m with mode_DN ⇒ true | _ ⇒ false end)
end.
Theorem Bplus_correct :
∀ m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x + B2R y))) (bpow radix2 emax) then
B2R (Bplus m x y) = round radix2 fexp (round_mode m) (B2R x + B2R y) ∧
is_finite (Bplus m x y) = true ∧
Bsign (Bplus m x y) =
match Rcompare (B2R x + B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (Bsign y)
| _ ⇒ andb (Bsign x) (Bsign y) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2SF (Bplus m x y) = binary_overflow m (Bsign x) ∧ Bsign x = Bsign y).
Subtraction
Definition Bminus m x y :=
match x, y with
| B754_nan, _ | _, B754_nan ⇒ B754_nan
| B754_infinity sx, B754_infinity sy ⇒
if Bool.eqb sx (negb sy) then x else B754_nan
| B754_infinity _, _ ⇒ x
| _, B754_infinity sy ⇒ B754_infinity (negb sy)
| B754_zero sx, B754_zero sy ⇒
if Bool.eqb sx (negb sy) then x else
match m with mode_DN ⇒ B754_zero true | _ ⇒ B754_zero false end
| B754_zero _, B754_finite sy my ey Hy ⇒ B754_finite (negb sy) my ey Hy
| _, B754_zero _ ⇒ x
| B754_finite sx mx ex Hx, B754_finite sy my ey Hy ⇒
let ez := Z.min ex ey in
binary_normalize m (Fplus_naive sx mx ex (negb sy) my ey ez)
ez (match m with mode_DN ⇒ true | _ ⇒ false end)
end.
Theorem Bminus_correct :
∀ m x y,
is_finite x = true →
is_finite y = true →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x - B2R y))) (bpow radix2 emax) then
B2R (Bminus m x y) = round radix2 fexp (round_mode m) (B2R x - B2R y) ∧
is_finite (Bminus m x y) = true ∧
Bsign (Bminus m x y) =
match Rcompare (B2R x - B2R y) 0 with
| Eq ⇒ match m with mode_DN ⇒ orb (Bsign x) (negb (Bsign y))
| _ ⇒ andb (Bsign x) (negb (Bsign y)) end
| Lt ⇒ true
| Gt ⇒ false
end
else
(B2SF (Bminus m x y) = binary_overflow m (Bsign x) ∧ Bsign x = negb (Bsign y)).
Fused Multiply-Add
Definition Bfma_szero m (x y z: binary_float) : bool :=
let s_xy := xorb (Bsign x) (Bsign y) in
if Bool.eqb s_xy (Bsign z) then s_xy
else match m with mode_DN ⇒ true | _ ⇒ false end.
Definition Bfma m (x y z: binary_float) :=
match x, y with
| B754_nan, _ | _, B754_nan
| B754_infinity _, B754_zero _
| B754_zero _, B754_infinity _ ⇒
B754_nan
| B754_infinity sx, B754_infinity sy
| B754_infinity sx, B754_finite sy _ _ _
| B754_finite sx _ _ _, B754_infinity sy ⇒
let s := xorb sx sy in
match z with
| B754_nan ⇒ B754_nan
| B754_infinity sz ⇒ if Bool.eqb s sz then z else B754_nan
| _ ⇒ B754_infinity s
end
| B754_finite sx _ _ _, B754_zero sy
| B754_zero sx, B754_finite sy _ _ _
| B754_zero sx, B754_zero sy ⇒
match z with
| B754_nan ⇒ B754_nan
| B754_zero _ ⇒ B754_zero (Bfma_szero m x y z)
| _ ⇒ z
end
| B754_finite sx mx ex _, B754_finite sy my ey _ ⇒
match z with
| B754_nan ⇒ B754_nan
| B754_infinity sz ⇒ z
| B754_zero _ ⇒
let X := Float radix2 (cond_Zopp sx (Zpos mx)) ex in
let Y := Float radix2 (cond_Zopp sy (Zpos my)) ey in
let '(Float _ mr er) := Fmult X Y in
binary_normalize m mr er (Bfma_szero m x y z)
| B754_finite sz mz ez _ ⇒
let X := Float radix2 (cond_Zopp sx (Zpos mx)) ex in
let Y := Float radix2 (cond_Zopp sy (Zpos my)) ey in
let Z := Float radix2 (cond_Zopp sz (Zpos mz)) ez in
let '(Float _ mr er) := Fplus (Fmult X Y) Z in
binary_normalize m mr er (Bfma_szero m x y z)
end
end.
Theorem Bfma_correct:
∀ m x y z,
is_finite x = true →
is_finite y = true →
is_finite z = true →
let res := (B2R x × B2R y + B2R z)%R in
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) res)) (bpow radix2 emax) then
B2R (Bfma m x y z) = round radix2 fexp (round_mode m) res ∧
is_finite (Bfma m x y z) = true ∧
Bsign (Bfma m x y z) =
match Rcompare res 0 with
| Eq ⇒ Bfma_szero m x y z
| Lt ⇒ true
| Gt ⇒ false
end
else
B2SF (Bfma m x y z) = binary_overflow m (Rlt_bool res 0).
Division
Lemma Bdiv_correct_aux :
∀ m sx mx ex sy my ey,
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
let z :=
let '(mz, ez, lz) := SFdiv_core_binary prec emax (Zpos mx) ex (Zpos my) ey in
binary_round_aux m (xorb sx sy) mz ez lz in
valid_binary z = true ∧
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x / y))) (bpow radix2 emax) then
SF2R radix2 z = round radix2 fexp (round_mode m) (x / y) ∧
is_finite_SF z = true ∧ sign_SF z = xorb sx sy
else
z = binary_overflow m (xorb sx sy).
Definition Bdiv m x y :=
match x, y with
| B754_nan, _ | _, B754_nan ⇒ B754_nan
| B754_infinity sx, B754_infinity sy ⇒ B754_nan
| B754_infinity sx, B754_finite sy _ _ _ ⇒ B754_infinity (xorb sx sy)
| B754_finite sx _ _ _, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_infinity sx, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_infinity sy ⇒ B754_zero (xorb sx sy)
| B754_finite sx _ _ _, B754_zero sy ⇒ B754_infinity (xorb sx sy)
| B754_zero sx, B754_finite sy _ _ _ ⇒ B754_zero (xorb sx sy)
| B754_zero sx, B754_zero sy ⇒ B754_nan
| B754_finite sx mx ex _, B754_finite sy my ey _ ⇒
SF2B _ (proj1 (Bdiv_correct_aux m sx mx ex sy my ey))
end.
Theorem Bdiv_correct :
∀ m x y,
B2R y ≠ 0%R →
if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x / B2R y))) (bpow radix2 emax) then
B2R (Bdiv m x y) = round radix2 fexp (round_mode m) (B2R x / B2R y) ∧
is_finite (Bdiv m x y) = is_finite x ∧
(is_nan (Bdiv m x y) = false →
Bsign (Bdiv m x y) = xorb (Bsign x) (Bsign y))
else
B2SF (Bdiv m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).
Square root
Lemma Bsqrt_correct_aux :
∀ m mx ex (Hx : bounded mx ex = true),
let x := F2R (Float radix2 (Zpos mx) ex) in
let z :=
let '(mz, ez, lz) := SFsqrt_core_binary prec emax (Zpos mx) ex in
binary_round_aux m false mz ez lz in
valid_binary z = true ∧
SF2R radix2 z = round radix2 fexp (round_mode m) (sqrt x) ∧
is_finite_SF z = true ∧ sign_SF z = false.
Definition Bsqrt m x :=
match x with
| B754_nan ⇒ B754_nan
| B754_infinity false ⇒ x
| B754_infinity true ⇒ B754_nan
| B754_finite true _ _ _ ⇒ B754_nan
| B754_zero _ ⇒ x
| B754_finite sx mx ex Hx ⇒
SF2B _ (proj1 (Bsqrt_correct_aux m mx ex Hx))
end.
Theorem Bsqrt_correct :
∀ m x,
B2R (Bsqrt m x) = round radix2 fexp (round_mode m) (sqrt (B2R x)) ∧
is_finite (Bsqrt m x) = match x with B754_zero _ ⇒ true | B754_finite false _ _ _ ⇒ true | _ ⇒ false end ∧
(is_nan (Bsqrt m x) = false → Bsign (Bsqrt m x) = Bsign x).
NearbyInt and Trunc
Definition SFnearbyint_binary_aux m sx mx ex :=
if (0 <=? ex)%Z then ((Z.pos mx) × 2 ^ ex)%Z else
let mrs := {| shr_m := Z.pos mx; shr_r := false; shr_s := false |} in
let mrs' := if (ex <? - prec)%Z then
{| shr_m := Z0; shr_r := false; shr_s := true |} else
fst (shr mrs ex (- ex)) in
let l' := loc_of_shr_record mrs' in
let mx' := shr_m mrs' in
choice_mode m sx mx' l'.
Definition SFnearbyint_binary m sx mx ex :=
if (0 <=? ex)%Z then S754_finite sx mx ex else
let mx'' := SFnearbyint_binary_aux m sx mx ex in
match mx'' with
| Z.pos n ⇒
let (mx''', ex''') := shl_align_fexp n 0 in
S754_finite sx mx''' ex'''
| Z.neg n ⇒ S754_nan
| Z0 ⇒ S754_zero sx
end.
Lemma Bnearbyint_correct_aux :
∀ md sx mx ex (Hx : bounded mx ex = true),
let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
let z := SFnearbyint_binary md sx mx ex in
valid_binary z = true ∧
SF2R radix2 z = (round radix2 (FIX_exp 0) (round_mode md) x) ∧
is_finite_SF z = true ∧ (is_nan_SF z = false → sign_SF z = sx).
Definition Bnearbyint md (x : binary_float) :=
match x with
| B754_nan ⇒ B754_nan
| B754_zero s ⇒ B754_zero s
| B754_infinity s ⇒ B754_infinity s
| B754_finite s m e H ⇒
SF2B _ (proj1 (Bnearbyint_correct_aux md s m e H))
end.
Theorem Bnearbyint_correct :
∀ md x,
B2R (Bnearbyint md x) = round radix2 (FIX_exp 0) (round_mode md) (B2R x) ∧
is_finite (Bnearbyint md x) = is_finite x ∧
(is_nan (Bnearbyint md x) = false → Bsign (Bnearbyint md x) = Bsign x).
Definition Btrunc (x : binary_float) :=
match x with
| B754_finite s m e _ ⇒
cond_Zopp s (SFnearbyint_binary_aux mode_ZR s m e)
| _ ⇒ 0%Z
end.
Theorem Btrunc_correct :
∀ x,
IZR (Btrunc x) = round radix2 (FIX_exp 0) Ztrunc (B2R x).
A few values
Definition Bone := SF2B _ (proj1 (binary_round_correct mode_NE false 1 0)).
Theorem Bone_correct : B2R Bone = 1%R.
Theorem is_finite_strict_Bone :
is_finite_strict Bone = true.
Theorem is_nan_Bone :
is_nan Bone = false.
Theorem is_finite_Bone :
is_finite Bone = true.
Theorem Bsign_Bone :
Bsign Bone = false.
Lemma Bmax_float_proof :
valid_binary
(S754_finite false (shift_pos (Z.to_pos prec) 1 - 1) (emax - prec))
= true.
Definition Bmax_float := SF2B _ Bmax_float_proof.
Extraction/modification of mantissa/exponent
Definition Bnormfr_mantissa x := SFnormfr_mantissa prec (B2SF x).
Lemma Bnormfr_mantissa_correct :
∀ x,
(/ 2 ≤ Rabs (B2R x) < 1)%R →
match x with
| B754_finite _ m e _ ⇒
Bnormfr_mantissa x = N.pos m
∧ Z.pos (digits2_pos m) = prec ∧ (e = - prec)%Z
| _ ⇒ False
end.
Definition Bldexp mode f e :=
match f with
| B754_finite sx mx ex _ ⇒
SF2B _ (proj1 (binary_round_correct mode sx mx (ex+e)))
| _ ⇒ f
end.
Theorem is_nan_Bldexp :
∀ mode x e,
is_nan (Bldexp mode x e) = is_nan x.
Theorem Bldexp_correct :
∀ m (f : binary_float) e,
if Rlt_bool
(Rabs (round radix2 fexp (round_mode m) (B2R f × bpow radix2 e)))
(bpow radix2 emax) then
(B2R (Bldexp m f e)
= round radix2 fexp (round_mode m) (B2R f × bpow radix2 e))%R ∧
is_finite (Bldexp m f e) = is_finite f ∧
Bsign (Bldexp m f e) = Bsign f
else
B2SF (Bldexp m f e) = binary_overflow m (Bsign f).
Lemma Bldexp_Bopp_NE x e : Bldexp mode_NE (Bopp x) e = Bopp (Bldexp mode_NE x e).
Definition Ffrexp_core_binary s m e :=
if Zlt_bool (-prec) emin then
(S754_finite s m e, 0%Z)
else if (Z.to_pos prec <=? digits2_pos m)%positive then
(S754_finite s m (-prec), (e + prec)%Z)
else
let d := (prec - Z.pos (digits2_pos m))%Z in
(S754_finite s (shift_pos (Z.to_pos d) m) (-prec), (e + prec - d)%Z).
Lemma Bfrexp_correct_aux :
∀ sx mx ex (Hx : bounded mx ex = true),
let x := F2R (Float radix2 (cond_Zopp sx (Z.pos mx)) ex) in
let z := fst (Ffrexp_core_binary sx mx ex) in
let e := snd (Ffrexp_core_binary sx mx ex) in
valid_binary z = true ∧
((2 < emax)%Z → (/2 ≤ Rabs (SF2R radix2 z) < 1)%R) ∧
(x = SF2R radix2 z × bpow radix2 e)%R.
Definition Bfrexp f :=
match f with
| B754_finite s m e H ⇒
let e' := snd (Ffrexp_core_binary s m e) in
(SF2B _ (proj1 (Bfrexp_correct_aux s m e H)), e')
| _ ⇒ (f, (-2×emax-prec)%Z)
end.
Theorem is_nan_Bfrexp :
∀ x,
is_nan (fst (Bfrexp x)) = is_nan x.
Theorem Bfrexp_correct :
∀ f,
is_finite_strict f = true →
let (z, e) := Bfrexp f in
(B2R f = B2R z × bpow radix2 e)%R ∧
( (2 < emax)%Z → (/2 ≤ Rabs (B2R z) < 1)%R ∧ e = mag radix2 (B2R f) ).
Ulp
Lemma Bulp_correct_aux :
bounded 1 emin = true.
Definition Bulp x :=
match x with
| B754_zero _ ⇒ B754_finite false 1 emin Bulp_correct_aux
| B754_infinity _ ⇒ B754_infinity false
| B754_nan ⇒ B754_nan
| B754_finite _ _ e _ ⇒ binary_normalize mode_ZR 1 e false
end.
Theorem is_nan_Bulp :
∀ x,
is_nan (Bulp x) = is_nan x.
Theorem Bulp_correct :
∀ x,
is_finite x = true →
B2R (Bulp x) = ulp radix2 fexp (B2R x) ∧
is_finite (Bulp x) = true ∧
Bsign (Bulp x) = false.
Theorem is_finite_strict_Bulp :
∀ x,
is_finite_strict (Bulp x) = is_finite x.
Definition Bulp' x := Bldexp mode_NE Bone (fexp (snd (Bfrexp x))).
Theorem Bulp'_correct :
(2 < emax)%Z →
∀ x,
is_finite x = true →
Bulp' x = Bulp x.
Successor (and predecessor)
Definition Bsucc x :=
match x with
| B754_zero _ ⇒ B754_finite false 1 emin Bulp_correct_aux
| B754_infinity false ⇒ x
| B754_infinity true ⇒ Bopp Bmax_float
| B754_nan ⇒ B754_nan
| B754_finite false mx ex _ ⇒
SF2B _ (proj1 (binary_round_correct mode_UP false (mx + 1) ex))
| B754_finite true mx ex _ ⇒
SF2B _ (proj1 (binary_round_correct mode_ZR true (xO mx - 1) (ex - 1)))
end.
Theorem is_nan_Bsucc :
∀ x,
is_nan (Bsucc x) = is_nan x.
Theorem Bsucc_correct :
∀ x,
is_finite x = true →
if Rlt_bool (succ radix2 fexp (B2R x)) (bpow radix2 emax) then
B2R (Bsucc x) = succ radix2 fexp (B2R x) ∧
is_finite (Bsucc x) = true ∧
(Bsign (Bsucc x) = Bsign x && is_finite_strict x)%bool
else
B2SF (Bsucc x) = S754_infinity false.
Definition Bpred x := Bopp (Bsucc (Bopp x)).
Theorem is_nan_Bpred :
∀ x,
is_nan (Bpred x) = is_nan x.
Theorem Bpred_correct :
∀ x,
is_finite x = true →
if Rlt_bool (- bpow radix2 emax) (pred radix2 fexp (B2R x)) then
B2R (Bpred x) = pred radix2 fexp (B2R x) ∧
is_finite (Bpred x) = true ∧
(Bsign (Bpred x) = Bsign x || negb (is_finite_strict x))%bool
else
B2SF (Bpred x) = S754_infinity true.
Definition Bpred_pos' x :=
match x with
| B754_finite _ mx _ _ ⇒
let d :=
if (mx~0 =? shift_pos (Z.to_pos prec) 1)%positive then
Bldexp mode_NE Bone (fexp (snd (Bfrexp x) - 1))
else
Bulp' x in
Bminus mode_NE x d
| _ ⇒ x
end.
Theorem Bpred_pos'_correct :
(2 < emax)%Z →
∀ x,
(0 < B2R x)%R →
Bpred_pos' x = Bpred x.
Definition Bsucc' x :=
match x with
| B754_zero _ ⇒ Bldexp mode_NE Bone emin
| B754_infinity false ⇒ x
| B754_infinity true ⇒ Bopp Bmax_float
| B754_nan ⇒ B754_nan
| B754_finite false _ _ _ ⇒ Bplus mode_NE x (Bulp x)
| B754_finite true _ _ _ ⇒ Bopp (Bpred_pos' (Bopp x))
end.
Theorem Bsucc'_correct :
(2 < emax)%Z →
∀ x,
is_finite x = true →
Bsucc' x = Bsucc x.
End Binary.
Arguments B754_zero {prec} {emax}.
Arguments B754_infinity {prec} {emax}.
Arguments B754_nan {prec} {emax}.
Arguments B754_finite {prec} {emax}.
Arguments SF2B {prec} {emax}.
Arguments SF2B' {prec} {emax}.
Arguments B2SF {prec} {emax}.
Arguments B2R {prec} {emax}.
Arguments is_finite_strict {prec} {emax}.
Arguments is_finite {prec} {emax}.
Arguments is_nan {prec} {emax}.
Arguments erase {prec} {emax}.
Arguments Bsign {prec} {emax}.
Arguments Bcompare {prec} {emax}.
Arguments Beqb {prec} {emax}.
Arguments Bltb {prec} {emax}.
Arguments Bleb {prec} {emax}.
Arguments Bopp {prec} {emax}.
Arguments Babs {prec} {emax}.
Arguments Bone {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bmax_float {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bplus {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bminus {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bmult {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bfma {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bdiv {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bsqrt {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bnearbyint {prec} {emax} {prec_lt_emax_}.
Arguments Btrunc {prec} {emax}.
Arguments Bldexp {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bnormfr_mantissa {prec} {emax}.
Arguments Bfrexp {prec} {emax} {prec_gt_0_}.
Arguments Bulp {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bulp' {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bsucc {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bpred {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bpred_pos' {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.