Library Flocq.IEEE754.Bits
This file is part of the Flocq formalization of floating-point
arithmetic in Coq: https://flocq.gitlabpages.inria.fr/
Copyright (C) 2011-2018 Sylvie Boldo
Copyright (C) 2011-2018 Guillaume Melquiond
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
COPYING file for more details.
Copyright (C) 2011-2018 Guillaume Melquiond
IEEE-754 encoding of binary floating-point data
From Coq Require Import ZArith Reals Lia SpecFloat.
Require Import Core BinarySingleNaN Binary.
Section Binary_Bits.
Arguments exist {A} {P}.
Arguments B754_zero {prec} {emax}.
Arguments B754_infinity {prec} {emax}.
Arguments B754_nan {prec} {emax}.
Arguments B754_finite {prec} {emax}.
Number of bits for the fraction and exponent
Variable mw ew : Z.
Definition join_bits (s : bool) m e :=
(Z.shiftl ((if s then Zpower 2 ew else 0) + e) mw + m)%Z.
Lemma join_bits_range :
∀ s m e,
(0 ≤ m < 2^mw)%Z →
(0 ≤ e < 2^ew)%Z →
(0 ≤ join_bits s m e < 2 ^ (mw + ew + 1))%Z.
Definition split_bits x :=
let mm := Zpower 2 mw in
let em := Zpower 2 ew in
(Zle_bool (mm × em) x, Zmod x mm, Zmod (Z.div x mm) em)%Z.
Theorem split_join_bits :
∀ s m e,
(0 ≤ m < Zpower 2 mw)%Z →
(0 ≤ e < Zpower 2 ew)%Z →
split_bits (join_bits s m e) = (s, m, e).
Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.
Let prec := (mw + 1)%Z.
Let emax := Zpower 2 (ew - 1).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Notation binary_float := (binary_float prec emax) (only parsing).
Let Hprec : (0 < prec)%Z.
Let Hm_gt_0 : (0 < 2^mw)%Z.
Let He_gt_0 : (0 < 2^ew)%Z.
Hypothesis Hmax : (prec < emax)%Z.
Theorem join_split_bits :
∀ x,
(0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
let '(s, m, e) := split_bits x in
join_bits s m e = x.
Theorem split_bits_inj :
∀ x y,
(0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
(0 ≤ y < Zpower 2 (mw + ew + 1))%Z →
split_bits x = split_bits y →
x = y.
Definition bits_of_binary_float (x : binary_float) :=
match x with
| B754_zero sx ⇒ join_bits sx 0 0
| B754_infinity sx ⇒ join_bits sx 0 (Zpower 2 ew - 1)
| B754_nan sx plx _ ⇒ join_bits sx (Zpos plx) (Zpower 2 ew - 1)
| B754_finite sx mx ex _ ⇒
let m := (Zpos mx - Zpower 2 mw)%Z in
if Zle_bool 0 m then
join_bits sx m (ex - emin + 1)
else
join_bits sx (Zpos mx) 0
end.
Definition split_bits_of_binary_float (x : binary_float) :=
match x with
| B754_zero sx ⇒ (sx, 0, 0)%Z
| B754_infinity sx ⇒ (sx, 0, Zpower 2 ew - 1)%Z
| B754_nan sx plx _ ⇒ (sx, Zpos plx, Zpower 2 ew - 1)%Z
| B754_finite sx mx ex _ ⇒
let m := (Zpos mx - Zpower 2 mw)%Z in
if Zle_bool 0 m then
(sx, m, ex - emin + 1)%Z
else
(sx, Zpos mx, 0)%Z
end.
Theorem split_bits_of_binary_float_correct :
∀ x,
split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.
Theorem bits_of_binary_float_range:
∀ x, (0 ≤ bits_of_binary_float x < 2^(mw+ew+1))%Z.
Definition binary_float_of_bits_aux x :=
let '(sx, mx, ex) := split_bits x in
if Zeq_bool ex 0 then
match mx with
| Z0 ⇒ F754_zero sx
| Zpos px ⇒ F754_finite sx px emin
| Zneg _ ⇒ F754_nan false xH
end
else if Zeq_bool ex (Zpower 2 ew - 1) then
match mx with
| Z0 ⇒ F754_infinity sx
| Zpos plx ⇒ F754_nan sx plx
| Zneg _ ⇒ F754_nan false xH
end
else
match (mx + Zpower 2 mw)%Z with
| Zpos px ⇒ F754_finite sx px (ex + emin - 1)
| _ ⇒ F754_nan false xH
end.
Lemma binary_float_of_bits_aux_correct :
∀ x,
valid_binary prec emax (binary_float_of_bits_aux x) = true.
Definition binary_float_of_bits x :=
FF2B prec emax _ (binary_float_of_bits_aux_correct x).
Theorem binary_float_of_bits_of_binary_float :
∀ x,
binary_float_of_bits (bits_of_binary_float x) = x.
Theorem bits_of_binary_float_of_bits :
∀ x,
(0 ≤ x < 2^(mw+ew+1))%Z →
bits_of_binary_float (binary_float_of_bits x) = x.
End Binary_Bits.
Definition join_bits (s : bool) m e :=
(Z.shiftl ((if s then Zpower 2 ew else 0) + e) mw + m)%Z.
Lemma join_bits_range :
∀ s m e,
(0 ≤ m < 2^mw)%Z →
(0 ≤ e < 2^ew)%Z →
(0 ≤ join_bits s m e < 2 ^ (mw + ew + 1))%Z.
Definition split_bits x :=
let mm := Zpower 2 mw in
let em := Zpower 2 ew in
(Zle_bool (mm × em) x, Zmod x mm, Zmod (Z.div x mm) em)%Z.
Theorem split_join_bits :
∀ s m e,
(0 ≤ m < Zpower 2 mw)%Z →
(0 ≤ e < Zpower 2 ew)%Z →
split_bits (join_bits s m e) = (s, m, e).
Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.
Let prec := (mw + 1)%Z.
Let emax := Zpower 2 (ew - 1).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Notation binary_float := (binary_float prec emax) (only parsing).
Let Hprec : (0 < prec)%Z.
Let Hm_gt_0 : (0 < 2^mw)%Z.
Let He_gt_0 : (0 < 2^ew)%Z.
Hypothesis Hmax : (prec < emax)%Z.
Theorem join_split_bits :
∀ x,
(0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
let '(s, m, e) := split_bits x in
join_bits s m e = x.
Theorem split_bits_inj :
∀ x y,
(0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
(0 ≤ y < Zpower 2 (mw + ew + 1))%Z →
split_bits x = split_bits y →
x = y.
Definition bits_of_binary_float (x : binary_float) :=
match x with
| B754_zero sx ⇒ join_bits sx 0 0
| B754_infinity sx ⇒ join_bits sx 0 (Zpower 2 ew - 1)
| B754_nan sx plx _ ⇒ join_bits sx (Zpos plx) (Zpower 2 ew - 1)
| B754_finite sx mx ex _ ⇒
let m := (Zpos mx - Zpower 2 mw)%Z in
if Zle_bool 0 m then
join_bits sx m (ex - emin + 1)
else
join_bits sx (Zpos mx) 0
end.
Definition split_bits_of_binary_float (x : binary_float) :=
match x with
| B754_zero sx ⇒ (sx, 0, 0)%Z
| B754_infinity sx ⇒ (sx, 0, Zpower 2 ew - 1)%Z
| B754_nan sx plx _ ⇒ (sx, Zpos plx, Zpower 2 ew - 1)%Z
| B754_finite sx mx ex _ ⇒
let m := (Zpos mx - Zpower 2 mw)%Z in
if Zle_bool 0 m then
(sx, m, ex - emin + 1)%Z
else
(sx, Zpos mx, 0)%Z
end.
Theorem split_bits_of_binary_float_correct :
∀ x,
split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.
Theorem bits_of_binary_float_range:
∀ x, (0 ≤ bits_of_binary_float x < 2^(mw+ew+1))%Z.
Definition binary_float_of_bits_aux x :=
let '(sx, mx, ex) := split_bits x in
if Zeq_bool ex 0 then
match mx with
| Z0 ⇒ F754_zero sx
| Zpos px ⇒ F754_finite sx px emin
| Zneg _ ⇒ F754_nan false xH
end
else if Zeq_bool ex (Zpower 2 ew - 1) then
match mx with
| Z0 ⇒ F754_infinity sx
| Zpos plx ⇒ F754_nan sx plx
| Zneg _ ⇒ F754_nan false xH
end
else
match (mx + Zpower 2 mw)%Z with
| Zpos px ⇒ F754_finite sx px (ex + emin - 1)
| _ ⇒ F754_nan false xH
end.
Lemma binary_float_of_bits_aux_correct :
∀ x,
valid_binary prec emax (binary_float_of_bits_aux x) = true.
Definition binary_float_of_bits x :=
FF2B prec emax _ (binary_float_of_bits_aux_correct x).
Theorem binary_float_of_bits_of_binary_float :
∀ x,
binary_float_of_bits (bits_of_binary_float x) = x.
Theorem bits_of_binary_float_of_bits :
∀ x,
(0 ≤ x < 2^(mw+ew+1))%Z →
bits_of_binary_float (binary_float_of_bits x) = x.
End Binary_Bits.
Specialization for IEEE single precision operations
Section B32_Bits.
Arguments B754_nan {prec} {emax}.
Definition binary32 := binary_float 24 128.
Let Hprec : (0 < 24)%Z.
Let Hprec_emax : (24 < 128)%Z.
Let Hemax : (3 ≤ 128)%Z.
Definition default_nan_pl32 : { nan : binary32 | is_nan 24 128 nan = true } :=
exist _ (@B754_nan 24 128 false (iter_nat xO 22 xH) (refl_equal true)) (refl_equal true).
Definition unop_nan_pl32 (f : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f as f with
| B754_nan s pl Hpl ⇒ exist _ (B754_nan s pl Hpl) (refl_equal true)
| _ ⇒ default_nan_pl32
end.
Definition binop_nan_pl32 (f1 f2 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f1, f2 with
| B754_nan s1 pl1 Hpl1, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2 ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _ ⇒ default_nan_pl32
end.
Definition ternop_nan_pl32 (f1 f2 f3 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f1, f2, f3 with
| B754_nan s1 pl1 Hpl1, _, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2, _ ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _, B754_nan s3 pl3 Hpl3 ⇒ exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
| _, _, _ ⇒ default_nan_pl32
end.
Definition b32_erase : binary32 → binary32 := erase 24 128.
Definition b32_opp : binary32 → binary32 := Bopp 24 128 unop_nan_pl32.
Definition b32_abs : binary32 → binary32 := Babs 24 128 unop_nan_pl32.
Definition b32_pred : binary32 → binary32 := Bpred _ _ Hprec Hprec_emax.
Definition b32_succ : binary32 → binary32 := Bsucc _ _ Hprec Hprec_emax.
Definition b32_sqrt : mode → binary32 → binary32 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl32.
Definition b32_plus : mode → binary32 → binary32 → binary32 := Bplus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_minus : mode → binary32 → binary32 → binary32 := Bminus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_mult : mode → binary32 → binary32 → binary32 := Bmult _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_div : mode → binary32 → binary32 → binary32 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_fma : mode → binary32 → binary32 → binary32 → binary32 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl32.
Definition b32_compare : binary32 → binary32 → option comparison := Bcompare 24 128.
Definition b32_of_bits : Z → binary32 := binary_float_of_bits 23 8 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b32 : binary32 → Z := bits_of_binary_float 23 8.
End B32_Bits.
Arguments B754_nan {prec} {emax}.
Definition binary32 := binary_float 24 128.
Let Hprec : (0 < 24)%Z.
Let Hprec_emax : (24 < 128)%Z.
Let Hemax : (3 ≤ 128)%Z.
Definition default_nan_pl32 : { nan : binary32 | is_nan 24 128 nan = true } :=
exist _ (@B754_nan 24 128 false (iter_nat xO 22 xH) (refl_equal true)) (refl_equal true).
Definition unop_nan_pl32 (f : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f as f with
| B754_nan s pl Hpl ⇒ exist _ (B754_nan s pl Hpl) (refl_equal true)
| _ ⇒ default_nan_pl32
end.
Definition binop_nan_pl32 (f1 f2 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f1, f2 with
| B754_nan s1 pl1 Hpl1, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2 ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _ ⇒ default_nan_pl32
end.
Definition ternop_nan_pl32 (f1 f2 f3 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
match f1, f2, f3 with
| B754_nan s1 pl1 Hpl1, _, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2, _ ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _, B754_nan s3 pl3 Hpl3 ⇒ exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
| _, _, _ ⇒ default_nan_pl32
end.
Definition b32_erase : binary32 → binary32 := erase 24 128.
Definition b32_opp : binary32 → binary32 := Bopp 24 128 unop_nan_pl32.
Definition b32_abs : binary32 → binary32 := Babs 24 128 unop_nan_pl32.
Definition b32_pred : binary32 → binary32 := Bpred _ _ Hprec Hprec_emax.
Definition b32_succ : binary32 → binary32 := Bsucc _ _ Hprec Hprec_emax.
Definition b32_sqrt : mode → binary32 → binary32 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl32.
Definition b32_plus : mode → binary32 → binary32 → binary32 := Bplus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_minus : mode → binary32 → binary32 → binary32 := Bminus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_mult : mode → binary32 → binary32 → binary32 := Bmult _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_div : mode → binary32 → binary32 → binary32 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_fma : mode → binary32 → binary32 → binary32 → binary32 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl32.
Definition b32_compare : binary32 → binary32 → option comparison := Bcompare 24 128.
Definition b32_of_bits : Z → binary32 := binary_float_of_bits 23 8 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b32 : binary32 → Z := bits_of_binary_float 23 8.
End B32_Bits.
Specialization for IEEE double precision operations
Section B64_Bits.
Arguments B754_nan {prec} {emax}.
Definition binary64 := binary_float 53 1024.
Let Hprec : (0 < 53)%Z.
Let Hprec_emax : (53 < 1024)%Z.
Let Hemax : (3 ≤ 1024)%Z.
Definition default_nan_pl64 : { nan : binary64 | is_nan 53 1024 nan = true } :=
exist _ (@B754_nan 53 1024 false (iter_nat xO 51 xH) (refl_equal true)) (refl_equal true).
Definition unop_nan_pl64 (f : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f as f with
| B754_nan s pl Hpl ⇒ exist _ (B754_nan s pl Hpl) (refl_equal true)
| _ ⇒ default_nan_pl64
end.
Definition binop_nan_pl64 (f1 f2 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f1, f2 with
| B754_nan s1 pl1 Hpl1, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2 ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _ ⇒ default_nan_pl64
end.
Definition ternop_nan_pl64 (f1 f2 f3 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f1, f2, f3 with
| B754_nan s1 pl1 Hpl1, _, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2, _ ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _, B754_nan s3 pl3 Hpl3 ⇒ exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
| _, _, _ ⇒ default_nan_pl64
end.
Definition b64_erase : binary64 → binary64 := erase 53 1024.
Definition b64_opp : binary64 → binary64 := Bopp 53 1024 unop_nan_pl64.
Definition b64_abs : binary64 → binary64 := Babs 53 1024 unop_nan_pl64.
Definition b64_pred : binary64 → binary64 := Bpred _ _ Hprec Hprec_emax.
Definition b64_succ : binary64 → binary64 := Bsucc _ _ Hprec Hprec_emax.
Definition b64_sqrt : mode → binary64 → binary64 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl64.
Definition b64_plus : mode → binary64 → binary64 → binary64 := Bplus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_minus : mode → binary64 → binary64 → binary64 := Bminus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_mult : mode → binary64 → binary64 → binary64 := Bmult _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_div : mode → binary64 → binary64 → binary64 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_fma : mode → binary64 → binary64 → binary64 → binary64 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl64.
Definition b64_compare : binary64 → binary64 → option comparison := Bcompare 53 1024.
Definition b64_of_bits : Z → binary64 := binary_float_of_bits 52 11 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b64 : binary64 → Z := bits_of_binary_float 52 11.
End B64_Bits.
Arguments B754_nan {prec} {emax}.
Definition binary64 := binary_float 53 1024.
Let Hprec : (0 < 53)%Z.
Let Hprec_emax : (53 < 1024)%Z.
Let Hemax : (3 ≤ 1024)%Z.
Definition default_nan_pl64 : { nan : binary64 | is_nan 53 1024 nan = true } :=
exist _ (@B754_nan 53 1024 false (iter_nat xO 51 xH) (refl_equal true)) (refl_equal true).
Definition unop_nan_pl64 (f : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f as f with
| B754_nan s pl Hpl ⇒ exist _ (B754_nan s pl Hpl) (refl_equal true)
| _ ⇒ default_nan_pl64
end.
Definition binop_nan_pl64 (f1 f2 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f1, f2 with
| B754_nan s1 pl1 Hpl1, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2 ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _ ⇒ default_nan_pl64
end.
Definition ternop_nan_pl64 (f1 f2 f3 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
match f1, f2, f3 with
| B754_nan s1 pl1 Hpl1, _, _ ⇒ exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
| _, B754_nan s2 pl2 Hpl2, _ ⇒ exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
| _, _, B754_nan s3 pl3 Hpl3 ⇒ exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
| _, _, _ ⇒ default_nan_pl64
end.
Definition b64_erase : binary64 → binary64 := erase 53 1024.
Definition b64_opp : binary64 → binary64 := Bopp 53 1024 unop_nan_pl64.
Definition b64_abs : binary64 → binary64 := Babs 53 1024 unop_nan_pl64.
Definition b64_pred : binary64 → binary64 := Bpred _ _ Hprec Hprec_emax.
Definition b64_succ : binary64 → binary64 := Bsucc _ _ Hprec Hprec_emax.
Definition b64_sqrt : mode → binary64 → binary64 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl64.
Definition b64_plus : mode → binary64 → binary64 → binary64 := Bplus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_minus : mode → binary64 → binary64 → binary64 := Bminus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_mult : mode → binary64 → binary64 → binary64 := Bmult _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_div : mode → binary64 → binary64 → binary64 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_fma : mode → binary64 → binary64 → binary64 → binary64 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl64.
Definition b64_compare : binary64 → binary64 → option comparison := Bcompare 53 1024.
Definition b64_of_bits : Z → binary64 := binary_float_of_bits 52 11 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b64 : binary64 → Z := bits_of_binary_float 52 11.
End B64_Bits.