Library Flocq.IEEE754.Bits

This file is part of the Flocq formalization of floating-point arithmetic in Coq: https://flocq.gitlabpages.inria.fr/
Copyright (C) 2011-2018 Sylvie Boldo
Copyright (C) 2011-2018 Guillaume Melquiond
This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

IEEE-754 encoding of binary floating-point data


From Coq Require Import ZArith Reals Lia SpecFloat.

Require Import Core BinarySingleNaN Binary.

Section Binary_Bits.

Arguments exist {A} {P}.
Arguments B754_zero {prec} {emax}.
Arguments B754_infinity {prec} {emax}.
Arguments B754_nan {prec} {emax}.
Arguments B754_finite {prec} {emax}.

Number of bits for the fraction and exponent
Variable mw ew : Z.

Definition join_bits (s : bool) m e :=
  (Z.shiftl ((if s then Zpower 2 ew else 0) + e) mw + m)%Z.

Lemma join_bits_range :
   s m e,
  (0 m < 2^mw)%Z
  (0 e < 2^ew)%Z
  (0 join_bits s m e < 2 ^ (mw + ew + 1))%Z.

Definition split_bits x :=
  let mm := Zpower 2 mw in
  let em := Zpower 2 ew in
  (Zle_bool (mm × em) x, Zmod x mm, Zmod (Z.div x mm) em)%Z.

Theorem split_join_bits :
   s m e,
  (0 m < Zpower 2 mw)%Z
  (0 e < Zpower 2 ew)%Z
  split_bits (join_bits s m e) = (s, m, e).

Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.

Let prec := (mw + 1)%Z.
Let emax := Zpower 2 (ew - 1).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Notation binary_float := (binary_float prec emax) (only parsing).

Let Hprec : (0 < prec)%Z.

Let Hm_gt_0 : (0 < 2^mw)%Z.

Let He_gt_0 : (0 < 2^ew)%Z.

Hypothesis Hmax : (prec < emax)%Z.

Theorem join_split_bits :
   x,
  (0 x < Zpower 2 (mw + ew + 1))%Z
  let '(s, m, e) := split_bits x in
  join_bits s m e = x.

Theorem split_bits_inj :
   x y,
  (0 x < Zpower 2 (mw + ew + 1))%Z
  (0 y < Zpower 2 (mw + ew + 1))%Z
  split_bits x = split_bits y
  x = y.

Definition bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sxjoin_bits sx 0 0
  | B754_infinity sxjoin_bits sx 0 (Zpower 2 ew - 1)
  | B754_nan sx plx _join_bits sx (Zpos plx) (Zpower 2 ew - 1)
  | B754_finite sx mx ex _
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      join_bits sx m (ex - emin + 1)
    else
      join_bits sx (Zpos mx) 0
  end.

Definition split_bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx(sx, 0, 0)%Z
  | B754_infinity sx(sx, 0, Zpower 2 ew - 1)%Z
  | B754_nan sx plx _(sx, Zpos plx, Zpower 2 ew - 1)%Z
  | B754_finite sx mx ex _
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      (sx, m, ex - emin + 1)%Z
    else
      (sx, Zpos mx, 0)%Z
  end.

Theorem split_bits_of_binary_float_correct :
   x,
  split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.

Theorem bits_of_binary_float_range:
   x, (0 bits_of_binary_float x < 2^(mw+ew+1))%Z.

Definition binary_float_of_bits_aux x :=
  let '(sx, mx, ex) := split_bits x in
  if Zeq_bool ex 0 then
    match mx with
    | Z0F754_zero sx
    | Zpos pxF754_finite sx px emin
    | Zneg _F754_nan false xH
    end
  else if Zeq_bool ex (Zpower 2 ew - 1) then
    match mx with
      | Z0F754_infinity sx
      | Zpos plxF754_nan sx plx
      | Zneg _F754_nan false xH
    end
  else
    match (mx + Zpower 2 mw)%Z with
    | Zpos pxF754_finite sx px (ex + emin - 1)
    | _F754_nan false xH
    end.

Lemma binary_float_of_bits_aux_correct :
   x,
  valid_binary prec emax (binary_float_of_bits_aux x) = true.

Definition binary_float_of_bits x :=
  FF2B prec emax _ (binary_float_of_bits_aux_correct x).

Theorem binary_float_of_bits_of_binary_float :
   x,
  binary_float_of_bits (bits_of_binary_float x) = x.

Theorem bits_of_binary_float_of_bits :
   x,
  (0 x < 2^(mw+ew+1))%Z
  bits_of_binary_float (binary_float_of_bits x) = x.

End Binary_Bits.

Specialization for IEEE single precision operations
Section B32_Bits.

Arguments B754_nan {prec} {emax}.

Definition binary32 := binary_float 24 128.

Let Hprec : (0 < 24)%Z.

Let Hprec_emax : (24 < 128)%Z.

Let Hemax : (3 128)%Z.

Definition default_nan_pl32 : { nan : binary32 | is_nan 24 128 nan = true } :=
  exist _ (@B754_nan 24 128 false (iter_nat xO 22 xH) (refl_equal true)) (refl_equal true).

Definition unop_nan_pl32 (f : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
  match f as f with
  | B754_nan s pl Hplexist _ (B754_nan s pl Hpl) (refl_equal true)
  | _default_nan_pl32
  end.

Definition binop_nan_pl32 (f1 f2 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
  match f1, f2 with
  | B754_nan s1 pl1 Hpl1, _exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
  | _, B754_nan s2 pl2 Hpl2exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
  | _, _default_nan_pl32
  end.

Definition ternop_nan_pl32 (f1 f2 f3 : binary32) : { nan : binary32 | is_nan 24 128 nan = true } :=
  match f1, f2, f3 with
  | B754_nan s1 pl1 Hpl1, _, _exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
  | _, B754_nan s2 pl2 Hpl2, _exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
  | _, _, B754_nan s3 pl3 Hpl3exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
  | _, _, _default_nan_pl32
  end.

Definition b32_erase : binary32 binary32 := erase 24 128.
Definition b32_opp : binary32 binary32 := Bopp 24 128 unop_nan_pl32.
Definition b32_abs : binary32 binary32 := Babs 24 128 unop_nan_pl32.
Definition b32_pred : binary32 binary32 := Bpred _ _ Hprec Hprec_emax.
Definition b32_succ : binary32 binary32 := Bsucc _ _ Hprec Hprec_emax.
Definition b32_sqrt : mode binary32 binary32 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl32.

Definition b32_plus : mode binary32 binary32 binary32 := Bplus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_minus : mode binary32 binary32 binary32 := Bminus _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_mult : mode binary32 binary32 binary32 := Bmult _ _ Hprec Hprec_emax binop_nan_pl32.
Definition b32_div : mode binary32 binary32 binary32 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl32.

Definition b32_fma : mode binary32 binary32 binary32 binary32 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl32.

Definition b32_compare : binary32 binary32 option comparison := Bcompare 24 128.
Definition b32_of_bits : Z binary32 := binary_float_of_bits 23 8 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b32 : binary32 Z := bits_of_binary_float 23 8.

End B32_Bits.

Specialization for IEEE double precision operations
Section B64_Bits.

Arguments B754_nan {prec} {emax}.

Definition binary64 := binary_float 53 1024.

Let Hprec : (0 < 53)%Z.

Let Hprec_emax : (53 < 1024)%Z.

Let Hemax : (3 1024)%Z.

Definition default_nan_pl64 : { nan : binary64 | is_nan 53 1024 nan = true } :=
  exist _ (@B754_nan 53 1024 false (iter_nat xO 51 xH) (refl_equal true)) (refl_equal true).

Definition unop_nan_pl64 (f : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
  match f as f with
  | B754_nan s pl Hplexist _ (B754_nan s pl Hpl) (refl_equal true)
  | _default_nan_pl64
  end.

Definition binop_nan_pl64 (f1 f2 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
  match f1, f2 with
  | B754_nan s1 pl1 Hpl1, _exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
  | _, B754_nan s2 pl2 Hpl2exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
  | _, _default_nan_pl64
  end.

Definition ternop_nan_pl64 (f1 f2 f3 : binary64) : { nan : binary64 | is_nan 53 1024 nan = true } :=
  match f1, f2, f3 with
  | B754_nan s1 pl1 Hpl1, _, _exist _ (B754_nan s1 pl1 Hpl1) (refl_equal true)
  | _, B754_nan s2 pl2 Hpl2, _exist _ (B754_nan s2 pl2 Hpl2) (refl_equal true)
  | _, _, B754_nan s3 pl3 Hpl3exist _ (B754_nan s3 pl3 Hpl3) (refl_equal true)
  | _, _, _default_nan_pl64
  end.

Definition b64_erase : binary64 binary64 := erase 53 1024.
Definition b64_opp : binary64 binary64 := Bopp 53 1024 unop_nan_pl64.
Definition b64_abs : binary64 binary64 := Babs 53 1024 unop_nan_pl64.
Definition b64_pred : binary64 binary64 := Bpred _ _ Hprec Hprec_emax.
Definition b64_succ : binary64 binary64 := Bsucc _ _ Hprec Hprec_emax.
Definition b64_sqrt : mode binary64 binary64 := Bsqrt _ _ Hprec Hprec_emax unop_nan_pl64.

Definition b64_plus : mode binary64 binary64 binary64 := Bplus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_minus : mode binary64 binary64 binary64 := Bminus _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_mult : mode binary64 binary64 binary64 := Bmult _ _ Hprec Hprec_emax binop_nan_pl64.
Definition b64_div : mode binary64 binary64 binary64 := Bdiv _ _ Hprec Hprec_emax binop_nan_pl64.

Definition b64_fma : mode binary64 binary64 binary64 binary64 := Bfma _ _ Hprec Hprec_emax ternop_nan_pl64.

Definition b64_compare : binary64 binary64 option comparison := Bcompare 53 1024.
Definition b64_of_bits : Z binary64 := binary_float_of_bits 52 11 (refl_equal _) (refl_equal _) (refl_equal _).
Definition bits_of_b64 : binary64 Z := bits_of_binary_float 52 11.

End B64_Bits.