Library Flocq.IEEE754.Bits

This file is part of the Flocq formalization of floating-point arithmetic in Coq: https://flocq.gitlabpages.inria.fr/

This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

IEEE-754 encoding of binary floating-point data

From Coq Require Import ZArith Reals Lia SpecFloat.

Require Import Core BinarySingleNaN Binary.

Section Binary_Bits.

Arguments exist {A} {P}.
Arguments B754_zero {prec} {emax}.
Arguments B754_infinity {prec} {emax}.
Arguments B754_nan {prec} {emax}.
Arguments B754_finite {prec} {emax}.

Number of bits for the fraction and exponent

Variable mw ew : Z.

Definition join_bits (s : bool) m e :=
  (Z.shiftl ((if s then Zpower 2 ew else 0) + e) mw + m)%Z.

Lemma join_bits_range :
  ∀ s m e,
  (0 ≤ m < 2^mw)%Z →
  (0 ≤ e < 2^ew)%Z →
  (0 ≤ join_bits s m e < 2 ^ (mw + ew + 1))%Z.

Definition split_bits x :=
  let mm := Zpower 2 mw in
  let em := Zpower 2 ew in
  (Zle_bool (mm × em) x , Z.modulo x mm , Z.modulo (Z.div x mm) em )%Z.

Theorem split_join_bits :
  ∀ s m e,
  (0 ≤ m < Zpower 2 mw)%Z →
  (0 ≤ e < Zpower 2 ew)%Z →
  split_bits (join_bits s m e) = (s , m , e ).

Hypothesis Hmw : (0 < mw)%Z.
Hypothesis Hew : (0 < ew)%Z.

Let prec := (mw + 1)%Z.
Let emax := Zpower 2 (ew - 1).
Notation emin := (emin prec emax) (only parsing).
Notation fexp := (fexp prec emax) (only parsing).
Notation binary_float := (binary_float prec emax) (only parsing).

Let Hprec : (0 < prec)%Z.

Let Hm_gt_0 : (0 < 2^mw)%Z.

Let He_gt_0 : (0 < 2^ew)%Z.

Hypothesis Hmax : (prec < emax)%Z.

Theorem join_split_bits :
  ∀ x,
  (0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
  let '(s, m, e) := split_bits x in
  join_bits s m e = x.

Theorem split_bits_inj :
  ∀ x y,
  (0 ≤ x < Zpower 2 (mw + ew + 1))%Z →
  (0 ≤ y < Zpower 2 (mw + ew + 1))%Z →
  split_bits x = split_bits y →
  x = y.

Definition bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx ⇒ join_bits sx 0 0
  | B754_infinity sx ⇒ join_bits sx 0 (Zpower 2 ew - 1)
  | B754_nan sx plx _ ⇒ join_bits sx (Zpos plx) (Zpower 2 ew - 1)
  | B754_finite sx mx ex _ ⇒
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      join_bits sx m (ex - emin + 1)
    else
      join_bits sx (Zpos mx) 0
  end.

Definition split_bits_of_binary_float (x : binary_float) :=
  match x with
  | B754_zero sx ⇒ (sx, 0, 0)%Z
  | B754_infinity sx ⇒ (sx, 0, Zpower 2 ew - 1)%Z
  | B754_nan sx plx _ ⇒ (sx, Zpos plx, Zpower 2 ew - 1)%Z
  | B754_finite sx mx ex _ ⇒
    let m := (Zpos mx - Zpower 2 mw)%Z in
    if Zle_bool 0 m then
      (sx, m , ex - emin + 1)%Z
    else
      (sx, Zpos mx, 0)%Z
  end.

Theorem split_bits_of_binary_float_correct :
  ∀ x,
  split_bits (bits_of_binary_float x) = split_bits_of_binary_float x.

Theorem bits_of_binary_float_range:
  ∀ x, (0 ≤ bits_of_binary_float x < 2^(mw +ew +1))%Z.

Definition binary_float_of_bits_aux x :=
  let '(sx, mx, ex) := split_bits x in
  if Zeq_bool ex 0 then
    match mx with
    | Z0 ⇒ F754_zero sx
    | Zpos px ⇒ F754_finite sx px emin
    | Zneg _ ⇒ F754_nan false xH
    end
  else if Zeq_bool ex (Zpower 2 ew - 1) then
    match mx with
      | Z0 ⇒ F754_infinity sx
      | Zpos plx ⇒ F754_nan sx plx
      | Zneg _ ⇒ F754_nan false xH
    end
  else
    match (mx + Zpower 2 mw)%Z with
    | Zpos px ⇒ F754_finite sx px (ex + emin - 1)
    | _ ⇒ F754_nan false xH
    end.

Lemma binary_float_of_bits_aux_correct :
  ∀ x,
  valid_binary prec emax (binary_float_of_bits_aux x) = true.

Definition binary_float_of_bits x :=
  FF2B prec emax _ (binary_float_of_bits_aux_correct x).

Theorem binary_float_of_bits_of_binary_float :
  ∀ x,
  binary_float_of_bits (bits_of_binary_float x) = x.

Theorem bits_of_binary_float_of_bits :
  ∀ x,
  (0 ≤ x < 2^(mw +ew +1))%Z →
  bits_of_binary_float (binary_float_of_bits x) = x.

End Binary_Bits.

Specialization for IEEE single precision operations

Specialization for IEEE double precision operations