Library Flocq.IEEE754.BinarySingleNaN

This file is part of the Flocq formalization of floating-point arithmetic in Coq: https://flocq.gitlabpages.inria.fr/
Copyright (C) 2010-2018 Sylvie Boldo
Copyright (C) 2010-2018 Guillaume Melquiond
This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

IEEE-754 arithmetic


From Coq Require Import ZArith Reals Psatz SpecFloat.

Require Import Core Round Bracket Operations Div Sqrt Relative.

Definition SF2R beta x :=
  match x with
  | S754_finite s m eF2R (Float beta (cond_Zopp s (Zpos m)) e)
  | _ ⇒ 0%R
  end.

Class Prec_lt_emax prec emax := prec_lt_emax : (prec < emax)%Z.
Arguments prec_lt_emax prec emax {Prec_lt_emax}.

Section Binary.

prec is the number of bits of the mantissa including the implicit one; emax is the exponent of the infinities. For instance, binary32 is defined by prec = 24 and emax = 128.
Basic type used for representing binary FP numbers. Note that there is exactly one such object per FP datum.

Inductive binary_float :=
  | B754_zero (s : bool)
  | B754_infinity (s : bool)
  | B754_nan : binary_float
  | B754_finite (s : bool) (m : positive) (e : Z) :
    bounded m e = true binary_float.

Definition SF2B x :=
  match x as x return valid_binary x = true binary_float with
  | S754_finite s m eB754_finite s m e
  | S754_infinity sfun _B754_infinity s
  | S754_zero sfun _B754_zero s
  | S754_nanfun _B754_nan
  end.

Definition B2SF x :=
  match x with
  | B754_finite s m e _S754_finite s m e
  | B754_infinity sS754_infinity s
  | B754_zero sS754_zero s
  | B754_nanS754_nan
  end.

Definition B2R f :=
  match f with
  | B754_finite s m e _F2R (Float radix2 (cond_Zopp s (Zpos m)) e)
  | _ ⇒ 0%R
  end.

Theorem SF2R_B2SF :
   x,
  SF2R radix2 (B2SF x) = B2R x.

Theorem B2SF_SF2B :
   x Hx,
  B2SF (SF2B x Hx) = x.

Theorem valid_binary_B2SF :
   x,
  valid_binary (B2SF x) = true.

Theorem SF2B_B2SF :
   x H,
  SF2B (B2SF x) H = x.

Theorem SF2B_B2SF_valid :
   x,
  SF2B (B2SF x) (valid_binary_B2SF x) = x.

Theorem B2R_SF2B :
   x Hx,
  B2R (SF2B x Hx) = SF2R radix2 x.

Theorem match_SF2B :
   {T} fz fi fn ff x Hx,
  match SF2B x Hx return T with
  | B754_zero sxfz sx
  | B754_infinity sxfi sx
  | B754_nanfn
  | B754_finite sx mx ex _ff sx mx ex
  end =
  match x with
  | S754_zero sxfz sx
  | S754_infinity sxfi sx
  | S754_nanfn
  | S754_finite sx mx exff sx mx ex
  end.

Theorem canonical_canonical_mantissa :
   (sx : bool) mx ex,
  canonical_mantissa mx ex = true
  canonical radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).

Theorem canonical_bounded :
   sx mx ex,
  bounded mx ex = true
  canonical radix2 fexp (Float radix2 (cond_Zopp sx (Zpos mx)) ex).

Lemma emin_lt_emax :
  (emin < emax)%Z.

Lemma fexp_emax :
  fexp emax = (emax - prec)%Z.

Theorem generic_format_B2R :
   x,
  generic_format radix2 fexp (B2R x).

Theorem FLT_format_B2R :
   x,
  FLT_format radix2 emin prec (B2R x).

Theorem B2SF_inj :
   x y : binary_float,
  B2SF x = B2SF y
  x = y.

Definition is_finite_strict f :=
  match f with
  | B754_finite _ _ _ _true
  | _false
  end.

Definition is_finite_strict_SF f :=
  match f with
  | S754_finite _ _ _true
  | _false
  end.

Theorem is_finite_strict_B2R :
   x,
  B2R x 0%R
  is_finite_strict x = true.

Theorem is_finite_strict_SF2B :
   x Hx,
  is_finite_strict (SF2B x Hx) = is_finite_strict_SF x.

Theorem B2R_inj:
   x y : binary_float,
  is_finite_strict x = true
  is_finite_strict y = true
  B2R x = B2R y
  x = y.

Definition Bsign x :=
  match x with
  | B754_nanfalse
  | B754_zero ss
  | B754_infinity ss
  | B754_finite s _ _ _s
  end.

Definition sign_SF x :=
  match x with
  | S754_nanfalse
  | S754_zero ss
  | S754_infinity ss
  | S754_finite s _ _s
  end.

Theorem Bsign_SF2B :
   x H,
  Bsign (SF2B x H) = sign_SF x.

Definition is_finite f :=
  match f with
  | B754_finite _ _ _ _true
  | B754_zero _true
  | _false
  end.

Definition is_finite_SF f :=
  match f with
  | S754_finite _ _ _true
  | S754_zero _true
  | _false
  end.

Theorem is_finite_SF2B :
   x Hx,
  is_finite (SF2B x Hx) = is_finite_SF x.

Theorem is_finite_SF_B2SF :
   x,
  is_finite_SF (B2SF x) = is_finite x.

Theorem B2R_Bsign_inj:
   x y : binary_float,
    is_finite x = true
    is_finite y = true
    B2R x = B2R y
    Bsign x = Bsign y
    x = y.

Definition is_nan f :=
  match f with
  | B754_nantrue
  | _false
  end.

Definition is_nan_SF f :=
  match f with
  | S754_nantrue
  | _false
  end.

Theorem is_nan_SF2B :
   x Hx,
  is_nan (SF2B x Hx) = is_nan_SF x.

Theorem is_nan_SF_B2SF :
   x,
  is_nan_SF (B2SF x) = is_nan x.

Definition erase (x : binary_float) : binary_float.

Theorem erase_correct :
   x, erase x = x.

Opposite

Definition Bopp x :=
  match x with
  | B754_nanx
  | B754_infinity sxB754_infinity (negb sx)
  | B754_finite sx mx ex HxB754_finite (negb sx) mx ex Hx
  | B754_zero sxB754_zero (negb sx)
  end.

Theorem Bopp_involutive :
   x,
  Bopp (Bopp x) = x.

Theorem B2R_Bopp :
   x,
  B2R (Bopp x) = (- B2R x)%R.

Theorem is_nan_Bopp :
   x,
  is_nan (Bopp x) = is_nan x.

Theorem is_finite_Bopp :
   x,
  is_finite (Bopp x) = is_finite x.

Theorem is_finite_strict_Bopp :
   x,
  is_finite_strict (Bopp x) = is_finite_strict x.

Lemma Bsign_Bopp :
   x, is_nan x = false Bsign (Bopp x) = negb (Bsign x).

Absolute value

Definition Babs (x : binary_float) : binary_float :=
  match x with
  | B754_nanx
  | B754_infinity sxB754_infinity false
  | B754_finite sx mx ex HxB754_finite false mx ex Hx
  | B754_zero sxB754_zero false
  end.

Theorem B2R_Babs :
   x,
  B2R (Babs x) = Rabs (B2R x).

Theorem is_nan_Babs :
   x,
  is_nan (Babs x) = is_nan x.

Theorem is_finite_Babs :
   x,
  is_finite (Babs x) = is_finite x.

Theorem is_finite_strict_Babs :
   x,
  is_finite_strict (Babs x) = is_finite_strict x.

Theorem Bsign_Babs :
   x,
  Bsign (Babs x) = false.

Theorem Babs_idempotent :
   (x: binary_float),
  Babs (Babs x) = Babs x.

Theorem Babs_Bopp :
   x,
  Babs (Bopp x) = Babs x.

Comparison
Some c means ordered as per c; None means unordered.

Definition Bcompare (f1 f2 : binary_float) : option comparison :=
  SFcompare (B2SF f1) (B2SF f2).

Theorem Bcompare_correct :
   f1 f2,
  is_finite f1 = true is_finite f2 = true
  Bcompare f1 f2 = Some (Rcompare (B2R f1) (B2R f2)).

Theorem Bcompare_swap :
   x y,
  Bcompare y x = match Bcompare x y with Some cSome (CompOpp c) | NoneNone end.

Definition Beqb (f1 f2 : binary_float) : bool := SFeqb (B2SF f1) (B2SF f2).

Theorem Beqb_correct :
   f1 f2,
  is_finite f1 = true is_finite f2 = true
  Beqb f1 f2 = Req_bool (B2R f1) (B2R f2).

Theorem Beqb_refl :
   f, Beqb f f = negb (is_nan f).

Definition Bltb (f1 f2 : binary_float) : bool := SFltb (B2SF f1) (B2SF f2).

Theorem Bltb_correct :
   f1 f2,
  is_finite f1 = true is_finite f2 = true
  Bltb f1 f2 = Rlt_bool (B2R f1) (B2R f2).

Definition Bleb (f1 f2 : binary_float) : bool := SFleb (B2SF f1) (B2SF f2).

Theorem Bleb_correct :
   f1 f2,
  is_finite f1 = true is_finite f2 = true
  Bleb f1 f2 = Rle_bool (B2R f1) (B2R f2).

Theorem bounded_le_emax_minus_prec :
   mx ex,
  bounded mx ex = true
  (F2R (Float radix2 (Zpos mx) ex)
    bpow radix2 emax - bpow radix2 (emax - prec))%R.

Theorem bounded_lt_emax :
   mx ex,
  bounded mx ex = true
  (F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R.


Theorem bounded_ge_emin :
   mx ex,
  bounded mx ex = true
  (bpow radix2 emin F2R (Float radix2 (Zpos mx) ex))%R.

Theorem abs_B2R_le_emax_minus_prec :
   x,
  (Rabs (B2R x) bpow radix2 emax - bpow radix2 (emax - prec))%R.

Theorem abs_B2R_lt_emax :
   x,
  (Rabs (B2R x) < bpow radix2 emax)%R.

Theorem abs_B2R_ge_emin :
   x,
  is_finite_strict x = true
  (bpow radix2 emin Rabs (B2R x))%R.

Theorem bounded_canonical_lt_emax :
   mx ex,
  canonical radix2 fexp (Float radix2 (Zpos mx) ex)
  (F2R (Float radix2 (Zpos mx) ex) < bpow radix2 emax)%R
  bounded mx ex = true.

Truncation

Theorem shr_m_shr_record_of_loc :
   m l,
  shr_m (shr_record_of_loc m l) = m.

Theorem loc_of_shr_record_of_loc :
   m l,
  loc_of_shr_record (shr_record_of_loc m l) = l.

Lemma inbetween_shr_1 :
   x mrs e,
  (0 shr_m mrs)%Z
  inbetween_float radix2 (shr_m mrs) e x (loc_of_shr_record mrs)
  inbetween_float radix2 (shr_m (shr_1 mrs)) (e + 1) x (loc_of_shr_record (shr_1 mrs)).

Theorem inbetween_shr :
   x m e l n,
  (0 m)%Z
  inbetween_float radix2 m e x l
  let '(mrs, e') := shr (shr_record_of_loc m l) e n in
  inbetween_float radix2 (shr_m mrs) e' x (loc_of_shr_record mrs).

Lemma le_shr1_le :
   mrs, (0 shr_m mrs)%Z
  (0 2 × shr_m (shr_1 mrs) shr_m mrs)%Z
  (shr_m mrs < 2 × (shr_m (shr_1 mrs) + 1))%Z.

Lemma le_shr_le :
   mrs e n,
  (0 shr_m mrs)%Z (0 n)%Z
  (0 2 ^ n × shr_m (fst (shr mrs e n)) shr_m mrs)%Z
  (shr_m mrs < 2 ^ n × (shr_m (fst (shr mrs e n)) + 1))%Z.

Lemma shr_limit :
   mrs e n,
  ((0 < shr_m mrs)%Z shr_m mrs = 0%Z (shr_r mrs || shr_s mrs = true)%bool)
  (shr_m mrs < radix2 ^ (n - 1))%Z
  fst (shr mrs e n) = {| shr_m := 0%Z; shr_r := false; shr_s := true |}.

Theorem shr_truncate :
   f m e l,
  Valid_exp f
  (0 m)%Z
  shr (shr_record_of_loc m l) e (f (Zdigits2 m + e) - e)%Z =
  let '(m', e', l') := truncate radix2 f (m, e, l) in (shr_record_of_loc m' l', e').

Notation shr_fexp := (shr_fexp prec emax).

Theorem shr_fexp_truncate :
   m e l,
  (0 m)%Z
  shr_fexp m e l =
  let '(m', e', l') := truncate radix2 fexp (m, e, l) in (shr_record_of_loc m' l', e').

Rounding modes

Inductive mode := mode_NE | mode_ZR | mode_DN | mode_UP | mode_NA.

Definition round_mode m :=
  match m with
  | mode_NEZnearestE
  | mode_ZRZtrunc
  | mode_DNZfloor
  | mode_UPZceil
  | mode_NAZnearestA
  end.

Definition choice_mode m sx mx lx :=
  match m with
  | mode_NEcond_incr (round_N (negb (Z.even mx)) lx) mx
  | mode_ZRmx
  | mode_DNcond_incr (round_sign_DN sx lx) mx
  | mode_UPcond_incr (round_sign_UP sx lx) mx
  | mode_NAcond_incr (round_N true lx) mx
  end.

Lemma le_choice_mode_le :
   m sx mx lx, (mx choice_mode m sx mx lx mx + 1)%Z.

Lemma round_mode_choice_mode :
   md x m l,
  inbetween_int m (Rabs x) l
  round_mode md x = cond_Zopp (Rlt_bool x 0) (choice_mode md (Rlt_bool x 0) m l).

Global Instance valid_rnd_round_mode : m, Valid_rnd (round_mode m).

Definition overflow_to_inf m s :=
  match m with
  | mode_NEtrue
  | mode_NAtrue
  | mode_ZRfalse
  | mode_UPnegb s
  | mode_DNs
  end.

Definition binary_overflow m s :=
  if overflow_to_inf m s then S754_infinity s
  else S754_finite s (Z.to_pos (Zpower 2 prec - 1)%Z) (emax - prec).

Theorem is_nan_binary_overflow :
   mode s,
  is_nan_SF (binary_overflow mode s) = false.

Theorem binary_overflow_correct :
   m s,
  valid_binary (binary_overflow m s) = true.

Definition binary_fit_aux mode sx mx ex :=
  if Zle_bool ex (emax - prec) then S754_finite sx mx ex
  else binary_overflow mode sx.

Theorem binary_fit_aux_correct :
   mode sx mx ex,
  canonical_mantissa mx ex = true
  let x := SF2R radix2 (S754_finite sx mx ex) in
  let z := binary_fit_aux mode sx mx ex in
  valid_binary z = true
  if Rlt_bool (Rabs x) (bpow radix2 emax) then
    SF2R radix2 z = x is_finite_SF z = true sign_SF z = sx
  else
    z = binary_overflow mode sx.

Definition binary_round_aux mode sx mx ex lx :=
  let '(mrs', e') := shr_fexp mx ex lx in
  let '(mrs'', e'') := shr_fexp (choice_mode mode sx (shr_m mrs') (loc_of_shr_record mrs')) e' loc_Exact in
  match shr_m mrs'' with
  | Z0S754_zero sx
  | Zpos mbinary_fit_aux mode sx m e''
  | _S754_nan
  end.

Theorem binary_round_aux_correct' :
   mode x mx ex lx,
  (x 0)%R
  inbetween_float radix2 mx ex (Rabs x) lx
  (ex cexp radix2 fexp x)%Z
  let z := binary_round_aux mode (Rlt_bool x 0) mx ex lx in
  valid_binary z = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
    SF2R radix2 z = round radix2 fexp (round_mode mode) x
    is_finite_SF z = true sign_SF z = Rlt_bool x 0
  else
    z = binary_overflow mode (Rlt_bool x 0).

Theorem binary_round_aux_correct :
   mode x mx ex lx,
  inbetween_float radix2 (Zpos mx) ex (Rabs x) lx
  (ex fexp (Zdigits radix2 (Zpos mx) + ex))%Z
  let z := binary_round_aux mode (Rlt_bool x 0) (Zpos mx) ex lx in
  valid_binary z = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode mode) x)) (bpow radix2 emax) then
    SF2R radix2 z = round radix2 fexp (round_mode mode) x
    is_finite_SF z = true sign_SF z = Rlt_bool x 0
  else
    z = binary_overflow mode (Rlt_bool x 0).

Multiplication

Lemma Bmult_correct_aux :
   m sx mx ex (Hx : bounded mx ex = true) sy my ey (Hy : bounded my ey = true),
  let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
  let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
  let z := binary_round_aux m (xorb sx sy) (Zpos (mx × my)) (ex + ey) loc_Exact in
  valid_binary z = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x × y))) (bpow radix2 emax) then
    SF2R radix2 z = round radix2 fexp (round_mode m) (x × y)
    is_finite_SF z = true sign_SF z = xorb sx sy
  else
    z = binary_overflow m (xorb sx sy).

Definition Bmult m x y :=
  match x, y with
  | B754_nan, _ | _, B754_nanB754_nan
  | B754_infinity sx, B754_infinity syB754_infinity (xorb sx sy)
  | B754_infinity sx, B754_finite sy _ _ _B754_infinity (xorb sx sy)
  | B754_finite sx _ _ _, B754_infinity syB754_infinity (xorb sx sy)
  | B754_infinity _, B754_zero _B754_nan
  | B754_zero _, B754_infinity _B754_nan
  | B754_finite sx _ _ _, B754_zero syB754_zero (xorb sx sy)
  | B754_zero sx, B754_finite sy _ _ _B754_zero (xorb sx sy)
  | B754_zero sx, B754_zero syB754_zero (xorb sx sy)
  | B754_finite sx mx ex Hx, B754_finite sy my ey Hy
    SF2B _ (proj1 (Bmult_correct_aux m sx mx ex Hx sy my ey Hy))
  end.


Theorem Bmult_correct :
   m x y,
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x × B2R y))) (bpow radix2 emax) then
    B2R (Bmult m x y) = round radix2 fexp (round_mode m) (B2R x × B2R y)
    is_finite (Bmult m x y) = andb (is_finite x) (is_finite y)
    (is_nan (Bmult m x y) = false
      Bsign (Bmult m x y) = xorb (Bsign x) (Bsign y))
  else
    B2SF (Bmult m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).

Normalization and rounding

Theorem shl_align_correct':
   mx ex e,
  (e ex)%Z
  let (mx', ex') := shl_align mx ex e in
  F2R (Float radix2 (Zpos mx') e) = F2R (Float radix2 (Zpos mx) ex)
  ex' = e.

Theorem shl_align_correct :
   mx ex ex',
  let (mx', ex'') := shl_align mx ex ex' in
  F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex'')
  (ex'' ex')%Z.

Theorem snd_shl_align :
   mx ex ex',
  (ex' ex)%Z
  snd (shl_align mx ex ex') = ex'.

Definition shl_align_fexp mx ex :=
  shl_align mx ex (fexp (Zpos (digits2_pos mx) + ex)).

Theorem shl_align_fexp_correct :
   mx ex,
  let (mx', ex') := shl_align_fexp mx ex in
  F2R (Float radix2 (Zpos mx) ex) = F2R (Float radix2 (Zpos mx') ex')
  (ex' fexp (Zdigits radix2 (Zpos mx') + ex'))%Z.

Definition binary_round m sx mx ex :=
  let '(mz, ez) := shl_align_fexp mx ex in binary_round_aux m sx (Zpos mz) ez loc_Exact.

Theorem binary_round_correct :
   m sx mx ex,
  let z := binary_round m sx mx ex in
  valid_binary z = true
  let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
    SF2R radix2 z = round radix2 fexp (round_mode m) x
    is_finite_SF z = true
    sign_SF z = sx
  else
    z = binary_overflow m sx.

Theorem is_nan_binary_round :
   mode sx mx ex,
  is_nan_SF (binary_round mode sx mx ex) = false.

Definition binary_normalize mode m e szero :=
  match m with
  | Z0B754_zero szero
  | Zpos mSF2B _ (proj1 (binary_round_correct mode false m e))
  | Zneg mSF2B _ (proj1 (binary_round_correct mode true m e))
  end.

Theorem binary_normalize_correct :
   m mx ex szero,
  let x := F2R (Float radix2 mx ex) in
  let z := binary_normalize m mx ex szero in
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) x)) (bpow radix2 emax) then
    B2R z = round radix2 fexp (round_mode m) x
    is_finite z = true
    Bsign z =
      match Rcompare x 0 with
        | Eqszero
        | Lttrue
        | Gtfalse
      end
  else
    B2SF z = binary_overflow m (Rlt_bool x 0).

Theorem is_nan_binary_normalize :
   mode m e szero,
  is_nan (binary_normalize mode m e szero) = false.

Addition

Definition Fplus_naive sx mx ex sy my ey ez :=
  (Zplus (cond_Zopp sx (Zpos (fst (shl_align mx ex ez)))) (cond_Zopp sy (Zpos (fst (shl_align my ey ez))))).

Lemma Fplus_naive_correct :
   sx mx ex sy my ey ez,
  (ez ex)%Z (ez ey)%Z
  let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
  let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
  F2R (Float radix2 (Fplus_naive sx mx ex sy my ey ez) ez) = (x + y)%R.

Lemma sign_plus_overflow :
   m sx mx ex sy my ey,
  bounded mx ex = true
  bounded my ey = true
  let z := (F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) + F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey))%R in
  (bpow radix2 emax Rabs (round radix2 fexp (round_mode m) z))%R
  sx = Rlt_bool z 0 sx = sy.

Definition Bplus m x y :=
  match x, y with
  | B754_nan, _ | _, B754_nanB754_nan
  | B754_infinity sx, B754_infinity syif Bool.eqb sx sy then x else B754_nan
  | B754_infinity _, _x
  | _, B754_infinity _y
  | B754_zero sx, B754_zero sy
    if Bool.eqb sx sy then x else
    match m with mode_DNB754_zero true | _B754_zero false end
  | B754_zero _, _y
  | _, B754_zero _x
  | B754_finite sx mx ex Hx, B754_finite sy my ey Hy
    let ez := Z.min ex ey in
    binary_normalize m (Fplus_naive sx mx ex sy my ey ez)
      ez (match m with mode_DNtrue | _false end)
  end.

Theorem Bplus_correct :
   m x y,
  is_finite x = true
  is_finite y = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x + B2R y))) (bpow radix2 emax) then
    B2R (Bplus m x y) = round radix2 fexp (round_mode m) (B2R x + B2R y)
    is_finite (Bplus m x y) = true
    Bsign (Bplus m x y) =
      match Rcompare (B2R x + B2R y) 0 with
        | Eqmatch m with mode_DNorb (Bsign x) (Bsign y)
                                 | _andb (Bsign x) (Bsign y) end
        | Lttrue
        | Gtfalse
      end
  else
    (B2SF (Bplus m x y) = binary_overflow m (Bsign x) Bsign x = Bsign y).

Subtraction

Definition Bminus m x y :=
  match x, y with
  | B754_nan, _ | _, B754_nanB754_nan
  | B754_infinity sx, B754_infinity sy
    if Bool.eqb sx (negb sy) then x else B754_nan
  | B754_infinity _, _x
  | _, B754_infinity syB754_infinity (negb sy)
  | B754_zero sx, B754_zero sy
    if Bool.eqb sx (negb sy) then x else
    match m with mode_DNB754_zero true | _B754_zero false end
  | B754_zero _, B754_finite sy my ey HyB754_finite (negb sy) my ey Hy
  | _, B754_zero _x
  | B754_finite sx mx ex Hx, B754_finite sy my ey Hy
    let ez := Z.min ex ey in
    binary_normalize m (Fplus_naive sx mx ex (negb sy) my ey ez)
      ez (match m with mode_DNtrue | _false end)
  end.

Theorem Bminus_correct :
   m x y,
  is_finite x = true
  is_finite y = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x - B2R y))) (bpow radix2 emax) then
    B2R (Bminus m x y) = round radix2 fexp (round_mode m) (B2R x - B2R y)
    is_finite (Bminus m x y) = true
    Bsign (Bminus m x y) =
      match Rcompare (B2R x - B2R y) 0 with
        | Eqmatch m with mode_DNorb (Bsign x) (negb (Bsign y))
                                 | _andb (Bsign x) (negb (Bsign y)) end
        | Lttrue
        | Gtfalse
      end
  else
    (B2SF (Bminus m x y) = binary_overflow m (Bsign x) Bsign x = negb (Bsign y)).

Fused Multiply-Add

Definition Bfma_szero m (x y z: binary_float) : bool :=
  let s_xy := xorb (Bsign x) (Bsign y) in
  if Bool.eqb s_xy (Bsign z) then s_xy
  else match m with mode_DNtrue | _false end.

Definition Bfma m (x y z: binary_float) :=
  match x, y with
  | B754_nan, _ | _, B754_nan
  | B754_infinity _, B754_zero _
  | B754_zero _, B754_infinity _
      
      B754_nan
  | B754_infinity sx, B754_infinity sy
  | B754_infinity sx, B754_finite sy _ _ _
  | B754_finite sx _ _ _, B754_infinity sy
      let s := xorb sx sy in
      
      match z with
      | B754_nanB754_nan
      | B754_infinity szif Bool.eqb s sz then z else B754_nan
      | _B754_infinity s
      end
  | B754_finite sx _ _ _, B754_zero sy
  | B754_zero sx, B754_finite sy _ _ _
  | B754_zero sx, B754_zero sy
      
      match z with
      | B754_nanB754_nan
      | B754_zero _B754_zero (Bfma_szero m x y z)
      | _z
      end
  | B754_finite sx mx ex _, B754_finite sy my ey _
      
      match z with
      | B754_nanB754_nan
      | B754_infinity szz
      | B754_zero _
         let X := Float radix2 (cond_Zopp sx (Zpos mx)) ex in
         let Y := Float radix2 (cond_Zopp sy (Zpos my)) ey in
         let '(Float _ mr er) := Fmult X Y in
         binary_normalize m mr er (Bfma_szero m x y z)
      | B754_finite sz mz ez _
         let X := Float radix2 (cond_Zopp sx (Zpos mx)) ex in
         let Y := Float radix2 (cond_Zopp sy (Zpos my)) ey in
         let Z := Float radix2 (cond_Zopp sz (Zpos mz)) ez in
         let '(Float _ mr er) := Fplus (Fmult X Y) Z in
         binary_normalize m mr er (Bfma_szero m x y z)
      end
  end.

Theorem Bfma_correct:
   m x y z,
  is_finite x = true
  is_finite y = true
  is_finite z = true
  let res := (B2R x × B2R y + B2R z)%R in
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) res)) (bpow radix2 emax) then
    B2R (Bfma m x y z) = round radix2 fexp (round_mode m) res
    is_finite (Bfma m x y z) = true
    Bsign (Bfma m x y z) =
      match Rcompare res 0 with
        | EqBfma_szero m x y z
        | Lttrue
        | Gtfalse
      end
  else
    B2SF (Bfma m x y z) = binary_overflow m (Rlt_bool res 0).

Division

Lemma Bdiv_correct_aux :
   m sx mx ex sy my ey,
  let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
  let y := F2R (Float radix2 (cond_Zopp sy (Zpos my)) ey) in
  let z :=
    let '(mz, ez, lz) := SFdiv_core_binary prec emax (Zpos mx) ex (Zpos my) ey in
    binary_round_aux m (xorb sx sy) mz ez lz in
  valid_binary z = true
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (x / y))) (bpow radix2 emax) then
    SF2R radix2 z = round radix2 fexp (round_mode m) (x / y)
    is_finite_SF z = true sign_SF z = xorb sx sy
  else
    z = binary_overflow m (xorb sx sy).

Definition Bdiv m x y :=
  match x, y with
  | B754_nan, _ | _, B754_nanB754_nan
  | B754_infinity sx, B754_infinity syB754_nan
  | B754_infinity sx, B754_finite sy _ _ _B754_infinity (xorb sx sy)
  | B754_finite sx _ _ _, B754_infinity syB754_zero (xorb sx sy)
  | B754_infinity sx, B754_zero syB754_infinity (xorb sx sy)
  | B754_zero sx, B754_infinity syB754_zero (xorb sx sy)
  | B754_finite sx _ _ _, B754_zero syB754_infinity (xorb sx sy)
  | B754_zero sx, B754_finite sy _ _ _B754_zero (xorb sx sy)
  | B754_zero sx, B754_zero syB754_nan
  | B754_finite sx mx ex _, B754_finite sy my ey _
    SF2B _ (proj1 (Bdiv_correct_aux m sx mx ex sy my ey))
  end.

Theorem Bdiv_correct :
   m x y,
  B2R y 0%R
  if Rlt_bool (Rabs (round radix2 fexp (round_mode m) (B2R x / B2R y))) (bpow radix2 emax) then
    B2R (Bdiv m x y) = round radix2 fexp (round_mode m) (B2R x / B2R y)
    is_finite (Bdiv m x y) = is_finite x
    (is_nan (Bdiv m x y) = false
      Bsign (Bdiv m x y) = xorb (Bsign x) (Bsign y))
  else
    B2SF (Bdiv m x y) = binary_overflow m (xorb (Bsign x) (Bsign y)).

Square root

Lemma Bsqrt_correct_aux :
   m mx ex (Hx : bounded mx ex = true),
  let x := F2R (Float radix2 (Zpos mx) ex) in
  let z :=
    let '(mz, ez, lz) := SFsqrt_core_binary prec emax (Zpos mx) ex in
    binary_round_aux m false mz ez lz in
  valid_binary z = true
  SF2R radix2 z = round radix2 fexp (round_mode m) (sqrt x)
  is_finite_SF z = true sign_SF z = false.

Definition Bsqrt m x :=
  match x with
  | B754_nanB754_nan
  | B754_infinity falsex
  | B754_infinity trueB754_nan
  | B754_finite true _ _ _B754_nan
  | B754_zero _x
  | B754_finite sx mx ex Hx
    SF2B _ (proj1 (Bsqrt_correct_aux m mx ex Hx))
  end.

Theorem Bsqrt_correct :
   m x,
  B2R (Bsqrt m x) = round radix2 fexp (round_mode m) (sqrt (B2R x))
  is_finite (Bsqrt m x) = match x with B754_zero _true | B754_finite false _ _ _true | _false end
  (is_nan (Bsqrt m x) = false Bsign (Bsqrt m x) = Bsign x).

NearbyInt and Trunc

Definition SFnearbyint_binary_aux m sx mx ex :=
  if (0 <=? ex)%Z then ((Z.pos mx) × 2 ^ ex)%Z else
  let mrs := {| shr_m := Z.pos mx; shr_r := false; shr_s := false |} in
  let mrs' := if (ex <? - prec)%Z then
    {| shr_m := Z0; shr_r := false; shr_s := true |} else
    fst (shr mrs ex (- ex)) in
  let l' := loc_of_shr_record mrs' in
  let mx' := shr_m mrs' in
  choice_mode m sx mx' l'.

Definition SFnearbyint_binary m sx mx ex :=
  if (0 <=? ex)%Z then S754_finite sx mx ex else
  let mx'' := SFnearbyint_binary_aux m sx mx ex in
  match mx'' with
  | Z.pos n
    let (mx''', ex''') := shl_align_fexp n 0 in
    S754_finite sx mx''' ex'''
  | Z.neg nS754_nan
  | Z0S754_zero sx
  end.

Lemma Bnearbyint_correct_aux :
   md sx mx ex (Hx : bounded mx ex = true),
  let x := F2R (Float radix2 (cond_Zopp sx (Zpos mx)) ex) in
  let z := SFnearbyint_binary md sx mx ex in
  valid_binary z = true
  SF2R radix2 z = (round radix2 (FIX_exp 0) (round_mode md) x)
  is_finite_SF z = true (is_nan_SF z = false sign_SF z = sx).

Definition Bnearbyint md (x : binary_float) :=
  match x with
  | B754_nanB754_nan
  | B754_zero sB754_zero s
  | B754_infinity sB754_infinity s
  | B754_finite s m e H
    SF2B _ (proj1 (Bnearbyint_correct_aux md s m e H))
  end.

Theorem Bnearbyint_correct :
   md x,
  B2R (Bnearbyint md x) = round radix2 (FIX_exp 0) (round_mode md) (B2R x)
  is_finite (Bnearbyint md x) = is_finite x
  (is_nan (Bnearbyint md x) = false Bsign (Bnearbyint md x) = Bsign x).

Definition Btrunc (x : binary_float) :=
  match x with
  | B754_finite s m e _
    cond_Zopp s (SFnearbyint_binary_aux mode_ZR s m e)
  | _ ⇒ 0%Z
  end.

Theorem Btrunc_correct :
   x,
  IZR (Btrunc x) = round radix2 (FIX_exp 0) Ztrunc (B2R x).

A few values
Extraction/modification of mantissa/exponent

Definition Bnormfr_mantissa x := SFnormfr_mantissa prec (B2SF x).

Lemma Bnormfr_mantissa_correct :
   x,
  (/ 2 Rabs (B2R x) < 1)%R
  match x with
  | B754_finite _ m e _
    Bnormfr_mantissa x = N.pos m
     Z.pos (digits2_pos m) = prec (e = - prec)%Z
  | _False
  end.

Definition Bldexp mode f e :=
  match f with
  | B754_finite sx mx ex _
    SF2B _ (proj1 (binary_round_correct mode sx mx (ex+e)))
  | _f
  end.

Theorem is_nan_Bldexp :
   mode x e,
  is_nan (Bldexp mode x e) = is_nan x.

Theorem Bldexp_correct :
   m (f : binary_float) e,
  if Rlt_bool
       (Rabs (round radix2 fexp (round_mode m) (B2R f × bpow radix2 e)))
       (bpow radix2 emax) then
    (B2R (Bldexp m f e)
     = round radix2 fexp (round_mode m) (B2R f × bpow radix2 e))%R
    is_finite (Bldexp m f e) = is_finite f
    Bsign (Bldexp m f e) = Bsign f
  else
    B2SF (Bldexp m f e) = binary_overflow m (Bsign f).

Lemma Bldexp_Bopp_NE x e : Bldexp mode_NE (Bopp x) e = Bopp (Bldexp mode_NE x e).

Definition Ffrexp_core_binary s m e :=
  if Zlt_bool (-prec) emin then
    (S754_finite s m e, 0%Z)
  else if (Z.to_pos prec <=? digits2_pos m)%positive then
    (S754_finite s m (-prec), (e + prec)%Z)
  else
    let d := (prec - Z.pos (digits2_pos m))%Z in
    (S754_finite s (shift_pos (Z.to_pos d) m) (-prec), (e + prec - d)%Z).

Lemma Bfrexp_correct_aux :
   sx mx ex (Hx : bounded mx ex = true),
  let x := F2R (Float radix2 (cond_Zopp sx (Z.pos mx)) ex) in
  let z := fst (Ffrexp_core_binary sx mx ex) in
  let e := snd (Ffrexp_core_binary sx mx ex) in
  valid_binary z = true
  ((2 < emax)%Z (/2 Rabs (SF2R radix2 z) < 1)%R)
  (x = SF2R radix2 z × bpow radix2 e)%R.

Definition Bfrexp f :=
  match f with
  | B754_finite s m e H
    let e' := snd (Ffrexp_core_binary s m e) in
    (SF2B _ (proj1 (Bfrexp_correct_aux s m e H)), e')
  | _(f, (-2×emax-prec)%Z)
  end.

Theorem is_nan_Bfrexp :
   x,
  is_nan (fst (Bfrexp x)) = is_nan x.

Theorem Bfrexp_correct :
   f,
  is_finite_strict f = true
  let (z, e) := Bfrexp f in
  (B2R f = B2R z × bpow radix2 e)%R
  ( (2 < emax)%Z (/2 Rabs (B2R z) < 1)%R e = mag radix2 (B2R f) ).

Ulp

Lemma Bulp_correct_aux :
  bounded 1 emin = true.

Definition Bulp x :=
  match x with
  | B754_zero _B754_finite false 1 emin Bulp_correct_aux
  | B754_infinity _B754_infinity false
  | B754_nanB754_nan
  | B754_finite _ _ e _binary_normalize mode_ZR 1 e false
  end.

Theorem is_nan_Bulp :
   x,
  is_nan (Bulp x) = is_nan x.

Theorem Bulp_correct :
   x,
  is_finite x = true
  B2R (Bulp x) = ulp radix2 fexp (B2R x)
  is_finite (Bulp x) = true
  Bsign (Bulp x) = false.

Theorem is_finite_strict_Bulp :
   x,
  is_finite_strict (Bulp x) = is_finite x.

Definition Bulp' x := Bldexp mode_NE Bone (fexp (snd (Bfrexp x))).

Theorem Bulp'_correct :
  (2 < emax)%Z
   x,
  is_finite x = true
  Bulp' x = Bulp x.

Successor (and predecessor)

Definition Bsucc x :=
  match x with
  | B754_zero _B754_finite false 1 emin Bulp_correct_aux
  | B754_infinity falsex
  | B754_infinity trueBopp Bmax_float
  | B754_nanB754_nan
  | B754_finite false mx ex _
    SF2B _ (proj1 (binary_round_correct mode_UP false (mx + 1) ex))
  | B754_finite true mx ex _
    SF2B _ (proj1 (binary_round_correct mode_ZR true (xO mx - 1) (ex - 1)))
  end.

Theorem is_nan_Bsucc :
   x,
  is_nan (Bsucc x) = is_nan x.

Theorem Bsucc_correct :
   x,
  is_finite x = true
  if Rlt_bool (succ radix2 fexp (B2R x)) (bpow radix2 emax) then
    B2R (Bsucc x) = succ radix2 fexp (B2R x)
    is_finite (Bsucc x) = true
    (Bsign (Bsucc x) = Bsign x && is_finite_strict x)%bool
  else
    B2SF (Bsucc x) = S754_infinity false.

Definition Bpred x := Bopp (Bsucc (Bopp x)).

Theorem is_nan_Bpred :
   x,
  is_nan (Bpred x) = is_nan x.

Theorem Bpred_correct :
   x,
  is_finite x = true
  if Rlt_bool (- bpow radix2 emax) (pred radix2 fexp (B2R x)) then
    B2R (Bpred x) = pred radix2 fexp (B2R x)
    is_finite (Bpred x) = true
    (Bsign (Bpred x) = Bsign x || negb (is_finite_strict x))%bool
  else
    B2SF (Bpred x) = S754_infinity true.

Definition Bpred_pos' x :=
  match x with
  | B754_finite _ mx _ _
    let d :=
      if (mx~0 =? shift_pos (Z.to_pos prec) 1)%positive then
        Bldexp mode_NE Bone (fexp (snd (Bfrexp x) - 1))
      else
        Bulp' x in
    Bminus mode_NE x d
  | _x
  end.

Theorem Bpred_pos'_correct :
  (2 < emax)%Z
   x,
  (0 < B2R x)%R
  Bpred_pos' x = Bpred x.

Definition Bsucc' x :=
  match x with
  | B754_zero _Bldexp mode_NE Bone emin
  | B754_infinity falsex
  | B754_infinity trueBopp Bmax_float
  | B754_nanB754_nan
  | B754_finite false _ _ _Bplus mode_NE x (Bulp x)
  | B754_finite true _ _ _Bopp (Bpred_pos' (Bopp x))
  end.

Theorem Bsucc'_correct :
  (2 < emax)%Z
   x,
  is_finite x = true
  Bsucc' x = Bsucc x.

End Binary.

Arguments B754_zero {prec} {emax}.
Arguments B754_infinity {prec} {emax}.
Arguments B754_nan {prec} {emax}.
Arguments B754_finite {prec} {emax}.

Arguments SF2B {prec} {emax}.
Arguments B2SF {prec} {emax}.
Arguments B2R {prec} {emax}.

Arguments is_finite_strict {prec} {emax}.
Arguments is_finite {prec} {emax}.
Arguments is_nan {prec} {emax}.

Arguments erase {prec} {emax}.
Arguments Bsign {prec} {emax}.
Arguments Bcompare {prec} {emax}.
Arguments Beqb {prec} {emax}.
Arguments Bltb {prec} {emax}.
Arguments Bleb {prec} {emax}.
Arguments Bopp {prec} {emax}.
Arguments Babs {prec} {emax}.
Arguments Bone {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bmax_float {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.

Arguments Bplus {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bminus {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bmult {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bfma {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bdiv {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bsqrt {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bnearbyint {prec} {emax} {prec_lt_emax_}.
Arguments Btrunc {prec} {emax}.

Arguments Bldexp {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bnormfr_mantissa {prec} {emax}.
Arguments Bfrexp {prec} {emax} {prec_gt_0_}.
Arguments Bulp {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bulp' {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bsucc {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bpred {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.
Arguments Bpred_pos' {prec} {emax} {prec_gt_0_} {prec_lt_emax_}.