Library Flocq.Calc.Round

This file is part of the Flocq formalization of floating-point arithmetic in Coq: https://flocq.gitlabpages.inria.fr/

This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the COPYING file for more details.

Helper function for computing the rounded value of a real number.

From Coq Require Import ZArith Reals Lia.

Require Import Core Digits Float_prop Bracket.

Section Fcalc_round.

Variable beta : radix.
Notation bpow e := (bpow beta e).

Section Fcalc_round_fexp.

Variable fexp : Z → Z.
Context { valid_exp : Valid_exp fexp }.
Notation format := (generic_format beta fexp).

Theorem cexp_inbetween_float :
  ∀ x m e l,
  (0 < x)%R →
  inbetween_float beta m e x l →
  (e ≤ cexp beta fexp x ∨ e ≤ fexp (Zdigits beta m + e))%Z →
  cexp beta fexp x = fexp (Zdigits beta m + e).

Theorem cexp_inbetween_float_loc_Exact :
  ∀ x m e l,
  (0 ≤ x)%R →
  inbetween_float beta m e x l →
  (e ≤ cexp beta fexp x ∨ l = loc_Exact ↔
   e ≤ fexp (Zdigits beta m + e) ∨ l = loc_Exact)%Z.

Relates location and rounding.

Theorem inbetween_float_round :
  ∀ rnd choice,
  ( ∀ x m l, inbetween_int m x l → rnd x = choice m l ) →
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp rnd x = F2R (Float beta (choice m l) e).

Definition cond_incr (b : bool) m := if b then (m + 1)%Z else m.

Lemma le_cond_incr_le :
  ∀ b m, (m ≤ cond_incr b m ≤ m + 1)%Z.

Theorem inbetween_float_round_sign :
  ∀ rnd choice,
  ( ∀ x m l, inbetween_int m (Rabs x) l →
    rnd x = cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m l) ) →
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp rnd x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m l)) e).

Relates location and rounding down.

Theorem inbetween_int_DN :
  ∀ x m l,
  inbetween_int m x l →
  Zfloor x = m.

Theorem inbetween_float_DN :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp Zfloor x = F2R (Float beta m e).

Definition round_sign_DN s l :=
  match l with
  | loc_Exact ⇒ false
  | _ ⇒ s
  end.

Theorem inbetween_int_DN_sign :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  Zfloor x = cond_Zopp (Rlt_bool x 0) (cond_incr (round_sign_DN (Rlt_bool x 0) l) m).

Theorem inbetween_float_DN_sign :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp Zfloor x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (cond_incr (round_sign_DN (Rlt_bool x 0) l) m)) e).

Relates location and rounding up.

Definition round_UP l :=
  match l with
  | loc_Exact ⇒ false
  | _ ⇒ true
  end.

Theorem inbetween_int_UP :
  ∀ x m l,
  inbetween_int m x l →
  Zceil x = cond_incr (round_UP l) m.

Theorem inbetween_float_UP :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp Zceil x = F2R (Float beta (cond_incr (round_UP l) m) e).

Definition round_sign_UP s l :=
  match l with
  | loc_Exact ⇒ false
  | _ ⇒ negb s
  end.

Theorem inbetween_int_UP_sign :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  Zceil x = cond_Zopp (Rlt_bool x 0) (cond_incr (round_sign_UP (Rlt_bool x 0) l) m).

Theorem inbetween_float_UP_sign :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp Zceil x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (cond_incr (round_sign_UP (Rlt_bool x 0) l) m)) e).

Relates location and rounding toward zero.

Definition round_ZR (s : bool) l :=
  match l with
  | loc_Exact ⇒ false
  | _ ⇒ s
  end.

Theorem inbetween_int_ZR :
  ∀ x m l,
  inbetween_int m x l →
  Ztrunc x = cond_incr (round_ZR (Zlt_bool m 0) l) m.

Theorem inbetween_float_ZR :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp Ztrunc x = F2R (Float beta (cond_incr (round_ZR (Zlt_bool m 0) l) m) e).

Theorem inbetween_int_ZR_sign :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  Ztrunc x = cond_Zopp (Rlt_bool x 0) m.

Theorem inbetween_float_ZR_sign :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp Ztrunc x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) m) e).

Relates location and rounding to nearest.

Definition round_N (p : bool) l :=
  match l with
  | loc_Exact ⇒ false
  | loc_Inexact Lt ⇒ false
  | loc_Inexact Eq ⇒ p
  | loc_Inexact Gt ⇒ true
  end.

Theorem inbetween_int_N :
  ∀ choice x m l,
  inbetween_int m x l →
  Znearest choice x = cond_incr (round_N (choice m) l) m.

Theorem inbetween_int_N_sign :
  ∀ choice x m l,
  inbetween_int m (Rabs x) l →
  Znearest choice x = cond_Zopp (Rlt_bool x 0) (cond_incr (round_N (if Rlt_bool x 0 then negb (choice (-(m + 1))%Z) else choice m) l) m).

Relates location and rounding to nearest even.

Theorem inbetween_int_NE :
  ∀ x m l,
  inbetween_int m x l →
  ZnearestE x = cond_incr (round_N (negb (Z.even m)) l) m.

Theorem inbetween_float_NE :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp ZnearestE x = F2R (Float beta (cond_incr (round_N (negb (Z.even m)) l) m) e).

Theorem inbetween_int_NE_sign :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  ZnearestE x = cond_Zopp (Rlt_bool x 0) (cond_incr (round_N (negb (Z.even m)) l) m).

Theorem inbetween_float_NE_sign :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp ZnearestE x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (cond_incr (round_N (negb (Z.even m)) l) m)) e).

Relates location and rounding to nearest away.

Theorem inbetween_int_NA :
  ∀ x m l,
  inbetween_int m x l →
  ZnearestA x = cond_incr (round_N (Zle_bool 0 m) l) m.

Theorem inbetween_float_NA :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e x l →
  round beta fexp ZnearestA x = F2R (Float beta (cond_incr (round_N (Zle_bool 0 m) l) m) e).

Theorem inbetween_int_NA_sign :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  ZnearestA x = cond_Zopp (Rlt_bool x 0) (cond_incr (round_N true l) m).

Theorem inbetween_float_NA_sign :
  ∀ x m l,
  let e := cexp beta fexp x in
  inbetween_float beta m e (Rabs x) l →
  round beta fexp ZnearestA x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (cond_incr (round_N true l) m)) e).

Definition truncate_aux t k :=
  let '(m, e, l) := t in
  let p := Zpower beta k in
  (Z.div m p , (e + k)%Z, new_location p (Zmod m p) l).

Theorem truncate_aux_comp :
  ∀ t k1 k2,
  (0 < k1)%Z →
  (0 < k2)%Z →
  truncate_aux t (k1 + k2) = truncate_aux (truncate_aux t k1) k2.

Given a triple (mantissa, exponent, position), this function computes a triple with a canonic exponent, assuming the original triple had enough precision.

Definition truncate t :=
  let '(m, e, l) := t in
  let k := (fexp (Zdigits beta m + e) - e)%Z in
  if Zlt_bool 0 k then truncate_aux t k
  else t.

Theorem truncate_0 :
  ∀ e l,
  let '(m', e', l') := truncate (0, e , l )%Z in
  m' = Z0.

Theorem generic_format_truncate :
  ∀ m e l,
  (0 ≤ m)%Z →
  let '(m', e', l') := truncate (m , e , l ) in
  format (F2R (Float beta m' e')).

Theorem truncate_correct_format :
  ∀ m e,
  m ≠ Z0 →
  let x := F2R (Float beta m e) in
  generic_format beta fexp x →
  (e ≤ fexp (Zdigits beta m + e))%Z →
  let '(m', e', l') := truncate (m , e , loc_Exact ) in
  x = F2R (Float beta m' e') ∧ e' = cexp beta fexp x.

Theorem truncate_correct_partial' :
  ∀ x m e l,
  (0 < x)%R →
  inbetween_float beta m e x l →
  (e ≤ cexp beta fexp x)%Z →
  let '(m', e', l') := truncate (m , e , l ) in
  inbetween_float beta m' e' x l' ∧ e' = cexp beta fexp x.

Theorem truncate_correct_partial :
  ∀ x m e l,
  (0 < x)%R →
  inbetween_float beta m e x l →
  (e ≤ fexp (Zdigits beta m + e))%Z →
  let '(m', e', l') := truncate (m , e , l ) in
  inbetween_float beta m' e' x l' ∧ e' = cexp beta fexp x.

Theorem truncate_correct' :
  ∀ x m e l,
  (0 ≤ x)%R →
  inbetween_float beta m e x l →
  (e ≤ cexp beta fexp x)%Z ∨ l = loc_Exact →
  let '(m', e', l') := truncate (m , e , l ) in
  inbetween_float beta m' e' x l' ∧
  (e' = cexp beta fexp x ∨ (l' = loc_Exact ∧ format x )).

Theorem truncate_correct :
  ∀ x m e l,
  (0 ≤ x)%R →
  inbetween_float beta m e x l →
  (e ≤ fexp (Zdigits beta m + e))%Z ∨ l = loc_Exact →
  let '(m', e', l') := truncate (m , e , l ) in
  inbetween_float beta m' e' x l' ∧
  (e' = cexp beta fexp x ∨ (l' = loc_Exact ∧ format x )).

Section round_dir.

Variable rnd : R → Z.
Context { valid_rnd : Valid_rnd rnd }.

Variable choice : Z → location → Z.
Hypothesis inbetween_int_valid :
  ∀ x m l,
  inbetween_int m x l →
  rnd x = choice m l.

Theorem round_any_correct :
  ∀ x m e l,
  inbetween_float beta m e x l →
  (e = cexp beta fexp x ∨ (l = loc_Exact ∧ format x )) →
  round beta fexp rnd x = F2R (Float beta (choice m l) e).

Truncating a triple is sufficient to round a real number.

Theorem round_trunc_any_correct :
  ∀ x m e l,
  (0 ≤ x)%R →
  inbetween_float beta m e x l →
  (e ≤ fexp (Zdigits beta m + e))%Z ∨ l = loc_Exact →
  round beta fexp rnd x = let '(m', e', l') := truncate (m , e , l ) in F2R (Float beta (choice m' l') e').

Theorem round_trunc_any_correct' :
  ∀ x m e l,
  (0 ≤ x)%R →
  inbetween_float beta m e x l →
  (e ≤ cexp beta fexp x)%Z ∨ l = loc_Exact →
  round beta fexp rnd x = let '(m', e', l') := truncate (m , e , l ) in F2R (Float beta (choice m' l') e').

End round_dir.

Section round_dir_sign.

Variable rnd : R → Z.
Context { valid_rnd : Valid_rnd rnd }.

Variable choice : bool → Z → location → Z.
Hypothesis inbetween_int_valid :
  ∀ x m l,
  inbetween_int m (Rabs x) l →
  rnd x = cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m l).

Theorem round_sign_any_correct :
  ∀ x m e l,
  inbetween_float beta m e (Rabs x) l →
  (e = cexp beta fexp x ∨ (l = loc_Exact ∧ format x )) →
  round beta fexp rnd x = F2R (Float beta (cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m l)) e).

Truncating a triple is sufficient to round a real number.

Theorem round_trunc_sign_any_correct' :
  ∀ x m e l,
  inbetween_float beta m e (Rabs x) l →
  (e ≤ cexp beta fexp x)%Z ∨ l = loc_Exact →
  round beta fexp rnd x = let '(m', e', l') := truncate (m , e , l ) in F2R (Float beta (cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m' l')) e').

Theorem round_trunc_sign_any_correct :
  ∀ x m e l,
  inbetween_float beta m e (Rabs x) l →
  (e ≤ fexp (Zdigits beta m + e))%Z ∨ l = loc_Exact →
  round beta fexp rnd x = let '(m', e', l') := truncate (m , e , l ) in F2R (Float beta (cond_Zopp (Rlt_bool x 0) (choice (Rlt_bool x 0) m' l')) e').

End round_dir_sign.

Instances of the theorems above, for the usual rounding modes.

Specialization of truncate for FIX formats.

Variable emin : Z.

Definition truncate_FIX t :=
  let '(m, e, l) := t in
  let k := (emin - e)%Z in
  if Zlt_bool 0 k then
    let p := Zpower beta k in
    (Z.div m p , (e + k)%Z, new_location p (Zmod m p) l)
  else t.

Theorem truncate_FIX_correct :
  ∀ x m e l,
  inbetween_float beta m e x l →
  (e ≤ emin)%Z ∨ l = loc_Exact →
  let '(m', e', l') := truncate_FIX (m , e , l ) in
  inbetween_float beta m' e' x l' ∧
  (e' = cexp beta (FIX_exp emin) x ∨ (l' = loc_Exact ∧ generic_format beta (FIX_exp emin) x )).

End Fcalc_round.