fpu: allow flushing of output denormals to be after rounding

author Peter Maydell <peter.maydell@linaro.org>

Sat, 1 Feb 2025 16:39:08 +0000 (16:39 +0000)

committer Peter Maydell <peter.maydell@linaro.org>

Tue, 11 Feb 2025 16:22:07 +0000 (16:22 +0000)
author Peter Maydell <peter.maydell@linaro.org>
Sat, 1 Feb 2025 16:39:08 +0000 (16:39 +0000)
committer Peter Maydell <peter.maydell@linaro.org>
Tue, 11 Feb 2025 16:22:07 +0000 (16:22 +0000)
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc

index 0122b35008a901ff6664d456d9cba1b30563c1b9..1d09f066c5d83853bd4467ba5027b377df571754 100644 (file)
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -334,7 +334,8 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
              p->frac_lo &= ~round_mask;
          }
          frac_shr(p, frac_shift);
-    } else if (s->flush_to_zero) {
+    } else if (s->flush_to_zero &&
+               s->ftz_detection == float_ftz_before_rounding) {
          flags |= float_flag_output_denormal_flushed;
          p->cls = float_class_zero;
          exp = 0;
@@ -381,11 +382,19 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
          exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
          frac_shr(p, frac_shift);
  
-        if (is_tiny && (flags & float_flag_inexact)) {
-            flags |= float_flag_underflow;
-        }
-        if (exp == 0 && frac_eqz(p)) {
-            p->cls = float_class_zero;
+        if (is_tiny) {
+            if (s->flush_to_zero) {
+                assert(s->ftz_detection == float_ftz_after_rounding);
+                flags |= float_flag_output_denormal_flushed;
+                p->cls = float_class_zero;
+                exp = 0;
+                frac_clear(p);
+            } else if (flags & float_flag_inexact) {
+                flags |= float_flag_underflow;
+            }
+            if (exp == 0 && frac_eqz(p)) {
+                p->cls = float_class_zero;
+            }
          }
      }
      p->exp = exp;
diff --git a/include/fpu/softfloat-helpers.h b/include/fpu/softfloat-helpers.h

index 4cb30a48220f99d3c86b464030f38d7cb425337c..8983c2748ecd85e924597c1a1b9675487b2562c3 100644 (file)
--- a/include/fpu/softfloat-helpers.h
+++ b/include/fpu/softfloat-helpers.h
@@ -109,6 +109,12 @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status)
      status->flush_inputs_to_zero = val;
  }
  
+static inline void set_float_ftz_detection(FloatFTZDetection d,
+                                           float_status *status)
+{
+    status->ftz_detection = d;
+}
+
  static inline void set_default_nan_mode(bool val, float_status *status)
  {
      status->default_nan_mode = val;
@@ -183,4 +189,9 @@ static inline bool get_default_nan_mode(const float_status *status)
      return status->default_nan_mode;
  }
  
+static inline FloatFTZDetection get_float_ftz_detection(const float_status *status)
+{
+    return status->ftz_detection;
+}
+
  #endif /* SOFTFLOAT_HELPERS_H */
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h

index bba1c397bb7ce4793dfe432f3f660d6fc5555704..53d5eb85210e2470b31139f410b12b214d8020d7 100644 (file)
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -304,6 +304,22 @@ typedef enum __attribute__((__packed__)) {
      float_infzeronan_suppress_invalid = (1 << 7),
  } FloatInfZeroNaNRule;
  
+/*
+ * When flush_to_zero is set, should we detect denormal results to
+ * be flushed before or after rounding? For most architectures this
+ * should be set to match the tininess_before_rounding setting,
+ * but a few architectures, e.g. MIPS MSA, detect FTZ before
+ * rounding but tininess after rounding.
+ *
+ * This enum is arranged so that the default if the target doesn't
+ * configure it matches the default for tininess_before_rounding
+ * (i.e. "after rounding").
+ */
+typedef enum __attribute__((__packed__)) {
+    float_ftz_after_rounding = 0,
+    float_ftz_before_rounding = 1,
+} FloatFTZDetection;
+
  /*
   * Floating Point Status. Individual architectures may maintain
   * several versions of float_status for different functions. The
@@ -321,6 +337,8 @@ typedef struct float_status {
      bool tininess_before_rounding;
      /* should denormalised results go to zero and set output_denormal_flushed? */
      bool flush_to_zero;
+    /* do we detect and flush denormal results before or after rounding? */
+    FloatFTZDetection ftz_detection;
      /* should denormalised inputs go to zero and set input_denormal_flushed? */
      bool flush_inputs_to_zero;
      bool default_nan_mode;
diff --git a/target/alpha/cpu.c b/target/alpha/cpu.c

index e1b898e57556d48102c3cfe894059d503f595ad1..f5dd744987618974d32cb36feab8957d54960b79 100644 (file)
--- a/target/alpha/cpu.c
+++ b/target/alpha/cpu.c
@@ -202,6 +202,13 @@ static void alpha_cpu_initfn(Object *obj)
      set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
      /* Default NaN: sign bit clear, msb frac bit set */
      set_float_default_nan_pattern(0b01000000, &env->fp_status);
+    /*
+     * TODO: this is incorrect. The Alpha Architecture Handbook version 4
+     * section 4.7.7.11 says that we flush to zero for underflow cases, so
+     * this should be float_ftz_after_rounding to match the
+     * tininess_after_rounding (which is specified in section 4.7.5).
+     */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
  #if defined(CONFIG_USER_ONLY)
      env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
      cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
diff --git a/target/arm/cpu.c b/target/arm/cpu.c

index 32dc7c1e692e98bf7bff421926423a7f26eeedbe..80377443008098590347032e50951b02c8874b05 100644 (file)
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -185,6 +185,7 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
  static void arm_set_default_fp_behaviours(float_status *s)
  {
      set_float_detect_tininess(float_tininess_before_rounding, s);
+    set_float_ftz_detection(float_ftz_before_rounding, s);
      set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
      set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
      set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
diff --git a/target/hppa/fpu_helper.c b/target/hppa/fpu_helper.c

index 239c027ec528bcbabbd1faea017a3168deb9d2b9..8ff4b4480490c09fdcfeef36b6c412c65cb17da6 100644 (file)
--- a/target/hppa/fpu_helper.c
+++ b/target/hppa/fpu_helper.c
@@ -67,6 +67,17 @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
      set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
      /* Default NaN: sign bit clear, msb-1 frac bit set */
      set_float_default_nan_pattern(0b00100000, &env->fp_status);
+    /*
+     * "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
+     * enabled by FPSR.D happens before or after rounding. We pick "before"
+     * for consistency with tininess detection.
+     */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
+    /*
+     * TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should
+     * detect tininess before rounding, but we don't set that here so we
+     * get the default tininess after rounding.
+     */
  }
  
  void cpu_hppa_loaded_fr0(CPUHPPAState *env)
diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c

index de6d0b252ec831a462f7d74952217325ab47e8b0..f112c6c673768bb6619832ca7746341cc2f6cf1d 100644 (file)
--- a/target/i386/tcg/fpu_helper.c
+++ b/target/i386/tcg/fpu_helper.c
@@ -188,6 +188,14 @@ void cpu_init_fp_statuses(CPUX86State *env)
      set_float_default_nan_pattern(0b11000000, &env->fp_status);
      set_float_default_nan_pattern(0b11000000, &env->mmx_status);
      set_float_default_nan_pattern(0b11000000, &env->sse_status);
+    /*
+     * TODO: x86 does flush-to-zero detection after rounding (the SDM
+     * section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
+     * when we detect underflow, which x86 does after rounding).
+     */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
+    set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
+    set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
  }
  
  static inline uint8_t save_exception_flags(CPUX86State *env)
diff --git a/target/mips/fpu_helper.h b/target/mips/fpu_helper.h

index 6ad1e466cfde0ec8d1e4ff2b1835d5b23b9f3697..08fb4093904a26f1aa6e89c3d7e3db126beef630 100644 (file)
--- a/target/mips/fpu_helper.h
+++ b/target/mips/fpu_helper.h
@@ -84,6 +84,12 @@ static inline void fp_reset(CPUMIPSState *env)
       */
      set_float_2nan_prop_rule(float_2nan_prop_s_ab,
                               &env->active_fpu.fp_status);
+    /*
+     * TODO: the spec does't say clearly whether FTZ happens before
+     * or after rounding for normal FPU operations.
+     */
+    set_float_ftz_detection(float_ftz_before_rounding,
+                            &env->active_fpu.fp_status);
  }
  
  /* MSA */
diff --git a/target/mips/msa.c b/target/mips/msa.c

index fc77bfc7b9a36b1af7a7293fa037bb6d4858779a..32c6acbcc56868ff561537db672822cd5796c9e7 100644 (file)
--- a/target/mips/msa.c
+++ b/target/mips/msa.c
@@ -48,6 +48,15 @@ void msa_reset(CPUMIPSState *env)
      /* tininess detected after rounding.*/
      set_float_detect_tininess(float_tininess_after_rounding,
                                &env->active_tc.msa_fp_status);
+    /*
+     * MSACSR.FS detects tiny results to flush to zero before rounding
+     * (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD
+     * Architecture Module, Revision 1.1" section 3.5.4), even though it
+     * detects tininess after rounding for underflow purposes (section 3.4.2
+     * table 3.3).
+     */
+    set_float_ftz_detection(float_ftz_before_rounding,
+                            &env->active_tc.msa_fp_status);
  
      /*
       * According to MIPS specifications, if one of the two operands is
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c

index 8e49051254bd6db0269a38d51e8e08a92585de8b..062a6e85fbaecf980dc90ffe4a5e9ac1e9a96265 100644 (file)
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -7262,6 +7262,9 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
      /* tininess for underflow is detected before rounding */
      set_float_detect_tininess(float_tininess_before_rounding,
                                &env->fp_status);
+    /* Similarly for flush-to-zero */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
+
      /*
       * PowerPC propagation rules:
       *  1. A if it sNaN or qNaN
diff --git a/target/rx/cpu.c b/target/rx/cpu.c

index 8c50c7a1bc81c206d8139ba5b952a4cc25ad4dd5..37a6fdd569b37c1769fd54ee0513c24fddff01a7 100644 (file)
--- a/target/rx/cpu.c
+++ b/target/rx/cpu.c
@@ -103,6 +103,14 @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
      set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
      /* Default NaN value: sign bit clear, set frac msb */
      set_float_default_nan_pattern(0b01000000, &env->fp_status);
+    /*
+     * TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear
+     * on whether flush-to-zero should happen before or after rounding, but
+     * section 1.3.2 says that it happens when underflow is detected, and
+     * implies that underflow is detected after rounding. So this may not
+     * be the correct setting.
+     */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
  }
  
  static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
diff --git a/target/sh4/cpu.c b/target/sh4/cpu.c

index 24a22724c617b0fe38ef48ce9de41f5b488b1d69..4ac693d99bd4560a9dcc0c48bd3d2cfb57e959ec 100644 (file)
--- a/target/sh4/cpu.c
+++ b/target/sh4/cpu.c
@@ -130,6 +130,14 @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
      set_default_nan_mode(1, &env->fp_status);
      /* sign bit clear, set all frac bits other than msb */
      set_float_default_nan_pattern(0b00111111, &env->fp_status);
+    /*
+     * TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether
+     * it detects tininess before or after rounding. Section 6.4 is clear
+     * that flush-to-zero happens when the result underflows, though, so
+     * either this should be "detect ftz after rounding" or else we should
+     * be setting "detect tininess before rounding".
+     */
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
  }
  
  static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
diff --git a/target/tricore/helper.c b/target/tricore/helper.c

index e8b0ec51611040e99cbeb2efb8babdf66a477cf6..9898752eb00eb63df513cde6d7645a0484fc58c0 100644 (file)
--- a/target/tricore/helper.c
+++ b/target/tricore/helper.c
@@ -116,6 +116,7 @@ void fpu_set_state(CPUTriCoreState *env)
      set_flush_inputs_to_zero(1, &env->fp_status);
      set_flush_to_zero(1, &env->fp_status);
      set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
+    set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
      set_default_nan_mode(1, &env->fp_status);
      /* Default NaN pattern: sign bit clear, frac msb set */
      set_float_default_nan_pattern(0b01000000, &env->fp_status);
diff --git a/tests/fp/fp-bench.c b/tests/fp/fp-bench.c

index eacb39b99cbc9bb27603096def2ad692778193f6..d90f542ea25778523c397e659df8284f4b392e84 100644 (file)
--- a/tests/fp/fp-bench.c
+++ b/tests/fp/fp-bench.c
@@ -496,6 +496,7 @@ static void run_bench(void)
      set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
      set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
      set_float_default_nan_pattern(0b01000000, &soft_status);
+    set_float_ftz_detection(float_ftz_before_rounding, &soft_status);
  
      f = bench_funcs[operation][precision];
      g_assert(f);
author	Peter Maydell <peter.maydell@linaro.org>
	Sat, 1 Feb 2025 16:39:08 +0000 (16:39 +0000)
committer	Peter Maydell <peter.maydell@linaro.org>
	Tue, 11 Feb 2025 16:22:07 +0000 (16:22 +0000)
fpu/softfloat-parts.c.inc		patch \| blob \| history
include/fpu/softfloat-helpers.h		patch \| blob \| history
include/fpu/softfloat-types.h		patch \| blob \| history
target/alpha/cpu.c		patch \| blob \| history
target/arm/cpu.c		patch \| blob \| history
target/hppa/fpu_helper.c		patch \| blob \| history
target/i386/tcg/fpu_helper.c		patch \| blob \| history
target/mips/fpu_helper.h		patch \| blob \| history
target/mips/msa.c		patch \| blob \| history
target/ppc/cpu_init.c		patch \| blob \| history
target/rx/cpu.c		patch \| blob \| history
target/sh4/cpu.c		patch \| blob \| history
target/tricore/helper.c		patch \| blob \| history
tests/fp/fp-bench.c		patch \| blob \| history