target/arm: Implement FPST_STD_F16 fpstatus

Architecturally, Neon FP16 operations use the "standard FPSCR" like all other Neon operations. However, this is defined in the Arm ARM pseudocode as "a fixed value, except that FZ16 (and AHP) follow the FPSCR bits". In QEMU, the softfloat float_status doesn't include separate flush-to-zero for FP16 operations, so we must keep separate fp_status for "Neon non-FP16" and "Neon fp16" operations, in the same way we do already for the non-Neon "fp_status" vs "fp_status_f16". Add the extra float_status field to the CPU state structure, ensure it is correctly initialized and updated on FPSCR writes, and make fpstatus_ptr(FPST_STD_F16) return a pointer to it. Backports commit aaae563bc73de0598bbc09a102e68f27fafe704a
2025-09-14 11:27:11 +00:00 · 2021-02-26 12:00:23 -05:00 · 2021-02-26 12:00:23 -05:00 · 61377ce01c
parent b1b0a41507
commit 61377ce01c
4 changed files with 18 additions and 2 deletions
--- a/qemu/target/arm/cpu.c
+++ b/qemu/target/arm/cpu.c
@ -393,12 +393,15 @@ static void arm_cpu_reset(CPUState *s)
    set_flush_to_zero(1, &env->vfp.standard_fp_status);
    set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
    set_default_nan_mode(1, &env->vfp.standard_fp_status);
+    set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
    set_float_detect_tininess(float_tininess_before_rounding,
                              &env->vfp.fp_status);
    set_float_detect_tininess(float_tininess_before_rounding,
                              &env->vfp.standard_fp_status);
    set_float_detect_tininess(float_tininess_before_rounding,
                              &env->vfp.fp_status_f16);
+    set_float_detect_tininess(float_tininess_before_rounding,
+                              &env->vfp.standard_fp_status_f16);

    hw_breakpoint_update_all(cpu);
    hw_watchpoint_update_all(cpu);
--- a/qemu/target/arm/cpu.h
+++ b/qemu/target/arm/cpu.h
@ -582,6 +582,8 @@ typedef struct CPUARMState {
         *  fp_status: is the "normal" fp status.
         *  fp_status_fp16: used for half-precision calculations
         *  standard_fp_status : the ARM "Standard FPSCR Value"
+         *  standard_fp_status_fp16 : used for half-precision
+         *       calculations with the ARM "Standard FPSCR Value"
         *
         * Half-precision operations are governed by a separate
         * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
@ -592,15 +594,20 @@ typedef struct CPUARMState {
         * Neon) which the architecture defines as controlled by the
         * standard FPSCR value rather than the FPSCR.
         *
+         * The "standard FPSCR but for fp16 ops" is needed because
+         * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
+         * using a fixed value for it.
+         *
         * To avoid having to transfer exception bits around, we simply
         * say that the FPSCR cumulative exception flags are the logical
-         * OR of the flags in the three fp statuses. This relies on the
+         * OR of the flags in the four fp statuses. This relies on the
         * only thing which needs to read the exception flags being
         * an explicit FPSCR read.
         */
        float_status fp_status;
        float_status fp_status_f16;
        float_status standard_fp_status;
+        float_status standard_fp_status_f16;

        /* ZCR_EL[1-3] */
        uint64_t zcr_el[4];
--- a/qemu/target/arm/translate.h
+++ b/qemu/target/arm/translate.h
@ -444,7 +444,8 @@ static inline TCGv_ptr fpstatus_ptr(TCGContext *s, ARMFPStatusFlavour flavour)
        offset = offsetof(CPUARMState, vfp.standard_fp_status);
        break;
    case FPST_STD_F16:
-        /* Not yet used or implemented: fall through to assert */
+        offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
+        break;
    default:
        g_assert_not_reached();
    }
--- a/qemu/target/arm/vfp_helper.c
+++ b/qemu/target/arm/vfp_helper.c
@ -96,6 +96,8 @@ static uint32_t vfp_get_fpscr_from_host(CPUARMState *env)
    /* FZ16 does not generate an input denormal exception.  */
    i |= (get_float_exception_flags(&env->vfp.fp_status_f16)
          & ~float_flag_input_denormal);
+    i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
+          & ~float_flag_input_denormal);
    return vfp_exceptbits_from_host(i);
 }

@ -127,7 +129,9 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
    if (changed & FPCR_FZ16) {
        bool ftz_enabled = val & FPCR_FZ16;
        set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+        set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16);
+        set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
    }
    if (changed & FPCR_FZ) {
        bool ftz_enabled = val & FPCR_FZ;
@ -149,6 +153,7 @@ static void vfp_set_fpscr_to_host(CPUARMState *env, uint32_t val)
    set_float_exception_flags(i, &env->vfp.fp_status);
    set_float_exception_flags(0, &env->vfp.fp_status_f16);
    set_float_exception_flags(0, &env->vfp.standard_fp_status);
+    set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
 }

 #else