mirror of
				https://github.com/yuzu-emu/unicorn.git
				synced 2025-11-04 13:44:49 +00:00 
			
		
		
		
	target/arm: Implement VFP fp16 for fused-multiply-add
Implement VFP fp16 support for fused multiply-add insns VFNMA, VFNMS, VFMA, VFMS. Backports 9886fe2834b064a3cf0675a4659942ed547aed42
This commit is contained in:
		
							parent
							
								
									f86c84425b
								
							
						
					
					
						commit
						6ac2c597ab
					
				| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_aarch64
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_aarch64
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_aarch64
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_aarch64
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_aarch64
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_aarch64
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_aarch64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_aarch64eb
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_aarch64eb
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_aarch64eb
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_aarch64eb
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_aarch64eb
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_aarch64eb
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_aarch64eb
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_arm
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_arm
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_arm
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_arm
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_arm
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_arm
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_arm
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_armeb
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_armeb
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_armeb
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_armeb
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_armeb
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_armeb
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_armeb
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1925,6 +1925,7 @@ symbols = (
 | 
			
		|||
    'helper_vfp_minnums',
 | 
			
		||||
    'helper_vfp_mins',
 | 
			
		||||
    'helper_vfp_muladdd',
 | 
			
		||||
    'helper_vfp_muladdh',
 | 
			
		||||
    'helper_vfp_muladds',
 | 
			
		||||
    'helper_vfp_muld',
 | 
			
		||||
    'helper_vfp_mulh',
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_m68k
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_m68k
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_m68k
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_m68k
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_m68k
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_m68k
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_m68k
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_mips
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_mips
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_mips
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_mips
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_mips
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_mips
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_mips
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_mips64
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_mips64
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_mips64
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_mips64
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_mips64
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_mips64
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_mips64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_mips64el
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_mips64el
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_mips64el
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_mips64el
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_mips64el
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_mips64el
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_mips64el
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_mipsel
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_mipsel
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_mipsel
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_mipsel
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_mipsel
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_mipsel
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_mipsel
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_powerpc
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_powerpc
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_powerpc
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_powerpc
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_powerpc
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_powerpc
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_powerpc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_riscv32
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_riscv32
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_riscv32
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_riscv32
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_riscv32
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_riscv32
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_riscv32
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_riscv64
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_riscv64
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_riscv64
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_riscv64
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_riscv64
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_riscv64
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_riscv64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_sparc
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_sparc
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_sparc
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_sparc
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_sparc
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_sparc
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_sparc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_sparc64
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_sparc64
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_sparc64
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_sparc64
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_sparc64
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_sparc64
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_sparc64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -211,6 +211,7 @@ DEF_HELPER_FLAGS_3(vfp_fcvt_f64_to_f16, TCG_CALL_NO_RWG, f16, f64, ptr, i32)
 | 
			
		|||
 | 
			
		||||
DEF_HELPER_4(vfp_muladdd, f64, f64, f64, f64, ptr)
 | 
			
		||||
DEF_HELPER_4(vfp_muladds, f32, f32, f32, f32, ptr)
 | 
			
		||||
DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, ptr)
 | 
			
		||||
 | 
			
		||||
DEF_HELPER_3(recps_f32, f32, env, f32, f32)
 | 
			
		||||
DEF_HELPER_3(rsqrts_f32, f32, env, f32, f32)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1935,6 +1935,70 @@ static bool trans_VMAXNM_dp(DisasContext *s, arg_VMAXNM_dp *a)
 | 
			
		|||
                         a->vd, a->vn, a->vm, false);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
 | 
			
		||||
{
 | 
			
		||||
    /*
 | 
			
		||||
     * VFNMA : fd = muladd(-fd,  fn, fm)
 | 
			
		||||
     * VFNMS : fd = muladd(-fd, -fn, fm)
 | 
			
		||||
     * VFMA  : fd = muladd( fd,  fn, fm)
 | 
			
		||||
     * VFMS  : fd = muladd( fd, -fn, fm)
 | 
			
		||||
     *
 | 
			
		||||
     * These are fused multiply-add, and must be done as one floating
 | 
			
		||||
     * point operation with no rounding between the multiplication and
 | 
			
		||||
     * addition steps.  NB that doing the negations here as separate
 | 
			
		||||
     * steps is correct : an input NaN should come out with its sign
 | 
			
		||||
     * bit flipped if it is a negated-input.
 | 
			
		||||
     */
 | 
			
		||||
    TCGv_ptr fpst;
 | 
			
		||||
    TCGv_i32 vn, vm, vd;
 | 
			
		||||
    TCGContext *tcg_ctx = s->uc->tcg_ctx;
 | 
			
		||||
 | 
			
		||||
    /*
 | 
			
		||||
     * Present in VFPv4 only, and only with the FP16 extension.
 | 
			
		||||
     * Note that we can't rely on the SIMDFMAC check alone, because
 | 
			
		||||
     * in a Neon-no-VFP core that ID register field will be non-zero.
 | 
			
		||||
     */
 | 
			
		||||
    if (!dc_isar_feature(aa32_fp16_arith, s) ||
 | 
			
		||||
        !dc_isar_feature(aa32_simdfmac, s) ||
 | 
			
		||||
        !dc_isar_feature(aa32_fpsp_v2, s)) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (s->vec_len != 0 || s->vec_stride != 0) {
 | 
			
		||||
        return false;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (!vfp_access_check(s)) {
 | 
			
		||||
        return true;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    vn = tcg_temp_new_i32(tcg_ctx);
 | 
			
		||||
    vm = tcg_temp_new_i32(tcg_ctx);
 | 
			
		||||
    vd = tcg_temp_new_i32(tcg_ctx);
 | 
			
		||||
 | 
			
		||||
    neon_load_reg32(s, vn, a->vn);
 | 
			
		||||
    neon_load_reg32(s, vm, a->vm);
 | 
			
		||||
    if (neg_n) {
 | 
			
		||||
        /* VFNMS, VFMS */
 | 
			
		||||
        gen_helper_vfp_negh(tcg_ctx, vn, vn);
 | 
			
		||||
    }
 | 
			
		||||
    neon_load_reg32(s, vd, a->vd);
 | 
			
		||||
    if (neg_d) {
 | 
			
		||||
        /* VFNMA, VFNMS */
 | 
			
		||||
        gen_helper_vfp_negh(tcg_ctx, vd, vd);
 | 
			
		||||
    }
 | 
			
		||||
    fpst = fpstatus_ptr(tcg_ctx, FPST_FPCR_F16);
 | 
			
		||||
    gen_helper_vfp_muladdh(tcg_ctx, vd, vn, vm, vd, fpst);
 | 
			
		||||
    neon_store_reg32(s, vd, a->vd);
 | 
			
		||||
 | 
			
		||||
    tcg_temp_free_ptr(tcg_ctx, fpst);
 | 
			
		||||
    tcg_temp_free_i32(tcg_ctx, vn);
 | 
			
		||||
    tcg_temp_free_i32(tcg_ctx, vm);
 | 
			
		||||
    tcg_temp_free_i32(tcg_ctx, vd);
 | 
			
		||||
 | 
			
		||||
    return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
 | 
			
		||||
{
 | 
			
		||||
    /*
 | 
			
		||||
| 
						 | 
				
			
			@ -2087,6 +2151,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
 | 
			
		|||
    MAKE_ONE_VFM_TRANS_FN(VFNMA, PREC, false, true) \
 | 
			
		||||
    MAKE_ONE_VFM_TRANS_FN(VFNMS, PREC, true, true)
 | 
			
		||||
 | 
			
		||||
MAKE_VFM_TRANS_FNS(hp)
 | 
			
		||||
MAKE_VFM_TRANS_FNS(sp)
 | 
			
		||||
MAKE_VFM_TRANS_FNS(dp)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -139,6 +139,11 @@ VDIV_hp      ---- 1110 1.00 .... .... 1001 .0.0 ....        @vfp_dnm_s
 | 
			
		|||
VDIV_sp      ---- 1110 1.00 .... .... 1010 .0.0 ....        @vfp_dnm_s
 | 
			
		||||
VDIV_dp      ---- 1110 1.00 .... .... 1011 .0.0 ....        @vfp_dnm_d
 | 
			
		||||
 | 
			
		||||
VFMA_hp      ---- 1110 1.10 .... .... 1001 .0. 0 ....       @vfp_dnm_s
 | 
			
		||||
VFMS_hp      ---- 1110 1.10 .... .... 1001 .1. 0 ....       @vfp_dnm_s
 | 
			
		||||
VFNMA_hp     ---- 1110 1.01 .... .... 1001 .0. 0 ....       @vfp_dnm_s
 | 
			
		||||
VFNMS_hp     ---- 1110 1.01 .... .... 1001 .1. 0 ....       @vfp_dnm_s
 | 
			
		||||
 | 
			
		||||
VFMA_sp      ---- 1110 1.10 .... .... 1010 .0. 0 ....       @vfp_dnm_s
 | 
			
		||||
VFMS_sp      ---- 1110 1.10 .... .... 1010 .1. 0 ....       @vfp_dnm_s
 | 
			
		||||
VFNMA_sp     ---- 1110 1.01 .... .... 1010 .0. 0 ....       @vfp_dnm_s
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1082,6 +1082,13 @@ uint32_t HELPER(rsqrte_u32)(uint32_t a)
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
/* VFPv4 fused multiply-accumulate */
 | 
			
		||||
dh_ctype_f16 VFP_HELPER(muladd, h)(dh_ctype_f16 a, dh_ctype_f16 b,
 | 
			
		||||
                                   dh_ctype_f16 c, void *fpstp)
 | 
			
		||||
{
 | 
			
		||||
    float_status *fpst = fpstp;
 | 
			
		||||
    return float16_muladd(a, b, c, 0, fpst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
float32 VFP_HELPER(muladd, s)(float32 a, float32 b, float32 c, void *fpstp)
 | 
			
		||||
{
 | 
			
		||||
    float_status *fpst = fpstp;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1919,6 +1919,7 @@
 | 
			
		|||
#define helper_vfp_minnums helper_vfp_minnums_x86_64
 | 
			
		||||
#define helper_vfp_mins helper_vfp_mins_x86_64
 | 
			
		||||
#define helper_vfp_muladdd helper_vfp_muladdd_x86_64
 | 
			
		||||
#define helper_vfp_muladdh helper_vfp_muladdh_x86_64
 | 
			
		||||
#define helper_vfp_muladds helper_vfp_muladds_x86_64
 | 
			
		||||
#define helper_vfp_muld helper_vfp_muld_x86_64
 | 
			
		||||
#define helper_vfp_mulh helper_vfp_mulh_x86_64
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in a new issue