target/arm: Convert Neon VCVT f16/f32 insns to decodetree

Convert the Neon insns in the 2-reg-misc group which are
VCVT between f32 and f16 to decodetree.

Backports commit 654a517355e249435505ae5ff14a7520410cf7a4 from qemu
This commit is contained in:
Peter Maydell 2021-02-25 12:24:20 -05:00 committed by Lioncash
parent 4ca33c54a2
commit 6301f9acaa
3 changed files with 104 additions and 62 deletions

View file

@ -461,6 +461,9 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm
VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0
VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0
VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
]
# Subgroup for size != 0b11

View file

@ -3396,3 +3396,101 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
tcg_temp_free_i32(tcg_ctx, rm1);
return true;
}
static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv_ptr fpst;
TCGv_i32 ahp, tmp, tmp2, tmp3;
if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_fp16_spconv, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vm & 1) || (a->size != 1)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fpst = get_fpstatus_ptr(s, true);
ahp = get_ahp_flag(s);
tmp = neon_load_reg(s, a->vm, 0);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
tmp2 = neon_load_reg(s, a->vm, 1);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16);
tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp);
tcg_temp_free_i32(tcg_ctx, tmp);
tmp = neon_load_reg(s, a->vm, 2);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
tmp3 = neon_load_reg(s, a->vm, 3);
neon_store_reg(s, a->vd, 0, tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16);
tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp);
neon_store_reg(s, a->vd, 1, tmp3);
tcg_temp_free_i32(tcg_ctx, tmp);
tcg_temp_free_i32(tcg_ctx, ahp);
tcg_temp_free_ptr(tcg_ctx, fpst);
return true;
}
static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
TCGv_ptr fpst;
TCGv_i32 ahp, tmp, tmp2, tmp3;
if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
!dc_isar_feature(aa32_fp16_spconv, s)) {
return false;
}
/* UNDEF accesses to D16-D31 if they don't exist. */
if (!dc_isar_feature(aa32_simd_r32, s) &&
((a->vd | a->vm) & 0x10)) {
return false;
}
if ((a->vd & 1) || (a->size != 1)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
fpst = get_fpstatus_ptr(s, true);
ahp = get_ahp_flag(s);
tmp3 = tcg_temp_new_i32(tcg_ctx);
tmp = neon_load_reg(s, a->vm, 0);
tmp2 = neon_load_reg(s, a->vm, 1);
tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
neon_store_reg(s, a->vd, 0, tmp3);
tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp);
neon_store_reg(s, a->vd, 1, tmp);
tmp3 = tcg_temp_new_i32(tcg_ctx);
tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
neon_store_reg(s, a->vd, 2, tmp3);
tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp);
neon_store_reg(s, a->vd, 3, tmp2);
tcg_temp_free_i32(tcg_ctx, ahp);
tcg_temp_free_ptr(tcg_ctx, fpst);
return true;
}

View file

@ -4965,7 +4965,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
int pass;
int u;
int vec_size;
TCGv_i32 tmp, tmp2, tmp3;
TCGv_i32 tmp, tmp2;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@ -5032,6 +5032,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
case NEON_2RM_VZIP:
case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
case NEON_2RM_VSHLL:
case NEON_2RM_VCVT_F16_F32:
case NEON_2RM_VCVT_F32_F16:
/* handled by decodetree */
return 1;
case NEON_2RM_VTRN:
@ -5047,67 +5049,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
goto elementwise;
}
break;
case NEON_2RM_VCVT_F16_F32:
{
TCGv_ptr fpst;
TCGv_i32 ahp;
if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rm & 1)) {
return 1;
}
fpst = get_fpstatus_ptr(tcg_ctx, true);
ahp = get_ahp_flag(s);
tmp = neon_load_reg(s, rm, 0);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
tmp2 = neon_load_reg(s, rm, 1);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp2, tmp2, fpst, ahp);
tcg_gen_shli_i32(tcg_ctx, tmp2, tmp2, 16);
tcg_gen_or_i32(tcg_ctx, tmp2, tmp2, tmp);
tcg_temp_free_i32(tcg_ctx, tmp);
tmp = neon_load_reg(s, rm, 2);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp, tmp, fpst, ahp);
tmp3 = neon_load_reg(s, rm, 3);
neon_store_reg(s, rd, 0, tmp2);
gen_helper_vfp_fcvt_f32_to_f16(tcg_ctx, tmp3, tmp3, fpst, ahp);
tcg_gen_shli_i32(tcg_ctx, tmp3, tmp3, 16);
tcg_gen_or_i32(tcg_ctx, tmp3, tmp3, tmp);
neon_store_reg(s, rd, 1, tmp3);
tcg_temp_free_i32(tcg_ctx, tmp);
tcg_temp_free_i32(tcg_ctx, ahp);
tcg_temp_free_ptr(tcg_ctx, fpst);
break;
}
case NEON_2RM_VCVT_F32_F16:
{
TCGv_ptr fpst;
TCGv_i32 ahp;
if (!dc_isar_feature(aa32_fp16_spconv, s) ||
q || (rd & 1)) {
return 1;
}
fpst = get_fpstatus_ptr(tcg_ctx, true);
ahp = get_ahp_flag(s);
tmp3 = tcg_temp_new_i32(tcg_ctx);
tmp = neon_load_reg(s, rm, 0);
tmp2 = neon_load_reg(s, rm, 1);
tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
neon_store_reg(s, rd, 0, tmp3);
tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp, tmp, fpst, ahp);
neon_store_reg(s, rd, 1, tmp);
tmp3 = tcg_temp_new_i32(tcg_ctx);
tcg_gen_ext16u_i32(tcg_ctx, tmp3, tmp2);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp3, tmp3, fpst, ahp);
neon_store_reg(s, rd, 2, tmp3);
tcg_gen_shri_i32(tcg_ctx, tmp2, tmp2, 16);
gen_helper_vfp_fcvt_f16_to_f32(tcg_ctx, tmp2, tmp2, fpst, ahp);
neon_store_reg(s, rd, 3, tmp2);
tcg_temp_free_i32(tcg_ctx, ahp);
tcg_temp_free_ptr(tcg_ctx, fpst);
break;
}
case NEON_2RM_AESE: case NEON_2RM_AESMC:
if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;