target/arm: Convert VABS to decodetree

Convert the VFP VABS instruction to decodetree.

Unlike the 3-op versions, we don't pass fpst to the VFPGen2OpSPFn or
VFPGen2OpDPFn because none of the operations which use this format
and support short vectors will need it.

Backports commit 90287e22c987e9840704345ed33d237cbe759dd9 from qemu
This commit is contained in:
Peter Maydell 2019-06-13 18:41:41 -04:00 committed by Lioncash
parent 7a16bc6876
commit 1032d86ad3
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
3 changed files with 182 additions and 4 deletions

View file

@ -1128,6 +1128,14 @@ typedef void VFPGen3OpSPFn(TCGContext *, TCGv_i32 vd,
typedef void VFPGen3OpDPFn(TCGContext *, TCGv_i64 vd,
TCGv_i64 vn, TCGv_i64 vm, TCGv_ptr fpst);
/*
* Types for callbacks for do_vfp_2op_sp() and do_vfp_2op_dp().
* The callback should emit code to write a value to vd (which
* should be written to only).
*/
typedef void VFPGen2OpSPFn(TCGContext *, TCGv_i32 vd, TCGv_i32 vm);
typedef void VFPGen2OpDPFn(TCGContext *, TCGv_i64 vd, TCGv_i64 vm);
/*
* Perform a 3-operand VFP data processing instruction. fn is the
* callback to do the actual operation; this function deals with the
@ -1293,6 +1301,157 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
return true;
}
static bool do_vfp_2op_sp(DisasContext *s, VFPGen2OpSPFn *fn, int vd, int vm)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i32 f0, fd;
if (!dc_isar_feature(aa32_fpshvec, s) &&
(veclen != 0 || s->vec_stride != 0)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
if (veclen > 0) {
bank_mask = 0x18;
/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
/* scalar */
veclen = 0;
} else {
delta_d = s->vec_stride + 1;
if ((vm & bank_mask) == 0) {
/* mixed scalar/vector */
delta_m = 0;
} else {
/* vector */
delta_m = delta_d;
}
}
}
f0 = tcg_temp_new_i32(tcg_ctx);
fd = tcg_temp_new_i32(tcg_ctx);
neon_load_reg32(s, f0, vm);
for (;;) {
fn(tcg_ctx, fd, f0);
neon_store_reg32(s, fd, vd);
if (veclen == 0) {
break;
}
if (delta_m == 0) {
/* single source one-many */
while (veclen--) {
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
neon_store_reg32(s, fd, vd);
}
break;
}
/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
neon_load_reg32(s, f0, vm);
}
tcg_temp_free_i32(tcg_ctx, f0);
tcg_temp_free_i32(tcg_ctx, fd);
return true;
}
static bool do_vfp_2op_dp(DisasContext *s, VFPGen2OpDPFn *fn, int vd, int vm)
{
TCGContext *tcg_ctx = s->uc->tcg_ctx;
uint32_t delta_m = 0;
uint32_t delta_d = 0;
uint32_t bank_mask = 0;
int veclen = s->vec_len;
TCGv_i64 f0, fd;
/* UNDEF accesses to D16-D31 if they don't exist */
if (!dc_isar_feature(aa32_fp_d32, s) && ((vd | vm) & 0x10)) {
return false;
}
if (!dc_isar_feature(aa32_fpshvec, s) &&
(veclen != 0 || s->vec_stride != 0)) {
return false;
}
if (!vfp_access_check(s)) {
return true;
}
if (veclen > 0) {
bank_mask = 0xc;
/* Figure out what type of vector operation this is. */
if ((vd & bank_mask) == 0) {
/* scalar */
veclen = 0;
} else {
delta_d = (s->vec_stride >> 1) + 1;
if ((vm & bank_mask) == 0) {
/* mixed scalar/vector */
delta_m = 0;
} else {
/* vector */
delta_m = delta_d;
}
}
}
f0 = tcg_temp_new_i64(tcg_ctx);
fd = tcg_temp_new_i64(tcg_ctx);
neon_load_reg64(s, f0, vm);
for (;;) {
fn(tcg_ctx, fd, f0);
neon_store_reg64(s, fd, vd);
if (veclen == 0) {
break;
}
if (delta_m == 0) {
/* single source one-many */
while (veclen--) {
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
neon_store_reg64(s, fd, vd);
}
break;
}
/* Set up the operands for the next iteration */
veclen--;
vd = ((vd + delta_d) & (bank_mask - 1)) | (vd & bank_mask);
vm = ((vm + delta_m) & (bank_mask - 1)) | (vm & bank_mask);
neon_load_reg64(s, f0, vm);
}
tcg_temp_free_i64(tcg_ctx, f0);
tcg_temp_free_i64(tcg_ctx, fd);
return true;
}
static void gen_VMLA_sp(TCGContext *tcg_ctx, TCGv_i32 vd, TCGv_i32 vn, TCGv_i32 vm, TCGv_ptr fpst)
{
/* Note that order of inputs to the add matters for NaNs */
@ -1754,3 +1913,13 @@ static bool trans_VMOV_imm_dp(DisasContext *s, arg_VMOV_imm_dp *a)
tcg_temp_free_i64(tcg_ctx, fd);
return true;
}
static bool trans_VABS_sp(DisasContext *s, arg_VABS_sp *a)
{
return do_vfp_2op_sp(s, gen_helper_vfp_abss, a->vd, a->vm);
}
static bool trans_VABS_dp(DisasContext *s, arg_VABS_dp *a)
{
return do_vfp_2op_dp(s, gen_helper_vfp_absd, a->vd, a->vm);
}

View file

@ -3197,6 +3197,14 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
case 0 ... 14:
/* Already handled by decodetree */
return 1;
case 15:
switch (rn) {
case 1:
/* Already handled by decodetree */
return 1;
default:
break;
}
default:
break;
}
@ -3205,7 +3213,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
/* rn is opcode, encoded as per VFP_SREG_N. */
switch (rn) {
case 0x00: /* vmov */
case 0x01: /* vabs */
case 0x02: /* vneg */
case 0x03: /* vsqrt */
break;
@ -3385,9 +3392,6 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
case 0: /* cpy */
/* no-op */
break;
case 1: /* abs */
gen_vfp_abs(s, dp);
break;
case 2: /* neg */
gen_vfp_neg(s, dp);
break;

View file

@ -156,3 +156,8 @@ VMOV_imm_sp ---- 1110 1.11 imm4h:4 .... 1010 0000 imm4l:4 \
vd=%vd_sp
VMOV_imm_dp ---- 1110 1.11 imm4h:4 .... 1011 0000 imm4l:4 \
vd=%vd_dp
VABS_sp ---- 1110 1.11 0000 .... 1010 11.0 .... \
vd=%vd_sp vm=%vm_sp
VABS_dp ---- 1110 1.11 0000 .... 1011 11.0 .... \
vd=%vd_dp vm=%vm_dp