From 694058da944e47c8a925b629de6cbb60e0ac8eb8 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 13 Jun 2019 17:09:09 -0400 Subject: [PATCH] target/arm: Convert double-precision register moves to decodetree Convert the "double-precision" register moves to decodetree: this covers VMOV scalar-to-gpreg, VMOV gpreg-to-scalar and VDUP. Note that the conversion process has tightened up a few of the UNDEF encoding checks: we now correctly forbid: * VMOV-to-gpr with U:opc1:opc2 == 10x00 or x0x10 * VMOV-from-gpr with opc1:opc2 == 0x10 * VDUP with B:E == 11 * VDUP with Q == 1 and Vn<0> == 1 Signed-off-by: Peter Maydell --- The accesses of elements < 32 bits could be improved by doing direct ld/st of the right size rather than 32-bit read-and-shift or read-modify-write, but we leave this for later cleanup, since this series is generally trying to stick to fixing the decode. Backports commit 9851ed9269d214c0c6feba960dd14ff09e6c34b4 from qemu --- qemu/target/arm/translate-vfp.inc.c | 150 ++++++++++++++++++++++++++++ qemu/target/arm/translate.c | 83 +-------------- qemu/target/arm/vfp.decode | 36 +++++++ 3 files changed, 188 insertions(+), 81 deletions(-) diff --git a/qemu/target/arm/translate-vfp.inc.c b/qemu/target/arm/translate-vfp.inc.c index 65a16e88..5cf415d2 100644 --- a/qemu/target/arm/translate-vfp.inc.c +++ b/qemu/target/arm/translate-vfp.inc.c @@ -481,3 +481,153 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a) return true; } + +static bool trans_VMOV_to_gp(DisasContext *s, arg_VMOV_to_gp *a) +{ + /* VMOV scalar to general purpose register */ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 tmp; + int pass; + uint32_t offset; + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + return false; + } + + offset = a->index << a->size; + pass = extract32(offset, 2, 1); + offset = extract32(offset, 0, 2) * 8; + + if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = neon_load_reg(s, a->vn, pass); + switch (a->size) { + case 0: + if (offset) { + tcg_gen_shri_i32(tcg_ctx, tmp, tmp, offset); + } + if (a->u) { + gen_uxtb(tmp); + } else { + gen_sxtb(tmp); + } + break; + case 1: + if (a->u) { + if (offset) { + tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); + } else { + gen_uxth(tmp); + } + } else { + if (offset) { + tcg_gen_sari_i32(tcg_ctx, tmp, tmp, 16); + } else { + gen_sxth(tmp); + } + } + break; + case 2: + break; + } + store_reg(s, a->rt, tmp); + + return true; +} + +static bool trans_VMOV_from_gp(DisasContext *s, arg_VMOV_from_gp *a) +{ + /* VMOV general purpose register to scalar */ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 tmp, tmp2; + int pass; + uint32_t offset; + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + return false; + } + + offset = a->index << a->size; + pass = extract32(offset, 2, 1); + offset = extract32(offset, 0, 2) * 8; + + if (a->size != 2 && !arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = load_reg(s, a->rt); + switch (a->size) { + case 0: + tmp2 = neon_load_reg(s, a->vn, pass); + tcg_gen_deposit_i32(tcg_ctx, tmp, tmp2, tmp, offset, 8); + tcg_temp_free_i32(tcg_ctx, tmp2); + break; + case 1: + tmp2 = neon_load_reg(s, a->vn, pass); + tcg_gen_deposit_i32(tcg_ctx, tmp, tmp2, tmp, offset, 16); + tcg_temp_free_i32(tcg_ctx, tmp2); + break; + case 2: + break; + } + neon_store_reg(s, a->vn, pass, tmp); + + return true; +} + +static bool trans_VDUP(DisasContext *s, arg_VDUP *a) +{ + /* VDUP (general purpose register) */ + TCGContext *tcg_ctx = s->uc->tcg_ctx; + TCGv_i32 tmp; + int size, vec_size; + + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist */ + if (!dc_isar_feature(aa32_fp_d32, s) && (a->vn & 0x10)) { + return false; + } + + if (a->b && a->e) { + return false; + } + + if (a->q && (a->vn & 1)) { + return false; + } + + vec_size = a->q ? 16 : 8; + if (a->b) { + size = 0; + } else if (a->e) { + size = 1; + } else { + size = 2; + } + + if (!vfp_access_check(s)) { + return true; + } + + tmp = load_reg(s, a->rt); + tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(a->vn, 0), + vec_size, vec_size, tmp); + tcg_temp_free_i32(tcg_ctx, tmp); + + return true; +} diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 8488aa9a..28dc601e 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -3255,87 +3255,8 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) /* single register transfer */ rd = (insn >> 12) & 0xf; if (dp) { - int size; - int pass; - - VFP_DREG_N(rn, insn); - if (insn & 0xf) - return 1; - if (insn & 0x00c00060 - && !arm_dc_feature(s, ARM_FEATURE_NEON)) { - return 1; - } - - pass = (insn >> 21) & 1; - if (insn & (1 << 22)) { - size = 0; - offset = ((insn >> 5) & 3) * 8; - } else if (insn & (1 << 5)) { - size = 1; - offset = (insn & (1 << 6)) ? 16 : 0; - } else { - size = 2; - offset = 0; - } - if (insn & ARM_CP_RW_BIT) { - /* vfp->arm */ - tmp = neon_load_reg(s, rn, pass); - switch (size) { - case 0: - if (offset) - tcg_gen_shri_i32(tcg_ctx, tmp, tmp, offset); - if (insn & (1 << 23)) - gen_uxtb(tmp); - else - gen_sxtb(tmp); - break; - case 1: - if (insn & (1 << 23)) { - if (offset) { - tcg_gen_shri_i32(tcg_ctx, tmp, tmp, 16); - } else { - gen_uxth(tmp); - } - } else { - if (offset) { - tcg_gen_sari_i32(tcg_ctx, tmp, tmp, 16); - } else { - gen_sxth(tmp); - } - } - break; - case 2: - break; - } - store_reg(s, rd, tmp); - } else { - /* arm->vfp */ - tmp = load_reg(s, rd); - if (insn & (1 << 23)) { - /* VDUP */ - int vec_size = pass ? 16 : 8; - tcg_gen_gvec_dup_i32(tcg_ctx, size, neon_reg_offset(rn, 0), - vec_size, vec_size, tmp); - tcg_temp_free_i32(tcg_ctx, tmp); - } else { - /* VMOV */ - switch (size) { - case 0: - tmp2 = neon_load_reg(s, rn, pass); - tcg_gen_deposit_i32(tcg_ctx, tmp, tmp2, tmp, offset, 8); - tcg_temp_free_i32(tcg_ctx, tmp2); - break; - case 1: - tmp2 = neon_load_reg(s, rn, pass); - tcg_gen_deposit_i32(tcg_ctx, tmp, tmp2, tmp, offset, 16); - tcg_temp_free_i32(tcg_ctx, tmp2); - break; - case 2: - break; - } - neon_store_reg(s, rn, pass, tmp); - } - } + /* already handled by decodetree */ + return 1; } else { /* !dp */ bool is_sysreg; diff --git a/qemu/target/arm/vfp.decode b/qemu/target/arm/vfp.decode index 28ee664d..8286bdc0 100644 --- a/qemu/target/arm/vfp.decode +++ b/qemu/target/arm/vfp.decode @@ -26,3 +26,39 @@ # 1110 1110 .... .... .... 101. .... .... # (but those patterns might also cover some Neon instructions, # which do not live in this file.) + +# VFP registers have an odd encoding with a four-bit field +# and a one-bit field which are assembled in different orders +# depending on whether the register is double or single precision. +# Each individual instruction function must do the checks for +# "double register selected but CPU does not have double support" +# and "double register number has bit 4 set but CPU does not +# support D16-D31" (which should UNDEF). +%vm_dp 5:1 0:4 +%vm_sp 0:4 5:1 +%vn_dp 7:1 16:4 +%vn_sp 16:4 7:1 +%vd_dp 22:1 12:4 +%vd_sp 12:4 22:1 + +%vmov_idx_b 21:1 5:2 +%vmov_idx_h 21:1 6:1 + +# VMOV scalar to general-purpose register; note that this does +# include some Neon cases. +VMOV_to_gp ---- 1110 u:1 1. 1 .... rt:4 1011 ... 1 0000 \ + vn=%vn_dp size=0 index=%vmov_idx_b +VMOV_to_gp ---- 1110 u:1 0. 1 .... rt:4 1011 ..1 1 0000 \ + vn=%vn_dp size=1 index=%vmov_idx_h +VMOV_to_gp ---- 1110 0 0 index:1 1 .... rt:4 1011 .00 1 0000 \ + vn=%vn_dp size=2 u=0 + +VMOV_from_gp ---- 1110 0 1. 0 .... rt:4 1011 ... 1 0000 \ + vn=%vn_dp size=0 index=%vmov_idx_b +VMOV_from_gp ---- 1110 0 0. 0 .... rt:4 1011 ..1 1 0000 \ + vn=%vn_dp size=1 index=%vmov_idx_h +VMOV_from_gp ---- 1110 0 0 index:1 0 .... rt:4 1011 .00 1 0000 \ + vn=%vn_dp size=2 + +VDUP ---- 1110 1 b:1 q:1 0 .... rt:4 1011 . 0 e:1 1 0000 \ + vn=%vn_dp