diff --git a/qemu/target/arm/neon-dp.decode b/qemu/target/arm/neon-dp.decode index 86b1b9e3..0a791af4 100644 --- a/qemu/target/arm/neon-dp.decode +++ b/qemu/target/arm/neon-dp.decode @@ -445,6 +445,8 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 . op:1 1 .... @1reg_imm &2misc vm=%vm_dp vd=%vd_dp q=1 VREV64 1111 001 11 . 11 .. 00 .... 0 0000 . . 0 .... @2misc + VREV32 1111 001 11 . 11 .. 00 .... 0 0001 . . 0 .... @2misc + VREV16 1111 001 11 . 11 .. 00 .... 0 0010 . . 0 .... @2misc VPADDL_S 1111 001 11 . 11 .. 00 .... 0 0100 . . 0 .... @2misc VPADDL_U 1111 001 11 . 11 .. 00 .... 0 0101 . . 0 .... @2misc diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index 6977895a..0543584e 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -3045,7 +3045,7 @@ static bool trans_VREV64(DisasContext *s, arg_VREV64 *a) tcg_gen_bswap32_i32(tcg_ctx, tmp[half], tmp[half]); break; case 1: - gen_swap_half(s, tmp[half], tmp[half]); + gen_swap_half(tcg_ctx, tmp[half], tmp[half]); break; case 2: break; @@ -3592,3 +3592,59 @@ DO_2M_CRYPTO(AESIMC, aa32_aes, 0) DO_2M_CRYPTO(SHA1H, aa32_sha1, 2) DO_2M_CRYPTO(SHA1SU1, aa32_sha1, 2) DO_2M_CRYPTO(SHA256SU0, aa32_sha2, 2) + +static bool do_2misc(DisasContext *s, arg_2misc *a, NeonGenOneOpFn *fn) +{ + int pass; + TCGContext *tcg_ctx = s->uc->tcg_ctx; + + /* Handle a 2-reg-misc operation by iterating 32 bits at a time */ + if (!arm_dc_feature(s, ARM_FEATURE_NEON)) { + return false; + } + + /* UNDEF accesses to D16-D31 if they don't exist. */ + if (!dc_isar_feature(aa32_simd_r32, s) && + ((a->vd | a->vm) & 0x10)) { + return false; + } + + if (!fn) { + return false; + } + + if ((a->vd | a->vm) & a->q) { + return false; + } + + if (!vfp_access_check(s)) { + return true; + } + + for (pass = 0; pass < (a->q ? 4 : 2); pass++) { + TCGv_i32 tmp = neon_load_reg(s, a->vm, pass); + fn(tcg_ctx, tmp, tmp); + neon_store_reg(s, a->vd, pass, tmp); + } + + return true; +} + +static bool trans_VREV32(DisasContext *s, arg_2misc *a) +{ + static NeonGenOneOpFn * const fn[] = { + tcg_gen_bswap32_i32, + gen_swap_half, + NULL, + NULL, + }; + return do_2misc(s, a, fn[a->size]); +} + +static bool trans_VREV16(DisasContext *s, arg_2misc *a) +{ + if (a->size != 0) { + return false; + } + return do_2misc(s, a, gen_rev16); +} diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index b8316f4f..4e0b72f8 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -362,9 +362,8 @@ static void gen_smul_dual(DisasContext *s, TCGv_i32 a, TCGv_i32 b) } /* Byteswap each halfword. */ -static void gen_rev16(DisasContext *s, TCGv_i32 dest, TCGv_i32 var) +static void gen_rev16(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tmp = tcg_temp_new_i32(tcg_ctx); TCGv_i32 mask = tcg_const_i32(tcg_ctx, 0x00ff00ff); tcg_gen_shri_i32(tcg_ctx, tmp, var, 8); @@ -377,19 +376,17 @@ static void gen_rev16(DisasContext *s, TCGv_i32 dest, TCGv_i32 var) } /* Byteswap low halfword and sign extend. */ -static void gen_revsh(DisasContext *s, TCGv_i32 dest, TCGv_i32 var) +static void gen_revsh(TCGContext *tcg_ctx, TCGv_i32 dest, TCGv_i32 var) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; tcg_gen_ext16u_i32(tcg_ctx, var, var); tcg_gen_bswap16_i32(tcg_ctx, var, var); tcg_gen_ext16s_i32(tcg_ctx, dest, var); } /* Swap low and high halfwords. */ -static void gen_swap_half(DisasContext *s, TCGv_i32 dest, TCGv_i32 var) +static void gen_swap_half(TCGContext *s, TCGv_i32 dest, TCGv_i32 var) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_rotri_i32(tcg_ctx, dest, var, 16); + tcg_gen_rotri_i32(s, dest, var, 16); } /* Dual 16-bit add. Result placed in t0 and t1 is marked as dead. @@ -5041,6 +5038,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_2RM_AESE: case NEON_2RM_AESMC: case NEON_2RM_SHA1H: case NEON_2RM_SHA1SU1: + case NEON_2RM_VREV32: + case NEON_2RM_VREV16: /* handled by decodetree */ return 1; case NEON_2RM_VTRN: @@ -5062,16 +5061,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) for (pass = 0; pass < (q ? 4 : 2); pass++) { tmp = neon_load_reg(s, rm, pass); switch (op) { - case NEON_2RM_VREV32: - switch (size) { - case 0: tcg_gen_bswap32_i32(tcg_ctx, tmp, tmp); break; - case 1: gen_swap_half(s, tmp, tmp); break; - default: abort(); - } - break; - case NEON_2RM_VREV16: - gen_rev16(s, tmp, tmp); - break; case NEON_2RM_VCLS: switch (size) { case 0: gen_helper_neon_cls_s8(tcg_ctx, tmp, tmp); break; @@ -6087,10 +6076,9 @@ static void gen_ext16u_i32(DisasContext *s, TCGv_i32 dest, TCGv_i32 src) tcg_gen_ext16u_i32(tcg_ctx, dest, src); } -static void gen_bswap32_i32(DisasContext *s, TCGv_i32 dest, TCGv_i32 src) +static void gen_bswap32_i32(TCGContext *s, TCGv_i32 dest, TCGv_i32 src) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - tcg_gen_bswap32_i32(tcg_ctx, dest, src); + tcg_gen_bswap32_i32(s, dest, src); } static void gen_ssat_dectree(DisasContext *s, TCGv_i32 dest, TCGv_env env, TCGv_i32 a, TCGv_i32 b) @@ -6129,9 +6117,8 @@ static void gen_uxtb16_dectree(DisasContext *s, TCGv_i32 dest, TCGv_i32 src) gen_helper_uxtb16(tcg_ctx, dest, src); } -static void gen_rbit_dectree(DisasContext* s, TCGv_i32 dest, TCGv_i32 src) +static void gen_rbit_dectree(TCGContext* tcg_ctx, TCGv_i32 dest, TCGv_i32 src) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; gen_helper_rbit(tcg_ctx, dest, src); } @@ -8250,12 +8237,13 @@ static bool trans_SEL(DisasContext *s, arg_rrr *a) } static bool op_rr(DisasContext *s, arg_rr *a, - void (*gen)(DisasContext* s, TCGv_i32, TCGv_i32)) + void (*gen)(TCGContext* s, TCGv_i32, TCGv_i32)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i32 tmp; tmp = load_reg(s, a->rm); - gen(s, tmp, tmp); + gen(tcg_ctx, tmp, tmp); store_reg(s, a->rd, tmp); return true; } @@ -8308,7 +8296,7 @@ static bool op_smlad(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(s, t2, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(s, t1, t2); @@ -8367,7 +8355,7 @@ static bool op_smlald(DisasContext *s, arg_rrrr *a, bool m_swap, bool sub) t1 = load_reg(s, a->rn); t2 = load_reg(s, a->rm); if (m_swap) { - gen_swap_half(s, t2, t2); + gen_swap_half(tcg_ctx, t2, t2); } gen_smul_dual(s, t1, t2); diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index 24e5e8c8..78e54999 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -370,6 +370,7 @@ typedef void GVecGen4Fn(TCGContext *, unsigned, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, uint32_t); /* Function prototype for gen_ functions for calling Neon helpers */ +typedef void NeonGenOneOpFn(TCGContext *t, TCGv_i32, TCGv_i32); typedef void NeonGenOneOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32); typedef void NeonGenTwoOpFn(TCGContext *t, TCGv_i32, TCGv_i32, TCGv_i32); typedef void NeonGenTwoOpEnvFn(TCGContext *t, TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);