diff --git a/qemu/target/arm/neon-dp.decode b/qemu/target/arm/neon-dp.decode index fcce2eda..659cf139 100644 --- a/qemu/target/arm/neon-dp.decode +++ b/qemu/target/arm/neon-dp.decode @@ -208,6 +208,21 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp ###################################################################### &2reg_shift vm vd q shift size +# Right shifts are encoded as N - shift, where N is the element size in bits. +%neon_rshift_i6 16:6 !function=rsub_64 +%neon_rshift_i5 16:5 !function=rsub_32 +%neon_rshift_i4 16:4 !function=rsub_16 +%neon_rshift_i3 16:3 !function=rsub_8 + +@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6 +@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5 +@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4 +@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \ + &2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3 + @2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=3 @2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \ @@ -217,6 +232,51 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp @2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \ &2reg_shift vm=%vm_dp vd=%vd_dp size=0 +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h +VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b + +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h +VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b + +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h +VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b + +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h +VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b + +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h +VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b + VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h diff --git a/qemu/target/arm/translate-neon.inc.c b/qemu/target/arm/translate-neon.inc.c index c4804739..e4468f27 100644 --- a/qemu/target/arm/translate-neon.inc.c +++ b/qemu/target/arm/translate-neon.inc.c @@ -31,6 +31,24 @@ static inline int plus1(DisasContext *s, int x) return x + 1; } +static inline int rsub_64(DisasContext *s, int x) +{ + return 64 - x; +} + +static inline int rsub_32(DisasContext *s, int x) +{ + return 32 - x; +} +static inline int rsub_16(DisasContext *s, int x) +{ + return 16 - x; +} +static inline int rsub_8(DisasContext *s, int x) +{ + return 8 - x; +} + /* Include the generated Neon decoder */ #include "decode-neon-dp.inc.c" #include "decode-neon-ls.inc.c" @@ -1257,3 +1275,33 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn) DO_2SH(VSHL, tcg_gen_gvec_shli) DO_2SH(VSLI, gen_gvec_sli) +DO_2SH(VSRI, gen_gvec_sri) +DO_2SH(VSRA_S, gen_gvec_ssra) +DO_2SH(VSRA_U, gen_gvec_usra) +DO_2SH(VRSHR_S, gen_gvec_srshr) +DO_2SH(VRSHR_U, gen_gvec_urshr) +DO_2SH(VRSRA_S, gen_gvec_srsra) +DO_2SH(VRSRA_U, gen_gvec_ursra) + +static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Signed shift out of range results in all-sign-bits */ + a->shift = MIN(a->shift, (8 << a->size) - 1); + return do_vector_2sh(s, a, tcg_gen_gvec_sari); +} + +static void gen_zero_rd_2sh(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs, + int64_t shift, uint32_t oprsz, uint32_t maxsz) +{ + tcg_gen_gvec_dup_imm(s, vece, rd_ofs, oprsz, maxsz, 0); +} + +static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a) +{ + /* Shift out of range is architecturally valid and results in zero. */ + if (a->shift >= (8 << a->size)) { + return do_vector_2sh(s, a, gen_zero_rd_2sh); + } else { + return do_vector_2sh(s, a, tcg_gen_gvec_shri); + } +} diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 35247984..e5b86096 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -5445,6 +5445,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) op = (insn >> 8) & 0xf; switch (op) { + case 0: /* VSHR */ + case 1: /* VSRA */ + case 2: /* VRSHR */ + case 3: /* VRSRA */ + case 4: /* VSRI */ case 5: /* VSHL, VSLI */ return 1; /* handled by decodetree */ default: @@ -5478,50 +5483,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) shift = shift - (1 << (size + 3)); } - switch (op) { - case 0: /* VSHR */ - /* Right shift comes here negative. */ - shift = -shift; - /* Shifts larger than the element size are architecturally - * valid. Unsigned results in all zeros; signed results - * in all sign bits. - */ - if (!u) { - tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs, - MIN(shift, (8 << size) - 1), - vec_size, vec_size); - } else if (shift >= 8 << size) { - tcg_gen_gvec_dup_imm(tcg_ctx, MO_8, rd_ofs, vec_size, - vec_size, 0); - } else { - tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 1: /* VSRA */ - /* Right shift comes here negative. */ - shift = -shift; - if (u) { - gen_gvec_usra(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } else { - gen_gvec_ssra(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - } - return 0; - - case 4: /* VSRI */ - if (!u) { - return 1; - } - /* Right shift comes here negative. */ - shift = -shift; - gen_gvec_sri(tcg_ctx, size, rd_ofs, rm_ofs, shift, - vec_size, vec_size); - return 0; - } - if (size == 3) { count = q + 1; } else { @@ -5538,13 +5499,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(s, s->V0, rm + pass); tcg_gen_movi_i64(tcg_ctx, s->V1, imm); switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - if (u) - gen_helper_neon_rshl_u64(tcg_ctx, s->V0, s->V0, s->V1); - else - gen_helper_neon_rshl_s64(tcg_ctx, s->V0, s->V0, s->V1); - break; case 6: /* VQSHLU */ gen_helper_neon_qshlu_s64(tcg_ctx, s->V0, tcg_ctx->cpu_env, s->V0, s->V1); @@ -5573,10 +5527,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(tcg_ctx); tcg_gen_movi_i32(tcg_ctx, tmp2, imm); switch (op) { - case 2: /* VRSHR */ - case 3: /* VRSRA */ - GEN_NEON_INTEGER_OP(rshl); - break; case 6: /* VQSHLU */ switch (size) { case 0: