target/arm: Convert Neon VSHR 2-reg-shift insns to decodetree

Convert the VSHR 2-reg-shift insns to decodetree.

Note that unlike the legacy decoder, we present the right shift
amount to the trans_ function as a positive integer.

Backports commit 66432d6b8294e3508218b360acfdf7c244eea993 from qemu
This commit is contained in:
Peter Maydell 2020-06-15 12:10:45 -04:00 committed by Lioncash
parent bf18bf983d
commit 055c96f985
3 changed files with 113 additions and 55 deletions

View file

@ -208,6 +208,21 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
######################################################################
&2reg_shift vm vd q shift size
# Right shifts are encoded as N - shift, where N is the element size in bits.
%neon_rshift_i6 16:6 !function=rsub_64
%neon_rshift_i5 16:5 !function=rsub_32
%neon_rshift_i4 16:4 !function=rsub_16
%neon_rshift_i3 16:3 !function=rsub_8
@2reg_shr_d .... ... . . . ...... .... .... 1 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=3 shift=%neon_rshift_i6
@2reg_shr_s .... ... . . . 1 ..... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=2 shift=%neon_rshift_i5
@2reg_shr_h .... ... . . . 01 .... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=1 shift=%neon_rshift_i4
@2reg_shr_b .... ... . . . 001 ... .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0 shift=%neon_rshift_i3
@2reg_shl_d .... ... . . . shift:6 .... .... 1 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=3
@2reg_shl_s .... ... . . . 1 shift:5 .... .... 0 q:1 . . .... \
@ -217,6 +232,51 @@ VMINNM_fp_3s 1111 001 1 0 . 1 . .... .... 1111 ... 1 .... @3same_fp
@2reg_shl_b .... ... . . . 001 shift:3 .... .... 0 q:1 . . .... \
&2reg_shift vm=%vm_dp vd=%vd_dp size=0
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
VSHR_S_2sh 1111 001 0 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_d
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_s
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_h
VSHR_U_2sh 1111 001 1 1 . ...... .... 0000 . . . 1 .... @2reg_shr_b
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
VSRA_S_2sh 1111 001 0 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_d
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_s
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_h
VSRA_U_2sh 1111 001 1 1 . ...... .... 0001 . . . 1 .... @2reg_shr_b
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
VRSHR_S_2sh 1111 001 0 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_d
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_s
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_h
VRSHR_U_2sh 1111 001 1 1 . ...... .... 0010 . . . 1 .... @2reg_shr_b
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
VRSRA_S_2sh 1111 001 0 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_d
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_s
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_h
VRSRA_U_2sh 1111 001 1 1 . ...... .... 0011 . . . 1 .... @2reg_shr_b
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_d
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_s
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_h
VSRI_2sh 1111 001 1 1 . ...... .... 0100 . . . 1 .... @2reg_shr_b
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_d
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_s
VSHL_2sh 1111 001 0 1 . ...... .... 0101 . . . 1 .... @2reg_shl_h

View file

@ -31,6 +31,24 @@ static inline int plus1(DisasContext *s, int x)
return x + 1;
}
static inline int rsub_64(DisasContext *s, int x)
{
return 64 - x;
}
static inline int rsub_32(DisasContext *s, int x)
{
return 32 - x;
}
static inline int rsub_16(DisasContext *s, int x)
{
return 16 - x;
}
static inline int rsub_8(DisasContext *s, int x)
{
return 8 - x;
}
/* Include the generated Neon decoder */
#include "decode-neon-dp.inc.c"
#include "decode-neon-ls.inc.c"
@ -1257,3 +1275,33 @@ static bool do_vector_2sh(DisasContext *s, arg_2reg_shift *a, GVecGen2iFn *fn)
DO_2SH(VSHL, tcg_gen_gvec_shli)
DO_2SH(VSLI, gen_gvec_sli)
DO_2SH(VSRI, gen_gvec_sri)
DO_2SH(VSRA_S, gen_gvec_ssra)
DO_2SH(VSRA_U, gen_gvec_usra)
DO_2SH(VRSHR_S, gen_gvec_srshr)
DO_2SH(VRSHR_U, gen_gvec_urshr)
DO_2SH(VRSRA_S, gen_gvec_srsra)
DO_2SH(VRSRA_U, gen_gvec_ursra)
static bool trans_VSHR_S_2sh(DisasContext *s, arg_2reg_shift *a)
{
/* Signed shift out of range results in all-sign-bits */
a->shift = MIN(a->shift, (8 << a->size) - 1);
return do_vector_2sh(s, a, tcg_gen_gvec_sari);
}
static void gen_zero_rd_2sh(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rm_ofs,
int64_t shift, uint32_t oprsz, uint32_t maxsz)
{
tcg_gen_gvec_dup_imm(s, vece, rd_ofs, oprsz, maxsz, 0);
}
static bool trans_VSHR_U_2sh(DisasContext *s, arg_2reg_shift *a)
{
/* Shift out of range is architecturally valid and results in zero. */
if (a->shift >= (8 << a->size)) {
return do_vector_2sh(s, a, gen_zero_rd_2sh);
} else {
return do_vector_2sh(s, a, tcg_gen_gvec_shri);
}
}

View file

@ -5445,6 +5445,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
op = (insn >> 8) & 0xf;
switch (op) {
case 0: /* VSHR */
case 1: /* VSRA */
case 2: /* VRSHR */
case 3: /* VRSRA */
case 4: /* VSRI */
case 5: /* VSHL, VSLI */
return 1; /* handled by decodetree */
default:
@ -5478,50 +5483,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
shift = shift - (1 << (size + 3));
}
switch (op) {
case 0: /* VSHR */
/* Right shift comes here negative. */
shift = -shift;
/* Shifts larger than the element size are architecturally
* valid. Unsigned results in all zeros; signed results
* in all sign bits.
*/
if (!u) {
tcg_gen_gvec_sari(tcg_ctx, size, rd_ofs, rm_ofs,
MIN(shift, (8 << size) - 1),
vec_size, vec_size);
} else if (shift >= 8 << size) {
tcg_gen_gvec_dup_imm(tcg_ctx, MO_8, rd_ofs, vec_size,
vec_size, 0);
} else {
tcg_gen_gvec_shri(tcg_ctx, size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
}
return 0;
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
if (u) {
gen_gvec_usra(tcg_ctx, size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
} else {
gen_gvec_ssra(tcg_ctx, size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
}
return 0;
case 4: /* VSRI */
if (!u) {
return 1;
}
/* Right shift comes here negative. */
shift = -shift;
gen_gvec_sri(tcg_ctx, size, rd_ofs, rm_ofs, shift,
vec_size, vec_size);
return 0;
}
if (size == 3) {
count = q + 1;
} else {
@ -5538,13 +5499,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
neon_load_reg64(s, s->V0, rm + pass);
tcg_gen_movi_i64(tcg_ctx, s->V1, imm);
switch (op) {
case 2: /* VRSHR */
case 3: /* VRSRA */
if (u)
gen_helper_neon_rshl_u64(tcg_ctx, s->V0, s->V0, s->V1);
else
gen_helper_neon_rshl_s64(tcg_ctx, s->V0, s->V0, s->V1);
break;
case 6: /* VQSHLU */
gen_helper_neon_qshlu_s64(tcg_ctx, s->V0, tcg_ctx->cpu_env,
s->V0, s->V1);
@ -5573,10 +5527,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp2 = tcg_temp_new_i32(tcg_ctx);
tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
switch (op) {
case 2: /* VRSHR */
case 3: /* VRSRA */
GEN_NEON_INTEGER_OP(rshl);
break;
case 6: /* VQSHLU */
switch (size) {
case 0: