target/arm: Use gvec for VSRA

This commit is contained in:
Lioncash 2018-11-10 10:32:29 -05:00
parent b5877f1dfb
commit edb36c7505
No known key found for this signature in database
GPG key ID: 4E3C3CC1031BA9C7
6 changed files with 136 additions and 117 deletions

View file

@ -4262,10 +4262,12 @@
#define raise_exception raise_exception_aarch64
#define read_cpu_reg read_cpu_reg_aarch64
#define read_cpu_reg_sp read_cpu_reg_sp_aarch64
#define ssra_op ssra_op_aarch64
#define sve_access_check sve_access_check_aarch64
#define sve_exception_el sve_exception_el_aarch64
#define sve_zcr_len_for_el sve_zcr_len_for_el_aarch64
#define unallocated_encoding unallocated_encoding_aarch64
#define usra_op usra_op_aarch64
#define vfp_expand_imm vfp_expand_imm_aarch64
#define write_fp_dreg write_fp_dreg_aarch64
#endif

View file

@ -4262,10 +4262,12 @@
#define raise_exception raise_exception_aarch64eb
#define read_cpu_reg read_cpu_reg_aarch64eb
#define read_cpu_reg_sp read_cpu_reg_sp_aarch64eb
#define ssra_op ssra_op_aarch64eb
#define sve_access_check sve_access_check_aarch64eb
#define sve_exception_el sve_exception_el_aarch64eb
#define sve_zcr_len_for_el sve_zcr_len_for_el_aarch64eb
#define unallocated_encoding unallocated_encoding_aarch64eb
#define usra_op usra_op_aarch64eb
#define vfp_expand_imm vfp_expand_imm_aarch64eb
#define write_fp_dreg write_fp_dreg_aarch64eb
#endif

View file

@ -4287,10 +4287,12 @@ aarch64_symbols = (
'raise_exception',
'read_cpu_reg',
'read_cpu_reg_sp',
'ssra_op',
'sve_access_check',
'sve_exception_el',
'sve_zcr_len_for_el',
'unallocated_encoding',
'usra_op',
'vfp_expand_imm',
'write_fp_dreg',
)

View file

@ -9542,66 +9542,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
}
}
static void gen_ssra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(s, a, a, shift);
tcg_gen_vec_add8_i64(s, d, d, a);
}
static void gen_ssra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar16i_i64(s, a, a, shift);
tcg_gen_vec_add16_i64(s, d, d, a);
}
static void gen_ssra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift)
{
tcg_gen_sari_i32(s, a, a, shift);
tcg_gen_add_i32(s, d, d, a);
}
static void gen_ssra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_sari_i64(s, a, a, shift);
tcg_gen_add_i64(s, d, d, a);
}
static void gen_ssra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
tcg_gen_sari_vec(s, vece, a, a, sh);
tcg_gen_add_vec(s, vece, d, d, a);
}
static void gen_usra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_shr8i_i64(s, a, a, shift);
tcg_gen_vec_add8_i64(s, d, d, a);
}
static void gen_usra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_shr16i_i64(s, a, a, shift);
tcg_gen_vec_add16_i64(s, d, d, a);
}
static void gen_usra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift)
{
tcg_gen_shri_i32(s, a, a, shift);
tcg_gen_add_i32(s, d, d, a);
}
static void gen_usra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_shri_i64(s, a, a, shift);
tcg_gen_add_i64(s, d, d, a);
}
static void gen_usra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
tcg_gen_shri_vec(s, vece, a, a, sh);
tcg_gen_add_vec(s, vece, d, d, a);
}
static void gen_shr8_ins_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
uint64_t mask = dup_const(MO_8, 0xff >> shift);
@ -9657,52 +9597,6 @@ static void gen_shr_ins_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a
static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u,
int immh, int immb, int opcode, int rn, int rd)
{
static const GVecGen2i ssra_op[4] = {
{ .fni8 = gen_ssra8_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_8 },
{ .fni8 = gen_ssra16_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_16 },
{ .fni4 = gen_ssra32_i32,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_32 },
{ .fni8 = gen_ssra64_i64,
.fniv = gen_ssra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_64 },
};
static const GVecGen2i usra_op[4] = {
{ .fni8 = gen_usra8_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_8, },
{ .fni8 = gen_usra16_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_16, },
{ .fni4 = gen_usra32_i32,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_32, },
{ .fni8 = gen_usra64_i64,
.fniv = gen_usra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_64, },
};
static const GVecGen2i sri_op[4] = {
{ .fni8 = gen_shr8_ins_i64,
.fniv = gen_shr_ins_vec,

View file

@ -5933,6 +5933,113 @@ const GVecGen3 bif_op = {
.load_dest = true
};
static void gen_ssra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar8i_i64(s, a, a, shift);
tcg_gen_vec_add8_i64(s, d, d, a);
}
static void gen_ssra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_sar16i_i64(s, a, a, shift);
tcg_gen_vec_add16_i64(s, d, d, a);
}
static void gen_ssra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift)
{
tcg_gen_sari_i32(s, a, a, shift);
tcg_gen_add_i32(s, d, d, a);
}
static void gen_ssra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_sari_i64(s, a, a, shift);
tcg_gen_add_i64(s, d, d, a);
}
static void gen_ssra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
tcg_gen_sari_vec(s, vece, a, a, sh);
tcg_gen_add_vec(s, vece, d, d, a);
}
const GVecGen2i ssra_op[4] = {
{ .fni8 = gen_ssra8_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_8 },
{ .fni8 = gen_ssra16_i64,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_16 },
{ .fni4 = gen_ssra32_i32,
.fniv = gen_ssra_vec,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_32 },
{ .fni8 = gen_ssra64_i64,
.fniv = gen_ssra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_sari_vec,
.vece = MO_64 },
};
static void gen_usra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_shr8i_i64(s, a, a, shift);
tcg_gen_vec_add8_i64(s, d, d, a);
}
static void gen_usra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_vec_shr16i_i64(s, a, a, shift);
tcg_gen_vec_add16_i64(s, d, d, a);
}
static void gen_usra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift)
{
tcg_gen_shri_i32(s, a, a, shift);
tcg_gen_add_i32(s, d, d, a);
}
static void gen_usra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift)
{
tcg_gen_shri_i64(s, a, a, shift);
tcg_gen_add_i64(s, d, d, a);
}
static void gen_usra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
{
tcg_gen_shri_vec(s, vece, a, a, sh);
tcg_gen_add_vec(s, vece, d, d, a);
}
const GVecGen2i usra_op[4] = {
{ .fni8 = gen_usra8_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_8, },
{ .fni8 = gen_usra16_i64,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_16, },
{ .fni4 = gen_usra32_i32,
.fniv = gen_usra_vec,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_32, },
{ .fni8 = gen_usra64_i64,
.fniv = gen_usra_vec,
.prefer_i64 = TCG_TARGET_REG_BITS == 64,
.load_dest = true,
.opc = INDEX_op_shri_vec,
.vece = MO_64, },
};
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
@ -6572,6 +6679,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
return 0;
case 1: /* VSRA */
/* Right shift comes here negative. */
shift = -shift;
/* Shifts larger than the element size are architecturally
* valid. Unsigned results in all zeros; signed results
* in all sign bits.
*/
if (!u) {
tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size,
MIN(shift, (8 << size) - 1),
&ssra_op[size]);
} else if (shift >= 8 << size) {
/* rd += 0 */
} else {
tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size,
shift, &usra_op[size]);
}
return 0;
case 5: /* VSHL, VSLI */
if (!u) { /* VSHL */
/* Shifts larger than the element size are
@ -6604,12 +6730,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
neon_load_reg64(s, s->V0, rm + pass);
tcg_gen_movi_i64(tcg_ctx, s->V1, imm);
switch (op) {
case 1: /* VSRA */
if (u)
gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V0, s->V1);
else
gen_helper_neon_shl_s64(tcg_ctx, s->V0, s->V0, s->V1);
break;
case 2: /* VRSHR */
case 3: /* VRSRA */
if (u)
@ -6637,7 +6757,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
default:
g_assert_not_reached();
}
if (op == 1 || op == 3) {
if (op == 3) {
/* Accumulate. */
neon_load_reg64(s, s->V1, rd + pass);
tcg_gen_add_i64(tcg_ctx, s->V0, s->V0, s->V1);
@ -6664,9 +6784,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
tmp2 = tcg_temp_new_i32(tcg_ctx);
tcg_gen_movi_i32(tcg_ctx, tmp2, imm);
switch (op) {
case 1: /* VSRA */
GEN_NEON_INTEGER_OP(shl);
break;
case 2: /* VRSHR */
case 3: /* VRSRA */
GEN_NEON_INTEGER_OP(rshl);
@ -6706,7 +6823,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
}
tcg_temp_free_i32(tcg_ctx, tmp2);
if (op == 1 || op == 3) {
if (op == 3) {
/* Accumulate. */
tmp2 = neon_load_reg(s, rd, pass);
gen_neon_add(s, size, tmp, tmp2);

View file

@ -195,6 +195,8 @@ static inline TCGv_i32 get_ahp_flag(DisasContext *s)
extern const GVecGen3 bsl_op;
extern const GVecGen3 bit_op;
extern const GVecGen3 bif_op;
extern const GVecGen2i ssra_op[4];
extern const GVecGen2i usra_op[4];
/*
* Forward to the isar_feature_* tests given a DisasContext pointer.