From edb36c7505a22f6da2498f9652cdb37e529287c9 Mon Sep 17 00:00:00 2001 From: Lioncash Date: Sat, 10 Nov 2018 10:32:29 -0500 Subject: [PATCH] target/arm: Use gvec for VSRA --- qemu/aarch64.h | 2 + qemu/aarch64eb.h | 2 + qemu/header_gen.py | 2 + qemu/target/arm/translate-a64.c | 106 ------------------------ qemu/target/arm/translate.c | 139 +++++++++++++++++++++++++++++--- qemu/target/arm/translate.h | 2 + 6 files changed, 136 insertions(+), 117 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index c89e5cf9..dc6de381 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -4262,10 +4262,12 @@ #define raise_exception raise_exception_aarch64 #define read_cpu_reg read_cpu_reg_aarch64 #define read_cpu_reg_sp read_cpu_reg_sp_aarch64 +#define ssra_op ssra_op_aarch64 #define sve_access_check sve_access_check_aarch64 #define sve_exception_el sve_exception_el_aarch64 #define sve_zcr_len_for_el sve_zcr_len_for_el_aarch64 #define unallocated_encoding unallocated_encoding_aarch64 +#define usra_op usra_op_aarch64 #define vfp_expand_imm vfp_expand_imm_aarch64 #define write_fp_dreg write_fp_dreg_aarch64 #endif diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index c36870a3..8e5f21f7 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -4262,10 +4262,12 @@ #define raise_exception raise_exception_aarch64eb #define read_cpu_reg read_cpu_reg_aarch64eb #define read_cpu_reg_sp read_cpu_reg_sp_aarch64eb +#define ssra_op ssra_op_aarch64eb #define sve_access_check sve_access_check_aarch64eb #define sve_exception_el sve_exception_el_aarch64eb #define sve_zcr_len_for_el sve_zcr_len_for_el_aarch64eb #define unallocated_encoding unallocated_encoding_aarch64eb +#define usra_op usra_op_aarch64eb #define vfp_expand_imm vfp_expand_imm_aarch64eb #define write_fp_dreg write_fp_dreg_aarch64eb #endif diff --git a/qemu/header_gen.py b/qemu/header_gen.py index b35cd212..daaffac6 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -4287,10 +4287,12 @@ aarch64_symbols = ( 'raise_exception', 'read_cpu_reg', 'read_cpu_reg_sp', + 'ssra_op', 'sve_access_check', 'sve_exception_el', 'sve_zcr_len_for_el', 'unallocated_encoding', + 'usra_op', 'vfp_expand_imm', 'write_fp_dreg', ) diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index e032bab7..a2212ffd 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -9542,66 +9542,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn) } } -static void gen_ssra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar8i_i64(s, a, a, shift); - tcg_gen_vec_add8_i64(s, d, d, a); -} - -static void gen_ssra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_sar16i_i64(s, a, a, shift); - tcg_gen_vec_add16_i64(s, d, d, a); -} - -static void gen_ssra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_sari_i32(s, a, a, shift); - tcg_gen_add_i32(s, d, d, a); -} - -static void gen_ssra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_sari_i64(s, a, a, shift); - tcg_gen_add_i64(s, d, d, a); -} - -static void gen_ssra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_sari_vec(s, vece, a, a, sh); - tcg_gen_add_vec(s, vece, d, d, a); -} - -static void gen_usra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr8i_i64(s, a, a, shift); - tcg_gen_vec_add8_i64(s, d, d, a); -} - -static void gen_usra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_vec_shr16i_i64(s, a, a, shift); - tcg_gen_vec_add16_i64(s, d, d, a); -} - -static void gen_usra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift) -{ - tcg_gen_shri_i32(s, a, a, shift); - tcg_gen_add_i32(s, d, d, a); -} - -static void gen_usra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) -{ - tcg_gen_shri_i64(s, a, a, shift); - tcg_gen_add_i64(s, d, d, a); -} - -static void gen_usra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) -{ - tcg_gen_shri_vec(s, vece, a, a, sh); - tcg_gen_add_vec(s, vece, d, d, a); -} - static void gen_shr8_ins_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) { uint64_t mask = dup_const(MO_8, 0xff >> shift); @@ -9657,52 +9597,6 @@ static void gen_shr_ins_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a static void handle_vec_simd_shri(DisasContext *s, bool is_q, bool is_u, int immh, int immb, int opcode, int rn, int rd) { - static const GVecGen2i ssra_op[4] = { - { .fni8 = gen_ssra8_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_8 }, - { .fni8 = gen_ssra16_i64, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_16 }, - { .fni4 = gen_ssra32_i32, - .fniv = gen_ssra_vec, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_32 }, - { .fni8 = gen_ssra64_i64, - .fniv = gen_ssra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_sari_vec, - .vece = MO_64 }, - }; - static const GVecGen2i usra_op[4] = { - { .fni8 = gen_usra8_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_8, }, - { .fni8 = gen_usra16_i64, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_16, }, - { .fni4 = gen_usra32_i32, - .fniv = gen_usra_vec, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_32, }, - { .fni8 = gen_usra64_i64, - .fniv = gen_usra_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .load_dest = true, - .opc = INDEX_op_shri_vec, - .vece = MO_64, }, - }; static const GVecGen2i sri_op[4] = { { .fni8 = gen_shr8_ins_i64, .fniv = gen_shr_ins_vec, diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index a8f2b99b..3e219481 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -5933,6 +5933,113 @@ const GVecGen3 bif_op = { .load_dest = true }; +static void gen_ssra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar8i_i64(s, a, a, shift); + tcg_gen_vec_add8_i64(s, d, d, a); +} + +static void gen_ssra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_sar16i_i64(s, a, a, shift); + tcg_gen_vec_add16_i64(s, d, d, a); +} + +static void gen_ssra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_sari_i32(s, a, a, shift); + tcg_gen_add_i32(s, d, d, a); +} + +static void gen_ssra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_sari_i64(s, a, a, shift); + tcg_gen_add_i64(s, d, d, a); +} + +static void gen_ssra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_sari_vec(s, vece, a, a, sh); + tcg_gen_add_vec(s, vece, d, d, a); +} + +const GVecGen2i ssra_op[4] = { + { .fni8 = gen_ssra8_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_8 }, + { .fni8 = gen_ssra16_i64, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_16 }, + { .fni4 = gen_ssra32_i32, + .fniv = gen_ssra_vec, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_32 }, + { .fni8 = gen_ssra64_i64, + .fniv = gen_ssra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_sari_vec, + .vece = MO_64 }, +}; + +static void gen_usra8_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr8i_i64(s, a, a, shift); + tcg_gen_vec_add8_i64(s, d, d, a); +} + +static void gen_usra16_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_vec_shr16i_i64(s, a, a, shift); + tcg_gen_vec_add16_i64(s, d, d, a); +} + +static void gen_usra32_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, int32_t shift) +{ + tcg_gen_shri_i32(s, a, a, shift); + tcg_gen_add_i32(s, d, d, a); +} + +static void gen_usra64_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t shift) +{ + tcg_gen_shri_i64(s, a, a, shift); + tcg_gen_add_i64(s, d, d, a); +} + +static void gen_usra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh) +{ + tcg_gen_shri_vec(s, vece, a, a, sh); + tcg_gen_add_vec(s, vece, d, d, a); +} + +const GVecGen2i usra_op[4] = { + { .fni8 = gen_usra8_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_8, }, + { .fni8 = gen_usra16_i64, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_16, }, + { .fni4 = gen_usra32_i32, + .fniv = gen_usra_vec, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_32, }, + { .fni8 = gen_usra64_i64, + .fniv = gen_usra_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .load_dest = true, + .opc = INDEX_op_shri_vec, + .vece = MO_64, }, +}; /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. @@ -6572,6 +6679,25 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } return 0; + case 1: /* VSRA */ + /* Right shift comes here negative. */ + shift = -shift; + /* Shifts larger than the element size are architecturally + * valid. Unsigned results in all zeros; signed results + * in all sign bits. + */ + if (!u) { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, + MIN(shift, (8 << size) - 1), + &ssra_op[size]); + } else if (shift >= 8 << size) { + /* rd += 0 */ + } else { + tcg_gen_gvec_2i(tcg_ctx, rd_ofs, rm_ofs, vec_size, vec_size, + shift, &usra_op[size]); + } + return 0; + case 5: /* VSHL, VSLI */ if (!u) { /* VSHL */ /* Shifts larger than the element size are @@ -6604,12 +6730,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) neon_load_reg64(s, s->V0, rm + pass); tcg_gen_movi_i64(tcg_ctx, s->V1, imm); switch (op) { - case 1: /* VSRA */ - if (u) - gen_helper_neon_shl_u64(tcg_ctx, s->V0, s->V0, s->V1); - else - gen_helper_neon_shl_s64(tcg_ctx, s->V0, s->V0, s->V1); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ if (u) @@ -6637,7 +6757,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) default: g_assert_not_reached(); } - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ neon_load_reg64(s, s->V1, rd + pass); tcg_gen_add_i64(tcg_ctx, s->V0, s->V0, s->V1); @@ -6664,9 +6784,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = tcg_temp_new_i32(tcg_ctx); tcg_gen_movi_i32(tcg_ctx, tmp2, imm); switch (op) { - case 1: /* VSRA */ - GEN_NEON_INTEGER_OP(shl); - break; case 2: /* VRSHR */ case 3: /* VRSRA */ GEN_NEON_INTEGER_OP(rshl); @@ -6706,7 +6823,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) } tcg_temp_free_i32(tcg_ctx, tmp2); - if (op == 1 || op == 3) { + if (op == 3) { /* Accumulate. */ tmp2 = neon_load_reg(s, rd, pass); gen_neon_add(s, size, tmp, tmp2); diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index bfa380a9..d67af662 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -195,6 +195,8 @@ static inline TCGv_i32 get_ahp_flag(DisasContext *s) extern const GVecGen3 bsl_op; extern const GVecGen3 bit_op; extern const GVecGen3 bif_op; +extern const GVecGen2i ssra_op[4]; +extern const GVecGen2i usra_op[4]; /* * Forward to the isar_feature_* tests given a DisasContext pointer.