From 985acb9cde971ced3b12a6316d681e7489159420 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 10 Nov 2018 11:02:55 -0500 Subject: [PATCH] target/arm: Use gvec for NEON_3R_VTST_VCEQ, NEON_3R_VCGT, NEON_3R_VCGE Move cmtst_op expanders from translate-a64.c. Backports commit ea580fa312674c1ba82a8b137caf42b0609ce3e3 from qemu --- qemu/aarch64.h | 2 + qemu/aarch64eb.h | 2 + qemu/arm.h | 2 + qemu/armeb.h | 2 + qemu/header_gen.py | 4 ++ qemu/target/arm/translate-a64.c | 38 ---------------- qemu/target/arm/translate.c | 81 +++++++++++++++++++++++---------- qemu/target/arm/translate.h | 2 + 8 files changed, 72 insertions(+), 61 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 12644408..da10ce74 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3287,11 +3287,13 @@ #define bif_op bif_op_aarch64 #define bit_op bit_op_aarch64 #define bsl_op bsl_op_aarch64 +#define cmtst_op cmtst_op_aarch64 #define cpu_reg cpu_reg_aarch64 #define cpu_reg_sp cpu_reg_sp_aarch64 #define disas_sve disas_sve_aarch64 #define fp_exception_el fp_exception_el_aarch64 #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64 +#define gen_cmtst_i64 gen_cmtst_i64_aarch64 #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64 #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64 #define helper_advsimd_add2h helper_advsimd_add2h_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index d4df6792..fb4022b3 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3287,11 +3287,13 @@ #define bif_op bif_op_aarch64eb #define bit_op bit_op_aarch64eb #define bsl_op bsl_op_aarch64eb +#define cmtst_op cmtst_op_aarch64eb #define cpu_reg cpu_reg_aarch64eb #define cpu_reg_sp cpu_reg_sp_aarch64eb #define disas_sve disas_sve_aarch64eb #define fp_exception_el fp_exception_el_aarch64eb #define gen_a64_set_pc_im gen_a64_set_pc_im_aarch64eb +#define gen_cmtst_i64 gen_cmtst_i64_aarch64eb #define helper_advsimd_acge_f16 helper_advsimd_acge_f16_aarch64eb #define helper_advsimd_acgt_f16 helper_advsimd_acgt_f16_aarch64eb #define helper_advsimd_add2h helper_advsimd_add2h_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index 97173de2..08c749fe 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -3276,7 +3276,9 @@ #define arm_reset_cpu arm_reset_cpu_arm #define arm_set_cpu_off arm_set_cpu_off_arm #define arm_set_cpu_on arm_set_cpu_on_arm +#define cmtst_op cmtst_op_arm #define fp_exception_el fp_exception_el_arm +#define gen_cmtst_i64 gen_cmtst_i64_arm #define mla_op mla_op_arm #define mls_op mls_op_arm #define raise_exception raise_exception_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 1e7f448d..9adc5d70 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -3276,7 +3276,9 @@ #define arm_reset_cpu arm_reset_cpu_armeb #define arm_set_cpu_off arm_set_cpu_off_armeb #define arm_set_cpu_on arm_set_cpu_on_armeb +#define cmtst_op cmtst_op_armeb #define fp_exception_el fp_exception_el_armeb +#define gen_cmtst_i64 gen_cmtst_i64_armeb #define mla_op mla_op_armeb #define mls_op mls_op_armeb #define raise_exception raise_exception_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index b160930d..c7b9fc0f 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3285,7 +3285,9 @@ arm_symbols = ( 'arm_reset_cpu', 'arm_set_cpu_off', 'arm_set_cpu_on', + 'cmtst_op', 'fp_exception_el', + 'gen_cmtst_i64', 'mla_op', 'mls_op', 'raise_exception', @@ -3318,11 +3320,13 @@ aarch64_symbols = ( 'bif_op', 'bit_op', 'bsl_op', + 'cmtst_op', 'cpu_reg', 'cpu_reg_sp', 'disas_sve', 'fp_exception_el', 'gen_a64_set_pc_im', + 'gen_cmtst_i64', 'helper_advsimd_acge_f16', 'helper_advsimd_acgt_f16', 'helper_advsimd_add2h', diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 3eb747fb..e6f73691 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -8170,28 +8170,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn) } } -/* CMTST : test is "if (X & Y != 0)". */ -static void gen_cmtst_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) -{ - tcg_gen_and_i32(s, d, a, b); - tcg_gen_setcondi_i32(s, TCG_COND_NE, d, d, 0); - tcg_gen_neg_i32(s, d, d); -} - -static void gen_cmtst_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) -{ - tcg_gen_and_i64(s, d, a, b); - tcg_gen_setcondi_i64(s, TCG_COND_NE, d, d, 0); - tcg_gen_neg_i64(s, d, d); -} - -static void gen_cmtst_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) -{ - tcg_gen_and_vec(s, vece, d, a, b); - tcg_gen_dupi_vec(s, vece, a, 0); - tcg_gen_cmp_vec(s, TCG_COND_NE, vece, d, d, a); -} - static void handle_3same_64(DisasContext *s, int opcode, bool u, TCGv_i64 tcg_rd, TCGv_i64 tcg_rn, TCGv_i64 tcg_rm) { @@ -10572,22 +10550,6 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) /* Integer op subgroup of C3.6.16. */ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) { - static const GVecGen3 cmtst_op[4] = { - { .fni4 = gen_helper_neon_tst_u8, - .fniv = gen_cmtst_vec, - .vece = MO_8 }, - { .fni4 = gen_helper_neon_tst_u16, - .fniv = gen_cmtst_vec, - .vece = MO_16 }, - { .fni4 = gen_cmtst_i32, - .fniv = gen_cmtst_vec, - .vece = MO_32 }, - { .fni8 = gen_cmtst_i64, - .fniv = gen_cmtst_vec, - .prefer_i64 = TCG_TARGET_REG_BITS == 64, - .vece = MO_64 }, - }; - TCGContext *tcg_ctx = s->uc->tcg_ctx; int is_q = extract32(insn, 30, 1); int u = extract32(insn, 29, 1); diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index 8c1c9833..5296b6e8 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -6303,6 +6303,44 @@ const GVecGen3 mls_op[4] = { .vece = MO_64 }, }; +/* CMTST : test is "if (X & Y != 0)". */ +static void gen_cmtst_i32(TCGContext *s, TCGv_i32 d, TCGv_i32 a, TCGv_i32 b) +{ + tcg_gen_and_i32(s, d, a, b); + tcg_gen_setcondi_i32(s, TCG_COND_NE, d, d, 0); + tcg_gen_neg_i32(s, d, d); +} + +void gen_cmtst_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) +{ + tcg_gen_and_i64(s, d, a, b); + tcg_gen_setcondi_i64(s, TCG_COND_NE, d, d, 0); + tcg_gen_neg_i64(s, d, d); +} + +static void gen_cmtst_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b) +{ + tcg_gen_and_vec(s, vece, d, a, b); + tcg_gen_dupi_vec(s, vece, a, 0); + tcg_gen_cmp_vec(s, TCG_COND_NE, vece, d, d, a); +} + +const GVecGen3 cmtst_op[4] = { + { .fni4 = gen_helper_neon_tst_u8, + .fniv = gen_cmtst_vec, + .vece = MO_8 }, + { .fni4 = gen_helper_neon_tst_u16, + .fniv = gen_cmtst_vec, + .vece = MO_16 }, + { .fni4 = gen_cmtst_i32, + .fniv = gen_cmtst_vec, + .vece = MO_32 }, + { .fni8 = gen_cmtst_i64, + .fniv = gen_cmtst_vec, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64 }, +}; + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -6510,6 +6548,26 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size, u ? &mls_op[size] : &mla_op[size]); return 0; + + case NEON_3R_VTST_VCEQ: + if (u) { /* VCEQ */ + tcg_gen_gvec_cmp(tcg_ctx, TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size); + } else { /* VTST */ + tcg_gen_gvec_3(tcg_ctx, rd_ofs, rn_ofs, rm_ofs, + vec_size, vec_size, &cmtst_op[size]); + } + return 0; + + case NEON_3R_VCGT: + tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GTU : TCG_COND_GT, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; + + case NEON_3R_VCGE: + tcg_gen_gvec_cmp(tcg_ctx, u ? TCG_COND_GEU : TCG_COND_GE, size, + rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size); + return 0; } if (size == 3) { @@ -6663,12 +6721,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) case NEON_3R_VQSUB: GEN_NEON_INTEGER_OP_ENV(qsub); break; - case NEON_3R_VCGT: - GEN_NEON_INTEGER_OP(cgt); - break; - case NEON_3R_VCGE: - GEN_NEON_INTEGER_OP(cge); - break; case NEON_3R_VSHL: GEN_NEON_INTEGER_OP(shl); break; @@ -6696,23 +6748,6 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) tmp2 = neon_load_reg(s, rd, pass); gen_neon_add(s, size, tmp, tmp2); break; - case NEON_3R_VTST_VCEQ: - if (!u) { /* VTST */ - switch (size) { - case 0: gen_helper_neon_tst_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_tst_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_tst_u32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - } else { /* VCEQ */ - switch (size) { - case 0: gen_helper_neon_ceq_u8(tcg_ctx, tmp, tmp, tmp2); break; - case 1: gen_helper_neon_ceq_u16(tcg_ctx, tmp, tmp, tmp2); break; - case 2: gen_helper_neon_ceq_u32(tcg_ctx, tmp, tmp, tmp2); break; - default: abort(); - } - } - break; case NEON_3R_VMUL: /* VMUL.P8; other cases already eliminated. */ gen_helper_neon_mul_p8(tcg_ctx, tmp, tmp, tmp2); diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index cb1cdc41..8c7022b3 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -195,12 +195,14 @@ static inline TCGv_i32 get_ahp_flag(DisasContext *s) extern const GVecGen3 bsl_op; extern const GVecGen3 bit_op; extern const GVecGen3 bif_op; +extern const GVecGen3 cmtst_op[4]; extern const GVecGen3 mla_op[4]; extern const GVecGen3 mls_op[4]; extern const GVecGen2i ssra_op[4]; extern const GVecGen2i usra_op[4]; extern const GVecGen2i sri_op[4]; extern const GVecGen2i sli_op[4]; +void gen_cmtst_i64(TCGContext* tcg_ctx, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b); /* * Forward to the isar_feature_* tests given a DisasContext pointer.