From c54b2776f6ec906af2dba9ff2b86d206cc11d1d3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 16 May 2019 15:03:12 -0400 Subject: [PATCH] tcg: Specify optional vector requirements with a list Replace the single opcode in .opc with a null-terminated array in .opt_opc. We still require that all opcodes be used with the same .vece. Validate the contents of this list with CONFIG_DEBUG_TCG. All tcg_gen_*_vec functions will check any list active during .fniv expansion. Swap the active list in and out as we expand other opcodes, or take control away from the front-end function. Convert all existing vector aware front ends. Backports commit 53229a7703eeb2bbe101a19a33ef22aaf960c65b from qemu --- qemu/aarch64.h | 2 + qemu/aarch64eb.h | 2 + qemu/arm.h | 2 + qemu/armeb.h | 2 + qemu/header_gen.py | 2 + qemu/m68k.h | 2 + qemu/mips.h | 2 + qemu/mips64.h | 2 + qemu/mips64el.h | 2 + qemu/mipsel.h | 2 + qemu/powerpc.h | 2 + qemu/riscv32.h | 2 + qemu/riscv64.h | 2 + qemu/sparc.h | 2 + qemu/sparc64.h | 2 + qemu/target/arm/translate-sve.c | 70 ++++----- qemu/target/arm/translate.c | 123 +++++++++++----- qemu/tcg/tcg-op-gvec.c | 250 ++++++++++++++++++-------------- qemu/tcg/tcg-op-gvec.h | 12 +- qemu/tcg/tcg-op-vec.c | 102 +++++++++++++ qemu/tcg/tcg.h | 20 +++ qemu/x86_64.h | 2 + 22 files changed, 414 insertions(+), 195 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 47e5a909..f40a548d 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_aarch64 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_aarch64 #define tcg_allowed tcg_allowed_aarch64 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_aarch64 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_aarch64 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_aarch64 #define tcg_canonicalize_memop tcg_canonicalize_memop_aarch64 #define tcg_commit tcg_commit_aarch64 #define tcg_cond_to_jcc tcg_cond_to_jcc_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index a5a08a31..d52ae9b1 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_aarch64eb #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_aarch64eb #define tcg_allowed tcg_allowed_aarch64eb +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_aarch64eb #define tcg_can_emit_vec_op tcg_can_emit_vec_op_aarch64eb +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_aarch64eb #define tcg_canonicalize_memop tcg_canonicalize_memop_aarch64eb #define tcg_commit tcg_commit_aarch64eb #define tcg_cond_to_jcc tcg_cond_to_jcc_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index a7a7ac28..20ff66d6 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_arm #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_arm #define tcg_allowed tcg_allowed_arm +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_arm #define tcg_can_emit_vec_op tcg_can_emit_vec_op_arm +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_arm #define tcg_canonicalize_memop tcg_canonicalize_memop_arm #define tcg_commit tcg_commit_arm #define tcg_cond_to_jcc tcg_cond_to_jcc_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index f90c61fd..1141dd81 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_armeb #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_armeb #define tcg_allowed tcg_allowed_armeb +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_armeb #define tcg_can_emit_vec_op tcg_can_emit_vec_op_armeb +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_armeb #define tcg_canonicalize_memop tcg_canonicalize_memop_armeb #define tcg_commit tcg_commit_armeb #define tcg_cond_to_jcc tcg_cond_to_jcc_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index c8d39e50..5dbaabd0 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -2696,7 +2696,9 @@ symbols = ( 'tcg_add_param_i64', 'tcg_add_target_add_op_defs', 'tcg_allowed', + 'tcg_assert_listed_vecop', 'tcg_can_emit_vec_op', + 'tcg_can_emit_vecop_list', 'tcg_canonicalize_memop', 'tcg_commit', 'tcg_cond_to_jcc', diff --git a/qemu/m68k.h b/qemu/m68k.h index 93d6a770..2aaca8ad 100644 --- a/qemu/m68k.h +++ b/qemu/m68k.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_m68k #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_m68k #define tcg_allowed tcg_allowed_m68k +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_m68k #define tcg_can_emit_vec_op tcg_can_emit_vec_op_m68k +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_m68k #define tcg_canonicalize_memop tcg_canonicalize_memop_m68k #define tcg_commit tcg_commit_m68k #define tcg_cond_to_jcc tcg_cond_to_jcc_m68k diff --git a/qemu/mips.h b/qemu/mips.h index 5ae41053..47e217b9 100644 --- a/qemu/mips.h +++ b/qemu/mips.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_mips #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_mips #define tcg_allowed tcg_allowed_mips +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_mips #define tcg_can_emit_vec_op tcg_can_emit_vec_op_mips +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips #define tcg_canonicalize_memop tcg_canonicalize_memop_mips #define tcg_commit tcg_commit_mips #define tcg_cond_to_jcc tcg_cond_to_jcc_mips diff --git a/qemu/mips64.h b/qemu/mips64.h index a9f42e1a..b3d124ca 100644 --- a/qemu/mips64.h +++ b/qemu/mips64.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_mips64 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_mips64 #define tcg_allowed tcg_allowed_mips64 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_mips64 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_mips64 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64 #define tcg_canonicalize_memop tcg_canonicalize_memop_mips64 #define tcg_commit tcg_commit_mips64 #define tcg_cond_to_jcc tcg_cond_to_jcc_mips64 diff --git a/qemu/mips64el.h b/qemu/mips64el.h index 2bdf6777..a5bb0d08 100644 --- a/qemu/mips64el.h +++ b/qemu/mips64el.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_mips64el #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_mips64el #define tcg_allowed tcg_allowed_mips64el +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_mips64el #define tcg_can_emit_vec_op tcg_can_emit_vec_op_mips64el +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mips64el #define tcg_canonicalize_memop tcg_canonicalize_memop_mips64el #define tcg_commit tcg_commit_mips64el #define tcg_cond_to_jcc tcg_cond_to_jcc_mips64el diff --git a/qemu/mipsel.h b/qemu/mipsel.h index 08353036..f8388b93 100644 --- a/qemu/mipsel.h +++ b/qemu/mipsel.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_mipsel #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_mipsel #define tcg_allowed tcg_allowed_mipsel +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_mipsel #define tcg_can_emit_vec_op tcg_can_emit_vec_op_mipsel +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_mipsel #define tcg_canonicalize_memop tcg_canonicalize_memop_mipsel #define tcg_commit tcg_commit_mipsel #define tcg_cond_to_jcc tcg_cond_to_jcc_mipsel diff --git a/qemu/powerpc.h b/qemu/powerpc.h index 606d8ce2..cf4ac5ee 100644 --- a/qemu/powerpc.h +++ b/qemu/powerpc.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_powerpc #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_powerpc #define tcg_allowed tcg_allowed_powerpc +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_powerpc #define tcg_can_emit_vec_op tcg_can_emit_vec_op_powerpc +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_powerpc #define tcg_canonicalize_memop tcg_canonicalize_memop_powerpc #define tcg_commit tcg_commit_powerpc #define tcg_cond_to_jcc tcg_cond_to_jcc_powerpc diff --git a/qemu/riscv32.h b/qemu/riscv32.h index 516e7efc..d82ef037 100644 --- a/qemu/riscv32.h +++ b/qemu/riscv32.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_riscv32 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_riscv32 #define tcg_allowed tcg_allowed_riscv32 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_riscv32 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_riscv32 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv32 #define tcg_canonicalize_memop tcg_canonicalize_memop_riscv32 #define tcg_commit tcg_commit_riscv32 #define tcg_cond_to_jcc tcg_cond_to_jcc_riscv32 diff --git a/qemu/riscv64.h b/qemu/riscv64.h index 9cf743af..4b110255 100644 --- a/qemu/riscv64.h +++ b/qemu/riscv64.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_riscv64 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_riscv64 #define tcg_allowed tcg_allowed_riscv64 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_riscv64 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_riscv64 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_riscv64 #define tcg_canonicalize_memop tcg_canonicalize_memop_riscv64 #define tcg_commit tcg_commit_riscv64 #define tcg_cond_to_jcc tcg_cond_to_jcc_riscv64 diff --git a/qemu/sparc.h b/qemu/sparc.h index e8cdee42..6f5ced2a 100644 --- a/qemu/sparc.h +++ b/qemu/sparc.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_sparc #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_sparc #define tcg_allowed tcg_allowed_sparc +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_sparc #define tcg_can_emit_vec_op tcg_can_emit_vec_op_sparc +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc #define tcg_canonicalize_memop tcg_canonicalize_memop_sparc #define tcg_commit tcg_commit_sparc #define tcg_cond_to_jcc tcg_cond_to_jcc_sparc diff --git a/qemu/sparc64.h b/qemu/sparc64.h index 64b31bfb..84f246a2 100644 --- a/qemu/sparc64.h +++ b/qemu/sparc64.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_sparc64 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_sparc64 #define tcg_allowed tcg_allowed_sparc64 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_sparc64 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_sparc64 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_sparc64 #define tcg_canonicalize_memop tcg_canonicalize_memop_sparc64 #define tcg_commit tcg_commit_sparc64 #define tcg_cond_to_jcc tcg_cond_to_jcc_sparc64 diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index d5ac6893..7a299810 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -3459,51 +3459,33 @@ static bool trans_SUB_zzi(DisasContext *s, arg_rri_esz *a) static bool trans_SUBR_zzi(DisasContext *s, arg_rri_esz *a) { + static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, 0 }; static const GVecGen2s op[4] = { - { - tcg_gen_vec_sub8_i64, - NULL, - tcg_gen_sub_vec, - gen_helper_sve_subri_b, - INDEX_op_sub_vec, - 0, - MO_8, - false, - true - }, - { - tcg_gen_vec_sub16_i64, - NULL, - tcg_gen_sub_vec, - gen_helper_sve_subri_h, - INDEX_op_sub_vec, - 0, - MO_16, - false, - true - }, - { - NULL, - tcg_gen_sub_i32, - tcg_gen_sub_vec, - gen_helper_sve_subri_s, - INDEX_op_sub_vec, - 0, - MO_32, - false, - true - }, - { - tcg_gen_sub_i64, - NULL, - tcg_gen_sub_vec, - gen_helper_sve_subri_d, - INDEX_op_sub_vec, - 0, - MO_64, - TCG_TARGET_REG_BITS == 64, - true - } + { .fni8 = tcg_gen_vec_sub8_i64, + .fniv = tcg_gen_sub_vec, + .fno = gen_helper_sve_subri_b, + .opt_opc = vecop_list, + .vece = MO_8, + .scalar_first = true }, + { .fni8 = tcg_gen_vec_sub16_i64, + .fniv = tcg_gen_sub_vec, + .fno = gen_helper_sve_subri_h, + .opt_opc = vecop_list, + .vece = MO_16, + .scalar_first = true }, + { .fni4 = tcg_gen_sub_i32, + .fniv = tcg_gen_sub_vec, + .fno = gen_helper_sve_subri_s, + .opt_opc = vecop_list, + .vece = MO_32, + .scalar_first = true }, + { .fni8 = tcg_gen_sub_i64, + .fniv = tcg_gen_sub_vec, + .fno = gen_helper_sve_subri_d, + .opt_opc = vecop_list, + .prefer_i64 = TCG_TARGET_REG_BITS == 64, + .vece = MO_64, + .scalar_first = true } }; if (a->esz == 0 && extract32(s->insn, 13, 1)) { diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index ed35412d..a5570f6e 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -6016,27 +6016,31 @@ static void gen_ssra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, i tcg_gen_add_vec(s, vece, d, d, a); } +static const TCGOpcode vecop_list_ssra[] = { + INDEX_op_sari_vec, INDEX_op_add_vec, 0 +}; + const GVecGen2i ssra_op[4] = { { .fni8 = gen_ssra8_i64, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_8 }, { .fni8 = gen_ssra16_i64, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_16 }, { .fni4 = gen_ssra32_i32, .fniv = gen_ssra_vec, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_32 }, { .fni8 = gen_ssra64_i64, .fniv = gen_ssra_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list_ssra, .vece = MO_64 }, }; @@ -6070,27 +6074,31 @@ static void gen_usra_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, i tcg_gen_add_vec(s, vece, d, d, a); } +static const TCGOpcode vecop_list_usra[] = { + INDEX_op_shri_vec, INDEX_op_add_vec, 0 +}; + const GVecGen2i usra_op[4] = { { .fni8 = gen_usra8_i64, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_8, }, { .fni8 = gen_usra16_i64, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_16, }, { .fni4 = gen_usra32_i32, .fniv = gen_usra_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_32, }, { .fni8 = gen_usra64_i64, .fniv = gen_usra_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_usra, .vece = MO_64, }, }; @@ -6148,27 +6156,29 @@ static void gen_shr_ins_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a } } +static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, 0 }; + const GVecGen2i sri_op[4] = { { .fni8 = gen_shr8_ins_i64, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_8 }, { .fni8 = gen_shr16_ins_i64, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_16 }, { .fni4 = gen_shr32_ins_i32, .fniv = gen_shr_ins_vec, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_32 }, { .fni8 = gen_shr64_ins_i64, .fniv = gen_shr_ins_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list_sri, .vece = MO_64 }, }; @@ -6224,27 +6234,29 @@ static void gen_shl_ins_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a } } +static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, 0 }; + const GVecGen2i sli_op[4] = { { .fni8 = gen_shl8_ins_i64, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_8 }, { .fni8 = gen_shl16_ins_i64, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_16 }, { .fni4 = gen_shl32_ins_i32, .fniv = gen_shl_ins_vec, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_32 }, { .fni8 = gen_shl64_ins_i64, .fniv = gen_shl_ins_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list_sli, .vece = MO_64 }, }; @@ -6311,25 +6323,34 @@ static void gen_mls_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, TC /* Note that while NEON does not support VMLA and VMLS as 64-bit ops, * these tables are shared with AArch64 which does support them. */ + +static const TCGOpcode vecop_list_mla[] = { + INDEX_op_mul_vec, INDEX_op_add_vec, 0 +}; + +static const TCGOpcode vecop_list_mls[] = { + INDEX_op_mul_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen3 mla_op[4] = { { .fni4 = gen_mla8_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mla, .load_dest = true, .vece = MO_8 }, { .fni4 = gen_mla16_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mla, .load_dest = true, .vece = MO_16 }, { .fni4 = gen_mla32_i32, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mla, .load_dest = true, .vece = MO_32 }, { .fni8 = gen_mla64_i64, .fniv = gen_mla_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mla, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, .vece = MO_64 }, @@ -6338,22 +6359,22 @@ const GVecGen3 mla_op[4] = { const GVecGen3 mls_op[4] = { { .fni4 = gen_mls8_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mls, .load_dest = true, .vece = MO_8 }, { .fni4 = gen_mls16_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mls, .load_dest = true, .vece = MO_16 }, { .fni4 = gen_mls32_i32, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mls, .load_dest = true, .vece = MO_32 }, { .fni8 = gen_mls64_i64, .fniv = gen_mls_vec, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mls, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .load_dest = true, .vece = MO_64 }, @@ -6381,18 +6402,24 @@ static void gen_cmtst_vec(TCGContext *s, unsigned vece, TCGv_vec d, TCGv_vec a, tcg_gen_cmp_vec(s, TCG_COND_NE, vece, d, d, a); } +static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, 0 }; + const GVecGen3 cmtst_op[4] = { { .fni4 = gen_helper_neon_tst_u8, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_8 }, { .fni4 = gen_helper_neon_tst_u16, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_16 }, { .fni4 = gen_cmtst_i32, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .vece = MO_32 }, { .fni8 = gen_cmtst_i64, .fniv = gen_cmtst_vec, + .opt_opc = vecop_list_cmtst, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -6408,25 +6435,29 @@ static void gen_uqadd_vec(TCGContext *s, unsigned vece, TCGv_vec t, TCGv_vec sat tcg_temp_free_vec(s, x); } +static const TCGOpcode vecop_list_uqadd[] = { + INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 +}; + const GVecGen4 uqadd_op[4] = { { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_b, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list_uqadd, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_h, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list_uqadd, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_s, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list_uqadd, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_uqadd_vec, .fno = gen_helper_gvec_uqadd_d, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list_uqadd, .write_aofs = true, .vece = MO_64 }, }; @@ -6442,25 +6473,29 @@ static void gen_sqadd_vec(TCGContext *s, unsigned vece, TCGv_vec t, TCGv_vec sat tcg_temp_free_vec(s, x); } +static const TCGOpcode vecop_list_sqadd[] = { + INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, 0 +}; + const GVecGen4 sqadd_op[4] = { { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_b, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_h, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_s, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqadd_vec, .fno = gen_helper_gvec_sqadd_d, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list_sqadd, .write_aofs = true, .vece = MO_64 }, }; @@ -6476,25 +6511,29 @@ static void gen_uqsub_vec(TCGContext *s, unsigned vece, TCGv_vec t, TCGv_vec sat tcg_temp_free_vec(s, x); } +static const TCGOpcode vecop_list_uqsub[] = { + INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen4 uqsub_op[4] = { { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_b, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_h, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_s, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_uqsub_vec, .fno = gen_helper_gvec_uqsub_d, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list_uqsub, .write_aofs = true, .vece = MO_64 }, }; @@ -6510,25 +6549,29 @@ static void gen_sqsub_vec(TCGContext *s, unsigned vece, TCGv_vec t, TCGv_vec sat tcg_temp_free_vec(s, x); } +static const TCGOpcode vecop_list_sqsub[] = { + INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, 0 +}; + const GVecGen4 sqsub_op[4] = { { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_b, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_8 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_h, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_16 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_s, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_32 }, { .fniv = gen_sqsub_vec, .fno = gen_helper_gvec_sqsub_d, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list_sqsub, .write_aofs = true, .vece = MO_64 }, }; diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index da28d4aa..7179c3f5 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -26,6 +26,13 @@ #define MAX_UNROLL 4 +#ifdef CONFIG_DEBUG_TCG +static const TCGOpcode vecop_list_empty[1] = { 0 }; +#else +#define vecop_list_empty NULL +#endif + + /* Verify vector size and alignment rules. OFS should be the OR of all of the operand offsets so that we can check them all at once. */ static void check_size_align(uint32_t oprsz, uint32_t maxsz, uint32_t ofs) @@ -360,31 +367,30 @@ static void gen_dup_i64(TCGContext *s, unsigned vece, TCGv_i64 out, TCGv_i64 in) * on elements of size VECE in the selected type. Do not select V64 if * PREFER_I64 is true. Return 0 if no vector type is selected. */ -static TCGType choose_vector_type(TCGOpcode op, unsigned vece, uint32_t size, - bool prefer_i64) +static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, + uint32_t size, bool prefer_i64) { if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { - if (op == 0) { - return TCG_TYPE_V256; - } - /* Recall that ARM SVE allows vector sizes that are not a + + /* + * Recall that ARM SVE allows vector sizes that are not a * power of 2, but always a multiple of 16. The intent is * that e.g. size == 80 would be expanded with 2x32 + 1x16. * It is hard to imagine a case in which v256 is supported * but v128 is not, but check anyway. */ - if (tcg_can_emit_vec_op(op, TCG_TYPE_V256, vece) + if (tcg_can_emit_vecop_list(list, TCG_TYPE_V256, vece) && (size % 32 == 0 - || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { + || tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece))) { return TCG_TYPE_V256; } } if (TCG_TARGET_HAS_v128 && check_size_impl(size, 16) - && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V128, vece))) { + && tcg_can_emit_vecop_list(list, TCG_TYPE_V128, vece)) { return TCG_TYPE_V128; } if (TCG_TARGET_HAS_v64 && !prefer_i64 && check_size_impl(size, 8) - && (op == 0 || tcg_can_emit_vec_op(op, TCG_TYPE_V64, vece))) { + && tcg_can_emit_vecop_list(list, TCG_TYPE_V64, vece)) { return TCG_TYPE_V64; } return 0; @@ -418,7 +424,7 @@ static void do_dup(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t oprsz, /* Implement inline with a vector type, if possible. * Prefer integer when 64-bit host and no variable dup. */ - type = choose_vector_type(0, vece, oprsz, + type = choose_vector_type(NULL, vece, oprsz, (TCG_TARGET_REG_BITS == 64 && in_32 == NULL && (in_64 == NULL || vece == MO_64))); if (type != 0) { @@ -991,6 +997,8 @@ static void expand_4_vec(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t a void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, const GVecGen2 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGType type; uint32_t some; @@ -999,7 +1007,7 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1032,13 +1040,14 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, } else { assert(g->fno != NULL); tcg_gen_gvec_2_ool(s, dofs, aofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); @@ -1049,6 +1058,8 @@ void tcg_gen_gvec_2(TCGContext *s, uint32_t dofs, uint32_t aofs, void tcg_gen_gvec_2i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen2i *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGType type; uint32_t some; @@ -1057,7 +1068,7 @@ void tcg_gen_gvec_2i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1099,13 +1110,14 @@ void tcg_gen_gvec_2i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz maxsz, c, g->fnoi); tcg_temp_free_i64(s, tcg_c); } - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); @@ -1123,9 +1135,11 @@ void tcg_gen_gvec_2s(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } if (type != 0) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGv_vec t_vec = tcg_temp_new_vec(s, type); uint32_t some; @@ -1163,6 +1177,7 @@ void tcg_gen_gvec_2s(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz g_assert_not_reached(); } tcg_temp_free_vec(s, t_vec); + tcg_swap_vecop_list(s, hold_list); } else if (g->fni8 && check_size_impl(oprsz, 8)) { TCGv_i64 t64 = tcg_temp_new_i64(s); @@ -1190,6 +1205,8 @@ void tcg_gen_gvec_2s(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t oprsz void tcg_gen_gvec_3(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, const GVecGen3 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGType type; uint32_t some; @@ -1198,7 +1215,7 @@ void tcg_gen_gvec_3(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1236,13 +1253,14 @@ void tcg_gen_gvec_3(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, assert(g->fno != NULL); tcg_gen_gvec_3_ool(s, dofs, aofs, bofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); @@ -1254,6 +1272,8 @@ void tcg_gen_gvec_3i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz, int64_t c, const GVecGen3i *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGType type; uint32_t some; @@ -1262,7 +1282,7 @@ void tcg_gen_gvec_3i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1300,13 +1320,14 @@ void tcg_gen_gvec_3i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, } else { assert(g->fno != NULL); tcg_gen_gvec_3_ool(s, dofs, aofs, bofs, oprsz, maxsz, c, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); @@ -1317,6 +1338,8 @@ void tcg_gen_gvec_3i(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, void tcg_gen_gvec_4(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t cofs, uint32_t oprsz, uint32_t maxsz, const GVecGen4 *g) { + const TCGOpcode *this_list = g->opt_opc ? : vecop_list_empty; + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, this_list); TCGType type; uint32_t some; @@ -1325,7 +1348,7 @@ void tcg_gen_gvec_4(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, type = 0; if (g->fniv) { - type = choose_vector_type(g->opc, g->vece, oprsz, g->prefer_i64); + type = choose_vector_type(g->opt_opc, g->vece, oprsz, g->prefer_i64); } switch (type) { case TCG_TYPE_V256: @@ -1367,13 +1390,14 @@ void tcg_gen_gvec_4(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, assert(g->fno != NULL); tcg_gen_gvec_4_ool(s, dofs, aofs, bofs, cofs, oprsz, maxsz, g->data, g->fno); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); @@ -1568,6 +1592,8 @@ void tcg_gen_vec_add32_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) tcg_temp_free_i64(s, t2); } +static const TCGOpcode vecop_list_add[] = { INDEX_op_add_vec, 0 }; + void tcg_gen_gvec_add(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { @@ -1575,22 +1601,22 @@ void tcg_gen_gvec_add(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs { .fni8 = tcg_gen_vec_add8_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add8, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_8 }, { .fni8 = tcg_gen_vec_add16_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add16, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_16 }, { .fni4 = tcg_gen_add_i32, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add32, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_32 }, { .fni8 = tcg_gen_add_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_add64, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1606,22 +1632,22 @@ void tcg_gen_gvec_adds(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof { .fni8 = tcg_gen_vec_add8_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds8, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_8 }, { .fni8 = tcg_gen_vec_add16_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds16, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_16 }, { .fni4 = tcg_gen_add_i32, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds32, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .vece = MO_32 }, { .fni8 = tcg_gen_add_i64, .fniv = tcg_gen_add_vec, .fno = gen_helper_gvec_adds64, - .opc = INDEX_op_add_vec, + .opt_opc = vecop_list_add, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1638,6 +1664,8 @@ void tcg_gen_gvec_addi(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof tcg_temp_free_i64(s, tmp); } +static const TCGOpcode vecop_list_sub[] = { INDEX_op_sub_vec, 0 }; + void tcg_gen_gvec_subs(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, TCGv_i64 c, uint32_t oprsz, uint32_t maxsz) { @@ -1645,22 +1673,22 @@ void tcg_gen_gvec_subs(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof { .fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs8, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sub16_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs16, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_sub_i32, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs32, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_sub_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_subs64, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1724,22 +1752,22 @@ void tcg_gen_gvec_sub(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs { .fni8 = tcg_gen_vec_sub8_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub8, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sub16_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub16, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_16 }, { .fni4 = tcg_gen_sub_i32, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub32, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .vece = MO_32 }, { .fni8 = tcg_gen_sub_i64, .fniv = tcg_gen_sub_vec, .fno = gen_helper_gvec_sub64, - .opc = INDEX_op_sub_vec, + .opt_opc = vecop_list_sub, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1748,27 +1776,29 @@ void tcg_gen_gvec_sub(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs tcg_gen_gvec_3(s, dofs, aofs, bofs, oprsz, maxsz, &g[vece]); } +static const TCGOpcode vecop_list_mul[] = { INDEX_op_mul_vec, 0 }; + void tcg_gen_gvec_mul(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { static const GVecGen3 g[4] = { { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul8, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_8 }, { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul16, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_16 }, { .fni4 = tcg_gen_mul_i32, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul32, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_32 }, { .fni8 = tcg_gen_mul_i64, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_mul64, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1783,21 +1813,21 @@ void tcg_gen_gvec_muls(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof static const GVecGen2s g[4] = { { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls8, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_8 }, { .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls16, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_16 }, { .fni4 = tcg_gen_mul_i32, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls32, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .vece = MO_32 }, { .fni8 = tcg_gen_mul_i64, .fniv = tcg_gen_mul_vec, .fno = gen_helper_gvec_muls64, - .opc = INDEX_op_mul_vec, + .opt_opc = vecop_list_mul, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -1817,22 +1847,23 @@ void tcg_gen_gvec_muli(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof void tcg_gen_gvec_ssadd(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_ssadd_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd8, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd16, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd32, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fniv = tcg_gen_ssadd_vec, .fno = gen_helper_gvec_ssadd64, - .opc = INDEX_op_ssadd_vec, + .opt_opc = vecop_list, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); @@ -1842,22 +1873,23 @@ void tcg_gen_gvec_ssadd(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t ao void tcg_gen_gvec_sssub(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_sssub_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub8, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub16, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub32, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fniv = tcg_gen_sssub_vec, .fno = gen_helper_gvec_sssub64, - .opc = INDEX_op_sssub_vec, + .opt_opc = vecop_list, .vece = MO_64 }, }; tcg_debug_assert(vece <= MO_64); @@ -1883,24 +1915,25 @@ static void tcg_gen_usadd_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) void tcg_gen_gvec_usadd(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_usadd_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd8, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd16, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_usadd_i32, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd32, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_usadd_i64, .fniv = tcg_gen_usadd_vec, .fno = gen_helper_gvec_usadd64, - .opc = INDEX_op_usadd_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1926,24 +1959,25 @@ static void tcg_gen_ussub_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, TCGv_i64 b) void tcg_gen_gvec_ussub(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_ussub_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub8, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub16, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_ussub_i32, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub32, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_ussub_i64, .fniv = tcg_gen_ussub_vec, .fno = gen_helper_gvec_ussub64, - .opc = INDEX_op_ussub_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1953,24 +1987,25 @@ void tcg_gen_gvec_ussub(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t ao void tcg_gen_gvec_smin(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_smin_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin8, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin16, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_smin_i32, .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin32, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_smin_i64, .fniv = tcg_gen_smin_vec, .fno = gen_helper_gvec_smin64, - .opc = INDEX_op_smin_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -1980,24 +2015,25 @@ void tcg_gen_gvec_smin(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof void tcg_gen_gvec_umin(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_umin_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin8, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin16, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_umin_i32, .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin32, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_umin_i64, .fniv = tcg_gen_umin_vec, .fno = gen_helper_gvec_umin64, - .opc = INDEX_op_umin_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -2007,24 +2043,25 @@ void tcg_gen_gvec_umin(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof void tcg_gen_gvec_smax(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_smax_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax8, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax16, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_smax_i32, .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax32, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_smax_i64, .fniv = tcg_gen_smax_vec, .fno = gen_helper_gvec_smax64, - .opc = INDEX_op_smax_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -2034,24 +2071,25 @@ void tcg_gen_gvec_smax(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof void tcg_gen_gvec_umax(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_umax_vec, 0 }; static const GVecGen3 g[4] = { { .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax8, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax16, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_umax_i32, .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax32, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_umax_i64, .fniv = tcg_gen_umax_vec, .fno = gen_helper_gvec_umax64, - .opc = INDEX_op_umax_vec, + .opt_opc = vecop_list, .vece = MO_64 } }; tcg_debug_assert(vece <= MO_64); @@ -2105,26 +2143,27 @@ void tcg_gen_vec_neg32_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 b) void tcg_gen_gvec_neg(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_neg_vec, 0 }; static const GVecGen2 g[4] = { { .fni8 = tcg_gen_vec_neg8_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg8, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_neg16_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg16, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_neg_i32, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg32, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_neg_i64, .fniv = tcg_gen_neg_vec, .fno = gen_helper_gvec_neg64, - .opc = INDEX_op_neg_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2140,7 +2179,6 @@ void tcg_gen_gvec_and(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs .fni8 = tcg_gen_and_i64, .fniv = tcg_gen_and_vec, .fno = gen_helper_gvec_and, - .opc = INDEX_op_and_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2158,7 +2196,6 @@ void tcg_gen_gvec_or(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, .fni8 = tcg_gen_or_i64, .fniv = tcg_gen_or_vec, .fno = gen_helper_gvec_or, - .opc = INDEX_op_or_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2176,7 +2213,6 @@ void tcg_gen_gvec_xor(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs .fni8 = tcg_gen_xor_i64, .fniv = tcg_gen_xor_vec, .fno = gen_helper_gvec_xor, - .opc = INDEX_op_xor_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2194,7 +2230,6 @@ void tcg_gen_gvec_andc(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aof .fni8 = tcg_gen_andc_i64, .fniv = tcg_gen_andc_vec, .fno = gen_helper_gvec_andc, - .opc = INDEX_op_andc_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2212,7 +2247,6 @@ void tcg_gen_gvec_orc(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs .fni8 = tcg_gen_orc_i64, .fniv = tcg_gen_orc_vec, .fno = gen_helper_gvec_orc, - .opc = INDEX_op_orc_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, }; @@ -2278,7 +2312,6 @@ static const GVecGen2s gop_ands = { .fni8 = tcg_gen_and_i64, .fniv = tcg_gen_and_vec, .fno = gen_helper_gvec_ands, - .opc = INDEX_op_and_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2304,7 +2337,6 @@ static const GVecGen2s gop_xors = { .fni8 = tcg_gen_xor_i64, .fniv = tcg_gen_xor_vec, .fno = gen_helper_gvec_xors, - .opc = INDEX_op_xor_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2330,7 +2362,6 @@ static const GVecGen2s gop_ors = { .fni8 = tcg_gen_or_i64, .fniv = tcg_gen_or_vec, .fno = gen_helper_gvec_ors, - .opc = INDEX_op_or_vec, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }; @@ -2369,26 +2400,27 @@ void tcg_gen_vec_shl16i_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_shli(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_shl8i_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl8i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_shl16i_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl16i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_shli_i32, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl32i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_shli_i64, .fniv = tcg_gen_shli_vec, .fno = gen_helper_gvec_shl64i, - .opc = INDEX_op_shli_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2419,26 +2451,27 @@ void tcg_gen_vec_shr16i_i64(TCGContext *s, TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_shri(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_shri_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_shr8i_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr8i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_shr16i_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr16i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_shri_i32, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr32i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_shri_i64, .fniv = tcg_gen_shri_vec, .fno = gen_helper_gvec_shr64i, - .opc = INDEX_op_shri_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2483,26 +2516,27 @@ void tcg_gen_vec_sar16i_i64(TCGContext *ctx, TCGv_i64 d, TCGv_i64 a, int64_t c) void tcg_gen_gvec_sari(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode vecop_list[] = { INDEX_op_sari_vec, 0 }; static const GVecGen2i g[4] = { { .fni8 = tcg_gen_vec_sar8i_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar8i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_8 }, { .fni8 = tcg_gen_vec_sar16i_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar16i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_16 }, { .fni4 = tcg_gen_sari_i32, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar32i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .vece = MO_32 }, { .fni8 = tcg_gen_sari_i64, .fniv = tcg_gen_sari_vec, .fno = gen_helper_gvec_sar64i, - .opc = INDEX_op_sari_vec, + .opt_opc = vecop_list, .prefer_i64 = TCG_TARGET_REG_BITS == 64, .vece = MO_64 }, }; @@ -2575,6 +2609,7 @@ void tcg_gen_gvec_cmp(TCGContext *s, TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz) { + static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 }; static gen_helper_gvec_3 * const eq_fn[4] = { gen_helper_gvec_eq8, gen_helper_gvec_eq16, gen_helper_gvec_eq32, gen_helper_gvec_eq64 @@ -2607,6 +2642,8 @@ void tcg_gen_gvec_cmp(TCGContext *s, TCGCond cond, unsigned vece, uint32_t dofs, [TCG_COND_LTU] = ltu_fn, [TCG_COND_LEU] = leu_fn, }; + + const TCGOpcode *hold_list; TCGType type; uint32_t some; @@ -2619,10 +2656,12 @@ void tcg_gen_gvec_cmp(TCGContext *s, TCGCond cond, unsigned vece, uint32_t dofs, return; } - /* Implement inline with a vector type, if possible. + /* + * Implement inline with a vector type, if possible. * Prefer integer when 64-bit host and 64-bit comparison. */ - type = choose_vector_type(INDEX_op_cmp_vec, vece, oprsz, + hold_list = tcg_swap_vecop_list(s, cmp_list); + type = choose_vector_type(cmp_list, vece, oprsz, TCG_TARGET_REG_BITS == 64 && vece == MO_64); switch (type) { case TCG_TYPE_V256: @@ -2664,13 +2703,14 @@ void tcg_gen_gvec_cmp(TCGContext *s, TCGCond cond, unsigned vece, uint32_t dofs, assert(fn != NULL); } tcg_gen_gvec_3_ool(s, dofs, aofs, bofs, oprsz, maxsz, 0, fn[vece]); - return; + oprsz = maxsz; } break; default: g_assert_not_reached(); } + tcg_swap_vecop_list(s, hold_list); if (oprsz < maxsz) { expand_clr(s, dofs + oprsz, maxsz - oprsz); diff --git a/qemu/tcg/tcg-op-gvec.h b/qemu/tcg/tcg-op-gvec.h index ec90c691..520e4187 100644 --- a/qemu/tcg/tcg-op-gvec.h +++ b/qemu/tcg/tcg-op-gvec.h @@ -92,7 +92,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2 *fno; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ @@ -113,7 +113,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor, data as argument. */ gen_helper_gvec_2i *fnoi; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The vector element size, if applicable. */ uint8_t vece; /* Prefer i64 to v64. */ @@ -132,7 +132,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2i *fno; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ uint32_t data; /* The vector element size, if applicable. */ @@ -153,7 +153,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_3 *fno; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ @@ -176,7 +176,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor, data in descriptor. */ gen_helper_gvec_3 *fno; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The vector element size, if applicable. */ uint8_t vece; /* Prefer i64 to v64. */ @@ -195,7 +195,7 @@ typedef struct { /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_4 *fno; /* The opcode, if any, to which this corresponds. */ - TCGOpcode opc; + const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; /* The vector element size, if applicable. */ diff --git a/qemu/tcg/tcg-op-vec.c b/qemu/tcg/tcg-op-vec.c index ceed601e..83565d25 100644 --- a/qemu/tcg/tcg-op-vec.c +++ b/qemu/tcg/tcg-op-vec.c @@ -35,6 +35,90 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGContext *, TCGv_i64); #define TCGV_HIGH TCGV_HIGH_link_error #endif +/* + * Vector optional opcode tracking. + * Except for the basic logical operations (and, or, xor), and + * data movement (mov, ld, st, dupi), many vector opcodes are + * optional and may not be supported on the host. Thank Intel + * for the irregularity in their instruction set. + * + * The gvec expanders allow custom vector operations to be composed, + * generally via the .fniv callback in the GVecGen* structures. At + * the same time, in deciding whether to use this hook we need to + * know if the host supports the required operations. This is + * presented as an array of opcodes, terminated by 0. Each opcode + * is assumed to be expanded with the given VECE. + * + * For debugging, we want to validate this array. Therefore, when + * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders + * will validate that their opcode is present in the list. + */ +#ifdef CONFIG_DEBUG_TCG +void tcg_assert_listed_vecop(TCGContext *tcg_ctx, TCGOpcode op) +{ + const TCGOpcode *p = tcg_ctx->vecop_list; + if (p) { + for (; *p; ++p) { + if (*p == op) { + return; + } + } + g_assert_not_reached(); + } +} +#endif + +bool tcg_can_emit_vecop_list(const TCGOpcode *list, + TCGType type, unsigned vece) +{ + if (list == NULL) { + return true; + } + + for (; *list; ++list) { + TCGOpcode opc = *list; + +#ifdef CONFIG_DEBUG_TCG + switch (opc) { + case INDEX_op_and_vec: + case INDEX_op_or_vec: + case INDEX_op_xor_vec: + case INDEX_op_mov_vec: + case INDEX_op_dup_vec: + case INDEX_op_dupi_vec: + case INDEX_op_dup2_vec: + case INDEX_op_ld_vec: + case INDEX_op_st_vec: + /* These opcodes are mandatory and should not be listed. */ + g_assert_not_reached(); + default: + break; + } +#endif + + if (tcg_can_emit_vec_op(opc, type, vece)) { + continue; + } + + /* + * The opcode list is created by front ends based on what they + * actually invoke. We must mirror the logic in the routines + * below for generic expansions using other opcodes. + */ + switch (opc) { + case INDEX_op_neg_vec: + if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) { + continue; + } + break; + default: + break; + } + return false; + } + return true; +} + void vec_gen_2(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a) { TCGOp *op = tcg_emit_op(s, opc); @@ -297,11 +381,14 @@ static bool do_op2(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpco int can; tcg_debug_assert(at->base_type >= type); + tcg_assert_listed_vecop(s, opc); can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_2(s, opc, type, vece, ri, ai); } else if (can < 0) { + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, NULL); tcg_expand_vec_op(s, opc, type, vece, ri, ai); + tcg_swap_vecop_list(s, hold_list); } else { return false; } @@ -321,6 +408,11 @@ void tcg_gen_not_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a) void tcg_gen_neg_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a) { + const TCGOpcode *hold_list; + + tcg_assert_listed_vecop(s, INDEX_op_neg_vec); + hold_list = tcg_swap_vecop_list(s, NULL); + if (!TCG_TARGET_HAS_neg_vec || !do_op2(s, vece, r, a, INDEX_op_neg_vec)) { vec_gen_op2(s, INDEX_op_neg_vec, vece, r, a); } else { @@ -328,6 +420,7 @@ void tcg_gen_neg_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a) tcg_gen_sub_vec(s, vece, r, t, a); tcg_temp_free_vec(s, t); } + tcg_swap_vecop_list(s, hold_list); } static void do_shifti(TCGContext *s, TCGOpcode opc, unsigned vece, @@ -342,6 +435,7 @@ static void do_shifti(TCGContext *s, TCGOpcode opc, unsigned vece, tcg_debug_assert(at->base_type == type); tcg_debug_assert(i >= 0 && i < (8 << vece)); + tcg_assert_listed_vecop(s, opc); if (i == 0) { tcg_gen_mov_vec(s, r, a); @@ -355,8 +449,10 @@ static void do_shifti(TCGContext *s, TCGOpcode opc, unsigned vece, /* We leave the choice of expansion via scalar or vector shift to the target. Often, but not always, dupi can feed a vector shift easier than a scalar. */ + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(s, opc, type, vece, ri, ai, i); + tcg_swap_vecop_list(s, hold_list); } } @@ -389,12 +485,15 @@ void tcg_gen_cmp_vec(TCGContext *s, TCGCond cond, unsigned vece, tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); + tcg_assert_listed_vecop(s, INDEX_op_cmp_vec); can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece); if (can > 0) { vec_gen_4(s, INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); } else { + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(s, INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond); + tcg_swap_vecop_list(s, hold_list); } } @@ -412,12 +511,15 @@ static void do_op3(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a, tcg_debug_assert(at->base_type >= type); tcg_debug_assert(bt->base_type >= type); + tcg_assert_listed_vecop(s, opc); can = tcg_can_emit_vec_op(opc, type, vece); if (can > 0) { vec_gen_3(s, opc, type, vece, ri, ai, bi); } else { + const TCGOpcode *hold_list = tcg_swap_vecop_list(s, NULL); tcg_debug_assert(can < 0); tcg_expand_vec_op(s, opc, type, vece, ri, ai, bi); + tcg_swap_vecop_list(s, hold_list); } } diff --git a/qemu/tcg/tcg.h b/qemu/tcg/tcg.h index 235d4f4c..bc0775b0 100644 --- a/qemu/tcg/tcg.h +++ b/qemu/tcg/tcg.h @@ -787,6 +787,7 @@ struct TCGContext { #ifdef CONFIG_DEBUG_TCG int temps_in_use; int goto_tb_issue_mask; + const TCGOpcode *vecop_list; #endif /* Code generation. Note that we specifically do not use tcg_insn_unit @@ -1620,4 +1621,23 @@ void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, TCGMemOpIdx oi, uintptr_t retaddr); +#ifdef CONFIG_DEBUG_TCG +void tcg_assert_listed_vecop(TCGContext *, TCGOpcode); +#else +static inline void tcg_assert_listed_vecop(TCGContext *tcg_ctx, TCGOpcode op) { } +#endif + +static inline const TCGOpcode *tcg_swap_vecop_list(TCGContext * tcg_ctx, const TCGOpcode *n) +{ +#ifdef CONFIG_DEBUG_TCG + const TCGOpcode *o = tcg_ctx->vecop_list; + tcg_ctx->vecop_list = n; + return o; +#else + return NULL; +#endif +} + +bool tcg_can_emit_vecop_list(const TCGOpcode *, TCGType, unsigned); + #endif /* TCG_H */ diff --git a/qemu/x86_64.h b/qemu/x86_64.h index 7d2b0f7c..b200398b 100644 --- a/qemu/x86_64.h +++ b/qemu/x86_64.h @@ -2690,7 +2690,9 @@ #define tcg_add_param_i64 tcg_add_param_i64_x86_64 #define tcg_add_target_add_op_defs tcg_add_target_add_op_defs_x86_64 #define tcg_allowed tcg_allowed_x86_64 +#define tcg_assert_listed_vecop tcg_assert_listed_vecop_x86_64 #define tcg_can_emit_vec_op tcg_can_emit_vec_op_x86_64 +#define tcg_can_emit_vecop_list tcg_can_emit_vecop_list_x86_64 #define tcg_canonicalize_memop tcg_canonicalize_memop_x86_64 #define tcg_commit tcg_commit_x86_64 #define tcg_cond_to_jcc tcg_cond_to_jcc_x86_64