diff --git a/qemu/tcg/i386/tcg-target.inc.c b/qemu/tcg/i386/tcg-target.inc.c index cd7fc14e..7b714a75 100644 --- a/qemu/tcg/i386/tcg-target.inc.c +++ b/qemu/tcg/i386/tcg-target.inc.c @@ -917,6 +917,38 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece, return true; } +static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, + TCGReg r, TCGReg base, intptr_t offset) +{ + if (have_avx2) { + int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); + tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, + r, 0, base, offset); + } else { + switch (vece) { + case MO_64: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); + break; + case MO_32: + tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); + break; + case MO_16: + tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + case MO_8: + tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); + tcg_out8(s, 0); /* imm8 */ + tcg_out_dup_vec(s, type, vece, r, r); + break; + default: + g_assert_not_reached(); + } + } + return true; +} + static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { @@ -950,38 +982,6 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, } } -static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece, - TCGReg r, TCGReg base, intptr_t offset) -{ - if (have_avx2) { - int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0); - tcg_out_vex_modrm_offset(s, avx2_dup_insn[vece] + vex_l, - r, 0, base, offset); - } else { - switch (vece) { - case MO_64: - tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSD, r, 0, base, offset); - break; - case MO_32: - tcg_out_vex_modrm_offset(s, OPC_VBROADCASTSS, r, 0, base, offset); - break; - case MO_16: - tcg_out_vex_modrm_offset(s, OPC_VPINSRW, r, r, base, offset); - tcg_out8(s, 0); /* imm8 */ - tcg_out_dup_vec(s, type, vece, r, r); - break; - case MO_8: - tcg_out_vex_modrm_offset(s, OPC_VPINSRB, r, r, base, offset); - tcg_out8(s, 0); /* imm8 */ - tcg_out_dup_vec(s, type, vece, r, r); - break; - default: - g_assert_not_reached(); - } - } - return true; -} - static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { @@ -3111,7 +3111,6 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op) case INDEX_op_ctpop_i32: case INDEX_op_ctpop_i64: return &r_r; - case INDEX_op_extract2_i32: case INDEX_op_extract2_i64: return &r_0_r; diff --git a/qemu/tcg/tcg-op-gvec.c b/qemu/tcg/tcg-op-gvec.c index 66ccc94f..4b9cfaa9 100644 --- a/qemu/tcg/tcg-op-gvec.c +++ b/qemu/tcg/tcg-op-gvec.c @@ -371,7 +371,6 @@ static TCGType choose_vector_type(const TCGOpcode *list, unsigned vece, uint32_t size, bool prefer_i64) { if (TCG_TARGET_HAS_v256 && check_size_impl(size, 32)) { - /* * Recall that ARM SVE allows vector sizes that are not a * power of 2, but always a multiple of 16. The intent is @@ -1354,7 +1353,6 @@ void tcg_gen_gvec_4(TCGContext *s, uint32_t dofs, uint32_t aofs, uint32_t bofs, * that e.g. size == 80 would be expanded with 2x32 + 1x16. */ some = QEMU_ALIGN_DOWN(oprsz, 32); - uint32_t some = QEMU_ALIGN_DOWN(oprsz, 32); expand_4_vec(s, g->vece, dofs, aofs, bofs, cofs, some, 32, TCG_TYPE_V256, g->write_aofs, g->fniv); if (some == oprsz) { diff --git a/qemu/tcg/tcg-op-gvec.h b/qemu/tcg/tcg-op-gvec.h index 86977903..4085db15 100644 --- a/qemu/tcg/tcg-op-gvec.h +++ b/qemu/tcg/tcg-op-gvec.h @@ -91,7 +91,7 @@ typedef struct { void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2 *fno; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; @@ -112,7 +112,7 @@ typedef struct { gen_helper_gvec_2 *fno; /* Expand out-of-line helper w/descriptor, data as argument. */ gen_helper_gvec_2i *fnoi; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The vector element size, if applicable. */ uint8_t vece; @@ -131,7 +131,7 @@ typedef struct { void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_2i *fno; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ uint32_t data; @@ -152,7 +152,7 @@ typedef struct { void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_3 *fno; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; @@ -175,7 +175,7 @@ typedef struct { void (*fniv)(TCGContext *, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, int64_t); /* Expand out-of-line helper w/descriptor, data in descriptor. */ gen_helper_gvec_3 *fno; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The vector element size, if applicable. */ uint8_t vece; @@ -194,7 +194,7 @@ typedef struct { void (*fniv)(TCGContext *s, unsigned, TCGv_vec, TCGv_vec, TCGv_vec, TCGv_vec); /* Expand out-of-line helper w/descriptor. */ gen_helper_gvec_4 *fno; - /* The opcode, if any, to which this corresponds. */ + /* The optional opcodes, if any, utilized by .fniv. */ const TCGOpcode *opt_opc; /* The data argument to the out-of-line helper. */ int32_t data; @@ -308,6 +308,11 @@ void tcg_gen_gvec_dup_i32(TCGContext *, unsigned vece, uint32_t dofs, uint32_t s void tcg_gen_gvec_dup_i64(TCGContext *, unsigned vece, uint32_t dofs, uint32_t s, uint32_t m, TCGv_i64); +void tcg_gen_gvec_dup8i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint8_t x); +void tcg_gen_gvec_dup16i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint16_t x); +void tcg_gen_gvec_dup32i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint32_t x); +void tcg_gen_gvec_dup64i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint64_t x); + void tcg_gen_gvec_shli(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, int64_t shift, uint32_t oprsz, uint32_t maxsz); void tcg_gen_gvec_shri(TCGContext *s, unsigned vece, uint32_t dofs, uint32_t aofs, @@ -337,11 +342,6 @@ void tcg_gen_gvec_cmp(TCGContext *s, TCGCond cond, unsigned vece, uint32_t dofs, uint32_t aofs, uint32_t bofs, uint32_t oprsz, uint32_t maxsz); -void tcg_gen_gvec_dup8i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint8_t x); -void tcg_gen_gvec_dup16i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint16_t x); -void tcg_gen_gvec_dup32i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint32_t x); -void tcg_gen_gvec_dup64i(TCGContext *, uint32_t dofs, uint32_t s, uint32_t m, uint64_t x); - /* * 64-bit vector operations. Use these when the register has been allocated * with tcg_global_mem_new_i64, and so we cannot also address it via pointer. diff --git a/qemu/tcg/tcg-op-vec.c b/qemu/tcg/tcg-op-vec.c index 97b74ef7..706d6726 100644 --- a/qemu/tcg/tcg-op-vec.c +++ b/qemu/tcg/tcg-op-vec.c @@ -417,8 +417,6 @@ static bool do_op2(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpco void tcg_gen_not_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a) { if (!TCG_TARGET_HAS_not_vec || !do_op2(s, vece, r, a, INDEX_op_not_vec)) { - vec_gen_op2(s, INDEX_op_not_vec, 0, r, a); - } else { TCGv_vec t = tcg_const_ones_vec_matching(s, r); tcg_gen_xor_vec(s, 0, r, a, t); tcg_temp_free_vec(s, t); @@ -433,8 +431,6 @@ void tcg_gen_neg_vec(TCGContext *s, unsigned vece, TCGv_vec r, TCGv_vec a) hold_list = tcg_swap_vecop_list(s, NULL); if (!TCG_TARGET_HAS_neg_vec || !do_op2(s, vece, r, a, INDEX_op_neg_vec)) { - vec_gen_op2(s, INDEX_op_neg_vec, vece, r, a); - } else { TCGv_vec t = tcg_const_zeros_vec_matching(s, r); tcg_gen_sub_vec(s, vece, r, t, a); tcg_temp_free_vec(s, t);