mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-11 10:05:33 +00:00
tcg/i386: Split subroutines out of tcg_expand_vec_op
This routine was becoming too large. Backports commit 44f1441dbe14e7174a707d7e7ecbc2c8e080bfda from qemu
This commit is contained in:
parent
fb684825c8
commit
24e65f60ed
|
@ -3189,22 +3189,16 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece,
|
static void expand_vec_shi(TCGContext *s, TCGType type, unsigned vece, bool shr,
|
||||||
TCGArg a0, ...)
|
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
|
||||||
{
|
{
|
||||||
va_list va;
|
TCGv_vec t1, t2;
|
||||||
TCGArg a1, a2;
|
|
||||||
TCGv_vec v0, t1, t2, t3, t4;
|
|
||||||
|
|
||||||
va_start(va, a0);
|
|
||||||
v0 = temp_tcgv_vec(s, arg_temp(a0));
|
|
||||||
|
|
||||||
switch (opc) {
|
|
||||||
case INDEX_op_shli_vec:
|
|
||||||
case INDEX_op_shri_vec:
|
|
||||||
tcg_debug_assert(vece == MO_8);
|
tcg_debug_assert(vece == MO_8);
|
||||||
a1 = va_arg(va, TCGArg);
|
|
||||||
a2 = va_arg(va, TCGArg);
|
t1 = tcg_temp_new_vec(s, type);
|
||||||
|
t2 = tcg_temp_new_vec(s, type);
|
||||||
|
|
||||||
/* Unpack to W, shift, and repack. Tricky bits:
|
/* Unpack to W, shift, and repack. Tricky bits:
|
||||||
(1) Use punpck*bw x,x to produce DDCCBBAA,
|
(1) Use punpck*bw x,x to produce DDCCBBAA,
|
||||||
i.e. duplicate in other half of the 16-bit lane.
|
i.e. duplicate in other half of the 16-bit lane.
|
||||||
|
@ -3214,155 +3208,135 @@ void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece
|
||||||
(3) Step 2 leaves high half zero such that PACKUSWB
|
(3) Step 2 leaves high half zero such that PACKUSWB
|
||||||
(pack with unsigned saturation) does not modify
|
(pack with unsigned saturation) does not modify
|
||||||
the quantity. */
|
the quantity. */
|
||||||
t1 = tcg_temp_new_vec(s, type);
|
|
||||||
t2 = tcg_temp_new_vec(s, type);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||||
tcgv_vec_arg(s, t1), a1, a1);
|
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, v1));
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
||||||
tcgv_vec_arg(s, t2), a1, a1);
|
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, v1));
|
||||||
if (opc == INDEX_op_shri_vec) {
|
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_16,
|
if (shr) {
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1), a2 + 8);
|
tcg_gen_shri_vec(s, MO_16, t1, t1, imm + 8);
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_16,
|
tcg_gen_shri_vec(s, MO_16, t2, t2, imm + 8);
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), a2 + 8);
|
|
||||||
} else {
|
} else {
|
||||||
vec_gen_3(s, INDEX_op_shli_vec, type, MO_16,
|
tcg_gen_shli_vec(s, MO_16, t1, t1, imm + 8);
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1), a2 + 8);
|
tcg_gen_shli_vec(s, MO_16, t2, t2, imm + 8);
|
||||||
vec_gen_3(s, INDEX_op_shli_vec, type, MO_16,
|
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), a2 + 8);
|
tcg_gen_shri_vec(s, MO_16, t2, t2, 8);
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_16,
|
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1), 8);
|
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_16,
|
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), 8);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vec_gen_3(s, INDEX_op_x86_packus_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_packus_vec, type, MO_8,
|
||||||
a0, tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t2));
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t2));
|
||||||
tcg_temp_free_vec(s, t1);
|
tcg_temp_free_vec(s, t1);
|
||||||
tcg_temp_free_vec(s, t2);
|
tcg_temp_free_vec(s, t2);
|
||||||
break;
|
}
|
||||||
|
|
||||||
case INDEX_op_sari_vec:
|
static void expand_vec_sari(TCGContext *s, TCGType type, unsigned vece,
|
||||||
a1 = va_arg(va, TCGArg);
|
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
|
||||||
a2 = va_arg(va, TCGArg);
|
{
|
||||||
if (vece == MO_8) {
|
TCGv_vec t1, t2;
|
||||||
/* Unpack to W, shift, and repack, as above. */
|
|
||||||
|
switch (vece) {
|
||||||
|
case MO_8:
|
||||||
|
/* Unpack to W, shift, and repack, as in expand_vec_shi. */
|
||||||
t1 = tcg_temp_new_vec(s, type);
|
t1 = tcg_temp_new_vec(s, type);
|
||||||
t2 = tcg_temp_new_vec(s, type);
|
t2 = tcg_temp_new_vec(s, type);
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||||
tcgv_vec_arg(s, t1), a1, a1);
|
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, v1));
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
||||||
tcgv_vec_arg(s, t2), a1, a1);
|
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, v1));
|
||||||
vec_gen_3(s, INDEX_op_sari_vec, type, MO_16,
|
tcg_gen_sari_vec(s, MO_16, t1, t1, imm + 8);
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1), a2 + 8);
|
tcg_gen_sari_vec(s, MO_16, t2, t2, imm + 8);
|
||||||
vec_gen_3(s, INDEX_op_sari_vec, type, MO_16,
|
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), a2 + 8);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_packss_vec, type, MO_8,
|
vec_gen_3(s, INDEX_op_x86_packss_vec, type, MO_8,
|
||||||
a0, tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t2));
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t2));
|
||||||
tcg_temp_free_vec(s, t1);
|
tcg_temp_free_vec(s, t1);
|
||||||
tcg_temp_free_vec(s, t2);
|
tcg_temp_free_vec(s, t2);
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
tcg_debug_assert(vece == MO_64);
|
|
||||||
/* MO_64: If the shift is <= 32, we can emulate the sign extend by
|
|
||||||
performing an arithmetic 32-bit shift and overwriting the high
|
|
||||||
half of the result (note that the ISA says shift of 32 is valid). */
|
|
||||||
if (a2 <= 32) {
|
|
||||||
t1 = tcg_temp_new_vec(s, type);
|
|
||||||
vec_gen_3(s, INDEX_op_sari_vec, type, MO_32, tcgv_vec_arg(s, t1), a1, a2);
|
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
|
|
||||||
vec_gen_4(s, INDEX_op_x86_blend_vec, type, MO_32,
|
|
||||||
a0, a0, tcgv_vec_arg(s, t1), 0xaa);
|
|
||||||
tcg_temp_free_vec(s, t1);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
/* Otherwise we will need to use a compare vs 0 to produce the
|
|
||||||
sign-extend, shift and merge. */
|
|
||||||
t1 = tcg_temp_new_vec(s, type);
|
|
||||||
t2 = tcg_const_zeros_vec(s, type);
|
|
||||||
vec_gen_4(s, INDEX_op_cmp_vec, type, MO_64,
|
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t2), a1, TCG_COND_GT);
|
|
||||||
tcg_temp_free_vec(s, t2);
|
|
||||||
vec_gen_3(s, INDEX_op_shri_vec, type, MO_64, a0, a1, a2);
|
|
||||||
vec_gen_3(s, INDEX_op_shli_vec, type, MO_64,
|
|
||||||
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1), 64 - a2);
|
|
||||||
vec_gen_3(s, INDEX_op_or_vec, type, MO_64, a0, a0, tcgv_vec_arg(s, t1));
|
|
||||||
tcg_temp_free_vec(s, t1);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case INDEX_op_mul_vec:
|
case MO_64:
|
||||||
|
if (imm <= 32) {
|
||||||
|
/* We can emulate a small sign extend by performing an arithmetic
|
||||||
|
* 32-bit shift and overwriting the high half of a 64-bit logical
|
||||||
|
* shift (note that the ISA says shift of 32 is valid).
|
||||||
|
*/
|
||||||
|
t1 = tcg_temp_new_vec(s, type);
|
||||||
|
tcg_gen_sari_vec(s, MO_32, t1, v1, imm);
|
||||||
|
tcg_gen_shri_vec(s, MO_64, v0, v1, imm);
|
||||||
|
vec_gen_4(s, INDEX_op_x86_blend_vec, type, MO_32,
|
||||||
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, v0),
|
||||||
|
tcgv_vec_arg(s, t1), 0xaa);
|
||||||
|
tcg_temp_free_vec(s, t1);
|
||||||
|
} else {
|
||||||
|
/* Otherwise we will need to use a compare vs 0 to produce
|
||||||
|
* the sign-extend, shift and merge.
|
||||||
|
*/
|
||||||
|
t1 = tcg_const_zeros_vec(s, type);
|
||||||
|
tcg_gen_cmp_vec(s, TCG_COND_GT, MO_64, t1, t1, v1);
|
||||||
|
tcg_gen_shri_vec(s, MO_64, v0, v1, imm);
|
||||||
|
tcg_gen_shli_vec(s, MO_64, t1, t1, 64 - imm);
|
||||||
|
tcg_gen_or_vec(s, MO_64, v0, v0, t1);
|
||||||
|
tcg_temp_free_vec(s, t1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
g_assert_not_reached();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void expand_vec_mul(TCGContext *s, TCGType type, unsigned vece,
|
||||||
|
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
|
||||||
|
{
|
||||||
|
TCGv_vec t1, t2, t3, t4;
|
||||||
|
|
||||||
tcg_debug_assert(vece == MO_8);
|
tcg_debug_assert(vece == MO_8);
|
||||||
a1 = va_arg(va, TCGArg);
|
|
||||||
a2 = va_arg(va, TCGArg);
|
/*
|
||||||
|
* Unpack v1 bytes to words, 0 | x.
|
||||||
|
* Unpack v2 bytes to words, y | 0.
|
||||||
|
* This leaves the 8-bit result, x * y, with 8 bits of right padding.
|
||||||
|
* Shift logical right by 8 bits to clear the high 8 bytes before
|
||||||
|
* using an unsigned saturated pack.
|
||||||
|
*
|
||||||
|
* The difference between the V64, V128 and V256 cases is merely how
|
||||||
|
* we distribute the expansion between temporaries.
|
||||||
|
*/
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case TCG_TYPE_V64:
|
case TCG_TYPE_V64:
|
||||||
t1 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
t1 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
||||||
t2 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
t2 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
||||||
tcg_gen_dup16i_vec(s, t2, 0);
|
tcg_gen_dup16i_vec(s, t2, 0);
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
||||||
tcgv_vec_arg(s, t1), a1, tcgv_vec_arg(s, t2));
|
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, t2));
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), a2);
|
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t2), tcgv_vec_arg(s, v2));
|
||||||
tcg_gen_mul_vec(s, MO_16, t1, t1, t2);
|
tcg_gen_mul_vec(s, MO_16, t1, t1, t2);
|
||||||
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
||||||
vec_gen_3(s, INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
|
vec_gen_3(s, INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
|
||||||
a0, tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1));
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t1));
|
||||||
tcg_temp_free_vec(s, t1);
|
tcg_temp_free_vec(s, t1);
|
||||||
tcg_temp_free_vec(s, t2);
|
tcg_temp_free_vec(s, t2);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCG_TYPE_V128:
|
case TCG_TYPE_V128:
|
||||||
t1 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
|
||||||
t2 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
|
||||||
t3 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
|
||||||
t4 = tcg_temp_new_vec(s, TCG_TYPE_V128);
|
|
||||||
tcg_gen_dup16i_vec(s, t4, 0);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
|
||||||
tcgv_vec_arg(s, t1), a1, tcgv_vec_arg(s, t4));
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V128, MO_8,
|
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t4), a2);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
|
|
||||||
tcgv_vec_arg(s, t3), a1, tcgv_vec_arg(s, t4));
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, TCG_TYPE_V128, MO_8,
|
|
||||||
tcgv_vec_arg(s, t4), tcgv_vec_arg(s, t4), a2);
|
|
||||||
tcg_gen_mul_vec(s, MO_16, t1, t1, t2);
|
|
||||||
tcg_gen_mul_vec(s, MO_16, t3, t3, t4);
|
|
||||||
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
|
||||||
tcg_gen_shri_vec(s, MO_16, t3, t3, 8);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_packus_vec, TCG_TYPE_V128, MO_8,
|
|
||||||
a0, tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t3));
|
|
||||||
tcg_temp_free_vec(s, t1);
|
|
||||||
tcg_temp_free_vec(s, t2);
|
|
||||||
tcg_temp_free_vec(s, t3);
|
|
||||||
tcg_temp_free_vec(s, t4);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case TCG_TYPE_V256:
|
case TCG_TYPE_V256:
|
||||||
t1 = tcg_temp_new_vec(s, TCG_TYPE_V256);
|
t1 = tcg_temp_new_vec(s, type);
|
||||||
t2 = tcg_temp_new_vec(s, TCG_TYPE_V256);
|
t2 = tcg_temp_new_vec(s, type);
|
||||||
t3 = tcg_temp_new_vec(s, TCG_TYPE_V256);
|
t3 = tcg_temp_new_vec(s, type);
|
||||||
t4 = tcg_temp_new_vec(s, TCG_TYPE_V256);
|
t4 = tcg_temp_new_vec(s, type);
|
||||||
tcg_gen_dup16i_vec(s, t4, 0);
|
tcg_gen_dup16i_vec(s, t4, 0);
|
||||||
/* a1: A[0-7] ... D[0-7]; a2: W[0-7] ... Z[0-7]
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||||
t1: extends of B[0-7], D[0-7]
|
tcgv_vec_arg(s, t1), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, t4));
|
||||||
t2: extends of X[0-7], Z[0-7]
|
vec_gen_3(s, INDEX_op_x86_punpckl_vec, type, MO_8,
|
||||||
t3: extends of A[0-7], C[0-7]
|
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t4), tcgv_vec_arg(s, v2));
|
||||||
t4: extends of W[0-7], Y[0-7]. */
|
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
|
tcgv_vec_arg(s, t3), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, t4));
|
||||||
tcgv_vec_arg(s, t1), a1, tcgv_vec_arg(s, t4));
|
vec_gen_3(s, INDEX_op_x86_punpckh_vec, type, MO_8,
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckl_vec, TCG_TYPE_V256, MO_8,
|
tcgv_vec_arg(s, t4), tcgv_vec_arg(s, t4), tcgv_vec_arg(s, v2));
|
||||||
tcgv_vec_arg(s, t2), tcgv_vec_arg(s, t4), a2);
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
|
|
||||||
tcgv_vec_arg(s, t3), a1, tcgv_vec_arg(s, t4));
|
|
||||||
vec_gen_3(s, INDEX_op_x86_punpckh_vec, TCG_TYPE_V256, MO_8,
|
|
||||||
tcgv_vec_arg(s, t4), tcgv_vec_arg(s, t4), a2);
|
|
||||||
/* t1: BX DZ; t2: AW CY. */
|
|
||||||
tcg_gen_mul_vec(s, MO_16, t1, t1, t2);
|
tcg_gen_mul_vec(s, MO_16, t1, t1, t2);
|
||||||
tcg_gen_mul_vec(s, MO_16, t3, t3, t4);
|
tcg_gen_mul_vec(s, MO_16, t3, t3, t4);
|
||||||
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
tcg_gen_shri_vec(s, MO_16, t1, t1, 8);
|
||||||
tcg_gen_shri_vec(s, MO_16, t3, t3, 8);
|
tcg_gen_shri_vec(s, MO_16, t3, t3, 8);
|
||||||
/* a0: AW BX CY DZ. */
|
vec_gen_3(s, INDEX_op_x86_packus_vec, type, MO_8,
|
||||||
vec_gen_3(s, INDEX_op_x86_packus_vec, TCG_TYPE_V256, MO_8,
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t3));
|
||||||
a0, tcgv_vec_arg(s, t1), tcgv_vec_arg(s, t3));
|
|
||||||
tcg_temp_free_vec(s, t1);
|
tcg_temp_free_vec(s, t1);
|
||||||
tcg_temp_free_vec(s, t2);
|
tcg_temp_free_vec(s, t2);
|
||||||
tcg_temp_free_vec(s, t3);
|
tcg_temp_free_vec(s, t3);
|
||||||
|
@ -3372,9 +3346,10 @@ void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece
|
||||||
default:
|
default:
|
||||||
g_assert_not_reached();
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
break;
|
}
|
||||||
|
|
||||||
case INDEX_op_cmp_vec:
|
static void expand_vec_cmp(TCGContext *s, TCGType type, unsigned vece, TCGv_vec v0,
|
||||||
|
TCGv_vec v1, TCGv_vec v2, TCGCond cond)
|
||||||
{
|
{
|
||||||
enum {
|
enum {
|
||||||
NEED_SWAP = 1,
|
NEED_SWAP = 1,
|
||||||
|
@ -3394,13 +3369,9 @@ void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece
|
||||||
[TCG_COND_LEU] = NEED_BIAS | NEED_INV,
|
[TCG_COND_LEU] = NEED_BIAS | NEED_INV,
|
||||||
[TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV,
|
[TCG_COND_GEU] = NEED_BIAS | NEED_SWAP | NEED_INV,
|
||||||
};
|
};
|
||||||
|
TCGv_vec t1, t2;
|
||||||
TCGCond cond;
|
|
||||||
uint8_t fixup;
|
uint8_t fixup;
|
||||||
|
|
||||||
a1 = va_arg(va, TCGArg);
|
|
||||||
a2 = va_arg(va, TCGArg);
|
|
||||||
cond = va_arg(va, TCGArg);
|
|
||||||
fixup = fixups[cond & 15];
|
fixup = fixups[cond & 15];
|
||||||
tcg_debug_assert(fixup != 0xff);
|
tcg_debug_assert(fixup != 0xff);
|
||||||
|
|
||||||
|
@ -3408,8 +3379,7 @@ void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece
|
||||||
cond = tcg_invert_cond(cond);
|
cond = tcg_invert_cond(cond);
|
||||||
}
|
}
|
||||||
if (fixup & NEED_SWAP) {
|
if (fixup & NEED_SWAP) {
|
||||||
TCGArg t;
|
t1 = v1, v1 = v2, v2 = t1;
|
||||||
t = a1, a1 = a2, a2 = t;
|
|
||||||
cond = tcg_swap_cond(cond);
|
cond = tcg_swap_cond(cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3418,24 +3388,59 @@ void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece
|
||||||
t1 = tcg_temp_new_vec(s, type);
|
t1 = tcg_temp_new_vec(s, type);
|
||||||
t2 = tcg_temp_new_vec(s, type);
|
t2 = tcg_temp_new_vec(s, type);
|
||||||
tcg_gen_dupi_vec(s, vece, t2, 1ull << ((8 << vece) - 1));
|
tcg_gen_dupi_vec(s, vece, t2, 1ull << ((8 << vece) - 1));
|
||||||
tcg_gen_sub_vec(s, vece, t1, temp_tcgv_vec(s, arg_temp(a1)), t2);
|
tcg_gen_sub_vec(s, vece, t1, v1, t2);
|
||||||
tcg_gen_sub_vec(s, vece, t2, temp_tcgv_vec(s, arg_temp(a2)), t2);
|
tcg_gen_sub_vec(s, vece, t2, v2, t2);
|
||||||
a1 = tcgv_vec_arg(s, t1);
|
v1 = t1;
|
||||||
a2 = tcgv_vec_arg(s, t2);
|
v2 = t2;
|
||||||
cond = tcg_signed_cond(cond);
|
cond = tcg_signed_cond(cond);
|
||||||
}
|
}
|
||||||
|
|
||||||
tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
|
tcg_debug_assert(cond == TCG_COND_EQ || cond == TCG_COND_GT);
|
||||||
vec_gen_4(s, INDEX_op_cmp_vec, type, vece, a0, a1, a2, cond);
|
/* Expand directly; do not recurse. */
|
||||||
|
vec_gen_4(s, INDEX_op_cmp_vec, type, vece,
|
||||||
|
tcgv_vec_arg(s, v0), tcgv_vec_arg(s, v1), tcgv_vec_arg(s, v2), cond);
|
||||||
|
|
||||||
if (fixup & NEED_BIAS) {
|
if (t1) {
|
||||||
tcg_temp_free_vec(s, t1);
|
tcg_temp_free_vec(s, t1);
|
||||||
|
if (t2) {
|
||||||
tcg_temp_free_vec(s, t2);
|
tcg_temp_free_vec(s, t2);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (fixup & NEED_INV) {
|
if (fixup & NEED_INV) {
|
||||||
tcg_gen_not_vec(s, vece, v0, v0);
|
tcg_gen_not_vec(s, vece, v0, v0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void tcg_expand_vec_op(TCGContext *s, TCGOpcode opc, TCGType type, unsigned vece,
|
||||||
|
TCGArg a0, ...)
|
||||||
|
{
|
||||||
|
va_list va;
|
||||||
|
TCGArg a2;
|
||||||
|
TCGv_vec v0, v1, v2;
|
||||||
|
|
||||||
|
va_start(va, a0);
|
||||||
|
v0 = temp_tcgv_vec(s, arg_temp(a0));
|
||||||
|
v1 = temp_tcgv_vec(s, arg_temp(va_arg(va, TCGArg)));
|
||||||
|
a2 = va_arg(va, TCGArg);
|
||||||
|
|
||||||
|
switch (opc) {
|
||||||
|
case INDEX_op_shli_vec:
|
||||||
|
case INDEX_op_shri_vec:
|
||||||
|
expand_vec_shi(s, type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INDEX_op_sari_vec:
|
||||||
|
expand_vec_sari(s, type, vece, v0, v1, a2);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INDEX_op_mul_vec:
|
||||||
|
v2 = temp_tcgv_vec(s, arg_temp(a2));
|
||||||
|
expand_vec_mul(s, type, vece, v0, v1, v2);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case INDEX_op_cmp_vec:
|
||||||
|
v2 = temp_tcgv_vec(s, arg_temp(a2));
|
||||||
|
expand_vec_cmp(s, type, vece, v0, v1, v2, va_arg(va, TCGArg));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
Loading…
Reference in a new issue