mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-08 22:45:43 +00:00
target/arm: Implement SVE Integer Compare - Scalars Group
Backports commit caf1cefc72be98497e0907d0e07f4327fc641e96 from qemu
This commit is contained in:
parent
bc55b3e570
commit
496bb35c97
|
@ -3688,6 +3688,7 @@
|
|||
#define helper_sve_uzp_h helper_sve_uzp_h_aarch64
|
||||
#define helper_sve_uzp_s helper_sve_uzp_s_aarch64
|
||||
#define helper_sve_uzp_p helper_sve_uzp_p_aarch64
|
||||
#define helper_sve_while helper_sve_while_aarch64
|
||||
#define helper_sve_zip_b helper_sve_zip_b_aarch64
|
||||
#define helper_sve_zip_d helper_sve_zip_d_aarch64
|
||||
#define helper_sve_zip_h helper_sve_zip_h_aarch64
|
||||
|
|
|
@ -3688,6 +3688,7 @@
|
|||
#define helper_sve_uzp_h helper_sve_uzp_h_aarch64eb
|
||||
#define helper_sve_uzp_s helper_sve_uzp_s_aarch64eb
|
||||
#define helper_sve_uzp_p helper_sve_uzp_p_aarch64eb
|
||||
#define helper_sve_while helper_sve_while_aarch64eb
|
||||
#define helper_sve_zip_b helper_sve_zip_b_aarch64eb
|
||||
#define helper_sve_zip_d helper_sve_zip_d_aarch64eb
|
||||
#define helper_sve_zip_h helper_sve_zip_h_aarch64eb
|
||||
|
|
|
@ -3709,6 +3709,7 @@ aarch64_symbols = (
|
|||
'helper_sve_uzp_h',
|
||||
'helper_sve_uzp_s',
|
||||
'helper_sve_uzp_p',
|
||||
'helper_sve_while',
|
||||
'helper_sve_zip_b',
|
||||
'helper_sve_zip_d',
|
||||
'helper_sve_zip_h',
|
||||
|
|
|
@ -678,3 +678,5 @@ DEF_HELPER_FLAGS_4(sve_brkn, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||
DEF_HELPER_FLAGS_4(sve_brkns, TCG_CALL_NO_RWG, i32, ptr, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_cntp, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
|
||||
|
||||
DEF_HELPER_FLAGS_3(sve_while, TCG_CALL_NO_RWG, i32, ptr, i32, i32)
|
||||
|
|
|
@ -606,6 +606,14 @@ SINCDECP_r_64 00100101 .. 1010 d:1 u:1 10001 10 .... ..... @incdec_pred
|
|||
# SVE saturating inc/dec vector by predicate count
|
||||
SINCDECP_z 00100101 .. 1010 d:1 u:1 10000 00 .... ..... @incdec2_pred
|
||||
|
||||
### SVE Integer Compare - Scalars Group
|
||||
|
||||
# SVE conditionally terminate scalars
|
||||
CTERM 00100101 1 sf:1 1 rm:5 001000 rn:5 ne:1 0000
|
||||
|
||||
# SVE integer compare scalar count and limit
|
||||
WHILE 00100101 esz:2 1 rm:5 000 sf:1 u:1 1 rn:5 eq:1 rd:4
|
||||
|
||||
### SVE Memory - 32-bit Gather and Unsized Contiguous Group
|
||||
|
||||
# SVE load predicate register
|
||||
|
|
|
@ -2737,3 +2737,34 @@ uint64_t HELPER(sve_cntp)(void *vn, void *vg, uint32_t pred_desc)
|
|||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
uint32_t HELPER(sve_while)(void *vd, uint32_t count, uint32_t pred_desc)
|
||||
{
|
||||
uintptr_t oprsz = extract32(pred_desc, 0, SIMD_OPRSZ_BITS) + 2;
|
||||
intptr_t esz = extract32(pred_desc, SIMD_DATA_SHIFT, 2);
|
||||
uint64_t esz_mask = pred_esz_masks[esz];
|
||||
ARMPredicateReg *d = vd;
|
||||
uint32_t flags;
|
||||
intptr_t i;
|
||||
|
||||
/* Begin with a zero predicate register. */
|
||||
flags = do_zero(d, oprsz);
|
||||
if (count == 0) {
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* Scale from predicate element count to bits. */
|
||||
count <<= esz;
|
||||
/* Bound to the bits in the predicate. */
|
||||
count = MIN(count, oprsz * 8);
|
||||
|
||||
/* Set all of the requested bits. */
|
||||
for (i = 0; i < count / 64; ++i) {
|
||||
d->p[i] = esz_mask;
|
||||
}
|
||||
if (count & 63) {
|
||||
d->p[i] = MAKE_64BIT_MASK(0, count & 63) & esz_mask;
|
||||
}
|
||||
|
||||
return predtest_ones(d, oprsz, esz_mask);
|
||||
}
|
||||
|
|
|
@ -3205,6 +3205,107 @@ static bool trans_SINCDECP_z(DisasContext *s, arg_incdec2_pred *a,
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
*** SVE Integer Compare Scalars Group
|
||||
*/
|
||||
|
||||
static bool trans_CTERM(DisasContext *s, arg_CTERM *a, uint32_t insn)
|
||||
{
|
||||
if (!sve_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||
TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
|
||||
TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
|
||||
TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
|
||||
TCGv_i64 cmp = tcg_temp_new_i64(tcg_ctx);
|
||||
|
||||
tcg_gen_setcond_i64(tcg_ctx, cond, cmp, rn, rm);
|
||||
tcg_gen_extrl_i64_i32(tcg_ctx, tcg_ctx->cpu_NF, cmp);
|
||||
tcg_temp_free_i64(tcg_ctx, cmp);
|
||||
|
||||
/* VF = !NF & !CF. */
|
||||
tcg_gen_xori_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_NF, 1);
|
||||
tcg_gen_andc_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF, tcg_ctx->cpu_CF);
|
||||
|
||||
/* Both NF and VF actually look at bit 31. */
|
||||
tcg_gen_neg_i32(tcg_ctx, tcg_ctx->cpu_NF, tcg_ctx->cpu_NF);
|
||||
tcg_gen_neg_i32(tcg_ctx, tcg_ctx->cpu_VF, tcg_ctx->cpu_VF);
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool trans_WHILE(DisasContext *s, arg_WHILE *a, uint32_t insn)
|
||||
{
|
||||
if (!sve_access_check(s)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||
TCGv_i64 op0 = read_cpu_reg(s, a->rn, 1);
|
||||
TCGv_i64 op1 = read_cpu_reg(s, a->rm, 1);
|
||||
TCGv_i64 t0 = tcg_temp_new_i64(tcg_ctx);
|
||||
TCGv_i64 t1 = tcg_temp_new_i64(tcg_ctx);
|
||||
TCGv_i32 t2, t3;
|
||||
TCGv_ptr ptr;
|
||||
unsigned desc, vsz = vec_full_reg_size(s);
|
||||
TCGCond cond;
|
||||
|
||||
if (!a->sf) {
|
||||
if (a->u) {
|
||||
tcg_gen_ext32u_i64(tcg_ctx, op0, op0);
|
||||
tcg_gen_ext32u_i64(tcg_ctx, op1, op1);
|
||||
} else {
|
||||
tcg_gen_ext32s_i64(tcg_ctx, op0, op0);
|
||||
tcg_gen_ext32s_i64(tcg_ctx, op1, op1);
|
||||
}
|
||||
}
|
||||
|
||||
/* For the helper, compress the different conditions into a computation
|
||||
* of how many iterations for which the condition is true.
|
||||
*
|
||||
* This is slightly complicated by 0 <= UINT64_MAX, which is nominally
|
||||
* 2**64 iterations, overflowing to 0. Of course, predicate registers
|
||||
* aren't that large, so any value >= predicate size is sufficient.
|
||||
*/
|
||||
tcg_gen_sub_i64(tcg_ctx, t0, op1, op0);
|
||||
|
||||
/* t0 = MIN(op1 - op0, vsz). */
|
||||
tcg_gen_movi_i64(tcg_ctx, t1, vsz);
|
||||
tcg_gen_umin_i64(tcg_ctx, t0, t0, t1);
|
||||
if (a->eq) {
|
||||
/* Equality means one more iteration. */
|
||||
tcg_gen_addi_i64(tcg_ctx, t0, t0, 1);
|
||||
}
|
||||
|
||||
/* t0 = (condition true ? t0 : 0). */
|
||||
cond = (a->u
|
||||
? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
|
||||
: (a->eq ? TCG_COND_LE : TCG_COND_LT));
|
||||
tcg_gen_movi_i64(tcg_ctx, t1, 0);
|
||||
tcg_gen_movcond_i64(tcg_ctx, cond, t0, op0, op1, t0, t1);
|
||||
|
||||
t2 = tcg_temp_new_i32(tcg_ctx);
|
||||
tcg_gen_extrl_i64_i32(tcg_ctx, t2, t0);
|
||||
tcg_temp_free_i64(tcg_ctx, t0);
|
||||
tcg_temp_free_i64(tcg_ctx, t1);
|
||||
|
||||
desc = (vsz / 8) - 2;
|
||||
desc = deposit32(desc, SIMD_DATA_SHIFT, 2, a->esz);
|
||||
t3 = tcg_const_i32(tcg_ctx, desc);
|
||||
|
||||
ptr = tcg_temp_new_ptr(tcg_ctx);
|
||||
tcg_gen_addi_ptr(tcg_ctx, ptr, tcg_ctx->cpu_env, pred_full_reg_offset(s, a->rd));
|
||||
|
||||
gen_helper_sve_while(tcg_ctx, t2, ptr, t2, t3);
|
||||
do_pred_flags(s, t2);
|
||||
|
||||
tcg_temp_free_ptr(tcg_ctx, ptr);
|
||||
tcg_temp_free_i32(tcg_ctx, t2);
|
||||
tcg_temp_free_i32(tcg_ctx, t3);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
|
||||
*/
|
||||
|
|
Loading…
Reference in a new issue