target/arm: Merge helper_sve_clr_* and helper_sve_movz_*

This commit is contained in:
Lioncash 2021-02-26 14:20:18 -05:00
parent 8eb3642d96
commit 2da89a626c
6 changed files with 35 additions and 109 deletions

View file

@ -3738,10 +3738,6 @@
#define helper_sve_brkpas helper_sve_brkpas_aarch64 #define helper_sve_brkpas helper_sve_brkpas_aarch64
#define helper_sve_brkpb helper_sve_brkpb_aarch64 #define helper_sve_brkpb helper_sve_brkpb_aarch64
#define helper_sve_brkpbs helper_sve_brkpbs_aarch64 #define helper_sve_brkpbs helper_sve_brkpbs_aarch64
#define helper_sve_clr_b helper_sve_clr_b_aarch64
#define helper_sve_clr_d helper_sve_clr_d_aarch64
#define helper_sve_clr_h helper_sve_clr_h_aarch64
#define helper_sve_clr_s helper_sve_clr_s_aarch64
#define helper_sve_cls_b helper_sve_cls_b_aarch64 #define helper_sve_cls_b helper_sve_cls_b_aarch64
#define helper_sve_cls_d helper_sve_cls_d_aarch64 #define helper_sve_cls_d helper_sve_cls_d_aarch64
#define helper_sve_cls_h helper_sve_cls_h_aarch64 #define helper_sve_cls_h helper_sve_cls_h_aarch64

View file

@ -3738,10 +3738,6 @@
#define helper_sve_brkpas helper_sve_brkpas_aarch64eb #define helper_sve_brkpas helper_sve_brkpas_aarch64eb
#define helper_sve_brkpb helper_sve_brkpb_aarch64eb #define helper_sve_brkpb helper_sve_brkpb_aarch64eb
#define helper_sve_brkpbs helper_sve_brkpbs_aarch64eb #define helper_sve_brkpbs helper_sve_brkpbs_aarch64eb
#define helper_sve_clr_b helper_sve_clr_b_aarch64eb
#define helper_sve_clr_d helper_sve_clr_d_aarch64eb
#define helper_sve_clr_h helper_sve_clr_h_aarch64eb
#define helper_sve_clr_s helper_sve_clr_s_aarch64eb
#define helper_sve_cls_b helper_sve_cls_b_aarch64eb #define helper_sve_cls_b helper_sve_cls_b_aarch64eb
#define helper_sve_cls_d helper_sve_cls_d_aarch64eb #define helper_sve_cls_d helper_sve_cls_d_aarch64eb
#define helper_sve_cls_h helper_sve_cls_h_aarch64eb #define helper_sve_cls_h helper_sve_cls_h_aarch64eb

View file

@ -3878,10 +3878,6 @@ aarch64_symbols = (
'helper_sve_brkpas', 'helper_sve_brkpas',
'helper_sve_brkpb', 'helper_sve_brkpb',
'helper_sve_brkpbs', 'helper_sve_brkpbs',
'helper_sve_clr_b',
'helper_sve_clr_d',
'helper_sve_clr_h',
'helper_sve_clr_s',
'helper_sve_cls_b', 'helper_sve_cls_b',
'helper_sve_cls_d', 'helper_sve_cls_d',
'helper_sve_cls_h', 'helper_sve_cls_h',

View file

@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)

View file

@ -955,85 +955,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc)
return flags; return flags;
} }
/* Store zero into every active element of Zd. We will use this for two /*
* and three-operand predicated instructions for which logic dictates a * Copy Zn into Zd, and store zero into inactive elements.
* zero result. In particular, logical shift by element size, which is * If inv, store zeros into the active elements.
* otherwise undefined on the host.
*
* For element sizes smaller than uint64_t, we use tables to expand
* the N bits of the controlling predicate to a byte mask, and clear
* those bytes.
*/ */
void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd;
uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) {
d[i] &= ~expand_pred_b(pg[H1(i)]);
}
}
void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd;
uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) {
d[i] &= ~expand_pred_h(pg[H1(i)]);
}
}
void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd;
uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) {
d[i] &= ~expand_pred_s(pg[H1(i)]);
}
}
void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc)
{
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd;
uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) {
if (pg[H1(i)] & 1) {
d[i] = 0;
}
}
}
/* Copy Zn into Zd, and store zero into inactive elements. */
void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc)
{ {
intptr_t i, opr_sz = simd_oprsz(desc) / 8; intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn; uint64_t *d = vd, *n = vn;
uint8_t *pg = vg; uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) { for (i = 0; i < opr_sz; i += 1) {
d[i] = n[i] & expand_pred_b(pg[H1(i)]); d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv);
} }
} }
void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc)
{ {
intptr_t i, opr_sz = simd_oprsz(desc) / 8; intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn; uint64_t *d = vd, *n = vn;
uint8_t *pg = vg; uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) { for (i = 0; i < opr_sz; i += 1) {
d[i] = n[i] & expand_pred_h(pg[H1(i)]); d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv);
} }
} }
void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc)
{ {
intptr_t i, opr_sz = simd_oprsz(desc) / 8; intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t inv = -(uint64_t)(simd_data(desc) & 1);
uint64_t *d = vd, *n = vn; uint64_t *d = vd, *n = vn;
uint8_t *pg = vg; uint8_t *pg = vg;
for (i = 0; i < opr_sz; i += 1) { for (i = 0; i < opr_sz; i += 1) {
d[i] = n[i] & expand_pred_s(pg[H1(i)]); d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv);
} }
} }
@ -1042,8 +1000,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc)
intptr_t i, opr_sz = simd_oprsz(desc) / 8; intptr_t i, opr_sz = simd_oprsz(desc) / 8;
uint64_t *d = vd, *n = vn; uint64_t *d = vd, *n = vn;
uint8_t *pg = vg; uint8_t *pg = vg;
uint8_t inv = simd_data(desc);
for (i = 0; i < opr_sz; i += 1) { for (i = 0; i < opr_sz; i += 1) {
d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1);
} }
} }

View file

@ -598,39 +598,27 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a)
*** SVE Shift by Immediate - Predicated Group *** SVE Shift by Immediate - Predicated Group
*/ */
/* Store zero into every active element of Zd. We will use this for two /*
* and three-operand predicated instructions for which logic dictates a * Copy Zn into Zd, storing zeros into inactive elements.
* zero result. * If invert, store zeros into the active elements.
*/ */
static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz) static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg,
{ int esz, bool invert)
static gen_helper_gvec_2 * const fns[4] = {
gen_helper_sve_clr_b, gen_helper_sve_clr_h,
gen_helper_sve_clr_s, gen_helper_sve_clr_d,
};
if (sve_access_check(s)) {
TCGContext *tcg_ctx = s->uc->tcg_ctx;
unsigned vsz = vec_full_reg_size(s);
tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd),
pred_full_reg_offset(s, pg),
vsz, vsz, 0, fns[esz]);
}
return true;
}
/* Copy Zn into Zd, storing zeros into inactive elements. */
static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz)
{ {
static gen_helper_gvec_3 * const fns[4] = { static gen_helper_gvec_3 * const fns[4] = {
gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_b, gen_helper_sve_movz_h,
gen_helper_sve_movz_s, gen_helper_sve_movz_d, gen_helper_sve_movz_s, gen_helper_sve_movz_d,
}; };
TCGContext *tcg_ctx = s->uc->tcg_ctx;
unsigned vsz = vec_full_reg_size(s); if (sve_access_check(s)) {
tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd), TCGContext *tcg_ctx = s->uc->tcg_ctx;
vec_full_reg_offset(s, rn), unsigned vsz = vec_full_reg_size(s);
pred_full_reg_offset(s, pg), tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd),
vsz, vsz, 0, fns[esz]); vec_full_reg_offset(s, rn),
pred_full_reg_offset(s, pg),
vsz, vsz, invert, fns[esz]);
}
return true;
} }
static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a,
@ -675,7 +663,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. /* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */ For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) { if (a->imm >= (8 << a->esz)) {
return do_clr_zp(s, a->rd, a->pg, a->esz); return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else { } else {
return do_zpzi_ool(s, a, fns[a->esz]); return do_zpzi_ool(s, a, fns[a->esz]);
} }
@ -693,7 +681,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. /* Shift by element size is architecturally valid.
For logical shifts, it is a zeroing operation. */ For logical shifts, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) { if (a->imm >= (8 << a->esz)) {
return do_clr_zp(s, a->rd, a->pg, a->esz); return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else { } else {
return do_zpzi_ool(s, a, fns[a->esz]); return do_zpzi_ool(s, a, fns[a->esz]);
} }
@ -711,7 +699,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a)
/* Shift by element size is architecturally valid. For arithmetic /* Shift by element size is architecturally valid. For arithmetic
right shift for division, it is a zeroing operation. */ right shift for division, it is a zeroing operation. */
if (a->imm >= (8 << a->esz)) { if (a->imm >= (8 << a->esz)) {
return do_clr_zp(s, a->rd, a->pg, a->esz); return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true);
} else { } else {
return do_zpzi_ool(s, a, fns[a->esz]); return do_zpzi_ool(s, a, fns[a->esz]);
} }
@ -5170,9 +5158,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a)
tcg_temp_free_i64(tcg_ctx, temp); tcg_temp_free_i64(tcg_ctx, temp);
/* Zero the inactive elements. */ /* Zero the inactive elements. */
gen_set_label(tcg_ctx, over); return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false);
do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
return true;
} }
static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
@ -5962,8 +5948,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a)
static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a)
{ {
if (sve_access_check(s)) { return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false);
do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
}
return true;
} }