From 2da89a626cca21812033bbe0d332b69c32396ada Mon Sep 17 00:00:00 2001 From: Lioncash Date: Fri, 26 Feb 2021 14:20:18 -0500 Subject: [PATCH] target/arm: Merge helper_sve_clr_* and helper_sve_movz_* --- qemu/aarch64.h | 4 -- qemu/aarch64eb.h | 4 -- qemu/header_gen.py | 4 -- qemu/target/arm/helper-sve.h | 5 --- qemu/target/arm/sve_helper.c | 70 +++++++-------------------------- qemu/target/arm/translate-sve.c | 57 ++++++++++----------------- 6 files changed, 35 insertions(+), 109 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index de8b2080..7fd3b118 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3738,10 +3738,6 @@ #define helper_sve_brkpas helper_sve_brkpas_aarch64 #define helper_sve_brkpb helper_sve_brkpb_aarch64 #define helper_sve_brkpbs helper_sve_brkpbs_aarch64 -#define helper_sve_clr_b helper_sve_clr_b_aarch64 -#define helper_sve_clr_d helper_sve_clr_d_aarch64 -#define helper_sve_clr_h helper_sve_clr_h_aarch64 -#define helper_sve_clr_s helper_sve_clr_s_aarch64 #define helper_sve_cls_b helper_sve_cls_b_aarch64 #define helper_sve_cls_d helper_sve_cls_d_aarch64 #define helper_sve_cls_h helper_sve_cls_h_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 72efcf19..6d41dcd9 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3738,10 +3738,6 @@ #define helper_sve_brkpas helper_sve_brkpas_aarch64eb #define helper_sve_brkpb helper_sve_brkpb_aarch64eb #define helper_sve_brkpbs helper_sve_brkpbs_aarch64eb -#define helper_sve_clr_b helper_sve_clr_b_aarch64eb -#define helper_sve_clr_d helper_sve_clr_d_aarch64eb -#define helper_sve_clr_h helper_sve_clr_h_aarch64eb -#define helper_sve_clr_s helper_sve_clr_s_aarch64eb #define helper_sve_cls_b helper_sve_cls_b_aarch64eb #define helper_sve_cls_d helper_sve_cls_d_aarch64eb #define helper_sve_cls_h helper_sve_cls_h_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 4b28b803..6fc6a03a 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3878,10 +3878,6 @@ aarch64_symbols = ( 'helper_sve_brkpas', 'helper_sve_brkpb', 'helper_sve_brkpbs', - 'helper_sve_clr_b', - 'helper_sve_clr_d', - 'helper_sve_clr_h', - 'helper_sve_clr_s', 'helper_sve_cls_b', 'helper_sve_cls_d', 'helper_sve_cls_h', diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 63c4a087..4411c471 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -269,11 +269,6 @@ DEF_HELPER_FLAGS_3(sve_uminv_h, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_s, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) DEF_HELPER_FLAGS_3(sve_uminv_d, TCG_CALL_NO_RWG, i64, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_b, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32) -DEF_HELPER_FLAGS_3(sve_clr_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32) - DEF_HELPER_FLAGS_4(sve_movz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_movz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index 5ea0d1a8..e0e3248e 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -955,85 +955,43 @@ uint32_t HELPER(sve_pnext)(void *vd, void *vg, uint32_t pred_desc) return flags; } -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. In particular, logical shift by element size, which is - * otherwise undefined on the host. - * - * For element sizes smaller than uint64_t, we use tables to expand - * the N bits of the controlling predicate to a byte mask, and clear - * those bytes. +/* + * Copy Zn into Zd, and store zero into inactive elements. + * If inv, store zeros into the active elements. */ -void HELPER(sve_clr_b)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_b(pg[H1(i)]); - } -} - -void HELPER(sve_clr_h)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_h(pg[H1(i)]); - } -} - -void HELPER(sve_clr_s)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - d[i] &= ~expand_pred_s(pg[H1(i)]); - } -} - -void HELPER(sve_clr_d)(void *vd, void *vg, uint32_t desc) -{ - intptr_t i, opr_sz = simd_oprsz(desc) / 8; - uint64_t *d = vd; - uint8_t *pg = vg; - for (i = 0; i < opr_sz; i += 1) { - if (pg[H1(i)] & 1) { - d[i] = 0; - } - } -} - -/* Copy Zn into Zd, and store zero into inactive elements. */ void HELPER(sve_movz_b)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_b(pg[H1(i)]); + d[i] = n[i] & (expand_pred_b(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_h)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_h(pg[H1(i)]); + d[i] = n[i] & (expand_pred_h(pg[H1(i)]) ^ inv); } } void HELPER(sve_movz_s)(void *vd, void *vn, void *vg, uint32_t desc) { intptr_t i, opr_sz = simd_oprsz(desc) / 8; + uint64_t inv = -(uint64_t)(simd_data(desc) & 1); uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & expand_pred_s(pg[H1(i)]); + d[i] = n[i] & (expand_pred_s(pg[H1(i)]) ^ inv); } } @@ -1042,8 +1000,10 @@ void HELPER(sve_movz_d)(void *vd, void *vn, void *vg, uint32_t desc) intptr_t i, opr_sz = simd_oprsz(desc) / 8; uint64_t *d = vd, *n = vn; uint8_t *pg = vg; + uint8_t inv = simd_data(desc); + for (i = 0; i < opr_sz; i += 1) { - d[i] = n[i] & -(uint64_t)(pg[H1(i)] & 1); + d[i] = n[i] & -(uint64_t)((pg[H1(i)] ^ inv) & 1); } } diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index f3ce370f..3912eb3f 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -598,39 +598,27 @@ static bool trans_SADDV(DisasContext *s, arg_rpr_esz *a) *** SVE Shift by Immediate - Predicated Group */ -/* Store zero into every active element of Zd. We will use this for two - * and three-operand predicated instructions for which logic dictates a - * zero result. +/* + * Copy Zn into Zd, storing zeros into inactive elements. + * If invert, store zeros into the active elements. */ -static bool do_clr_zp(DisasContext *s, int rd, int pg, int esz) -{ - static gen_helper_gvec_2 * const fns[4] = { - gen_helper_sve_clr_b, gen_helper_sve_clr_h, - gen_helper_sve_clr_s, gen_helper_sve_clr_d, - }; - if (sve_access_check(s)) { - TCGContext *tcg_ctx = s->uc->tcg_ctx; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_2_ool(tcg_ctx, vec_full_reg_offset(s, rd), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); - } - return true; -} - -/* Copy Zn into Zd, storing zeros into inactive elements. */ -static void do_movz_zpz(DisasContext *s, int rd, int rn, int pg, int esz) +static bool do_movz_zpz(DisasContext *s, int rd, int rn, int pg, + int esz, bool invert) { static gen_helper_gvec_3 * const fns[4] = { gen_helper_sve_movz_b, gen_helper_sve_movz_h, gen_helper_sve_movz_s, gen_helper_sve_movz_d, }; - TCGContext *tcg_ctx = s->uc->tcg_ctx; - unsigned vsz = vec_full_reg_size(s); - tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd), - vec_full_reg_offset(s, rn), - pred_full_reg_offset(s, pg), - vsz, vsz, 0, fns[esz]); + + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_3_ool(tcg_ctx, vec_full_reg_offset(s, rd), + vec_full_reg_offset(s, rn), + pred_full_reg_offset(s, pg), + vsz, vsz, invert, fns[esz]); + } + return true; } static bool do_zpzi_ool(DisasContext *s, arg_rpri_esz *a, @@ -675,7 +663,7 @@ static bool trans_LSR_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -693,7 +681,7 @@ static bool trans_LSL_zpzi(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For logical shifts, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -711,7 +699,7 @@ static bool trans_ASRD(DisasContext *s, arg_rpri_esz *a) /* Shift by element size is architecturally valid. For arithmetic right shift for division, it is a zeroing operation. */ if (a->imm >= (8 << a->esz)) { - return do_clr_zp(s, a->rd, a->pg, a->esz); + return do_movz_zpz(s, a->rd, a->rd, a->pg, a->esz, true); } else { return do_zpzi_ool(s, a, fns[a->esz]); } @@ -5170,9 +5158,7 @@ static bool trans_LD1R_zpri(DisasContext *s, arg_rpri_load *a) tcg_temp_free_i64(tcg_ctx, temp); /* Zero the inactive elements. */ - gen_set_label(tcg_ctx, over); - do_movz_zpz(s, a->rd, a->rd, a->pg, esz); - return true; + return do_movz_zpz(s, a->rd, a->rd, a->pg, esz, false); } static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr, @@ -5962,8 +5948,5 @@ static bool trans_MOVPRFX_m(DisasContext *s, arg_rpr_esz *a) static bool trans_MOVPRFX_z(DisasContext *s, arg_rpr_esz *a) { - if (sve_access_check(s)) { - do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz); - } - return true; + return do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz, false); }