From 7698c1634ecddf3ec73f52fc3c3f40e3b0f87699 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 15 Jun 2018 13:14:30 -0400 Subject: [PATCH] target/arm: Implement SVE vector splice (predicated) Backports commit b48ff24098c72f86e187e6abb7e9ca4de40a7fb4 from qemu --- qemu/aarch64.h | 1 + qemu/aarch64eb.h | 1 + qemu/header_gen.py | 1 + qemu/target/arm/helper-sve.h | 2 ++ qemu/target/arm/sve.decode | 3 +++ qemu/target/arm/sve_helper.c | 37 +++++++++++++++++++++++++++++++++ qemu/target/arm/translate-sve.c | 14 +++++++++++++ 7 files changed, 59 insertions(+) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 4fcbc68c..437f6af3 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3489,6 +3489,7 @@ #define helper_sve_smulh_zpzz_d helper_sve_smulh_zpzz_d_aarch64 #define helper_sve_smulh_zpzz_h helper_sve_smulh_zpzz_h_aarch64 #define helper_sve_smulh_zpzz_s helper_sve_smulh_zpzz_s_aarch64 +#define helper_sve_splice helper_sve_splice_aarch64 #define helper_sve_sqaddi_b helper_sve_sqaddi_b_aarch64 #define helper_sve_sqaddi_d helper_sve_sqaddi_d_aarch64 #define helper_sve_sqaddi_h helper_sve_sqaddi_h_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index aefc599e..3a94f259 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3489,6 +3489,7 @@ #define helper_sve_smulh_zpzz_d helper_sve_smulh_zpzz_d_aarch64eb #define helper_sve_smulh_zpzz_h helper_sve_smulh_zpzz_h_aarch64eb #define helper_sve_smulh_zpzz_s helper_sve_smulh_zpzz_s_aarch64eb +#define helper_sve_splice helper_sve_splice_aarch64eb #define helper_sve_sqaddi_b helper_sve_sqaddi_b_aarch64eb #define helper_sve_sqaddi_d helper_sve_sqaddi_d_aarch64eb #define helper_sve_sqaddi_h helper_sve_sqaddi_h_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 7e2c4556..969f5806 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3510,6 +3510,7 @@ aarch64_symbols = ( 'helper_sve_smulh_zpzz_d', 'helper_sve_smulh_zpzz_h', 'helper_sve_smulh_zpzz_s', + 'helper_sve_splice', 'helper_sve_sqaddi_b', 'helper_sve_sqaddi_d', 'helper_sve_sqaddi_h', diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 3b7c5490..c3f8a2b5 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -479,6 +479,8 @@ DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_4(sve_rbit_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32) +DEF_HELPER_FLAGS_5(sve_splice, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) + DEF_HELPER_FLAGS_5(sve_and_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_bic_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sve_eor_pppp, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) diff --git a/qemu/target/arm/sve.decode b/qemu/target/arm/sve.decode index c2c78dbb..e6a9b5e3 100644 --- a/qemu/target/arm/sve.decode +++ b/qemu/target/arm/sve.decode @@ -463,6 +463,9 @@ REVH 00000101 .. 1001 01 100 ... ..... ..... @rd_pg_rn REVW 00000101 .. 1001 10 100 ... ..... ..... @rd_pg_rn RBIT 00000101 .. 1001 11 100 ... ..... ..... @rd_pg_rn +# SVE vector splice (predicated) +SPLICE 00000101 .. 101 100 100 ... ..... ..... @rdn_pg_rm + ### SVE Predicate Logical Operations Group # SVE predicate logical operations diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index d9de00ff..27dd8ecd 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -2108,3 +2108,40 @@ int32_t HELPER(sve_last_active_element)(void *vg, uint32_t pred_desc) return last_active_element(vg, DIV_ROUND_UP(oprsz, 8), esz); } + +void HELPER(sve_splice)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) +{ + intptr_t opr_sz = simd_oprsz(desc) / 8; + int esz = simd_data(desc); + uint64_t pg, first_g, last_g, len, mask = pred_esz_masks[esz]; + intptr_t i, first_i, last_i; + ARMVectorReg tmp; + + first_i = last_i = 0; + first_g = last_g = 0; + + /* Find the extent of the active elements within VG. */ + for (i = QEMU_ALIGN_UP(opr_sz, 8) - 8; i >= 0; i -= 8) { + pg = *(uint64_t *)(vg + i) & mask; + if (pg) { + if (last_g == 0) { + last_g = pg; + last_i = i; + } + first_g = pg; + first_i = i; + } + } + + len = 0; + if (first_g != 0) { + first_i = first_i * 8 + ctz64(first_g); + last_i = last_i * 8 + 63 - clz64(last_g); + len = last_i - first_i + (1 << esz); + if (vd == vm) { + vm = memcpy(&tmp, vm, opr_sz * 8); + } + swap_memmove(vd, vn + first_i, len); + } + swap_memmove(vd + len, vm, opr_sz * 8 - len); +} diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index 33e62490..b3a5cae8 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -2782,6 +2782,20 @@ static bool trans_RBIT(DisasContext *s, arg_rpr_esz *a, uint32_t insn) return do_zpz_ool(s, a, fns[a->esz]); } +static bool trans_SPLICE(DisasContext *s, arg_rprr_esz *a, uint32_t insn) +{ + if (sve_access_check(s)) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; + unsigned vsz = vec_full_reg_size(s); + tcg_gen_gvec_4_ool(tcg_ctx, vec_full_reg_offset(s, a->rd), + vec_full_reg_offset(s, a->rn), + vec_full_reg_offset(s, a->rm), + pred_full_reg_offset(s, a->pg), + vsz, vsz, a->esz, gen_helper_sve_splice); + } + return true; +} + /* *** SVE Memory - 32-bit Gather and Unsized Contiguous Group */