From 49bd9a5c68f0c2f21e99194c726456c6f8d238b0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Feb 2021 17:40:39 -0500 Subject: [PATCH] target/arm: Use mte_checkN for sve unpredicated stores Backports commit bba87d0a0f480805223a6428a7942a51733c488a from qemu --- qemu/aarch64.h | 6 ++-- qemu/aarch64eb.h | 6 ++-- qemu/arm.h | 6 ++-- qemu/armeb.h | 6 ++-- qemu/header_gen.py | 12 ++++--- qemu/target/arm/internals.h | 8 ++--- qemu/target/arm/mte_helper.c | 12 +++---- qemu/target/arm/translate-a64.c | 8 ++--- qemu/target/arm/translate-a64.h | 5 +++ qemu/target/arm/translate-sve.c | 63 ++++++++++++++++++--------------- 10 files changed, 77 insertions(+), 55 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 75defad4..e598add4 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3474,6 +3474,8 @@ #define gen_gvec_urshr gen_gvec_urshr_aarch64 #define gen_gvec_ushl gen_gvec_ushl_aarch64 #define gen_gvec_usra gen_gvec_usra_aarch64 +#define gen_mte_check1 gen_mte_check1_aarch64 +#define gen_mte_checkN gen_mte_checkN_aarch64 #define get_phys_addr get_phys_addr_aarch64 #define gen_sshl_i32 gen_sshl_i32_aarch64 #define gen_sshl_i64 gen_sshl_i64_aarch64 @@ -4527,8 +4529,8 @@ #define helper_xpacd helper_xpacd_aarch64 #define helper_xpaci helper_xpaci_aarch64 #define logic_imm_decode_wmask logic_imm_decode_wmask_aarch64 -#define mte_check1 mte_check1_aarch64 -#define mte_checkN mte_checkN_aarch64 +#define mte_check1_ mte_check1__aarch64 +#define mte_checkN_ mte_checkN__aarch64 #define mte_probe1 mte_probe1_aarch64 #define new_tmp_a64 new_tmp_a64_aarch64 #define new_tmp_a64_zero new_tmp_a64_zero_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 66d58a83..a6156338 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3474,6 +3474,8 @@ #define gen_gvec_urshr gen_gvec_urshr_aarch64eb #define gen_gvec_ushl gen_gvec_ushl_aarch64eb #define gen_gvec_usra gen_gvec_usra_aarch64eb +#define gen_mte_check1 gen_mte_check1_aarch64eb +#define gen_mte_checkN gen_mte_checkN_aarch64eb #define get_phys_addr get_phys_addr_aarch64eb #define gen_sshl_i32 gen_sshl_i32_aarch64eb #define gen_sshl_i64 gen_sshl_i64_aarch64eb @@ -4527,8 +4529,8 @@ #define helper_xpacd helper_xpacd_aarch64eb #define helper_xpaci helper_xpaci_aarch64eb #define logic_imm_decode_wmask logic_imm_decode_wmask_aarch64eb -#define mte_check1 mte_check1_aarch64eb -#define mte_checkN mte_checkN_aarch64eb +#define mte_check1_ mte_check1__aarch64eb +#define mte_checkN_ mte_checkN__aarch64eb #define mte_probe1 mte_probe1_aarch64eb #define new_tmp_a64 new_tmp_a64_aarch64eb #define new_tmp_a64_zero new_tmp_a64_zero_aarch64eb diff --git a/qemu/arm.h b/qemu/arm.h index d7ad02e6..6fd34363 100644 --- a/qemu/arm.h +++ b/qemu/arm.h @@ -3459,6 +3459,8 @@ #define gen_gvec_urshr gen_gvec_urshr_arm #define gen_gvec_ushl gen_gvec_ushl_arm #define gen_gvec_usra gen_gvec_usra_arm +#define gen_mte_check1 gen_mte_check1_arm +#define gen_mte_checkN gen_mte_checkN_arm #define get_phys_addr get_phys_addr_arm #define gen_sshl_i32 gen_sshl_i32_arm #define gen_sshl_i64 gen_sshl_i64_arm @@ -3515,8 +3517,8 @@ #define helper_gvec_usra_s helper_gvec_usra_s_arm #define helper_probe_access_armfn helper_probe_access_armfn_arm #define helper_vjcvt helper_vjcvt_arm -#define mte_check1 mte_check1_arm -#define mte_checkN mte_checkN_arm +#define mte_check1_ mte_check1__arm +#define mte_checkN_ mte_checkN__arm #define mte_probe1 mte_probe1_arm #define pmu_init pmu_init_arm #define pmsav8_mpu_lookup pmsav8_mpu_lookup_arm diff --git a/qemu/armeb.h b/qemu/armeb.h index 8744d6c4..a7ec55e2 100644 --- a/qemu/armeb.h +++ b/qemu/armeb.h @@ -3459,6 +3459,8 @@ #define gen_gvec_urshr gen_gvec_urshr_armeb #define gen_gvec_ushl gen_gvec_ushl_armeb #define gen_gvec_usra gen_gvec_usra_armeb +#define gen_mte_check1 gen_mte_check1_armeb +#define gen_mte_checkN gen_mte_checkN_armeb #define get_phys_addr get_phys_addr_armeb #define gen_sshl_i32 gen_sshl_i32_armeb #define gen_sshl_i64 gen_sshl_i64_armeb @@ -3515,8 +3517,8 @@ #define helper_gvec_usra_s helper_gvec_usra_s_armeb #define helper_probe_access_armfn helper_probe_access_armfn_armeb #define helper_vjcvt helper_vjcvt_armeb -#define mte_check1 mte_check1_armeb -#define mte_checkN mte_checkN_armeb +#define mte_check1_ mte_check1__armeb +#define mte_checkN_ mte_checkN__armeb #define mte_probe1 mte_probe1_armeb #define pmu_init pmu_init_armeb #define pmsav8_mpu_lookup pmsav8_mpu_lookup_armeb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index bc714b09..63c8a465 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3468,6 +3468,8 @@ arm_symbols = ( 'gen_gvec_urshr', 'gen_gvec_ushl', 'gen_gvec_usra', + 'gen_mte_check1', + 'gen_mte_checkN', 'get_phys_addr', 'gen_sshl_i32', 'gen_sshl_i64', @@ -3524,8 +3526,8 @@ arm_symbols = ( 'helper_gvec_usra_s', 'helper_probe_access_armfn', 'helper_vjcvt', - 'mte_check1', - 'mte_checkN', + 'mte_check1_', + 'mte_checkN_', 'mte_probe1', 'pmu_init', 'pmsav8_mpu_lookup', @@ -3612,6 +3614,8 @@ aarch64_symbols = ( 'gen_gvec_urshr', 'gen_gvec_ushl', 'gen_gvec_usra', + 'gen_mte_check1', + 'gen_mte_checkN', 'get_phys_addr', 'gen_sshl_i32', 'gen_sshl_i64', @@ -4665,8 +4669,8 @@ aarch64_symbols = ( 'helper_xpacd', 'helper_xpaci', 'logic_imm_decode_wmask', - 'mte_check1', - 'mte_checkN', + 'mte_check1_', + 'mte_checkN_', 'mte_probe1', 'new_tmp_a64', 'new_tmp_a64_zero', diff --git a/qemu/target/arm/internals.h b/qemu/target/arm/internals.h index 24e93b49..b52e483b 100644 --- a/qemu/target/arm/internals.h +++ b/qemu/target/arm/internals.h @@ -1324,10 +1324,10 @@ FIELD(MTEDESC, ESIZE, 9, 5) FIELD(MTEDESC, TSIZE, 14, 10) /* mte_checkN only */ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr); -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra); +uint64_t mte_check1_(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); +uint64_t mte_checkN_(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra); static inline int allocation_tag_from_addr(uint64_t ptr) diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c index 743ee366..b519bbbb 100644 --- a/qemu/target/arm/mte_helper.c +++ b/qemu/target/arm/mte_helper.c @@ -474,8 +474,8 @@ bool mte_probe1(CPUARMState *env, uint32_t desc, uint64_t ptr) return mte_probe1_int(env, desc, ptr, 0, bit55); } -uint64_t mte_check1(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) +uint64_t mte_check1_(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) { int bit55 = extract64(ptr, 55, 1); @@ -494,7 +494,7 @@ uint64_t mte_check1(CPUARMState *env, uint32_t desc, uint64_t HELPER(mte_check_1)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return mte_check1(env, desc, ptr, GETPC()); + return mte_check1_(env, desc, ptr, GETPC()); } /** @@ -559,8 +559,8 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) return n; } -uint64_t mte_checkN(CPUARMState *env, uint32_t desc, - uint64_t ptr, uintptr_t ra) +uint64_t mte_checkN_(CPUARMState *env, uint32_t desc, + uint64_t ptr, uintptr_t ra) { int mmu_idx, ptr_tag, bit55; uint64_t ptr_last, ptr_end, prev_page, next_page; @@ -661,7 +661,7 @@ uint64_t mte_checkN(CPUARMState *env, uint32_t desc, uint64_t HELPER(mte_check_N)(CPUARMState *env, uint32_t desc, uint64_t ptr) { - return mte_checkN(env, desc, ptr, GETPC()); + return mte_checkN_(env, desc, ptr, GETPC()); } /* diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index bd884707..e92bd13b 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -415,8 +415,8 @@ static TCGv_i64 gen_mte_check1_mmuidx(DisasContext *s, TCGv_i64 addr, return clean_data_tbi(s, addr); } -static TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_size) +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size) { return gen_mte_check1_mmuidx(s, addr, is_write, tag_checked, log2_size, false, get_mem_index(s)); @@ -425,8 +425,8 @@ static TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, /* * For MTE, check multiple logical sequential accesses. */ -static TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, - bool tag_checked, int log2_esize, int total_size) +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size) { if (tag_checked && s->mte_active[0] && total_size != (1 << log2_esize)) { TCGv_i32 tcg_desc; diff --git a/qemu/target/arm/translate-a64.h b/qemu/target/arm/translate-a64.h index dce6e7c1..931d387d 100644 --- a/qemu/target/arm/translate-a64.h +++ b/qemu/target/arm/translate-a64.h @@ -119,4 +119,9 @@ bool disas_sve(DisasContext *, uint32_t); void gen_gvec_rax1(TCGContext *s, unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs, uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz); +TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_size); +TCGv_i64 gen_mte_checkN(DisasContext *s, TCGv_i64 addr, bool is_write, + bool tag_checked, int log2_esize, int total_size); + #endif /* TARGET_ARM_TRANSLATE_A64_H */ diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index eefa0be6..3bac0281 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -4577,15 +4577,17 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Similarly for stores. */ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) { + TCGContext *tcg_ctx = s->uc->tcg_ctx; int len_align = QEMU_ALIGN_DOWN(len, 8); int len_remain = len % 8; int nparts = len / 8 + ctpop8(len_remain); int midx = get_mem_index(s); - TCGv_i64 addr, t0; + TCGv_i64 dirty_addr, clean_addr, t0; - TCGContext *tcg_ctx = s->uc->tcg_ctx; - addr = tcg_temp_new_i64(tcg_ctx); - t0 = tcg_temp_new_i64(tcg_ctx); + dirty_addr = tcg_temp_new_i64(tcg_ctx); + tcg_gen_addi_i64(tcg_ctx, dirty_addr, cpu_reg_sp(s, rn), imm); + clean_addr = gen_mte_checkN(s, dirty_addr, false, rn != 31, len, MO_8); + tcg_temp_free_i64(tcg_ctx, dirty_addr); /* Note that unpredicated load/store of vector/predicate registers * are defined as a stream of bytes, which equates to little-endian @@ -4598,33 +4600,35 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) if (nparts <= 4) { int i; + t0 = tcg_temp_new_i64(tcg_ctx); for (i = 0; i < len_align; i += 8) { tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + i); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + i); - tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEQ); + tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, cpu_reg_sp(s, rn), 8); } + tcg_temp_free_i64(tcg_ctx, t0); } else { TCGLabel *loop = gen_new_label(tcg_ctx); - TCGv_ptr t2, i = tcg_const_local_ptr(tcg_ctx, 0); + TCGv_ptr tp, i = tcg_const_local_ptr(tcg_ctx, 0); + + /* Copy the clean address into a local temp, live across the loop. */ + t0 = clean_addr; + clean_addr = tcg_temp_local_new_i64(tcg_ctx); + tcg_gen_mov_i64(tcg_ctx, clean_addr, t0); + tcg_temp_free_i64(tcg_ctx, t0); gen_set_label(tcg_ctx, loop); - t2 = tcg_temp_new_ptr(tcg_ctx); - tcg_gen_add_ptr(tcg_ctx, t2, tcg_ctx->cpu_env, i); - tcg_gen_ld_i64(tcg_ctx, t0, t2, vofs); - - /* Minimize the number of local temps that must be re-read from - * the stack each iteration. Instead, re-compute values other - * than the loop counter. - */ - tcg_gen_addi_ptr(tcg_ctx, t2, i, imm); - tcg_gen_extu_ptr_i64(tcg_ctx, addr, t2); - tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, rn)); - tcg_temp_free_ptr(tcg_ctx, t2); - - tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEQ); - + t0 = tcg_temp_new_i64(tcg_ctx); + tp = tcg_temp_new_ptr(tcg_ctx); + tcg_gen_add_ptr(tcg_ctx, tp, tcg_ctx->cpu_env, i); + tcg_gen_ld_i64(tcg_ctx, t0, tp, vofs); tcg_gen_addi_ptr(tcg_ctx, i, i, 8); + tcg_temp_free_ptr(tcg_ctx, tp); + + tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEQ); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 8); + tcg_temp_free_i64(tcg_ctx, t0); tcg_gen_brcondi_ptr(tcg_ctx, TCG_COND_LTU, i, len_align, loop); tcg_temp_free_ptr(tcg_ctx, i); @@ -4632,29 +4636,30 @@ static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm) /* Predicate register stores can be any multiple of 2. */ if (len_remain) { + t0 = tcg_temp_new_i64(tcg_ctx); tcg_gen_ld_i64(tcg_ctx, t0, tcg_ctx->cpu_env, vofs + len_align); - tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, rn), imm + len_align); switch (len_remain) { case 2: case 4: case 8: - tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LE | ctz32(len_remain)); + tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, + MO_LE | ctz32(len_remain)); break; case 6: - tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEUL); - tcg_gen_addi_i64(tcg_ctx, addr, addr, 4); + tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEUL); + tcg_gen_addi_i64(tcg_ctx, clean_addr, clean_addr, 4); tcg_gen_shri_i64(tcg_ctx, t0, t0, 32); - tcg_gen_qemu_st_i64(s->uc, t0, addr, midx, MO_LEUW); + tcg_gen_qemu_st_i64(s->uc, t0, clean_addr, midx, MO_LEUW); break; default: g_assert_not_reached(); } + tcg_temp_free_i64(tcg_ctx, t0); } - tcg_temp_free_i64(tcg_ctx, addr); - tcg_temp_free_i64(tcg_ctx, t0); + tcg_temp_free_i64(tcg_ctx, clean_addr); } static bool trans_LDR_zri(DisasContext *s, arg_rri *a)