From 4fdd05e1aa79d0aab57425a39ddf54605d67536a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Feb 2021 17:15:59 -0500 Subject: [PATCH] target/arm: Add helper_mte_check_zva Use a special helper for DC_ZVA, rather than the more general mte_checkN. Backports commit 46dc1bc0601554823a42ad27f236da2ad8f3bdc6 from qemu --- qemu/aarch64.h | 1 + qemu/aarch64eb.h | 1 + qemu/header_gen.py | 1 + qemu/target/arm/helper-a64.h | 1 + qemu/target/arm/mte_helper.c | 106 ++++++++++++++++++++++++++++++++ qemu/target/arm/translate-a64.c | 16 ++++- 6 files changed, 125 insertions(+), 1 deletion(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 1db188f7..75defad4 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3585,6 +3585,7 @@ #define helper_msr_i_spsel helper_msr_i_spsel_aarch64 #define helper_mte_check_1 helper_mte_check_1_aarch64 #define helper_mte_check_N helper_mte_check_N_aarch64 +#define helper_mte_check_zva helper_mte_check_zva_aarch64 #define helper_neon_addlp_s16 helper_neon_addlp_s16_aarch64 #define helper_neon_addlp_s8 helper_neon_addlp_s8_aarch64 #define helper_neon_addlp_u16 helper_neon_addlp_u16_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index d8f9eb5c..66d58a83 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3585,6 +3585,7 @@ #define helper_msr_i_spsel helper_msr_i_spsel_aarch64eb #define helper_mte_check_1 helper_mte_check_1_aarch64eb #define helper_mte_check_N helper_mte_check_N_aarch64eb +#define helper_mte_check_zva helper_mte_check_zva_aarch64eb #define helper_neon_addlp_s16 helper_neon_addlp_s16_aarch64eb #define helper_neon_addlp_s8 helper_neon_addlp_s8_aarch64eb #define helper_neon_addlp_u16 helper_neon_addlp_u16_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 02182561..bc714b09 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3723,6 +3723,7 @@ aarch64_symbols = ( 'helper_msr_i_spsel', 'helper_mte_check_1', 'helper_mte_check_N', + 'helper_mte_check_zva', 'helper_neon_addlp_s16', 'helper_neon_addlp_s8', 'helper_neon_addlp_u16', diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h index 7bc773b2..aff1e981 100644 --- a/qemu/target/arm/helper-a64.h +++ b/qemu/target/arm/helper-a64.h @@ -108,6 +108,7 @@ DEF_HELPER_FLAGS_2(xpacd, TCG_CALL_NO_RWG_SE, i64, env, i64) DEF_HELPER_FLAGS_3(mte_check_1, TCG_CALL_NO_WG, i64, env, i32, i64) // Named "mte_checkN" in mainline qemu. Renamed to avoid header gen conflicts DEF_HELPER_FLAGS_3(mte_check_N, TCG_CALL_NO_WG, i64, env, i32, i64) +DEF_HELPER_FLAGS_3(mte_check_zva, TCG_CALL_NO_WG, i64, env, i32, i64) DEF_HELPER_FLAGS_3(irg, TCG_CALL_NO_RWG, i64, env, i64, i64) DEF_HELPER_FLAGS_4(addsubg, TCG_CALL_NO_RWG_SE, i64, env, i64, s32, i32) DEF_HELPER_FLAGS_3(ldg, TCG_CALL_NO_WG, i64, env, i64, i64) diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c index e110b6a2..743ee366 100644 --- a/qemu/target/arm/mte_helper.c +++ b/qemu/target/arm/mte_helper.c @@ -663,3 +663,109 @@ uint64_t HELPER(mte_check_N)(CPUARMState *env, uint32_t desc, uint64_t ptr) { return mte_checkN(env, desc, ptr, GETPC()); } + +/* + * Perform an MTE checked access for DC_ZVA. + */ +uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) +{ + uintptr_t ra = GETPC(); + int log2_dcz_bytes, log2_tag_bytes; + int mmu_idx, bit55; + intptr_t dcz_bytes, tag_bytes, i; + void *mem; + uint64_t ptr_tag, mem_tag, align_ptr; + + bit55 = extract64(ptr, 55, 1); + + /* If TBI is disabled, the access is unchecked, and ptr is not dirty. */ + if (unlikely(!tbi_check(desc, bit55))) { + return ptr; + } + + ptr_tag = allocation_tag_from_addr(ptr); + + if (tcma_check(desc, bit55, ptr_tag)) { + goto done; + } + + /* + * In arm_cpu_realizefn, we asserted that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, to make + * sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + align_ptr = ptr & -dcz_bytes; + + /* + * Trap if accessing an invalid page. DC_ZVA requires that we supply + * the original pointer for an invalid page. But watchpoints require + * that we probe the actual space. So do both. + */ + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + (void) probe_write(env, ptr, 1, mmu_idx, ra); + mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, + dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + if (!mem) { + goto done; + } + + /* + * Unlike the reasoning for checkN, DC_ZVA is always aligned, and thus + * it is quite easy to perform all of the comparisons at once without + * any extra masking. + * + * The most common zva block size is 64; some of the thunderx cpus use + * a block size of 128. For user-only, aarch64_max_initfn will set the + * block size to 512. Fill out the other cases for future-proofing. + * + * In order to be able to find the first miscompare later, we want the + * tag bytes to be in little-endian order. + */ + switch (log2_tag_bytes) { + case 0: /* zva_blocksize 32 */ + mem_tag = *(uint8_t *)mem; + ptr_tag *= 0x11u; + break; + case 1: /* zva_blocksize 64 */ + mem_tag = cpu_to_le16(*(uint16_t *)mem); + ptr_tag *= 0x1111u; + break; + case 2: /* zva_blocksize 128 */ + mem_tag = cpu_to_le32(*(uint32_t *)mem); + ptr_tag *= 0x11111111u; + break; + case 3: /* zva_blocksize 256 */ + mem_tag = cpu_to_le64(*(uint64_t *)mem); + ptr_tag *= 0x1111111111111111ull; + break; + + default: /* zva_blocksize 512, 1024, 2048 */ + ptr_tag *= 0x1111111111111111ull; + i = 0; + do { + mem_tag = cpu_to_le64(*(uint64_t *)(mem + i)); + if (unlikely(mem_tag != ptr_tag)) { + goto fail; + } + i += 8; + align_ptr += 16 * TAG_GRANULE; + } while (i < tag_bytes); + goto done; + } + + if (likely(mem_tag == ptr_tag)) { + goto done; + } + + fail: + /* Locate the first nibble that differs. */ + i = ctz64(mem_tag ^ ptr_tag) >> 4; + mte_check_fail(env, mmu_idx, align_ptr + i * TAG_GRANULE, ra); + + done: + return useronly_clean_ptr(ptr); +} diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 0c3c65af..bd884707 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -2046,7 +2046,21 @@ static void handle_sys(DisasContext *s, uint32_t insn, bool isread, return; case ARM_CP_DC_ZVA: /* Writes clear the aligned block of memory which rt points into. */ - tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + if (s->mte_active[0]) { + TCGv_i32 t_desc; + int desc = 0; + + desc = FIELD_DP32(desc, MTEDESC, MIDX, get_mem_index(s)); + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + t_desc = tcg_const_i32(tcg_ctx, desc); + + tcg_rt = new_tmp_a64(s); + gen_helper_mte_check_zva(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, t_desc, cpu_reg(s, rt)); + tcg_temp_free_i32(tcg_ctx, t_desc); + } else { + tcg_rt = clean_data_tbi(s, cpu_reg(s, rt)); + } gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, tcg_rt); return; default: