From 4bb37fc3c118afce823481c0e992dccf703f1435 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 25 Feb 2021 16:00:41 -0500 Subject: [PATCH] target/arm: Implement the LDGM, STGM, STZGM instructions Backports commit 5f716a82388eb09754dd900e7dbb8ffa15897a28 from qemu --- qemu/aarch64.h | 3 ++ qemu/aarch64eb.h | 3 ++ qemu/header_gen.py | 3 ++ qemu/target/arm/helper-a64.h | 5 +- qemu/target/arm/mte_helper.c | 84 +++++++++++++++++++++++++++++++++ qemu/target/arm/translate-a64.c | 72 ++++++++++++++++++++++++---- qemu/target/arm/translate.h | 2 + 7 files changed, 163 insertions(+), 9 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 2bb895a8..61aa0d99 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3579,6 +3579,7 @@ #define helper_gvec_usra_s helper_gvec_usra_s_aarch64 #define helper_irg helper_irg_aarch64 #define helper_ldg helper_ldg_aarch64 +#define helper_ldgm helper_ldgm_aarch64 #define helper_msr_i_daifclear helper_msr_i_daifclear_aarch64 #define helper_msr_i_daifset helper_msr_i_daifset_aarch64 #define helper_msr_i_spsel helper_msr_i_spsel_aarch64 @@ -3615,6 +3616,8 @@ #define helper_stg helper_stg_aarch64 #define helper_stg_parallel helper_stg_parallel_aarch64 #define helper_stg_stub helper_stg_stub_aarch64 +#define helper_stgm helper_stgm_aarch64 +#define helper_stzgm_tags helper_stzgm_tags_aarch64 #define helper_sve_abs_b helper_sve_abs_b_aarch64 #define helper_sve_abs_d helper_sve_abs_d_aarch64 #define helper_sve_abs_h helper_sve_abs_h_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 2a423505..e056c817 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3579,6 +3579,7 @@ #define helper_gvec_usra_s helper_gvec_usra_s_aarch64eb #define helper_irg helper_irg_aarch64eb #define helper_ldg helper_ldg_aarch64eb +#define helper_ldgm helper_ldgm_aarch64eb #define helper_msr_i_daifclear helper_msr_i_daifclear_aarch64eb #define helper_msr_i_daifset helper_msr_i_daifset_aarch64eb #define helper_msr_i_spsel helper_msr_i_spsel_aarch64eb @@ -3615,6 +3616,8 @@ #define helper_stg helper_stg_aarch64eb #define helper_stg_parallel helper_stg_parallel_aarch64eb #define helper_stg_stub helper_stg_stub_aarch64eb +#define helper_stgm helper_stgm_aarch64eb +#define helper_stzgm_tags helper_stzgm_tags_aarch64eb #define helper_sve_abs_b helper_sve_abs_b_aarch64eb #define helper_sve_abs_d helper_sve_abs_d_aarch64eb #define helper_sve_abs_h helper_sve_abs_h_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index c7d77472..481b4339 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3714,6 +3714,7 @@ aarch64_symbols = ( 'helper_gvec_usra_s', 'helper_irg', 'helper_ldg', + 'helper_ldgm', 'helper_msr_i_daifclear', 'helper_msr_i_daifset', 'helper_msr_i_spsel', @@ -3750,6 +3751,8 @@ aarch64_symbols = ( 'helper_stg', 'helper_stg_parallel', 'helper_stg_stub', + 'helper_stgm', + 'helper_stzgm_tags', 'helper_sve_abs_b', 'helper_sve_abs_d', 'helper_sve_abs_h', diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h index 13f09433..fd8c6712 100644 --- a/qemu/target/arm/helper-a64.h +++ b/qemu/target/arm/helper-a64.h @@ -112,4 +112,7 @@ DEF_HELPER_FLAGS_3(stg_parallel, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_2(stg_stub, TCG_CALL_NO_WG, void, env, i64) DEF_HELPER_FLAGS_3(st2g, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_3(st2g_parallel, TCG_CALL_NO_WG, void, env, i64, i64) -DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) \ No newline at end of file +DEF_HELPER_FLAGS_2(st2g_stub, TCG_CALL_NO_WG, void, env, i64) +DEF_HELPER_FLAGS_2(ldgm, TCG_CALL_NO_WG, i64, env, i64) +DEF_HELPER_FLAGS_3(stgm, TCG_CALL_NO_WG, void, env, i64, i64) +DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) \ No newline at end of file diff --git a/qemu/target/arm/mte_helper.c b/qemu/target/arm/mte_helper.c index 7ec7930d..27d4b453 100644 --- a/qemu/target/arm/mte_helper.c +++ b/qemu/target/arm/mte_helper.c @@ -274,3 +274,87 @@ void HELPER(st2g_stub)(CPUARMState *env, uint64_t ptr) probe_write(env, ptr + TAG_GRANULE, TAG_GRANULE, mmu_idx, ra); } } + +#define LDGM_STGM_SIZE (4 << GMID_EL1_BS) + +uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* The tag is squashed to zero if the page does not support tags. */ + if (!tag_mem) { + return 0; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are loading 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + return ldq_le_p(tag_mem); +} + +void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + int mmu_idx = cpu_mmu_index(env, false); + uintptr_t ra = GETPC(); + void *tag_mem; + + ptr = QEMU_ALIGN_DOWN(ptr, LDGM_STGM_SIZE); + + /* Trap if accessing an invalid page. */ + tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, + LDGM_STGM_SIZE, MMU_DATA_LOAD, + LDGM_STGM_SIZE / (2 * TAG_GRANULE), ra); + + /* + * Tag store only happens if the page support tags, + * and if the OS has enabled access to the tags. + */ + if (!tag_mem) { + return; + } + + QEMU_BUILD_BUG_ON(GMID_EL1_BS != 6); + /* + * We are storing 64-bits worth of tags. The ordering of elements + * within the word corresponds to a 64-bit little-endian operation. + */ + stq_le_p(tag_mem, val); +} + +void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) +{ + uintptr_t ra = GETPC(); + int mmu_idx = cpu_mmu_index(env, false); + int log2_dcz_bytes, log2_tag_bytes; + intptr_t dcz_bytes, tag_bytes; + uint8_t *mem; + + /* + * In arm_cpu_realizefn, we assert that dcz > LOG2_TAG_GRANULE+1, + * i.e. 32 bytes, which is an unreasonably small dcz anyway, + * to make sure that we can access one complete tag byte here. + */ + log2_dcz_bytes = env_archcpu(env)->dcz_blocksize + 2; + log2_tag_bytes = log2_dcz_bytes - (LOG2_TAG_GRANULE + 1); + dcz_bytes = (intptr_t)1 << log2_dcz_bytes; + tag_bytes = (intptr_t)1 << log2_tag_bytes; + ptr &= -dcz_bytes; + + mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, + MMU_DATA_STORE, tag_bytes, ra); + if (mem) { + int tag_pair = (val & 0xf) * 0x11; + memset(mem, tag_pair, tag_bytes); + } +} diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 62222bdc..df10c1da 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -3933,7 +3933,7 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) uint64_t offset = sextract64(insn, 12, 9) << LOG2_TAG_GRANULE; int op2 = extract32(insn, 10, 2); int op1 = extract32(insn, 22, 2); - bool is_load = false, is_pair = false, is_zero = false; + bool is_load = false, is_pair = false, is_zero = false, is_mult = false; int index = 0; TCGv_i64 addr, clean_addr, tcg_rt; @@ -3953,9 +3953,14 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) if (op2 != 0) { /* STG */ index = op2 - 2; - break; + } else { + /* STZGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_zero = true; } - goto do_unallocated; + break; case 1: if (op2 != 0) { /* STZG */ @@ -3971,17 +3976,27 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) /* ST2G */ is_pair = true; index = op2 - 2; - break; + } else { + /* STGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = true; } - goto do_unallocated; + break; case 3: if (op2 != 0) { /* STZ2G */ is_pair = is_zero = true; index = op2 - 2; - break; + } else { + /* LDGM */ + if (s->current_el == 0 || offset != 0) { + goto do_unallocated; + } + is_mult = is_load = true; } - goto do_unallocated; + break; default: do_unallocated: @@ -3989,7 +4004,9 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) return; } - if (!dc_isar_feature(aa64_mte_insn_reg, s)) { + if (is_mult + ? !dc_isar_feature(aa64_mte, s) + : !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } @@ -4003,6 +4020,44 @@ static void disas_ldst_tag(DisasContext *s, uint32_t insn) tcg_gen_addi_i64(tcg_ctx, addr, addr, offset); } + if (is_mult) { + tcg_rt = cpu_reg(s, rt); + + if (is_zero) { + int size = 4 << s->dcz_blocksize; + + if (s->ata) { + gen_helper_stzgm_tags(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + /* + * The non-tags portion of STZGM is mostly like DC_ZVA, + * except the alignment happens before the access. + */ + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_helper_dc_zva(tcg_ctx, tcg_ctx->cpu_env, clean_addr); + } else if (s->ata) { + if (is_load) { + gen_helper_ldgm(tcg_ctx, tcg_rt, tcg_ctx->cpu_env, addr); + } else { + gen_helper_stgm(tcg_ctx, tcg_ctx->cpu_env, addr, tcg_rt); + } + } else { + MMUAccessType acc = is_load ? MMU_DATA_LOAD : MMU_DATA_STORE; + int size = 4 << GMID_EL1_BS; + + clean_addr = clean_data_tbi(s, addr); + tcg_gen_andi_i64(tcg_ctx, clean_addr, clean_addr, -size); + gen_probe_access(s, clean_addr, acc, size); + + if (is_load) { + /* The result tags are zeros. */ + tcg_gen_movi_i64(tcg_ctx, tcg_rt, 0); + } + } + return; + } + if (is_load) { tcg_gen_andi_i64(tcg_ctx, addr, addr, -TAG_GRANULE); tcg_rt = cpu_reg(s, rt); @@ -14801,6 +14856,7 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->vec_stride = 0; dc->cp_regs = arm_cpu->cp_regs; dc->features = env->features; + dc->dcz_blocksize = arm_cpu->dcz_blocksize; /* Single step state. The code-generation logic here is: * SS_ACTIVE == 0: diff --git a/qemu/target/arm/translate.h b/qemu/target/arm/translate.h index 5ef5ae6b..b4910e38 100644 --- a/qemu/target/arm/translate.h +++ b/qemu/target/arm/translate.h @@ -92,6 +92,8 @@ typedef struct DisasContext { * < 0, set by the current instruction. */ int8_t btype; + /* A copy of cpu->dcz_blocksize. */ + uint8_t dcz_blocksize; /* True if this page is guarded. */ bool guarded_page; /* TCG op of the current insn_start. */