From f72582bb7a5a67a331b4cb745f4c1dc61ece2948 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Sun, 22 Mar 2020 00:11:32 -0400 Subject: [PATCH] target/arm: Implement v8.3-RCPC The v8.3-RCPC extension implements three new load instructions which provide slightly weaker consistency guarantees than the existing load-acquire operations. For QEMU we choose to simply implement them with a full LDAQ barrier. Backports commit 2677cf9f92a5319bb995927f9225940414ce879d from qemu --- qemu/target/arm/cpu.h | 5 +++++ qemu/target/arm/cpu64.c | 1 + qemu/target/arm/translate-a64.c | 25 +++++++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/qemu/target/arm/cpu.h b/qemu/target/arm/cpu.h index 2fa20966..c215ed29 100644 --- a/qemu/target/arm/cpu.h +++ b/qemu/target/arm/cpu.h @@ -3647,6 +3647,11 @@ static inline bool isar_feature_aa64_pmu_8_4(const ARMISARegisters *id) FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf; } +static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0; +} + /* * Feature tests for "does this exist in either 32-bit or 64-bit?" */ diff --git a/qemu/target/arm/cpu64.c b/qemu/target/arm/cpu64.c index 08d38e44..b61955b7 100644 --- a/qemu/target/arm/cpu64.c +++ b/qemu/target/arm/cpu64.c @@ -298,6 +298,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, Object *obj, void *opaque) t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1); t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1); t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1); + t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 1); /* ARMv8.3-RCPC */ cpu->isar.id_aa64isar1 = t; t = cpu->isar.id_aa64pfr0; diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index 6121c1ed..d4ce1430 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -3335,6 +3335,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, int rs = extract32(insn, 16, 5); int rn = extract32(insn, 5, 5); int o3_opc = extract32(insn, 12, 4); + bool r = extract32(insn, 22, 1); + bool a = extract32(insn, 23, 1); TCGv_i64 tcg_rs, clean_addr; AtomicThreeOpFn *fn; @@ -3370,6 +3372,13 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, case 010: /* SWP */ fn = tcg_gen_atomic_xchg_i64; break; + case 014: /* LDAPR, LDAPRH, LDAPRB */ + if (!dc_isar_feature(aa64_rcpc_8_3, s) || + rs != 31 || a != 1 || r != 0) { + unallocated_encoding(s); + return; + } + break; default: unallocated_encoding(s); return; @@ -3379,6 +3388,22 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn, gen_check_sp_alignment(s); } clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn)); + + if (o3_opc == 014) { + /* + * LDAPR* are a special case because they are a simple load, not a + * fetch-and-do-something op. + * The architectural consistency requirements here are weaker than + * full load-acquire (we only need "load-acquire processor consistent"), + * but we choose to implement them as full LDAQ. + */ + do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false, + true, rt, disas_ldst_compute_iss_sf(size, false, 0), true); + tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ); + return; + } + + tcg_rs = read_cpu_reg(s, rs, true); if (o3_opc == 1) { /* LDCLR */