From f72582bb7a5a67a331b4cb745f4c1dc61ece2948 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Sun, 22 Mar 2020 00:11:32 -0400
Subject: [PATCH] target/arm: Implement v8.3-RCPC

The v8.3-RCPC extension implements three new load instructions
which provide slightly weaker consistency guarantees than the
existing load-acquire operations. For QEMU we choose to simply
implement them with a full LDAQ barrier.

Backports commit 2677cf9f92a5319bb995927f9225940414ce879d from qemu
---
 qemu/target/arm/cpu.h           |  5 +++++
 qemu/target/arm/cpu64.c         |  1 +
 qemu/target/arm/translate-a64.c | 25 +++++++++++++++++++++++++
 3 files changed, 31 insertions(+)

diff --git a/qemu/target/arm/cpu.h b/qemu/target/arm/cpu.h
index 2fa20966..c215ed29 100644
--- a/qemu/target/arm/cpu.h
+++ b/qemu/target/arm/cpu.h
@@ -3647,6 +3647,11 @@ static inline bool isar_feature_aa64_pmu_8_4(const ARMISARegisters *id)
         FIELD_EX64(id->id_aa64dfr0, ID_AA64DFR0, PMUVER) != 0xf;
 }
 
+static inline bool isar_feature_aa64_rcpc_8_3(const ARMISARegisters *id)
+{
+    return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, LRCPC) != 0;
+}
+
 /*
  * Feature tests for "does this exist in either 32-bit or 64-bit?"
  */
diff --git a/qemu/target/arm/cpu64.c b/qemu/target/arm/cpu64.c
index 08d38e44..b61955b7 100644
--- a/qemu/target/arm/cpu64.c
+++ b/qemu/target/arm/cpu64.c
@@ -298,6 +298,7 @@ static void aarch64_max_initfn(struct uc_struct *uc, Object *obj, void *opaque)
     t = FIELD_DP64(t, ID_AA64ISAR1, SB, 1);
     t = FIELD_DP64(t, ID_AA64ISAR1, SPECRES, 1);
     t = FIELD_DP64(t, ID_AA64ISAR1, FRINTTS, 1);
+    t = FIELD_DP64(t, ID_AA64ISAR1, LRCPC, 1); /* ARMv8.3-RCPC */
     cpu->isar.id_aa64isar1 = t;
 
     t = cpu->isar.id_aa64pfr0;
diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c
index 6121c1ed..d4ce1430 100644
--- a/qemu/target/arm/translate-a64.c
+++ b/qemu/target/arm/translate-a64.c
@@ -3335,6 +3335,8 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     int rs = extract32(insn, 16, 5);
     int rn = extract32(insn, 5, 5);
     int o3_opc = extract32(insn, 12, 4);
+    bool r = extract32(insn, 22, 1);
+    bool a = extract32(insn, 23, 1);
     TCGv_i64 tcg_rs, clean_addr;
     AtomicThreeOpFn *fn;
 
@@ -3370,6 +3372,13 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
     case 010: /* SWP */
         fn = tcg_gen_atomic_xchg_i64;
         break;
+    case 014: /* LDAPR, LDAPRH, LDAPRB */
+        if (!dc_isar_feature(aa64_rcpc_8_3, s) ||
+            rs != 31 || a != 1 || r != 0) {
+            unallocated_encoding(s);
+            return;
+        }
+        break;
     default:
         unallocated_encoding(s);
         return;
@@ -3379,6 +3388,22 @@ static void disas_ldst_atomic(DisasContext *s, uint32_t insn,
         gen_check_sp_alignment(s);
     }
     clean_addr = clean_data_tbi(s, cpu_reg_sp(s, rn));
+
+    if (o3_opc == 014) {
+        /*
+         * LDAPR* are a special case because they are a simple load, not a
+         * fetch-and-do-something op.
+         * The architectural consistency requirements here are weaker than
+         * full load-acquire (we only need "load-acquire processor consistent"),
+         * but we choose to implement them as full LDAQ.
+         */
+        do_gpr_ld(s, cpu_reg(s, rt), clean_addr, size, false, false,
+                  true, rt, disas_ldst_compute_iss_sf(size, false, 0), true);
+        tcg_gen_mb(tcg_ctx, TCG_MO_ALL | TCG_BAR_LDAQ);
+        return;
+    }
+
+
     tcg_rs = read_cpu_reg(s, rs, true);
 
     if (o3_opc == 1) { /* LDCLR */