From b71769fa5fa6a1a9e9e3056a6544606f0bfe3a74 Mon Sep 17 00:00:00 2001
From: "Emilio G. Cota" <cota@braap.org>
Date: Tue, 13 Mar 2018 15:00:48 -0400
Subject: [PATCH] target/arm: check CF_PARALLEL instead of parallel_cpus

Thereby decoupling the resulting translated code from the current state
of the system.

Backports commit 2399d4e7cec22ecf1c51062d2ebfd45220dbaace from qemu
---
 qemu/aarch64.h                  |  2 ++
 qemu/aarch64eb.h                |  2 ++
 qemu/header_gen.py              |  2 ++
 qemu/target/arm/helper-a64.c    | 38 +++++++++++++++++++++++++++------
 qemu/target/arm/helper-a64.h    |  4 ++++
 qemu/target/arm/op_helper.c     |  7 ------
 qemu/target/arm/translate-a64.c | 31 +++++++++++++++++++++------
 qemu/target/arm/translate.c     |  9 ++++++--
 8 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/qemu/aarch64.h b/qemu/aarch64.h
index e519c462..491d4fa8 100644
--- a/qemu/aarch64.h
+++ b/qemu/aarch64.h
@@ -3113,7 +3113,9 @@
 #define helper_neon_cge_f64 helper_neon_cge_f64_aarch64
 #define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64
 #define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64
+#define helper_paired_cmpxchg64_be_parallel helper_paired_cmpxchg64_be_parallel_aarch64
 #define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64
+#define helper_paired_cmpxchg64_le_parallel helper_paired_cmpxchg64_le_parallel_aarch64
 #define helper_rbit64 helper_rbit64_aarch64
 #define helper_recpsf_f16 helper_recpsf_f16_aarch64
 #define helper_recpsf_f32 helper_recpsf_f32_aarch64
diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h
index cbcbab67..c9ab8358 100644
--- a/qemu/aarch64eb.h
+++ b/qemu/aarch64eb.h
@@ -3113,7 +3113,9 @@
 #define helper_neon_cge_f64 helper_neon_cge_f64_aarch64eb
 #define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64eb
 #define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64eb
+#define helper_paired_cmpxchg64_be_parallel helper_paired_cmpxchg64_be_parallel_aarch64eb
 #define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64eb
+#define helper_paired_cmpxchg64_le_parallel helper_paired_cmpxchg64_le_parallel_aarch64eb
 #define helper_rbit64 helper_rbit64_aarch64eb
 #define helper_recpsf_f16 helper_recpsf_f16_aarch64eb
 #define helper_recpsf_f32 helper_recpsf_f32_aarch64eb
diff --git a/qemu/header_gen.py b/qemu/header_gen.py
index c148a23c..f1e05daf 100644
--- a/qemu/header_gen.py
+++ b/qemu/header_gen.py
@@ -3133,7 +3133,9 @@ aarch64_symbols = (
     'helper_neon_cge_f64',
     'helper_neon_cgt_f64',
     'helper_paired_cmpxchg64_be',
+    'helper_paired_cmpxchg64_be_parallel',
     'helper_paired_cmpxchg64_le',
+    'helper_paired_cmpxchg64_le_parallel',
     'helper_rbit64',
     'helper_recpsf_f16',
     'helper_recpsf_f32',
diff --git a/qemu/target/arm/helper-a64.c b/qemu/target/arm/helper-a64.c
index 489b2e32..0de3f9ad 100644
--- a/qemu/target/arm/helper-a64.c
+++ b/qemu/target/arm/helper-a64.c
@@ -569,8 +569,9 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 }
 
 /* Returns 0 on success; 1 otherwise.  */
-uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
-                                     uint64_t new_lo, uint64_t new_hi)
+static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
+                                       uint64_t new_lo, uint64_t new_hi,
+                                       bool parallel)
 {
     uintptr_t ra = GETPC();
     Int128 oldv, cmpv, newv;
@@ -582,7 +583,7 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
     cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
     newv = int128_make128(new_hi, new_lo);
 
-    if (env->uc->parallel_cpus) {
+    if (parallel) {
 #ifndef CONFIG_ATOMIC128
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 #else
@@ -630,8 +631,21 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
     return !success;
 }
 
-uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
-                                     uint64_t new_lo, uint64_t new_hi)
+uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
+                                              uint64_t new_lo, uint64_t new_hi)
+{
+    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false);
+}
+
+uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
+                                              uint64_t new_lo, uint64_t new_hi)
+{
+    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true);
+}
+
+static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
+                                       uint64_t new_lo, uint64_t new_hi,
+                                       bool parallel)
 {
     uintptr_t ra = GETPC();
     Int128 oldv, cmpv, newv;
@@ -640,7 +654,7 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
     cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
     newv = int128_make128(new_lo, new_hi);
 
-    if (env->uc->parallel_cpus) {
+    if (parallel) {
 #ifndef CONFIG_ATOMIC128
         cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 #else
@@ -688,6 +702,18 @@ uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
     return !success;
 }
 
+uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
+{
+    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false);
+}
+
+uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
+                                     uint64_t new_lo, uint64_t new_hi)
+{
+    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true);
+}
+
 /*
  * AdvSIMD half-precision
  */
diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h
index 80b8f552..ef4ddfe9 100644
--- a/qemu/target/arm/helper-a64.h
+++ b/qemu/target/arm/helper-a64.h
@@ -46,7 +46,11 @@ DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
 DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
 DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG,
+                   i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
+DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG,
+                   i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
 DEF_HELPER_FLAGS_3(advsimd_minh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
 DEF_HELPER_FLAGS_3(advsimd_maxnumh, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c
index d3460aa7..f82dccb8 100644
--- a/qemu/target/arm/op_helper.c
+++ b/qemu/target/arm/op_helper.c
@@ -452,13 +452,6 @@ void HELPER(yield)(CPUARMState *env)
     ARMCPU *cpu = arm_env_get_cpu(env);
     CPUState *cs = CPU(cpu);
 
-    /* When running in MTTCG we don't generate jumps to the yield and
-     * WFE helpers as it won't affect the scheduling of other vCPUs.
-     * If we wanted to more completely model WFE/SEV so we don't busy
-     * spin unnecessarily we would need to do something more involved.
-     */
-    g_assert(!cs->uc->parallel_cpus);
-
     /* This is a non-trappable hint instruction that generally indicates
      * that the guest is currently busy-looping. Yield control back to the
      * top level loop so that a more deserving VCPU has a chance to run.
diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c
index ce94c5d2..f3f0ac0d 100644
--- a/qemu/target/arm/translate-a64.c
+++ b/qemu/target/arm/translate-a64.c
@@ -1519,13 +1519,18 @@ static void handle_hint(DisasContext *s, uint32_t insn,
     case 3: /* WFI */
         s->base.is_jmp = DISAS_WFI;
         return;
+        /* When running in MTTCG we don't generate jumps to the yield and
+         * WFE helpers as it won't affect the scheduling of other vCPUs.
+         * If we wanted to more completely model WFE/SEV so we don't busy
+         * spin unnecessarily we would need to do something more involved.
+         */
     case 1: /* YIELD */
-        if (!s->uc->parallel_cpus) {
+        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
             s->base.is_jmp = DISAS_YIELD;
         }
         return;
     case 2: /* WFE */
-        if (!s->uc->parallel_cpus) {
+        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
             s->base.is_jmp = DISAS_WFE;
         }
         return;
@@ -2132,11 +2137,25 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
                                        MO_64 | MO_ALIGN | s->be_data);
             tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val);
         } else if (s->be_data == MO_LE) {
-            gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
-                                           cpu_reg(s, rt), cpu_reg(s, rt2));
+            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+                gen_helper_paired_cmpxchg64_le_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                                                        tcg_ctx->cpu_exclusive_addr,
+                                                        cpu_reg(s, rt),
+                                                        cpu_reg(s, rt2));
+            } else {
+                gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+                                               cpu_reg(s, rt), cpu_reg(s, rt2));
+            }
         } else {
-            gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
-                                           cpu_reg(s, rt), cpu_reg(s, rt2));
+            if (tb_cflags(s->base.tb) & CF_PARALLEL) {
+                gen_helper_paired_cmpxchg64_be_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env,
+                                                        tcg_ctx->cpu_exclusive_addr,
+                                                        cpu_reg(s, rt),
+                                                        cpu_reg(s, rt2));
+            } else {
+                gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr,
+                                               cpu_reg(s, rt), cpu_reg(s, rt2));
+            }
         }
     } else {
         tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, tcg_ctx->cpu_exclusive_val,
diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c
index 45615843..a3e91d4b 100644
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@@ -4698,8 +4698,13 @@ static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
 static void gen_nop_hint(DisasContext *s, int val)
 {
     switch (val) {
+        /* When running in MTTCG we don't generate jumps to the yield and
+         * WFE helpers as it won't affect the scheduling of other vCPUs.
+         * If we wanted to more completely model WFE/SEV so we don't busy
+         * spin unnecessarily we would need to do something more involved.
+         */
     case 1: /* yield */
-        if (!s->uc->parallel_cpus) {
+        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
             gen_set_pc_im(s, s->pc);
             s->base.is_jmp = DISAS_YIELD;
         }
@@ -4709,7 +4714,7 @@ static void gen_nop_hint(DisasContext *s, int val)
         s->base.is_jmp = DISAS_WFI;
         break;
     case 2: /* wfe */
-        if (!s->uc->parallel_cpus) {
+        if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
             gen_set_pc_im(s, s->pc);
             s->base.is_jmp = DISAS_WFE;
         }