From 1715f382b4608e055dcb4851fdae148f08e23d0a Mon Sep 17 00:00:00 2001 From: "Emilio G. Cota" Date: Sat, 4 May 2019 22:42:29 -0400 Subject: [PATCH] target/arm: check CF_PARALLEL instead of parallel_cpus Thereby decoupling the resulting translated code from the current state of the system. Backports commit 2399d4e7cec22ecf1c51062d2ebfd45220dbaace from qemu --- qemu/aarch64.h | 2 + qemu/aarch64eb.h | 2 + qemu/header_gen.py | 2 + qemu/target/arm/helper-a64.c | 198 ++++++++++++++++++++------------ qemu/target/arm/helper-a64.h | 4 + qemu/target/arm/op_helper.c | 7 -- qemu/target/arm/translate-a64.c | 26 ++++- qemu/target/arm/translate.c | 4 +- 8 files changed, 155 insertions(+), 90 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index afc275fa..f1a7968f 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3430,7 +3430,9 @@ #define helper_pacia helper_pacia_aarch64 #define helper_pacib helper_pacib_aarch64 #define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64 +#define helper_paired_cmpxchg64_be_parallel helper_paired_cmpxchg64_be_parallel_aarch64 #define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64 +#define helper_paired_cmpxchg64_le_parallel helper_paired_cmpxchg64_le_parallel_aarch64 #define helper_rbit64 helper_rbit64_aarch64 #define helper_recpsf_f16 helper_recpsf_f16_aarch64 #define helper_recpsf_f32 helper_recpsf_f32_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 8fdb4090..b806a2f5 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3430,7 +3430,9 @@ #define helper_pacia helper_pacia_aarch64eb #define helper_pacib helper_pacib_aarch64eb #define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64eb +#define helper_paired_cmpxchg64_be_parallel helper_paired_cmpxchg64_be_parallel_aarch64eb #define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64eb +#define helper_paired_cmpxchg64_le_parallel helper_paired_cmpxchg64_le_parallel_aarch64eb #define helper_rbit64 helper_rbit64_aarch64eb #define helper_recpsf_f16 helper_recpsf_f16_aarch64eb #define helper_recpsf_f32 helper_recpsf_f32_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index fa9073a5..ff42b6d6 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3485,7 +3485,9 @@ aarch64_symbols = ( 'helper_pacia', 'helper_pacib', 'helper_paired_cmpxchg64_be', + 'helper_paired_cmpxchg64_be_parallel', 'helper_paired_cmpxchg64_le', + 'helper_paired_cmpxchg64_le_parallel', 'helper_rbit64', 'helper_recpsf_f16', 'helper_recpsf_f32', diff --git a/qemu/target/arm/helper-a64.c b/qemu/target/arm/helper-a64.c index 66381842..6c1b080f 100644 --- a/qemu/target/arm/helper-a64.c +++ b/qemu/target/arm/helper-a64.c @@ -538,46 +538,130 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) return crc32c(acc, buf, bytes) ^ 0xffffffff; } -uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, - uint64_t new_lo, uint64_t new_hi) +static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi, + bool parallel) { - Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high); - Int128 newv = int128_make128(new_lo, new_hi); - Int128 oldv; uintptr_t ra = GETPC(); - uint64_t o0, o1; + Int128 oldv, cmpv, newv; bool success; -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); - helper_retaddr = ra; - o0 = ldq_le_p(haddr + 0); - o1 = ldq_le_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_le_p(haddr + 0, int128_getlo(newv)); - stq_le_p(haddr + 1, int128_gethi(newv)); - } - helper_retaddr = 0; + if (parallel) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); #else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); - - o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); - o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); - helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); - } + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); #endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); + } +#endif + } + + return !success; +} + +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false); +} + +uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true); +} + +static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi, + bool parallel) +{ + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + bool success; + + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + + if (parallel) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + + helper_retaddr = ra; + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } + helper_retaddr = 0; +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + } return !success; } @@ -585,49 +669,13 @@ uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, uint64_t new_lo, uint64_t new_hi) { - /* - * High and low need to be switched here because this is not actually a - * 128bit store but two doublewords stored consecutively - */ - Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val); - Int128 newv = int128_make128(new_hi, new_lo); - Int128 oldv; - uintptr_t ra = GETPC(); - uint64_t o0, o1; - bool success; + return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false); +} -#ifdef CONFIG_USER_ONLY - /* ??? Enforce alignment. */ - uint64_t *haddr = g2h(addr); - - helper_retaddr = ra; - o1 = ldq_be_p(haddr + 0); - o0 = ldq_be_p(haddr + 1); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - stq_be_p(haddr + 0, int128_gethi(newv)); - stq_be_p(haddr + 1, int128_getlo(newv)); - } - helper_retaddr = 0; -#else - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); - TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); - - o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); - o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); - oldv = int128_make128(o0, o1); - - success = int128_eq(oldv, cmpv); - if (success) { - helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); - helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); - } -#endif - - return !success; +uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true); } /* Writes back the old data into Rs. */ diff --git a/qemu/target/arm/helper-a64.h b/qemu/target/arm/helper-a64.h index 5ce920ec..419c9b6f 100644 --- a/qemu/target/arm/helper-a64.h +++ b/qemu/target/arm/helper-a64.h @@ -51,7 +51,11 @@ DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_le_parallel, TCG_CALL_NO_WG, + i64, env, i64, i64, i64) DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_be_parallel, TCG_CALL_NO_WG, + i64, env, i64, i64, i64) DEF_HELPER_5(casp_le_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_5(casp_be_parallel, void, env, i32, i64, i64, i64) DEF_HELPER_FLAGS_3(advsimd_maxh, TCG_CALL_NO_RWG, f16, f16, f16, ptr) diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c index 5351f342..f5f62d4d 100644 --- a/qemu/target/arm/op_helper.c +++ b/qemu/target/arm/op_helper.c @@ -485,13 +485,6 @@ void HELPER(yield)(CPUARMState *env) ARMCPU *cpu = arm_env_get_cpu(env); CPUState *cs = CPU(cpu); - /* When running in MTTCG we don't generate jumps to the yield and - * WFE helpers as it won't affect the scheduling of other vCPUs. - * If we wanted to more completely model WFE/SEV so we don't busy - * spin unnecessarily we would need to do something more involved. - */ - g_assert(!cs->uc->parallel_cpus); - /* This is a non-trappable hint instruction that generally indicates * that the guest is currently busy-looping. Yield control back to the * top level loop so that a more deserving VCPU has a chance to run. diff --git a/qemu/target/arm/translate-a64.c b/qemu/target/arm/translate-a64.c index e5f7e4bc..003dce2f 100644 --- a/qemu/target/arm/translate-a64.c +++ b/qemu/target/arm/translate-a64.c @@ -1556,12 +1556,12 @@ static void handle_hint(DisasContext *s, uint32_t insn, s->base.is_jmp = DISAS_WFI; break; case 0b00001: /* YIELD */ - if (!s->uc->parallel_cpus) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { s->base.is_jmp = DISAS_YIELD; } break; case 0b00010: /* WFE */ - if (!s->uc->parallel_cpus) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { s->base.is_jmp = DISAS_WFE; } break; @@ -2465,11 +2465,25 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, MO_64 | MO_ALIGN | s->be_data); tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); } else if (s->be_data == MO_LE) { - gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_paired_cmpxchg64_le_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), + cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } } else { - gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, - cpu_reg(s, rt), cpu_reg(s, rt2)); + if (tb_cflags(s->base.tb) & CF_PARALLEL) { + gen_helper_paired_cmpxchg64_be_parallel(tcg_ctx, tmp, tcg_ctx->cpu_env, + tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), + cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, tcg_ctx->cpu_exclusive_addr, + cpu_reg(s, rt), cpu_reg(s, rt2)); + } } } else { tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, tcg_ctx->cpu_exclusive_addr, tcg_ctx->cpu_exclusive_val, diff --git a/qemu/target/arm/translate.c b/qemu/target/arm/translate.c index b3b8e5cf..ed35412d 100644 --- a/qemu/target/arm/translate.c +++ b/qemu/target/arm/translate.c @@ -4941,7 +4941,7 @@ static void gen_nop_hint(DisasContext *s, int val) * spin unnecessarily we would need to do something more involved. */ case 1: /* yield */ - if (!s->uc->parallel_cpus) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { gen_set_pc_im(s, s->pc); s->base.is_jmp = DISAS_YIELD; } @@ -4951,7 +4951,7 @@ static void gen_nop_hint(DisasContext *s, int val) s->base.is_jmp = DISAS_WFI; break; case 2: /* wfe */ - if (!s->uc->parallel_cpus) { + if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { gen_set_pc_im(s, s->pc); s->base.is_jmp = DISAS_WFE; }