diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 532031ac..6526e065 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -3372,6 +3372,8 @@ #define helper_neon_ceq_f64 helper_neon_ceq_f64_aarch64 #define helper_neon_cge_f64 helper_neon_cge_f64_aarch64 #define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64 +#define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64 +#define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64 #define helper_rbit64 helper_rbit64_aarch64 #define helper_recpsf_f32 helper_recpsf_f32_aarch64 #define helper_recpsf_f64 helper_recpsf_f64_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 62b1f193..79a8a920 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -3372,6 +3372,8 @@ #define helper_neon_ceq_f64 helper_neon_ceq_f64_aarch64eb #define helper_neon_cge_f64 helper_neon_cge_f64_aarch64eb #define helper_neon_cgt_f64 helper_neon_cgt_f64_aarch64eb +#define helper_paired_cmpxchg64_be helper_paired_cmpxchg64_be_aarch64eb +#define helper_paired_cmpxchg64_le helper_paired_cmpxchg64_le_aarch64eb #define helper_rbit64 helper_rbit64_aarch64eb #define helper_recpsf_f32 helper_recpsf_f32_aarch64eb #define helper_recpsf_f64 helper_recpsf_f64_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 129fa5c8..0de7adef 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -3388,6 +3388,8 @@ aarch64_symbols = ( 'helper_neon_ceq_f64', 'helper_neon_cge_f64', 'helper_neon_cgt_f64', + 'helper_paired_cmpxchg64_be', + 'helper_paired_cmpxchg64_le', 'helper_rbit64', 'helper_recpsf_f32', 'helper_recpsf_f64', diff --git a/qemu/target-arm/helper-a64.c b/qemu/target-arm/helper-a64.c index f152085b..28603804 100644 --- a/qemu/target-arm/helper-a64.c +++ b/qemu/target-arm/helper-a64.c @@ -26,6 +26,10 @@ #include "qemu/bitops.h" #include "internals.h" #include "qemu/crc32c.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "tcg.h" /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -513,3 +517,113 @@ uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +/* Returns 0 on success; 1 otherwise. */ +uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + bool success; + + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + + if (env->uc->parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + o0 = ldq_le_p(haddr + 0); + o1 = ldq_le_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_le_p(haddr + 0, int128_getlo(newv)); + stq_le_p(haddr + 1, int128_gethi(newv)); + } +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx); + + o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra); + o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra); + helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra); + } +#endif + } + + return !success; +} + +uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr, + uint64_t new_lo, uint64_t new_hi) +{ + uintptr_t ra = GETPC(); + Int128 oldv, cmpv, newv; + bool success; + + cmpv = int128_make128(env->exclusive_val, env->exclusive_high); + newv = int128_make128(new_lo, new_hi); + + if (env->uc->parallel_cpus) { +#ifndef CONFIG_ATOMIC128 + cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra); + success = int128_eq(oldv, cmpv); +#endif + } else { + uint64_t o0, o1; + +#ifdef CONFIG_USER_ONLY + /* ??? Enforce alignment. */ + uint64_t *haddr = g2h(addr); + o1 = ldq_be_p(haddr + 0); + o0 = ldq_be_p(haddr + 1); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + stq_be_p(haddr + 0, int128_gethi(newv)); + stq_be_p(haddr + 1, int128_getlo(newv)); + } +#else + int mem_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx); + TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx); + + o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra); + o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra); + oldv = int128_make128(o0, o1); + + success = int128_eq(oldv, cmpv); + if (success) { + helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra); + helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra); + } +#endif + } + + return !success; +} + diff --git a/qemu/target-arm/helper-a64.h b/qemu/target-arm/helper-a64.h index 1d3d10ff..dd32000e 100644 --- a/qemu/target-arm/helper-a64.h +++ b/qemu/target-arm/helper-a64.h @@ -46,3 +46,5 @@ DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr) DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env) DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_le, TCG_CALL_NO_WG, i64, env, i64, i64, i64) +DEF_HELPER_FLAGS_4(paired_cmpxchg64_be, TCG_CALL_NO_WG, i64, env, i64, i64, i64) diff --git a/qemu/target-arm/translate-a64.c b/qemu/target-arm/translate-a64.c index fea08eb5..183ed5a6 100644 --- a/qemu/target-arm/translate-a64.c +++ b/qemu/target-arm/translate-a64.c @@ -1884,38 +1884,43 @@ static void disas_b_exc_sys(DisasContext *s, uint32_t insn) } } -/* - * Load/Store exclusive instructions are implemented by remembering - * the value/address loaded, and seeing if these are the same - * when the store is performed. This is not actually the architecturally - * mandated semantics, but it works for typical guest code sequences - * and avoids having to monitor regular stores. - * - * In system emulation mode only one CPU will be running at once, so - * this sequence is effectively atomic. In user emulation mode we - * throw an exception and handle the atomic operation elsewhere. - */ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, TCGv_i64 addr, int size, bool is_pair) { TCGContext *tcg_ctx = s->uc->tcg_ctx; TCGv_i64 tmp = tcg_temp_new_i64(tcg_ctx); - TCGMemOp memop = s->be_data + size; + TCGMemOp be = s->be_data; g_assert(size <= 3); - tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), memop); if (is_pair) { - TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx); TCGv_i64 hitmp = tcg_temp_new_i64(tcg_ctx); - g_assert(size >= 2); - tcg_gen_addi_i64(tcg_ctx, addr2, addr, 1ULL << size); - tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s), memop); - tcg_temp_free_i64(tcg_ctx, addr2); + if (size == 3) { + TCGv_i64 addr2 = tcg_temp_new_i64(tcg_ctx); + + tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), + MO_64 | MO_ALIGN_16 | be); + tcg_gen_addi_i64(tcg_ctx, addr2, addr, 8); + tcg_gen_qemu_ld_i64(s->uc, hitmp, addr2, get_mem_index(s), + MO_64 | MO_ALIGN | be); + tcg_temp_free_i64(tcg_ctx, addr2); + } else { + g_assert(size == 2); + tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), + MO_64 | MO_ALIGN | be); + if (be == MO_LE) { + tcg_gen_extr32_i64(tcg_ctx, tmp, hitmp, tmp); + } else { + tcg_gen_extr32_i64(tcg_ctx, hitmp, tmp, tmp); + } + } + tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_high, hitmp); tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rt2), hitmp); tcg_temp_free_i64(tcg_ctx, hitmp); + } else { + tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), size | MO_ALIGN | be); } tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, tmp); @@ -1925,16 +1930,6 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2, tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, addr); } -#ifdef CONFIG_USER_ONLY -static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, - TCGv_i64 addr, int size, int is_pair) -{ - tcg_gen_mov_i64(tcg_ctx, cpu_exclusive_test, addr); - tcg_gen_movi_i32(tcg_ctx, cpu_exclusive_info, - size | is_pair << 2 | (rd << 4) | (rt << 9) | (rt2 << 14)); - gen_exception_internal_insn(s, 4, EXCP_STREX); -} -#else static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, TCGv_i64 inaddr, int size, int is_pair) { @@ -1963,46 +1958,43 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, addr, tcg_ctx->cpu_exclusive_addr, fail_label); tmp = tcg_temp_new_i64(tcg_ctx); - tcg_gen_qemu_ld_i64(s->uc, tmp, addr, get_mem_index(s), s->be_data + size); - tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmp, tcg_ctx->cpu_exclusive_val, fail_label); - tcg_temp_free_i64(tcg_ctx, tmp); if (is_pair) { - TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx); - TCGv_i64 tmphi = tcg_temp_new_i64(tcg_ctx); - - tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size); - tcg_gen_qemu_ld_i64(s->uc, tmphi, addrhi, get_mem_index(s), - s->be_data + size); - tcg_gen_brcond_i64(tcg_ctx, TCG_COND_NE, tmphi, tcg_ctx->cpu_exclusive_high, fail_label); - - tcg_temp_free_i64(tcg_ctx, tmphi); - tcg_temp_free_i64(tcg_ctx, addrhi); - } - - /* We seem to still have the exclusive monitor, so do the store */ - tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt), addr, get_mem_index(s), - s->be_data + size); - if (is_pair) { - TCGv_i64 addrhi = tcg_temp_new_i64(tcg_ctx); - - tcg_gen_addi_i64(tcg_ctx, addrhi, addr, 1ULL << size); - tcg_gen_qemu_st_i64(s->uc, cpu_reg(s, rt2), addrhi, - get_mem_index(s), s->be_data + size); - tcg_temp_free_i64(tcg_ctx, addrhi); + if (size == 2) { + TCGv_i64 val = tcg_temp_new_i64(tcg_ctx); + tcg_gen_concat32_i64(tcg_ctx, tmp, cpu_reg(s, rt), cpu_reg(s, rt2)); + tcg_gen_concat32_i64(tcg_ctx, val, tcg_ctx->cpu_exclusive_val, tcg_ctx->cpu_exclusive_high); + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, addr, val, tmp, + get_mem_index(s), + size | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, val); + tcg_temp_free_i64(tcg_ctx, val); + } else if (s->be_data == MO_LE) { + gen_helper_paired_cmpxchg64_le(tcg_ctx, tmp, tcg_ctx->cpu_env, addr, cpu_reg(s, rt), + cpu_reg(s, rt2)); + } else { + gen_helper_paired_cmpxchg64_be(tcg_ctx, tmp, tcg_ctx->cpu_env, addr, cpu_reg(s, rt), + cpu_reg(s, rt2)); + } + } else { + TCGv_i64 val = cpu_reg(s, rt); + tcg_gen_atomic_cmpxchg_i64(tcg_ctx, tmp, addr, tcg_ctx->cpu_exclusive_val, val, + get_mem_index(s), + size | MO_ALIGN | s->be_data); + tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, tmp, tmp, tcg_ctx->cpu_exclusive_val); } tcg_temp_free_i64(tcg_ctx, addr); - tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 0); + tcg_gen_mov_i64(tcg_ctx, cpu_reg(s, rd), tmp); + tcg_temp_free_i64(tcg_ctx, tmp); tcg_gen_br(tcg_ctx, done_label); + gen_set_label(tcg_ctx, fail_label); tcg_gen_movi_i64(tcg_ctx, cpu_reg(s, rd), 1); gen_set_label(tcg_ctx, done_label); tcg_gen_movi_i64(tcg_ctx, tcg_ctx->cpu_exclusive_addr, -1); - } -#endif /* Update the Sixty-Four bit (SF) registersize. This logic is derived * from the ARMv8 specs for LDR (Shared decode for all encodings).