translate.c: Fix usermode big-endian AArch32 LDREXD and STREXD

For AArch32 LDREXD and STREXD, architecturally the 32-bit word at the lowest address is always Rt and the one at addr+4 is Rt2, even if the CPU is big-endian. Our implementation does these with a single 64-bit store, so if we're big-endian then we need to put the two 32-bit halves together in the opposite order to little-endian, so that they end up in the right places. We were trying to do this with the gen_aa32_frob64() function, but that is not correct for the usermode emulator, because there there is a distinction between "load a 64 bit value" (which does a BE 64-bit access and doesn't need swapping) and "load two 32 bit values as one 64 bit access" (where we still need to do the swapping, like system mode BE32). Backports commit 3448d47b3172015006b79197eb5a69826c6a7b6d from qemu
2025-08-21 12:01:04 +00:00 · 2018-03-05 11:38:19 -05:00 · 2018-03-05 11:38:19 -05:00 · 33d42df60c
parent 5250db33b5
commit 33d42df60c
1 changed files with 35 additions and 5 deletions
--- a/qemu/target/arm/translate.c
+++ b/qemu/target/arm/translate.c
@ -8071,9 +8071,28 @@ static void gen_load_exclusive(DisasContext *s, int rt, int rt2,
        TCGv_i32 tmp2 = tcg_temp_new_i32(tcg_ctx);
        TCGv_i64 t64 = tcg_temp_new_i64(tcg_ctx);

-        gen_aa32_ld_i64(s, t64, addr, get_mem_index(s), opc);
+        /* For AArch32, architecturally the 32-bit word at the lowest
+         * address is always Rt and the one at addr+4 is Rt2, even if
+         * the CPU is big-endian. That means we don't want to do a
+         * gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
+         * for an architecturally 64-bit access, but instead do a
+         * 64-bit access using MO_BE if appropriate and then split
+         * the two halves.
+         * This only makes a difference for BE32 user-mode, where
+         * frob64() must not flip the two halves of the 64-bit data
+         * but this code must treat BE32 user-mode like BE32 system.
+         */
+        TCGv taddr = gen_aa32_addr(s, addr, opc);
+
+        tcg_gen_qemu_ld_i64(s->uc, t64, taddr, get_mem_index(s), opc);
+        tcg_temp_free(tcg_ctx, taddr);
+
        tcg_gen_mov_i64(tcg_ctx, tcg_ctx->cpu_exclusive_val, t64);
-        tcg_gen_extr_i64_i32(tcg_ctx, tmp, tmp2, t64);
+        if (s->be_data == MO_BE) {
+            tcg_gen_extr_i64_i32(tcg_ctx, tmp2, tmp, t64);
+        } else {
+            tcg_gen_extr_i64_i32(tcg_ctx, tmp, tmp2, t64);
+        }
        tcg_temp_free_i64(tcg_ctx, t64);

        store_reg(s, rt2, tmp2);
@ -8124,15 +8143,26 @@ static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2,
        TCGv_i64 n64 = tcg_temp_new_i64(tcg_ctx);

        t2 = load_reg(s, rt2);
-        tcg_gen_concat_i32_i64(tcg_ctx, n64, t1, t2);
+        /* For AArch32, architecturally the 32-bit word at the lowest
+         * address is always Rt and the one at addr+4 is Rt2, even if
+         * the CPU is big-endian. Since we're going to treat this as a
+         * single 64-bit BE store, we need to put the two halves in the
+         * opposite order for BE to LE, so that they end up in the right
+         * places.
+         * We don't want gen_aa32_frob64() because that does the wrong
+         * thing for BE32 usermode.
+         */
+        if (s->be_data == MO_BE) {
+            tcg_gen_concat_i32_i64(tcg_ctx, n64, t2, t1);
+        } else {
+            tcg_gen_concat_i32_i64(tcg_ctx, n64, t1, t2);
+        }
        tcg_temp_free_i32(tcg_ctx, t2);
-        gen_aa32_frob64(s, n64);

        tcg_gen_atomic_cmpxchg_i64(tcg_ctx, o64, taddr, tcg_ctx->cpu_exclusive_val, n64,
                                   get_mem_index(s), opc);
        tcg_temp_free_i64(tcg_ctx, n64);

-        gen_aa32_frob64(s, o64);
        tcg_gen_setcond_i64(tcg_ctx, TCG_COND_NE, o64, o64, tcg_ctx->cpu_exclusive_val);
        tcg_gen_extrl_i64_i32(tcg_ctx, t0, o64);