From 2542ad17d08008672bb0fb2aacc06c51dbbd196a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 8 Oct 2018 12:20:17 -0400 Subject: [PATCH] target/arm: Rewrite vector gather stores This fixes the endianness problem for softmmu, and moves the main loop out of a macro and into an inlined function. Backports commit 78cf1b886aa1b95c97fc5114641515c2892bb240 from qemu --- qemu/aarch64.h | 39 ++++++--- qemu/aarch64eb.h | 39 ++++++--- qemu/header_gen.py | 39 ++++++--- qemu/target/arm/helper-sve.h | 52 +++++++++--- qemu/target/arm/sve_helper.c | 139 ++++++++++++++++++++------------ qemu/target/arm/translate-sve.c | 73 +++++++++++------ 6 files changed, 255 insertions(+), 126 deletions(-) diff --git a/qemu/aarch64.h b/qemu/aarch64.h index 2cf7cc9b..cabc63ce 100644 --- a/qemu/aarch64.h +++ b/qemu/aarch64.h @@ -4049,19 +4049,32 @@ #define helper_sve_stbd_zsu helper_sve_stbd_zsu_aarch64 #define helper_sve_stbs_zss helper_sve_stbs_zss_aarch64 #define helper_sve_stbs_zsu helper_sve_stbs_zsu_aarch64 -#define helper_sve_stdd_zd helper_sve_stdd_zd_aarch64 -#define helper_sve_stdd_zss helper_sve_stdd_zss_aarch64 -#define helper_sve_stdd_zsu helper_sve_stdd_zsu_aarch64 -#define helper_sve_sthd_zd helper_sve_sthd_zd_aarch64 -#define helper_sve_sthd_zss helper_sve_sthd_zss_aarch64 -#define helper_sve_sthd_zsu helper_sve_sthd_zsu_aarch64 -#define helper_sve_sths_zss helper_sve_sths_zss_aarch64 -#define helper_sve_sths_zsu helper_sve_sths_zsu_aarch64 -#define helper_sve_stsd_zd helper_sve_stsd_zd_aarch64 -#define helper_sve_stsd_zss helper_sve_stsd_zss_aarch64 -#define helper_sve_stsd_zsu helper_sve_stsd_zsu_aarch64 -#define helper_sve_stss_zss helper_sve_stss_zss_aarch64 -#define helper_sve_stss_zsu helper_sve_stss_zsu_aarch64 +#define helper_sve_stdd_be_zd helper_sve_stdd_be_zd_aarch64 +#define helper_sve_stdd_le_zd helper_sve_stdd_le_zd_aarch64 +#define helper_sve_stdd_be_zss helper_sve_stdd_be_zss_aarch64 +#define helper_sve_stdd_le_zss helper_sve_stdd_le_zss_aarch64 +#define helper_sve_stdd_be_zsu helper_sve_stdd_be_zsu_aarch64 +#define helper_sve_stdd_le_zsu helper_sve_stdd_le_zsu_aarch64 +#define helper_sve_sthd_be_zd helper_sve_sthd_be_zd_aarch64 +#define helper_sve_sthd_le_zd helper_sve_sthd_le_zd_aarch64 +#define helper_sve_sthd_be_zss helper_sve_sthd_be_zss_aarch64 +#define helper_sve_sthd_le_zss helper_sve_sthd_le_zss_aarch64 +#define helper_sve_sthd_be_zsu helper_sve_sthd_be_zsu_aarch64 +#define helper_sve_sthd_le_zsu helper_sve_sthd_le_zsu_aarch64 +#define helper_sve_sths_be_zss helper_sve_sths_be_zss_aarch64 +#define helper_sve_sths_le_zss helper_sve_sths_le_zss_aarch64 +#define helper_sve_sths_be_zsu helper_sve_sths_be_zsu_aarch64 +#define helper_sve_sths_le_zsu helper_sve_sths_le_zsu_aarch64 +#define helper_sve_stsd_be_zd helper_sve_stsd_be_zd_aarch64 +#define helper_sve_stsd_le_zd helper_sve_stsd_le_zd_aarch64 +#define helper_sve_stsd_be_zss helper_sve_stsd_be_zss_aarch64 +#define helper_sve_stsd_le_zss helper_sve_stsd_le_zss_aarch64 +#define helper_sve_stsd_be_zsu helper_sve_stsd_be_zsu_aarch64 +#define helper_sve_stsd_le_zsu helper_sve_stsd_le_zsu_aarch64 +#define helper_sve_stss_be_zss helper_sve_stss_be_zss_aarch64 +#define helper_sve_stss_le_zss helper_sve_stss_le_zss_aarch64 +#define helper_sve_stss_be_zsu helper_sve_stss_be_zsu_aarch64 +#define helper_sve_stss_le_zsu helper_sve_stss_le_zsu_aarch64 #define helper_sve_sub_zpzz_b helper_sve_sub_zpzz_b_aarch64 #define helper_sve_sub_zpzz_d helper_sve_sub_zpzz_d_aarch64 #define helper_sve_sub_zpzz_h helper_sve_sub_zpzz_h_aarch64 diff --git a/qemu/aarch64eb.h b/qemu/aarch64eb.h index 818edd84..986deaaf 100644 --- a/qemu/aarch64eb.h +++ b/qemu/aarch64eb.h @@ -4049,19 +4049,32 @@ #define helper_sve_stbd_zsu helper_sve_stbd_zsu_aarch64eb #define helper_sve_stbs_zss helper_sve_stbs_zss_aarch64eb #define helper_sve_stbs_zsu helper_sve_stbs_zsu_aarch64eb -#define helper_sve_stdd_zd helper_sve_stdd_zd_aarch64eb -#define helper_sve_stdd_zss helper_sve_stdd_zss_aarch64eb -#define helper_sve_stdd_zsu helper_sve_stdd_zsu_aarch64eb -#define helper_sve_sthd_zd helper_sve_sthd_zd_aarch64eb -#define helper_sve_sthd_zss helper_sve_sthd_zss_aarch64eb -#define helper_sve_sthd_zsu helper_sve_sthd_zsu_aarch64eb -#define helper_sve_sths_zss helper_sve_sths_zss_aarch64eb -#define helper_sve_sths_zsu helper_sve_sths_zsu_aarch64eb -#define helper_sve_stsd_zd helper_sve_stsd_zd_aarch64eb -#define helper_sve_stsd_zss helper_sve_stsd_zss_aarch64eb -#define helper_sve_stsd_zsu helper_sve_stsd_zsu_aarch64eb -#define helper_sve_stss_zss helper_sve_stss_zss_aarch64eb -#define helper_sve_stss_zsu helper_sve_stss_zsu_aarch64eb +#define helper_sve_stdd_be_zd helper_sve_stdd_be_zd_aarch64eb +#define helper_sve_stdd_le_zd helper_sve_stdd_le_zd_aarch64eb +#define helper_sve_stdd_be_zss helper_sve_stdd_be_zss_aarch64eb +#define helper_sve_stdd_le_zss helper_sve_stdd_le_zss_aarch64eb +#define helper_sve_stdd_be_zsu helper_sve_stdd_be_zsu_aarch64eb +#define helper_sve_stdd_le_zsu helper_sve_stdd_le_zsu_aarch64eb +#define helper_sve_sthd_be_zd helper_sve_sthd_be_zd_aarch64eb +#define helper_sve_sthd_le_zd helper_sve_sthd_le_zd_aarch64eb +#define helper_sve_sthd_be_zss helper_sve_sthd_be_zss_aarch64eb +#define helper_sve_sthd_le_zss helper_sve_sthd_le_zss_aarch64eb +#define helper_sve_sthd_be_zsu helper_sve_sthd_be_zsu_aarch64eb +#define helper_sve_sthd_le_zsu helper_sve_sthd_le_zsu_aarch64eb +#define helper_sve_sths_be_zss helper_sve_sths_be_zss_aarch64eb +#define helper_sve_sths_le_zss helper_sve_sths_le_zss_aarch64eb +#define helper_sve_sths_be_zsu helper_sve_sths_be_zsu_aarch64eb +#define helper_sve_sths_le_zsu helper_sve_sths_le_zsu_aarch64eb +#define helper_sve_stsd_be_zd helper_sve_stsd_be_zd_aarch64eb +#define helper_sve_stsd_le_zd helper_sve_stsd_le_zd_aarch64eb +#define helper_sve_stsd_be_zss helper_sve_stsd_be_zss_aarch64eb +#define helper_sve_stsd_le_zss helper_sve_stsd_le_zss_aarch64eb +#define helper_sve_stsd_be_zsu helper_sve_stsd_be_zsu_aarch64eb +#define helper_sve_stsd_le_zsu helper_sve_stsd_le_zsu_aarch64eb +#define helper_sve_stss_be_zss helper_sve_stss_be_zss_aarch64eb +#define helper_sve_stss_le_zss helper_sve_stss_le_zss_aarch64eb +#define helper_sve_stss_be_zsu helper_sve_stss_be_zsu_aarch64eb +#define helper_sve_stss_le_zsu helper_sve_stss_le_zsu_aarch64eb #define helper_sve_sub_zpzz_b helper_sve_sub_zpzz_b_aarch64eb #define helper_sve_sub_zpzz_d helper_sve_sub_zpzz_d_aarch64eb #define helper_sve_sub_zpzz_h helper_sve_sub_zpzz_h_aarch64eb diff --git a/qemu/header_gen.py b/qemu/header_gen.py index 901cb049..8a901b95 100644 --- a/qemu/header_gen.py +++ b/qemu/header_gen.py @@ -4073,19 +4073,32 @@ aarch64_symbols = ( 'helper_sve_stbd_zsu', 'helper_sve_stbs_zss', 'helper_sve_stbs_zsu', - 'helper_sve_stdd_zd', - 'helper_sve_stdd_zss', - 'helper_sve_stdd_zsu', - 'helper_sve_sthd_zd', - 'helper_sve_sthd_zss', - 'helper_sve_sthd_zsu', - 'helper_sve_sths_zss', - 'helper_sve_sths_zsu', - 'helper_sve_stsd_zd', - 'helper_sve_stsd_zss', - 'helper_sve_stsd_zsu', - 'helper_sve_stss_zss', - 'helper_sve_stss_zsu', + 'helper_sve_stdd_be_zd', + 'helper_sve_stdd_le_zd', + 'helper_sve_stdd_be_zss', + 'helper_sve_stdd_le_zss', + 'helper_sve_stdd_be_zsu', + 'helper_sve_stdd_le_zsu', + 'helper_sve_sthd_be_zd', + 'helper_sve_sthd_le_zd', + 'helper_sve_sthd_be_zss', + 'helper_sve_sthd_le_zss', + 'helper_sve_sthd_be_zsu', + 'helper_sve_sthd_le_zsu', + 'helper_sve_sths_be_zss', + 'helper_sve_sths_le_zss', + 'helper_sve_sths_be_zsu', + 'helper_sve_sths_le_zsu', + 'helper_sve_stsd_be_zd', + 'helper_sve_stsd_le_zd', + 'helper_sve_stsd_be_zss', + 'helper_sve_stsd_le_zss', + 'helper_sve_stsd_be_zsu', + 'helper_sve_stsd_le_zsu', + 'helper_sve_stss_be_zss', + 'helper_sve_stss_le_zss', + 'helper_sve_stss_be_zsu', + 'helper_sve_stss_le_zsu', 'helper_sve_sub_zpzz_b', 'helper_sve_sub_zpzz_d', 'helper_sve_sub_zpzz_h', diff --git a/qemu/target/arm/helper-sve.h b/qemu/target/arm/helper-sve.h index 7a222a25..fce9cd1b 100644 --- a/qemu/target/arm/helper-sve.h +++ b/qemu/target/arm/helper-sve.h @@ -1405,41 +1405,67 @@ DEF_HELPER_FLAGS_6(sve_ldffsds_zd, TCG_CALL_NO_WG, DEF_HELPER_FLAGS_6(sve_stbs_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbs_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sths_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stss_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sths_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stss_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zsu, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zsu, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zsu, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zss, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zss, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zss, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) DEF_HELPER_FLAGS_6(sve_stbd_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_sthd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_le_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stsd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_sthd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) -DEF_HELPER_FLAGS_6(sve_stdd_zd, TCG_CALL_NO_WG, +DEF_HELPER_FLAGS_6(sve_stsd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stsd_be_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_le_zd, TCG_CALL_NO_WG, + void, env, ptr, ptr, ptr, tl, i32) +DEF_HELPER_FLAGS_6(sve_stdd_be_zd, TCG_CALL_NO_WG, void, env, ptr, ptr, ptr, tl, i32) diff --git a/qemu/target/arm/sve_helper.c b/qemu/target/arm/sve_helper.c index f4fc7c80..b1aced8d 100644 --- a/qemu/target/arm/sve_helper.c +++ b/qemu/target/arm/sve_helper.c @@ -5121,61 +5121,100 @@ DO_LDFF1_ZPZ_D(sve_ldffsds_zd, uint64_t, int32_t, cpu_ldl_data_ra) /* Stores with a vector index. */ -#define DO_ST1_ZPZ_S(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc); \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - for (i = 0; i < oprsz; ) { \ - uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \ - do { \ - if (likely(pg & 1)) { \ - target_ulong off = *(TYPEI *)(vm + H1_4(i)); \ - uint32_t d = *(uint32_t *)(vd + H1_4(i)); \ - FN(env, base + (off << scale), d, ra); \ - } \ - i += sizeof(uint32_t), pg >>= sizeof(uint32_t); \ - } while (i & 15); \ - } \ +static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t i, oprsz = simd_oprsz(desc); + unsigned scale = simd_data(desc); + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; ) { + uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); + do { + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i); + tlb_fn(env, vd, i, base + (off << scale), mmu_idx, ra); + } + i += 4, pg >>= 4; + } while (i & 15); + } + set_helper_retaddr(0); } -#define DO_ST1_ZPZ_D(NAME, TYPEI, FN) \ -void HELPER(NAME)(CPUARMState *env, void *vd, void *vg, void *vm, \ - target_ulong base, uint32_t desc) \ -{ \ - intptr_t i, oprsz = simd_oprsz(desc) / 8; \ - unsigned scale = simd_data(desc); \ - uintptr_t ra = GETPC(); \ - uint64_t *d = vd, *m = vm; uint8_t *pg = vg; \ - for (i = 0; i < oprsz; i++) { \ - if (likely(pg[H1(i)] & 1)) { \ - target_ulong off = (target_ulong)(TYPEI)m[i] << scale; \ - FN(env, base + off, d[i], ra); \ - } \ - } \ +static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, + target_ulong base, uint32_t desc, uintptr_t ra, + zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn) +{ + const int mmu_idx = cpu_mmu_index(env, false); + intptr_t i, oprsz = simd_oprsz(desc) / 8; + unsigned scale = simd_data(desc); + + set_helper_retaddr(ra); + for (i = 0; i < oprsz; i++) { + uint8_t pg = *(uint8_t *)(vg + H1(i)); + if (likely(pg & 1)) { + target_ulong off = off_fn(vm, i * 8); + tlb_fn(env, vd, i * 8, base + (off << scale), mmu_idx, ra); + } + } + set_helper_retaddr(0); } -DO_ST1_ZPZ_S(sve_stbs_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zsu, uint32_t, cpu_stl_data_ra) +#define DO_ST1_ZPZ_S(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_s, sve_st1##MEM##_tlb); \ +} -DO_ST1_ZPZ_S(sve_stbs_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_S(sve_sths_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_S(sve_stss_zss, int32_t, cpu_stl_data_ra) +#define DO_ST1_ZPZ_D(MEM, OFS) \ +void __attribute__((flatten)) HELPER(sve_st##MEM##_##OFS) \ + (CPUARMState *env, void *vd, void *vg, void *vm, \ + target_ulong base, uint32_t desc) \ +{ \ + sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ + off_##OFS##_d, sve_st1##MEM##_tlb); \ +} -DO_ST1_ZPZ_D(sve_stbd_zsu, uint32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zsu, uint32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zsu, uint32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zsu, uint32_t, cpu_stq_data_ra) +DO_ST1_ZPZ_S(bs, zsu) +DO_ST1_ZPZ_S(hs_le, zsu) +DO_ST1_ZPZ_S(hs_be, zsu) +DO_ST1_ZPZ_S(ss_le, zsu) +DO_ST1_ZPZ_S(ss_be, zsu) -DO_ST1_ZPZ_D(sve_stbd_zss, int32_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zss, int32_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zss, int32_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zss, int32_t, cpu_stq_data_ra) +DO_ST1_ZPZ_S(bs, zss) +DO_ST1_ZPZ_S(hs_le, zss) +DO_ST1_ZPZ_S(hs_be, zss) +DO_ST1_ZPZ_S(ss_le, zss) +DO_ST1_ZPZ_S(ss_be, zss) -DO_ST1_ZPZ_D(sve_stbd_zd, uint64_t, cpu_stb_data_ra) -DO_ST1_ZPZ_D(sve_sthd_zd, uint64_t, cpu_stw_data_ra) -DO_ST1_ZPZ_D(sve_stsd_zd, uint64_t, cpu_stl_data_ra) -DO_ST1_ZPZ_D(sve_stdd_zd, uint64_t, cpu_stq_data_ra) +DO_ST1_ZPZ_D(bd, zsu) +DO_ST1_ZPZ_D(hd_le, zsu) +DO_ST1_ZPZ_D(hd_be, zsu) +DO_ST1_ZPZ_D(sd_le, zsu) +DO_ST1_ZPZ_D(sd_be, zsu) +DO_ST1_ZPZ_D(dd_le, zsu) +DO_ST1_ZPZ_D(dd_be, zsu) + +DO_ST1_ZPZ_D(bd, zss) +DO_ST1_ZPZ_D(hd_le, zss) +DO_ST1_ZPZ_D(hd_be, zss) +DO_ST1_ZPZ_D(sd_le, zss) +DO_ST1_ZPZ_D(sd_be, zss) +DO_ST1_ZPZ_D(dd_le, zss) +DO_ST1_ZPZ_D(dd_be, zss) + +DO_ST1_ZPZ_D(bd, zd) +DO_ST1_ZPZ_D(hd_le, zd) +DO_ST1_ZPZ_D(hd_be, zd) +DO_ST1_ZPZ_D(sd_le, zd) +DO_ST1_ZPZ_D(sd_be, zd) +DO_ST1_ZPZ_D(dd_le, zd) +DO_ST1_ZPZ_D(dd_be, zd) + +#undef DO_ST1_ZPZ_S +#undef DO_ST1_ZPZ_D diff --git a/qemu/target/arm/translate-sve.c b/qemu/target/arm/translate-sve.c index 4939cf8f..592bde69 100644 --- a/qemu/target/arm/translate-sve.c +++ b/qemu/target/arm/translate-sve.c @@ -5369,34 +5369,58 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn) } /* Indexed by [xs][msz]. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][3] = { - { gen_helper_sve_stbs_zsu, - gen_helper_sve_sths_zsu, - gen_helper_sve_stss_zsu, }, - { gen_helper_sve_stbs_zss, - gen_helper_sve_sths_zss, - gen_helper_sve_stss_zss, }, +/* Indexed by [be][xs][msz]. */ +static gen_helper_gvec_mem_scatter * const scatter_store_fn32[2][2][3] = { + /* Little-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_le_zsu, + gen_helper_sve_stss_le_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_le_zss, + gen_helper_sve_stss_le_zss, } }, + /* Big-endian */ + { { gen_helper_sve_stbs_zsu, + gen_helper_sve_sths_be_zsu, + gen_helper_sve_stss_be_zsu, }, + { gen_helper_sve_stbs_zss, + gen_helper_sve_sths_be_zss, + gen_helper_sve_stss_be_zss, } }, }; /* Note that we overload xs=2 to indicate 64-bit offset. */ -static gen_helper_gvec_mem_scatter * const scatter_store_fn64[3][4] = { - { gen_helper_sve_stbd_zsu, - gen_helper_sve_sthd_zsu, - gen_helper_sve_stsd_zsu, - gen_helper_sve_stdd_zsu, }, - { gen_helper_sve_stbd_zss, - gen_helper_sve_sthd_zss, - gen_helper_sve_stsd_zss, - gen_helper_sve_stdd_zss, }, - { gen_helper_sve_stbd_zd, - gen_helper_sve_sthd_zd, - gen_helper_sve_stsd_zd, - gen_helper_sve_stdd_zd, }, +static gen_helper_gvec_mem_scatter * const scatter_store_fn64[2][3][4] = { + /* Little-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_le_zsu, + gen_helper_sve_stsd_le_zsu, + gen_helper_sve_stdd_le_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_le_zss, + gen_helper_sve_stsd_le_zss, + gen_helper_sve_stdd_le_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_le_zd, + gen_helper_sve_stsd_le_zd, + gen_helper_sve_stdd_le_zd, } }, + /* Big-endian */ + { { gen_helper_sve_stbd_zsu, + gen_helper_sve_sthd_be_zsu, + gen_helper_sve_stsd_be_zsu, + gen_helper_sve_stdd_be_zsu, }, + { gen_helper_sve_stbd_zss, + gen_helper_sve_sthd_be_zss, + gen_helper_sve_stsd_be_zss, + gen_helper_sve_stdd_be_zss, }, + { gen_helper_sve_stbd_zd, + gen_helper_sve_sthd_be_zd, + gen_helper_sve_stsd_be_zd, + gen_helper_sve_stdd_be_zd, } }, }; static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn; + int be = s->be_data == MO_BE; if (a->esz < a->msz || (a->msz == 0 && a->scale)) { return false; @@ -5406,10 +5430,10 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) } switch (a->esz) { case MO_32: - fn = scatter_store_fn32[a->xs][a->msz]; + fn = scatter_store_fn32[be][a->xs][a->msz]; break; case MO_64: - fn = scatter_store_fn64[a->xs][a->msz]; + fn = scatter_store_fn64[be][a->xs][a->msz]; break; default: g_assert_not_reached(); @@ -5422,6 +5446,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn) static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) { gen_helper_gvec_mem_scatter *fn = NULL; + int be = s->be_data == MO_BE; TCGv_i64 imm; if (a->esz < a->msz) { @@ -5435,10 +5460,10 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn) switch (a->esz) { case MO_32: - fn = scatter_store_fn32[0][a->msz]; + fn = scatter_store_fn32[be][0][a->msz]; break; case MO_64: - fn = scatter_store_fn64[2][a->msz]; + fn = scatter_store_fn64[be][2][a->msz]; break; } assert(fn != NULL);