target/arm: Reuse sve_probe_page for scatter stores

Backports commit 88a660a48ef513ce9875b595e19b2a820b3f3fca from qemu
This commit is contained in:
Richard Henderson 2021-02-25 21:27:12 -05:00 committed by Lioncash
parent 3eee880c2a
commit 585da952ec

View file

@ -5376,94 +5376,136 @@ DO_LDFF1_ZPZ_D(dd_be, zd, MO_64)
/* Stores with a vector index. */ /* Stores with a vector index. */
static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm, static inline
target_ulong base, uint32_t desc, uintptr_t ra, void sve_st1_z(CPUARMState *env, void *vd, uint64_t *vg, void *vm,
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn) target_ulong base, uint32_t desc, uintptr_t retaddr,
int esize, int msize, zreg_off_fn *off_fn,
sve_ldst1_host_fn *host_fn,
sve_ldst1_tlb_fn *tlb_fn)
{ {
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
intptr_t i, oprsz = simd_oprsz(desc); const int mmu_idx = cpu_mmu_index(env, false);
const intptr_t reg_max = simd_oprsz(desc);
void *host[ARM_MAX_VQ * 4];
intptr_t reg_off, i;
SVEHostPage info, info2;
/*
* Probe all of the elements for host addresses and flags.
*/
i = reg_off = 0;
do {
uint64_t pg = vg[reg_off >> 6];
for (i = 0; i < oprsz; ) {
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
do { do {
if (likely(pg & 1)) { target_ulong addr = base + (off_fn(vm, reg_off) << scale);
target_ulong off = off_fn(vm, i); target_ulong in_page = -(addr | TARGET_PAGE_MASK);
tlb_fn(env, vd, i, base + (off << scale), ra);
host[i] = NULL;
if (likely((pg >> (reg_off & 63)) & 1)) {
if (likely(in_page >= msize)) {
sve_probe_page(&info, false, env, addr, 0, MMU_DATA_STORE,
mmu_idx, retaddr);
host[i] = info.host;
} else {
/*
* Element crosses the page boundary.
* Probe both pages, but do not record the host address,
* so that we use the slow path.
*/
sve_probe_page(&info, false, env, addr, 0,
MMU_DATA_STORE, mmu_idx, retaddr);
sve_probe_page(&info2, false, env, addr + in_page, 0,
MMU_DATA_STORE, mmu_idx, retaddr);
info.flags |= info2.flags;
}
if (unlikely(info.flags & TLB_WATCHPOINT)) {
cpu_check_watchpoint(env_cpu(env), addr, msize,
info.attrs, BP_MEM_WRITE, retaddr);
}
/* TODO: MTE check. */
} }
i += 4, pg >>= 4; i += 1;
} while (i & 15); reg_off += esize;
} } while (reg_off & 63);
} } while (reg_off < reg_max);
static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm, /*
target_ulong base, uint32_t desc, uintptr_t ra, * Now that we have recognized all exceptions except SyncExternal
zreg_off_fn *off_fn, sve_ldst1_tlb_fn *tlb_fn) * (from TLB_MMIO), which we cannot avoid, perform all of the stores.
{ *
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2); * Note for the common case of an element in RAM, not crossing a page
intptr_t i, oprsz = simd_oprsz(desc) / 8; * boundary, we have stored the host address in host[]. This doubles
* as a first-level check against the predicate, since only enabled
for (i = 0; i < oprsz; i++) { * elements have non-null host addresses.
uint8_t pg = *(uint8_t *)(vg + H1(i)); */
if (likely(pg & 1)) { i = reg_off = 0;
target_ulong off = off_fn(vm, i * 8); do {
tlb_fn(env, vd, i * 8, base + (off << scale), ra); void *h = host[i];
if (likely(h != NULL)) {
host_fn(vd, reg_off, h);
} else if ((vg[reg_off >> 6] >> (reg_off & 63)) & 1) {
target_ulong addr = base + (off_fn(vm, reg_off) << scale);
tlb_fn(env, vd, reg_off, addr, retaddr);
} }
}
i += 1;
reg_off += esize;
} while (reg_off < reg_max);
} }
#define DO_ST1_ZPZ_S(MEM, OFS) \ #define DO_ST1_ZPZ_S(MEM, OFS, MSZ) \
void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
(CPUARMState *env, void *vd, void *vg, void *vm, \ void *vm, target_ulong base, uint32_t desc) \
target_ulong base, uint32_t desc) \ { \
{ \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 4, 1 << MSZ, \
sve_st1_zs(env, vd, vg, vm, base, desc, GETPC(), \ off_##OFS##_s, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
off_##OFS##_s, sve_st1##MEM##_tlb); \
} }
#define DO_ST1_ZPZ_D(MEM, OFS) \ #define DO_ST1_ZPZ_D(MEM, OFS, MSZ) \
void QEMU_FLATTEN HELPER(sve_st##MEM##_##OFS) \ void HELPER(sve_st##MEM##_##OFS)(CPUARMState *env, void *vd, void *vg, \
(CPUARMState *env, void *vd, void *vg, void *vm, \ void *vm, target_ulong base, uint32_t desc) \
target_ulong base, uint32_t desc) \ { \
{ \ sve_st1_z(env, vd, vg, vm, base, desc, GETPC(), 8, 1 << MSZ, \
sve_st1_zd(env, vd, vg, vm, base, desc, GETPC(), \ off_##OFS##_d, sve_st1##MEM##_host, sve_st1##MEM##_tlb); \
off_##OFS##_d, sve_st1##MEM##_tlb); \
} }
DO_ST1_ZPZ_S(bs, zsu) DO_ST1_ZPZ_S(bs, zsu, MO_8)
DO_ST1_ZPZ_S(hs_le, zsu) DO_ST1_ZPZ_S(hs_le, zsu, MO_16)
DO_ST1_ZPZ_S(hs_be, zsu) DO_ST1_ZPZ_S(hs_be, zsu, MO_16)
DO_ST1_ZPZ_S(ss_le, zsu) DO_ST1_ZPZ_S(ss_le, zsu, MO_32)
DO_ST1_ZPZ_S(ss_be, zsu) DO_ST1_ZPZ_S(ss_be, zsu, MO_32)
DO_ST1_ZPZ_S(bs, zss) DO_ST1_ZPZ_S(bs, zss, MO_8)
DO_ST1_ZPZ_S(hs_le, zss) DO_ST1_ZPZ_S(hs_le, zss, MO_16)
DO_ST1_ZPZ_S(hs_be, zss) DO_ST1_ZPZ_S(hs_be, zss, MO_16)
DO_ST1_ZPZ_S(ss_le, zss) DO_ST1_ZPZ_S(ss_le, zss, MO_32)
DO_ST1_ZPZ_S(ss_be, zss) DO_ST1_ZPZ_S(ss_be, zss, MO_32)
DO_ST1_ZPZ_D(bd, zsu) DO_ST1_ZPZ_D(bd, zsu, MO_8)
DO_ST1_ZPZ_D(hd_le, zsu) DO_ST1_ZPZ_D(hd_le, zsu, MO_16)
DO_ST1_ZPZ_D(hd_be, zsu) DO_ST1_ZPZ_D(hd_be, zsu, MO_16)
DO_ST1_ZPZ_D(sd_le, zsu) DO_ST1_ZPZ_D(sd_le, zsu, MO_32)
DO_ST1_ZPZ_D(sd_be, zsu) DO_ST1_ZPZ_D(sd_be, zsu, MO_32)
DO_ST1_ZPZ_D(dd_le, zsu) DO_ST1_ZPZ_D(dd_le, zsu, MO_64)
DO_ST1_ZPZ_D(dd_be, zsu) DO_ST1_ZPZ_D(dd_be, zsu, MO_64)
DO_ST1_ZPZ_D(bd, zss) DO_ST1_ZPZ_D(bd, zss, MO_8)
DO_ST1_ZPZ_D(hd_le, zss) DO_ST1_ZPZ_D(hd_le, zss, MO_16)
DO_ST1_ZPZ_D(hd_be, zss) DO_ST1_ZPZ_D(hd_be, zss, MO_16)
DO_ST1_ZPZ_D(sd_le, zss) DO_ST1_ZPZ_D(sd_le, zss, MO_32)
DO_ST1_ZPZ_D(sd_be, zss) DO_ST1_ZPZ_D(sd_be, zss, MO_32)
DO_ST1_ZPZ_D(dd_le, zss) DO_ST1_ZPZ_D(dd_le, zss, MO_64)
DO_ST1_ZPZ_D(dd_be, zss) DO_ST1_ZPZ_D(dd_be, zss, MO_64)
DO_ST1_ZPZ_D(bd, zd) DO_ST1_ZPZ_D(bd, zd, MO_8)
DO_ST1_ZPZ_D(hd_le, zd) DO_ST1_ZPZ_D(hd_le, zd, MO_16)
DO_ST1_ZPZ_D(hd_be, zd) DO_ST1_ZPZ_D(hd_be, zd, MO_16)
DO_ST1_ZPZ_D(sd_le, zd) DO_ST1_ZPZ_D(sd_le, zd, MO_32)
DO_ST1_ZPZ_D(sd_be, zd) DO_ST1_ZPZ_D(sd_be, zd, MO_32)
DO_ST1_ZPZ_D(dd_le, zd) DO_ST1_ZPZ_D(dd_le, zd, MO_64)
DO_ST1_ZPZ_D(dd_be, zd) DO_ST1_ZPZ_D(dd_be, zd, MO_64)
#undef DO_ST1_ZPZ_S #undef DO_ST1_ZPZ_S
#undef DO_ST1_ZPZ_D #undef DO_ST1_ZPZ_D