mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2024-12-23 16:55:35 +00:00
target/arm: Pass TCGMemOpIdx to sve memory helpers
There is quite a lot of code required to compute cpu_mem_index, or even put together the full TCGMemOpIdx. This can easily be done at translation time. Backports commit 500d04843ba953dc4560e44f04001efec38c14a6 from qemu
This commit is contained in:
parent
606e8cdb8c
commit
66ffb372e7
|
@ -798,4 +798,9 @@ static inline uint32_t arm_debug_exception_fsr(CPUARMState *env)
|
|||
}
|
||||
}
|
||||
|
||||
/* Note make_memop_idx reserves 4 bits for mmu_idx, and MO_BSWAP is bit 3.
|
||||
* Thus a TCGMemOpIdx, without any MO_ALIGN bits, fits in 8 bits.
|
||||
*/
|
||||
#define MEMOPIDX_SHIFT 8
|
||||
|
||||
#endif
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "qemu/osdep.h"
|
||||
#include "cpu.h"
|
||||
#include "internals.h"
|
||||
#include "exec/exec-all.h"
|
||||
#include "exec/cpu_ldst.h"
|
||||
#include "exec/helper-proto.h"
|
||||
|
@ -3511,7 +3512,7 @@ typedef intptr_t sve_ld1_host_fn(void *vd, void *vg, void *host,
|
|||
* The controlling predicate is known to be true.
|
||||
*/
|
||||
typedef void sve_ld1_tlb_fn(CPUARMState *env, void *vd, intptr_t reg_off,
|
||||
target_ulong vaddr, int mmu_idx, uintptr_t ra);
|
||||
target_ulong vaddr, TCGMemOpIdx oi, uintptr_t ra);
|
||||
typedef sve_ld1_tlb_fn sve_st1_tlb_fn;
|
||||
|
||||
/*
|
||||
|
@ -3538,16 +3539,15 @@ static intptr_t sve_##NAME##_host(void *vd, void *vg, void *host, \
|
|||
#ifdef CONFIG_SOFTMMU
|
||||
#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
|
||||
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
|
||||
target_ulong addr, int mmu_idx, uintptr_t ra) \
|
||||
target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
|
||||
{ \
|
||||
TCGMemOpIdx oi = make_memop_idx(ctz32(sizeof(TYPEM)) | MOEND, mmu_idx); \
|
||||
TYPEM val = TLB(env, addr, oi, ra); \
|
||||
*(TYPEE *)(vd + H(reg_off)) = val; \
|
||||
}
|
||||
#else
|
||||
#define DO_LD_TLB(NAME, H, TYPEE, TYPEM, HOST, MOEND, TLB) \
|
||||
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
|
||||
target_ulong addr, int mmu_idx, uintptr_t ra) \
|
||||
target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
|
||||
{ \
|
||||
TYPEM val = HOST(g2h(addr)); \
|
||||
*(TYPEE *)(vd + H(reg_off)) = val; \
|
||||
|
@ -3675,11 +3675,13 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
sve_ld1_host_fn *host_fn,
|
||||
sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
void *vd = &env->vfp.zregs[simd_data(desc)];
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int mmu_idx = get_mmuidx(oi);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
const int diffsz = esz - msz;
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t mem_max = reg_max >> diffsz;
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
ARMVectorReg scratch;
|
||||
void *host;
|
||||
intptr_t split, reg_off, mem_off;
|
||||
|
@ -3753,7 +3755,7 @@ static void sve_ld1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
* on I/O memory, it may succeed but not bring in the TLB entry.
|
||||
* But even then we have still made forward progress.
|
||||
*/
|
||||
tlb_fn(env, &scratch, reg_off, addr + mem_off, mmu_idx, retaddr);
|
||||
tlb_fn(env, &scratch, reg_off, addr + mem_off, oi, retaddr);
|
||||
reg_off += 1 << esz;
|
||||
}
|
||||
#endif
|
||||
|
@ -3814,9 +3816,9 @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint32_t desc, int size, uintptr_t ra,
|
||||
sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
ARMVectorReg scratch[2] = { };
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -3824,8 +3826,8 @@ static void sve_ld2_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[0], i, addr, oi, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
|
||||
}
|
||||
i += size, pg >>= size;
|
||||
addr += 2 * size;
|
||||
|
@ -3842,9 +3844,9 @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint32_t desc, int size, uintptr_t ra,
|
||||
sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
ARMVectorReg scratch[3] = { };
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -3852,9 +3854,9 @@ static void sve_ld3_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[0], i, addr, oi, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
|
||||
tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
|
||||
}
|
||||
i += size, pg >>= size;
|
||||
addr += 3 * size;
|
||||
|
@ -3872,9 +3874,9 @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint32_t desc, int size, uintptr_t ra,
|
||||
sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
ARMVectorReg scratch[4] = { };
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -3882,10 +3884,10 @@ static void sve_ld4_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, &scratch[0], i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[2], i, addr + 2 * size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[3], i, addr + 3 * size, mmu_idx, ra);
|
||||
tlb_fn(env, &scratch[0], i, addr, oi, ra);
|
||||
tlb_fn(env, &scratch[1], i, addr + size, oi, ra);
|
||||
tlb_fn(env, &scratch[2], i, addr + 2 * size, oi, ra);
|
||||
tlb_fn(env, &scratch[3], i, addr + 3 * size, oi, ra);
|
||||
}
|
||||
i += size, pg >>= size;
|
||||
addr += 4 * size;
|
||||
|
@ -3980,11 +3982,13 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
sve_ld1_host_fn *host_fn,
|
||||
sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
void *vd = &env->vfp.zregs[simd_data(desc)];
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int mmu_idx = get_mmuidx(oi);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
const int diffsz = esz - msz;
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t mem_max = reg_max >> diffsz;
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
intptr_t split, reg_off, mem_off;
|
||||
void *host;
|
||||
|
||||
|
@ -4036,7 +4040,7 @@ static void sve_ldff1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
* Perform one normal read, which will fault or not.
|
||||
* But it is likely to bring the page into the tlb.
|
||||
*/
|
||||
tlb_fn(env, vd, reg_off, addr + mem_off, mmu_idx, retaddr);
|
||||
tlb_fn(env, vd, reg_off, addr + mem_off, oi, retaddr);
|
||||
|
||||
/* After any fault, zero any leading predicated false elts. */
|
||||
swap_memzero(vd, reg_off);
|
||||
|
@ -4065,7 +4069,8 @@ static void sve_ldnf1_r(CPUARMState *env, void *vg, const target_ulong addr,
|
|||
uint32_t desc, const int esz, const int msz,
|
||||
sve_ld1_host_fn *host_fn)
|
||||
{
|
||||
void *vd = &env->vfp.zregs[simd_data(desc)];
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
const int diffsz = esz - msz;
|
||||
const intptr_t reg_max = simd_oprsz(desc);
|
||||
const intptr_t mem_max = reg_max >> diffsz;
|
||||
|
@ -4200,15 +4205,14 @@ DO_LDFF1_LDNF1_2(dd, 3, 3)
|
|||
#ifdef CONFIG_SOFTMMU
|
||||
#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
|
||||
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
|
||||
target_ulong addr, int mmu_idx, uintptr_t ra) \
|
||||
target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
|
||||
{ \
|
||||
TCGMemOpIdx oi = make_memop_idx(ctz32(sizeof(TYPEM)) | MOEND, mmu_idx); \
|
||||
TLB(env, addr, *(TYPEM *)(vd + H(reg_off)), oi, ra); \
|
||||
}
|
||||
#else
|
||||
#define DO_ST_TLB(NAME, H, TYPEM, HOST, MOEND, TLB) \
|
||||
static void sve_##NAME##_tlb(CPUARMState *env, void *vd, intptr_t reg_off, \
|
||||
target_ulong addr, int mmu_idx, uintptr_t ra) \
|
||||
target_ulong addr, TCGMemOpIdx oi, uintptr_t ra) \
|
||||
{ \
|
||||
HOST(g2h(addr), *(TYPEM *)(vd + H(reg_off))); \
|
||||
}
|
||||
|
@ -4247,9 +4251,9 @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
const int esize, const int msize,
|
||||
sve_st1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
void *vd = &env->vfp.zregs[rd];
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -4257,7 +4261,7 @@ static void sve_st1_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, vd, i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, vd, i, addr, oi, ra);
|
||||
}
|
||||
i += esize, pg >>= esize;
|
||||
addr += msize;
|
||||
|
@ -4271,9 +4275,9 @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
const int esize, const int msize,
|
||||
sve_st1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
void *d1 = &env->vfp.zregs[rd];
|
||||
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
|
||||
|
||||
|
@ -4282,8 +4286,8 @@ static void sve_st2_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, d1, i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
|
||||
tlb_fn(env, d1, i, addr, oi, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, oi, ra);
|
||||
}
|
||||
i += esize, pg >>= esize;
|
||||
addr += 2 * msize;
|
||||
|
@ -4297,9 +4301,9 @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
const int esize, const int msize,
|
||||
sve_st1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
void *d1 = &env->vfp.zregs[rd];
|
||||
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
|
||||
void *d3 = &env->vfp.zregs[(rd + 2) & 31];
|
||||
|
@ -4309,9 +4313,9 @@ static void sve_st3_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, d1, i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
|
||||
tlb_fn(env, d3, i, addr + 2 * msize, mmu_idx, ra);
|
||||
tlb_fn(env, d1, i, addr, oi, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, oi, ra);
|
||||
tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
|
||||
}
|
||||
i += esize, pg >>= esize;
|
||||
addr += 3 * msize;
|
||||
|
@ -4325,9 +4329,9 @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
const int esize, const int msize,
|
||||
sve_st1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const unsigned rd = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 5);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned rd = simd_data(desc);
|
||||
void *d1 = &env->vfp.zregs[rd];
|
||||
void *d2 = &env->vfp.zregs[(rd + 1) & 31];
|
||||
void *d3 = &env->vfp.zregs[(rd + 2) & 31];
|
||||
|
@ -4338,10 +4342,10 @@ static void sve_st4_r(CPUARMState *env, void *vg, target_ulong addr,
|
|||
uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
|
||||
do {
|
||||
if (pg & 1) {
|
||||
tlb_fn(env, d1, i, addr, mmu_idx, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, mmu_idx, ra);
|
||||
tlb_fn(env, d3, i, addr + 2 * msize, mmu_idx, ra);
|
||||
tlb_fn(env, d4, i, addr + 3 * msize, mmu_idx, ra);
|
||||
tlb_fn(env, d1, i, addr, oi, ra);
|
||||
tlb_fn(env, d2, i, addr + msize, oi, ra);
|
||||
tlb_fn(env, d3, i, addr + 2 * msize, oi, ra);
|
||||
tlb_fn(env, d4, i, addr + 3 * msize, oi, ra);
|
||||
}
|
||||
i += esize, pg >>= esize;
|
||||
addr += 4 * msize;
|
||||
|
@ -4911,9 +4915,9 @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
target_ulong base, uint32_t desc, uintptr_t ra,
|
||||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned scale = simd_data(desc);
|
||||
ARMVectorReg scratch = { };
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -4922,7 +4926,7 @@ static void sve_ld1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
do {
|
||||
if (likely(pg & 1)) {
|
||||
target_ulong off = off_fn(vm, i);
|
||||
tlb_fn(env, &scratch, i, base + (off << scale), mmu_idx, ra);
|
||||
tlb_fn(env, &scratch, i, base + (off << scale), oi, ra);
|
||||
}
|
||||
i += 4, pg >>= 4;
|
||||
} while (i & 15);
|
||||
|
@ -4937,9 +4941,9 @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
target_ulong base, uint32_t desc, uintptr_t ra,
|
||||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t i, oprsz = simd_oprsz(desc) / 8;
|
||||
unsigned scale = simd_data(desc);
|
||||
ARMVectorReg scratch = { };
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
|
@ -4947,7 +4951,7 @@ static void sve_ld1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
uint8_t pg = *(uint8_t *)(vg + H1(i));
|
||||
if (likely(pg & 1)) {
|
||||
target_ulong off = off_fn(vm, i * 8);
|
||||
tlb_fn(env, &scratch, i * 8, base + (off << scale), mmu_idx, ra);
|
||||
tlb_fn(env, &scratch, i * 8, base + (off << scale), oi, ra);
|
||||
}
|
||||
}
|
||||
set_helper_retaddr(0);
|
||||
|
@ -5053,7 +5057,7 @@ typedef bool sve_ld1_nf_fn(CPUARMState *env, void *vd, intptr_t reg_off,
|
|||
#ifdef CONFIG_SOFTMMU
|
||||
#define DO_LD_NF(NAME, H, TYPEE, TYPEM, HOST) \
|
||||
static bool sve_ld##NAME##_nf(CPUARMState *env, void *vd, intptr_t reg_off, \
|
||||
target_ulong addr, int mmu_idx) \
|
||||
target_ulong addr, int mmu_idx) \
|
||||
{ \
|
||||
target_ulong next_page = -(addr | TARGET_PAGE_MASK); \
|
||||
if (likely(next_page - addr >= sizeof(TYPEM))) { \
|
||||
|
@ -5112,9 +5116,10 @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
|
||||
sve_ld1_nf_fn *nonfault_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int mmu_idx = get_mmuidx(oi);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t reg_off, reg_max = simd_oprsz(desc);
|
||||
unsigned scale = simd_data(desc);
|
||||
target_ulong addr;
|
||||
|
||||
/* Skip to the first true predicate. */
|
||||
|
@ -5124,7 +5129,7 @@ static inline void sve_ldff1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
set_helper_retaddr(ra);
|
||||
addr = off_fn(vm, reg_off);
|
||||
addr = base + (addr << scale);
|
||||
tlb_fn(env, vd, reg_off, addr, mmu_idx, ra);
|
||||
tlb_fn(env, vd, reg_off, addr, oi, ra);
|
||||
|
||||
/* The rest of the reads will be non-faulting. */
|
||||
set_helper_retaddr(0);
|
||||
|
@ -5153,9 +5158,10 @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn,
|
||||
sve_ld1_nf_fn *nonfault_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int mmu_idx = get_mmuidx(oi);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t reg_off, reg_max = simd_oprsz(desc);
|
||||
unsigned scale = simd_data(desc);
|
||||
target_ulong addr;
|
||||
|
||||
/* Skip to the first true predicate. */
|
||||
|
@ -5165,7 +5171,7 @@ static inline void sve_ldff1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
set_helper_retaddr(ra);
|
||||
addr = off_fn(vm, reg_off);
|
||||
addr = base + (addr << scale);
|
||||
tlb_fn(env, vd, reg_off, addr, mmu_idx, ra);
|
||||
tlb_fn(env, vd, reg_off, addr, oi, ra);
|
||||
|
||||
/* The rest of the reads will be non-faulting. */
|
||||
set_helper_retaddr(0);
|
||||
|
@ -5277,9 +5283,9 @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
target_ulong base, uint32_t desc, uintptr_t ra,
|
||||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t i, oprsz = simd_oprsz(desc);
|
||||
unsigned scale = simd_data(desc);
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
for (i = 0; i < oprsz; ) {
|
||||
|
@ -5287,7 +5293,7 @@ static void sve_st1_zs(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
do {
|
||||
if (likely(pg & 1)) {
|
||||
target_ulong off = off_fn(vm, i);
|
||||
tlb_fn(env, vd, i, base + (off << scale), mmu_idx, ra);
|
||||
tlb_fn(env, vd, i, base + (off << scale), oi, ra);
|
||||
}
|
||||
i += 4, pg >>= 4;
|
||||
} while (i & 15);
|
||||
|
@ -5299,16 +5305,16 @@ static void sve_st1_zd(CPUARMState *env, void *vd, void *vg, void *vm,
|
|||
target_ulong base, uint32_t desc, uintptr_t ra,
|
||||
zreg_off_fn *off_fn, sve_ld1_tlb_fn *tlb_fn)
|
||||
{
|
||||
const int mmu_idx = cpu_mmu_index(env, false);
|
||||
const TCGMemOpIdx oi = extract32(desc, SIMD_DATA_SHIFT, MEMOPIDX_SHIFT);
|
||||
const int scale = extract32(desc, SIMD_DATA_SHIFT + MEMOPIDX_SHIFT, 2);
|
||||
intptr_t i, oprsz = simd_oprsz(desc) / 8;
|
||||
unsigned scale = simd_data(desc);
|
||||
|
||||
set_helper_retaddr(ra);
|
||||
for (i = 0; i < oprsz; i++) {
|
||||
uint8_t pg = *(uint8_t *)(vg + H1(i));
|
||||
if (likely(pg & 1)) {
|
||||
target_ulong off = off_fn(vm, i * 8);
|
||||
tlb_fn(env, vd, i * 8, base + (off << scale), mmu_idx, ra);
|
||||
tlb_fn(env, vd, i * 8, base + (off << scale), oi, ra);
|
||||
}
|
||||
}
|
||||
set_helper_retaddr(0);
|
||||
|
|
|
@ -4765,26 +4765,35 @@ static const uint8_t dtype_esz[16] = {
|
|||
3, 2, 1, 3
|
||||
};
|
||||
|
||||
static TCGMemOpIdx sve_memopidx(DisasContext *s, int dtype)
|
||||
{
|
||||
return make_memop_idx(s->be_data | dtype_mop[dtype], get_mem_index(s));
|
||||
}
|
||||
|
||||
static void do_mem_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
|
||||
gen_helper_gvec_mem *fn)
|
||||
int dtype, gen_helper_gvec_mem *fn)
|
||||
{
|
||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||
unsigned vsz = vec_full_reg_size(s);
|
||||
TCGv_ptr t_pg;
|
||||
TCGv_i32 desc;
|
||||
TCGv_i32 t_desc;
|
||||
int desc;
|
||||
|
||||
/* For e.g. LD4, there are not enough arguments to pass all 4
|
||||
* registers as pointers, so encode the regno into the data field.
|
||||
* For consistency, do this even for LD1.
|
||||
*/
|
||||
desc = tcg_const_i32(tcg_ctx, simd_desc(vsz, vsz, zt));
|
||||
desc = sve_memopidx(s, dtype);
|
||||
desc |= zt << MEMOPIDX_SHIFT;
|
||||
desc = simd_desc(vsz, vsz, desc);
|
||||
t_desc = tcg_const_i32(tcg_ctx, desc);
|
||||
t_pg = tcg_temp_new_ptr(tcg_ctx);
|
||||
|
||||
tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg));
|
||||
fn(tcg_ctx, tcg_ctx->cpu_env, t_pg, addr, desc);
|
||||
fn(tcg_ctx, tcg_ctx->cpu_env, t_pg, addr, t_desc);
|
||||
|
||||
tcg_temp_free_ptr(tcg_ctx, t_pg);
|
||||
tcg_temp_free_i32(tcg_ctx, desc);
|
||||
tcg_temp_free_i32(tcg_ctx, t_desc);
|
||||
}
|
||||
|
||||
static void do_ld_zpa(DisasContext *s, int zt, int pg,
|
||||
|
@ -4847,7 +4856,7 @@ static void do_ld_zpa(DisasContext *s, int zt, int pg,
|
|||
* accessible via the instruction encoding.
|
||||
*/
|
||||
assert(fn != NULL);
|
||||
do_mem_zpa(s, zt, pg, addr, fn);
|
||||
do_mem_zpa(s, zt, pg, addr, dtype, fn);
|
||||
}
|
||||
|
||||
static bool trans_LD_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
|
||||
|
@ -4932,7 +4941,8 @@ static bool trans_LDFF1_zprr(DisasContext *s, arg_rprr_load *a, uint32_t insn)
|
|||
TCGv_i64 addr = new_tmp_a64(s);
|
||||
tcg_gen_shli_i64(tcg_ctx, addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
|
||||
tcg_gen_add_i64(tcg_ctx, addr, addr, cpu_reg_sp(s, a->rn));
|
||||
do_mem_zpa(s, a->rd, a->pg, addr, fns[s->be_data == MO_BE][a->dtype]);
|
||||
do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
|
||||
fns[s->be_data == MO_BE][a->dtype]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -4991,7 +5001,8 @@ static bool trans_LDNF1_zpri(DisasContext *s, arg_rpri_load *a, uint32_t insn)
|
|||
TCGv_i64 addr = new_tmp_a64(s);
|
||||
|
||||
tcg_gen_addi_i64(tcg_ctx, addr, cpu_reg_sp(s, a->rn), off);
|
||||
do_mem_zpa(s, a->rd, a->pg, addr, fns[s->be_data == MO_BE][a->dtype]);
|
||||
do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
|
||||
fns[s->be_data == MO_BE][a->dtype]);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -5007,11 +5018,14 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
|
|||
};
|
||||
unsigned vsz = vec_full_reg_size(s);
|
||||
TCGv_ptr t_pg;
|
||||
TCGv_i32 desc;
|
||||
int poff;
|
||||
TCGv_i32 t_desc;
|
||||
int desc, poff;
|
||||
|
||||
/* Load the first quadword using the normal predicated load helpers. */
|
||||
desc = tcg_const_i32(tcg_ctx, simd_desc(16, 16, zt));
|
||||
desc = sve_memopidx(s, msz_dtype(msz));
|
||||
desc |= zt << MEMOPIDX_SHIFT;
|
||||
desc = simd_desc(16, 16, desc);
|
||||
t_desc = tcg_const_i32(tcg_ctx, desc);
|
||||
|
||||
poff = pred_full_reg_offset(s, pg);
|
||||
if (vsz > 16) {
|
||||
|
@ -5035,10 +5049,10 @@ static void do_ldrq(DisasContext *s, int zt, int pg, TCGv_i64 addr, int msz)
|
|||
t_pg = tcg_temp_new_ptr(tcg_ctx);
|
||||
tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, poff);
|
||||
|
||||
fns[s->be_data == MO_BE][msz](tcg_ctx, tcg_ctx->cpu_env, t_pg, addr, desc);
|
||||
fns[s->be_data == MO_BE][msz](tcg_ctx, tcg_ctx->cpu_env, t_pg, addr, t_desc);
|
||||
|
||||
tcg_temp_free_ptr(tcg_ctx, t_pg);
|
||||
tcg_temp_free_i32(tcg_ctx, desc);
|
||||
tcg_temp_free_i32(tcg_ctx, t_desc);
|
||||
|
||||
/* Replicate that first quadword. */
|
||||
if (vsz > 16) {
|
||||
|
@ -5193,7 +5207,7 @@ static void do_st_zpa(DisasContext *s, int zt, int pg, TCGv_i64 addr,
|
|||
fn = fn_multiple[be][nreg - 1][msz];
|
||||
}
|
||||
assert(fn != NULL);
|
||||
do_mem_zpa(s, zt, pg, addr, fn);
|
||||
do_mem_zpa(s, zt, pg, addr, msz_dtype(msz), fn);
|
||||
}
|
||||
|
||||
static bool trans_ST_zprr(DisasContext *s, arg_rprr_store *a, uint32_t insn)
|
||||
|
@ -5233,25 +5247,32 @@ static bool trans_ST_zpri(DisasContext *s, arg_rpri_store *a, uint32_t insn)
|
|||
*** SVE gather loads / scatter stores
|
||||
*/
|
||||
|
||||
static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm, int scale,
|
||||
TCGv_i64 scalar, gen_helper_gvec_mem_scatter *fn)
|
||||
static void do_mem_zpz(DisasContext *s, int zt, int pg, int zm,
|
||||
int scale, TCGv_i64 scalar, int msz,
|
||||
gen_helper_gvec_mem_scatter *fn)
|
||||
{
|
||||
TCGContext *tcg_ctx = s->uc->tcg_ctx;
|
||||
unsigned vsz = vec_full_reg_size(s);
|
||||
TCGv_i32 desc = tcg_const_i32(tcg_ctx, simd_desc(vsz, vsz, scale));
|
||||
TCGv_ptr t_zm = tcg_temp_new_ptr(tcg_ctx);
|
||||
TCGv_ptr t_pg = tcg_temp_new_ptr(tcg_ctx);
|
||||
TCGv_ptr t_zt = tcg_temp_new_ptr(tcg_ctx);
|
||||
TCGv_i32 t_desc;
|
||||
int desc;
|
||||
|
||||
desc = sve_memopidx(s, msz_dtype(msz));
|
||||
desc |= scale << MEMOPIDX_SHIFT;
|
||||
desc = simd_desc(vsz, vsz, desc);
|
||||
t_desc = tcg_const_i32(tcg_ctx, desc);
|
||||
|
||||
tcg_gen_addi_ptr(tcg_ctx, t_pg, tcg_ctx->cpu_env, pred_full_reg_offset(s, pg));
|
||||
tcg_gen_addi_ptr(tcg_ctx, t_zm, tcg_ctx->cpu_env, vec_full_reg_offset(s, zm));
|
||||
tcg_gen_addi_ptr(tcg_ctx, t_zt, tcg_ctx->cpu_env, vec_full_reg_offset(s, zt));
|
||||
fn(tcg_ctx, tcg_ctx->cpu_env, t_zt, t_pg, t_zm, scalar, desc);
|
||||
fn(tcg_ctx, tcg_ctx->cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
|
||||
|
||||
tcg_temp_free_ptr(tcg_ctx, t_zt);
|
||||
tcg_temp_free_ptr(tcg_ctx, t_zm);
|
||||
tcg_temp_free_ptr(tcg_ctx, t_pg);
|
||||
tcg_temp_free_i32(tcg_ctx, desc);
|
||||
tcg_temp_free_i32(tcg_ctx, t_desc);
|
||||
}
|
||||
|
||||
/* Indexed by [be][ff][xs][u][msz]. */
|
||||
|
@ -5440,7 +5461,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz *a, uint32_t insn)
|
|||
assert(fn != NULL);
|
||||
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
|
||||
cpu_reg_sp(s, a->rn), fn);
|
||||
cpu_reg_sp(s, a->rn), a->msz, fn);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5473,7 +5494,7 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz *a, uint32_t insn)
|
|||
* by loading the immediate into the scalar parameter.
|
||||
*/
|
||||
imm = tcg_const_i64(tcg_ctx, a->imm << a->msz);
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
|
||||
tcg_temp_free_i64(tcg_ctx, imm);
|
||||
return true;
|
||||
}
|
||||
|
@ -5549,7 +5570,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz *a, uint32_t insn)
|
|||
g_assert_not_reached();
|
||||
}
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
|
||||
cpu_reg_sp(s, a->rn), fn);
|
||||
cpu_reg_sp(s, a->rn), a->msz, fn);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -5582,7 +5603,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz *a, uint32_t insn)
|
|||
* by loading the immediate into the scalar parameter.
|
||||
*/
|
||||
imm = tcg_const_i64(tcg_ctx, a->imm << a->msz);
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, fn);
|
||||
do_mem_zpz(s, a->rd, a->pg, a->rn, 0, imm, a->msz, fn);
|
||||
tcg_temp_free_i64(tcg_ctx, imm);
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue