target/arm: Add sve infrastructure for page lookup

For contiguous predicated memory operations, we want to
minimize the number of tlb lookups performed. We have
open-coded this for sve_ld1_r, but for correctness with
MTE we will need this for all of the memory operations.

Create a structure that holds the bounds of active elements,
and metadata for two pages. Add routines to find those
active elements, lookup the pages, and run watchpoints
for those pages.

Temporarily mark the functions unused to avoid Werror.

Backports commit b4cd95d2f4c7197b844f51b29871d888063ea3e7 from qemu
This commit is contained in:
Richard Henderson 2021-02-25 20:23:16 -05:00 committed by Lioncash
parent f430a399d4
commit 94b0876f15
19 changed files with 378 additions and 2 deletions

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_aarch64
#define print_type_str print_type_str_aarch64
#define probe_access probe_access_aarch64
#define probe_access_flags probe_access_flags_aarch64
#define probe_read probe_read_aarch64
#define probe_write probe_write_aarch64
#define propagateFloat128NaN propagateFloat128NaN_aarch64

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_aarch64eb
#define print_type_str print_type_str_aarch64eb
#define probe_access probe_access_aarch64eb
#define probe_access_flags probe_access_flags_aarch64eb
#define probe_read probe_read_aarch64eb
#define probe_write probe_write_aarch64eb
#define propagateFloat128NaN propagateFloat128NaN_aarch64eb

View file

@ -706,6 +706,85 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
}
}
static int probe_access_internal(CPUArchState *env, target_ulong addr,
int fault_size, MMUAccessType access_type,
int mmu_idx, bool nonfault,
void **phost, uintptr_t retaddr)
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
target_ulong tlb_addr, page_addr;
size_t elt_ofs;
int flags;
switch (access_type) {
case MMU_DATA_LOAD:
elt_ofs = offsetof(CPUTLBEntry, addr_read);
break;
case MMU_DATA_STORE:
elt_ofs = offsetof(CPUTLBEntry, addr_write);
break;
case MMU_INST_FETCH:
elt_ofs = offsetof(CPUTLBEntry, addr_code);
break;
default:
g_assert_not_reached();
}
tlb_addr = tlb_read_ofs(entry, elt_ofs);
page_addr = addr & TARGET_PAGE_MASK;
if (!tlb_hit_page(tlb_addr, page_addr)) {
if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
CPUState *cs = env_cpu(env);
CPUClass *cc = CPU_GET_CLASS(cs->uc, cs);
if (!cc->tlb_fill(cs, addr, fault_size, access_type,
mmu_idx, nonfault, retaddr)) {
/* Non-faulting page table read failed. */
*phost = NULL;
return TLB_INVALID_MASK;
}
/* TLB resize via tlb_fill may have moved the entry. */
entry = tlb_entry(env, mmu_idx, addr);
}
tlb_addr = tlb_read_ofs(entry, elt_ofs);
}
flags = tlb_addr & TLB_FLAGS_MASK;
/* Fold all "mmio-like" bits into TLB_MMIO. This is not RAM. */
if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
*phost = NULL;
return TLB_MMIO;
}
/* Everything else is RAM. */
*phost = (void *)((uintptr_t)addr + entry->addend);
return flags;
}
int probe_access_flags(CPUArchState *env, target_ulong addr,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr)
{
int flags;
flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
nonfault, phost, retaddr);
/* Handle clean RAM pages. */
if (unlikely(flags & TLB_NOTDIRTY)) {
// Unicorn: Commented out. We don't handle this.
//uintptr_t index = tlb_index(env, mmu_idx, addr);
//CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
//notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
flags &= ~TLB_NOTDIRTY;
}
return flags;
}
/*
* Probe for whether the specified guest access is permitted. If it is not
* permitted then an exception will be taken in the same way as if this

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_arm
#define print_type_str print_type_str_arm
#define probe_access probe_access_arm
#define probe_access_flags probe_access_flags_arm
#define probe_read probe_read_arm
#define probe_write probe_write_arm
#define propagateFloat128NaN propagateFloat128NaN_arm

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_armeb
#define print_type_str print_type_str_armeb
#define probe_access probe_access_armeb
#define probe_access_flags probe_access_flags_armeb
#define probe_read probe_read_armeb
#define probe_write probe_write_armeb
#define propagateFloat128NaN propagateFloat128NaN_armeb

View file

@ -2331,6 +2331,7 @@ symbols = (
'print_type_size',
'print_type_str',
'probe_access',
'probe_access_flags',
'probe_read',
'probe_write',
'propagateFloat128NaN',

View file

@ -244,6 +244,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
}
/**
* probe_access_flags:
* @env: CPUArchState
* @addr: guest virtual address to look up
* @access_type: read, write or execute permission
* @mmu_idx: MMU index to use for lookup
* @nonfault: suppress the fault
* @phost: return value for host address
* @retaddr: return address for unwinding
*
* Similar to probe_access, loosely returning the TLB_FLAGS_MASK for
* the page, and storing the host address for RAM in @phost.
*
* If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK.
* Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags.
* Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
* For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
*/
int probe_access_flags(CPUArchState *env, target_ulong addr,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr);
#define CODE_GEN_ALIGN 16 /* must be >= of the size of a icache line */
/* Estimated block size for TB allocation. */

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_m68k
#define print_type_str print_type_str_m68k
#define probe_access probe_access_m68k
#define probe_access_flags probe_access_flags_m68k
#define probe_read probe_read_m68k
#define probe_write probe_write_m68k
#define propagateFloat128NaN propagateFloat128NaN_m68k

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_mips
#define print_type_str print_type_str_mips
#define probe_access probe_access_mips
#define probe_access_flags probe_access_flags_mips
#define probe_read probe_read_mips
#define probe_write probe_write_mips
#define propagateFloat128NaN propagateFloat128NaN_mips

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_mips64
#define print_type_str print_type_str_mips64
#define probe_access probe_access_mips64
#define probe_access_flags probe_access_flags_mips64
#define probe_read probe_read_mips64
#define probe_write probe_write_mips64
#define propagateFloat128NaN propagateFloat128NaN_mips64

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_mips64el
#define print_type_str print_type_str_mips64el
#define probe_access probe_access_mips64el
#define probe_access_flags probe_access_flags_mips64el
#define probe_read probe_read_mips64el
#define probe_write probe_write_mips64el
#define propagateFloat128NaN propagateFloat128NaN_mips64el

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_mipsel
#define print_type_str print_type_str_mipsel
#define probe_access probe_access_mipsel
#define probe_access_flags probe_access_flags_mipsel
#define probe_read probe_read_mipsel
#define probe_write probe_write_mipsel
#define propagateFloat128NaN propagateFloat128NaN_mipsel

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_powerpc
#define print_type_str print_type_str_powerpc
#define probe_access probe_access_powerpc
#define probe_access_flags probe_access_flags_powerpc
#define probe_read probe_read_powerpc
#define probe_write probe_write_powerpc
#define propagateFloat128NaN propagateFloat128NaN_powerpc

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_riscv32
#define print_type_str print_type_str_riscv32
#define probe_access probe_access_riscv32
#define probe_access_flags probe_access_flags_riscv32
#define probe_read probe_read_riscv32
#define probe_write probe_write_riscv32
#define propagateFloat128NaN propagateFloat128NaN_riscv32

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_riscv64
#define print_type_str print_type_str_riscv64
#define probe_access probe_access_riscv64
#define probe_access_flags probe_access_flags_riscv64
#define probe_read probe_read_riscv64
#define probe_write probe_write_riscv64
#define propagateFloat128NaN propagateFloat128NaN_riscv64

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_sparc
#define print_type_str print_type_str_sparc
#define probe_access probe_access_sparc
#define probe_access_flags probe_access_flags_sparc
#define probe_read probe_read_sparc
#define probe_write probe_write_sparc
#define propagateFloat128NaN propagateFloat128NaN_sparc

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_sparc64
#define print_type_str print_type_str_sparc64
#define probe_access probe_access_sparc64
#define probe_access_flags probe_access_flags_sparc64
#define probe_read probe_read_sparc64
#define probe_write probe_write_sparc64
#define propagateFloat128NaN propagateFloat128NaN_sparc64

View file

@ -1629,7 +1629,7 @@ void HELPER(sve_cpy_z_d)(void *vd, void *vg, uint64_t val, uint32_t desc)
}
}
/* Big-endian hosts need to frob the byte indicies. If the copy
/* Big-endian hosts need to frob the byte indices. If the copy
* happens to be 8-byte aligned, then no frobbing necessary.
*/
static void swap_memmove(void *vd, void *vs, size_t n)
@ -3936,7 +3936,7 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
/*
* Load elements into @vd, controlled by @vg, from @host + @mem_ofs.
* Memory is valid through @host + @mem_max. The register element
* indicies are inferred from @mem_ofs, as modified by the types for
* indices are inferred from @mem_ofs, as modified by the types for
* which the helper is built. Return the @mem_ofs of the first element
* not loaded (which is @mem_max if they are all loaded).
*
@ -4095,6 +4095,265 @@ static intptr_t max_for_page(target_ulong base, intptr_t mem_off,
return MIN(split, mem_max - mem_off) + mem_off;
}
/*
* Resolve the guest virtual address to info->host and info->flags.
* If @nofault, return false if the page is invalid, otherwise
* exit via page fault exception.
*/
typedef struct {
void *host;
int flags;
MemTxAttrs attrs;
} SVEHostPage;
static bool sve_probe_page(SVEHostPage *info, bool nofault,
CPUARMState *env, target_ulong addr,
int mem_off, MMUAccessType access_type,
int mmu_idx, uintptr_t retaddr)
{
int flags;
addr += mem_off;
flags = probe_access_flags(env, addr, access_type, mmu_idx, nofault,
&info->host, retaddr);
info->flags = flags;
if (flags & TLB_INVALID_MASK) {
g_assert(nofault);
return false;
}
/* Ensure that info->host[] is relative to addr, not addr + mem_off. */
info->host -= mem_off;
#ifdef CONFIG_USER_ONLY
memset(&info->attrs, 0, sizeof(info->attrs));
#else
/*
* Find the iotlbentry for addr and return the transaction attributes.
* This *must* be present in the TLB because we just found the mapping.
*/
{
uintptr_t index = tlb_index(env, mmu_idx, addr);
# ifdef CONFIG_DEBUG_TCG
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
target_ulong comparator = (access_type == MMU_DATA_LOAD
? entry->addr_read
: tlb_addr_write(entry));
g_assert(tlb_hit(comparator, addr));
# endif
CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
info->attrs = iotlbentry->attrs;
}
#endif
return true;
}
/*
* Analyse contiguous data, protected by a governing predicate.
*/
typedef enum {
FAULT_NO,
FAULT_FIRST,
FAULT_ALL,
} SVEContFault;
typedef struct {
/*
* First and last element wholly contained within the two pages.
* mem_off_first[0] and reg_off_first[0] are always set >= 0.
* reg_off_last[0] may be < 0 if the first element crosses pages.
* All of mem_off_first[1], reg_off_first[1] and reg_off_last[1]
* are set >= 0 only if there are complete elements on a second page.
*
* The reg_off_* offsets are relative to the internal vector register.
* The mem_off_first offset is relative to the memory address; the
* two offsets are different when a load operation extends, a store
* operation truncates, or for multi-register operations.
*/
int16_t mem_off_first[2];
int16_t reg_off_first[2];
int16_t reg_off_last[2];
/*
* One element that is misaligned and spans both pages,
* or -1 if there is no such active element.
*/
int16_t mem_off_split;
int16_t reg_off_split;
/*
* The byte offset at which the entire operation crosses a page boundary.
* Set >= 0 if and only if the entire operation spans two pages.
*/
int16_t page_split;
/* TLB data for the two pages. */
SVEHostPage page[2];
} SVEContLdSt;
/*
* Find first active element on each page, and a loose bound for the
* final element on each page. Identify any single element that spans
* the page boundary. Return true if there are any active elements.
*/
static bool __attribute__((unused))
sve_cont_ldst_elements(SVEContLdSt *info, target_ulong addr, uint64_t *vg,
intptr_t reg_max, int esz, int msize)
{
const int esize = 1 << esz;
const uint64_t pg_mask = pred_esz_masks[esz];
intptr_t reg_off_first = -1, reg_off_last = -1, reg_off_split;
intptr_t mem_off_last, mem_off_split;
intptr_t page_split, elt_split;
intptr_t i;
/* Set all of the element indices to -1, and the TLB data to 0. */
memset(info, -1, offsetof(SVEContLdSt, page));
memset(info->page, 0, sizeof(info->page));
/* Gross scan over the entire predicate to find bounds. */
i = 0;
do {
uint64_t pg = vg[i] & pg_mask;
if (pg) {
reg_off_last = i * 64 + 63 - clz64(pg);
if (reg_off_first < 0) {
reg_off_first = i * 64 + ctz64(pg);
}
}
} while (++i * 64 < reg_max);
if (unlikely(reg_off_first < 0)) {
/* No active elements, no pages touched. */
return false;
}
tcg_debug_assert(reg_off_last >= 0 && reg_off_last < reg_max);
info->reg_off_first[0] = reg_off_first;
info->mem_off_first[0] = (reg_off_first >> esz) * msize;
mem_off_last = (reg_off_last >> esz) * msize;
page_split = -(addr | TARGET_PAGE_MASK);
if (likely(mem_off_last + msize <= page_split)) {
/* The entire operation fits within a single page. */
info->reg_off_last[0] = reg_off_last;
return true;
}
info->page_split = page_split;
elt_split = page_split / msize;
reg_off_split = elt_split << esz;
mem_off_split = elt_split * msize;
/*
* This is the last full element on the first page, but it is not
* necessarily active. If there is no full element, i.e. the first
* active element is the one that's split, this value remains -1.
* It is useful as iteration bounds.
*/
if (elt_split != 0) {
info->reg_off_last[0] = reg_off_split - esize;
}
/* Determine if an unaligned element spans the pages. */
if (page_split % msize != 0) {
/* It is helpful to know if the split element is active. */
if ((vg[reg_off_split >> 6] >> (reg_off_split & 63)) & 1) {
info->reg_off_split = reg_off_split;
info->mem_off_split = mem_off_split;
if (reg_off_split == reg_off_last) {
/* The page crossing element is last. */
return true;
}
}
reg_off_split += esize;
mem_off_split += msize;
}
/*
* We do want the first active element on the second page, because
* this may affect the address reported in an exception.
*/
reg_off_split = find_next_active(vg, reg_off_split, reg_max, esz);
tcg_debug_assert(reg_off_split <= reg_off_last);
info->reg_off_first[1] = reg_off_split;
info->mem_off_first[1] = (reg_off_split >> esz) * msize;
info->reg_off_last[1] = reg_off_last;
return true;
}
/*
* Resolve the guest virtual addresses to info->page[].
* Control the generation of page faults with @fault. Return false if
* there is no work to do, which can only happen with @fault == FAULT_NO.
*/
static bool __attribute__((unused))
sve_cont_ldst_pages(SVEContLdSt *info, SVEContFault fault, CPUARMState *env,
target_ulong addr, MMUAccessType access_type,
uintptr_t retaddr)
{
int mmu_idx = cpu_mmu_index(env, false);
int mem_off = info->mem_off_first[0];
bool nofault = fault == FAULT_NO;
bool have_work = true;
if (!sve_probe_page(&info->page[0], nofault, env, addr, mem_off,
access_type, mmu_idx, retaddr)) {
/* No work to be done. */
return false;
}
if (likely(info->page_split < 0)) {
/* The entire operation was on the one page. */
return true;
}
/*
* If the second page is invalid, then we want the fault address to be
* the first byte on that page which is accessed.
*/
if (info->mem_off_split >= 0) {
/*
* There is an element split across the pages. The fault address
* should be the first byte of the second page.
*/
mem_off = info->page_split;
/*
* If the split element is also the first active element
* of the vector, then: For first-fault we should continue
* to generate faults for the second page. For no-fault,
* we have work only if the second page is valid.
*/
if (info->mem_off_first[0] < info->mem_off_split) {
nofault = FAULT_FIRST;
have_work = false;
}
} else {
/*
* There is no element split across the pages. The fault address
* should be the first active element on the second page.
*/
mem_off = info->mem_off_first[1];
/*
* There must have been one active element on the first page,
* so we're out of first-fault territory.
*/
nofault = fault != FAULT_ALL;
}
have_work |= sve_probe_page(&info->page[1], nofault, env, addr, mem_off,
access_type, mmu_idx, retaddr);
return have_work;
}
/*
* The result of tlb_vaddr_to_host for user-only is just g2h(x),
* which is always non-null. Elide the useless test.

View file

@ -2325,6 +2325,7 @@
#define print_type_size print_type_size_x86_64
#define print_type_str print_type_str_x86_64
#define probe_access probe_access_x86_64
#define probe_access_flags probe_access_flags_x86_64
#define probe_read probe_read_x86_64
#define probe_write probe_write_x86_64
#define propagateFloat128NaN propagateFloat128NaN_x86_64