mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2025-01-11 19:45:29 +00:00
target/arm: Move helper_dc_zva to helper-a64.c
This is an aarch64-only function. Move it out of the shared file. This patch is code movement only. Backports commit 7b182eb2467af6c47c9c77c64bbbeed8ed53c330 from qemu
This commit is contained in:
parent
a22a2a8b71
commit
3cb68bc44e
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "qemu/osdep.h"
|
#include "qemu/osdep.h"
|
||||||
|
#include "qemu/units.h"
|
||||||
#include "cpu.h"
|
#include "cpu.h"
|
||||||
#include "exec/helper-proto.h"
|
#include "exec/helper-proto.h"
|
||||||
#include "qemu/host-utils.h"
|
#include "qemu/host-utils.h"
|
||||||
|
@ -1085,4 +1086,104 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
|
||||||
return float16_sqrt(a, s);
|
return float16_sqrt(a, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Implement DC ZVA, which zeroes a fixed-length block of memory.
|
||||||
|
* Note that we do not implement the (architecturally mandated)
|
||||||
|
* alignment fault for attempts to use this on Device memory
|
||||||
|
* (which matches the usual QEMU behaviour of not implementing either
|
||||||
|
* alignment faults or any memory attribute handling).
|
||||||
|
*/
|
||||||
|
|
||||||
|
ARMCPU *cpu = env_archcpu(env);
|
||||||
|
uint64_t blocklen = 4 << cpu->dcz_blocksize;
|
||||||
|
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
|
||||||
|
|
||||||
|
#ifndef CONFIG_USER_ONLY
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
|
||||||
|
* the block size so we might have to do more than one TLB lookup.
|
||||||
|
* We know that in fact for any v8 CPU the page size is at least 4K
|
||||||
|
* and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
|
||||||
|
* 1K as an artefact of legacy v5 subpage support being present in the
|
||||||
|
* same QEMU executable. So in practice the hostaddr[] array has
|
||||||
|
* two entries, given the current setting of TARGET_PAGE_BITS_MIN.
|
||||||
|
*/
|
||||||
|
int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
|
||||||
|
// msvc doesnt allow non-constant array sizes, so we work out the size it would be
|
||||||
|
// TARGET_PAGE_SIZE is 1024
|
||||||
|
// blocklen is 64
|
||||||
|
// maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE
|
||||||
|
// = (64+1024-1) / 1024
|
||||||
|
// = 1
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
void *hostaddr[1];
|
||||||
|
#else
|
||||||
|
void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
|
||||||
|
#endif
|
||||||
|
int try, i;
|
||||||
|
unsigned mmu_idx = cpu_mmu_index(env, false);
|
||||||
|
TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
|
||||||
|
|
||||||
|
assert(maxidx <= ARRAY_SIZE(hostaddr));
|
||||||
|
|
||||||
|
for (try = 0; try < 2; try++) {
|
||||||
|
|
||||||
|
for (i = 0; i < maxidx; i++) {
|
||||||
|
hostaddr[i] = tlb_vaddr_to_host(env,
|
||||||
|
vaddr + TARGET_PAGE_SIZE * i,
|
||||||
|
1, mmu_idx);
|
||||||
|
if (!hostaddr[i]) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i == maxidx) {
|
||||||
|
/*
|
||||||
|
* If it's all in the TLB it's fair game for just writing to;
|
||||||
|
* we know we don't need to update dirty status, etc.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < maxidx - 1; i++) {
|
||||||
|
memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
|
||||||
|
}
|
||||||
|
memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* OK, try a store and see if we can populate the tlb. This
|
||||||
|
* might cause an exception if the memory isn't writable,
|
||||||
|
* in which case we will longjmp out of here. We must for
|
||||||
|
* this purpose use the actual register value passed to us
|
||||||
|
* so that we get the fault address right.
|
||||||
|
*/
|
||||||
|
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
|
||||||
|
/* Now we can populate the other TLB entries, if any */
|
||||||
|
for (i = 0; i < maxidx; i++) {
|
||||||
|
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
|
||||||
|
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
|
||||||
|
helper_ret_stb_mmu(env, va, 0, oi, GETPC());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Slow path (probably attempt to do this to an I/O device or
|
||||||
|
* similar, or clearing of a block of code we have translations
|
||||||
|
* cached for). Just do a series of byte writes as the architecture
|
||||||
|
* demands. It's not worth trying to use a cpu_physical_memory_map(),
|
||||||
|
* memset(), unmap() sequence here because:
|
||||||
|
* + we'd need to account for the blocksize being larger than a page
|
||||||
|
* + the direct-RAM access case is almost always going to be dealt
|
||||||
|
* with in the fastpath code above, so there's no speed benefit
|
||||||
|
* + we would have to deal with the map returning NULL because the
|
||||||
|
* bounce buffer was in use
|
||||||
|
*/
|
||||||
|
for (i = 0; i < blocklen; i++) {
|
||||||
|
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
memset(g2h(vaddr), 0, blocklen);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
|
||||||
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
|
DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
|
||||||
|
|
||||||
DEF_HELPER_2(exception_return, void, env, i64)
|
DEF_HELPER_2(exception_return, void, env, i64)
|
||||||
|
DEF_HELPER_2(dc_zva, void, env, i64)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)
|
DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)
|
||||||
DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)
|
DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)
|
||||||
|
|
|
@ -556,7 +556,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_3(crc32_arm, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
DEF_HELPER_FLAGS_3(crc32_arm, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
||||||
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
|
||||||
DEF_HELPER_2(dc_zva, void, env, i64)
|
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
|
|
|
@ -927,105 +927,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
|
||||||
return ((uint32_t)x >> shift) | (x << (32 - shift));
|
return ((uint32_t)x >> shift) | (x << (32 - shift));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Implement DC ZVA, which zeroes a fixed-length block of memory.
|
|
||||||
* Note that we do not implement the (architecturally mandated)
|
|
||||||
* alignment fault for attempts to use this on Device memory
|
|
||||||
* (which matches the usual QEMU behaviour of not implementing either
|
|
||||||
* alignment faults or any memory attribute handling).
|
|
||||||
*/
|
|
||||||
|
|
||||||
ARMCPU *cpu = env_archcpu(env);
|
|
||||||
uint64_t blocklen = 4 << cpu->dcz_blocksize;
|
|
||||||
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
|
|
||||||
|
|
||||||
#ifndef CONFIG_USER_ONLY
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
|
|
||||||
* the block size so we might have to do more than one TLB lookup.
|
|
||||||
* We know that in fact for any v8 CPU the page size is at least 4K
|
|
||||||
* and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
|
|
||||||
* 1K as an artefact of legacy v5 subpage support being present in the
|
|
||||||
* same QEMU executable. So in practice the hostaddr[] array has
|
|
||||||
* two entries, given the current setting of TARGET_PAGE_BITS_MIN.
|
|
||||||
*/
|
|
||||||
int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
|
|
||||||
// msvc doesnt allow non-constant array sizes, so we work out the size it would be
|
|
||||||
// TARGET_PAGE_SIZE is 1024
|
|
||||||
// blocklen is 64
|
|
||||||
// maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE
|
|
||||||
// = (64+1024-1) / 1024
|
|
||||||
// = 1
|
|
||||||
#ifdef _MSC_VER
|
|
||||||
void *hostaddr[1];
|
|
||||||
#else
|
|
||||||
void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
|
|
||||||
#endif
|
|
||||||
int try, i;
|
|
||||||
unsigned mmu_idx = cpu_mmu_index(env, false);
|
|
||||||
TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
|
|
||||||
|
|
||||||
assert(maxidx <= ARRAY_SIZE(hostaddr));
|
|
||||||
|
|
||||||
for (try = 0; try < 2; try++) {
|
|
||||||
|
|
||||||
for (i = 0; i < maxidx; i++) {
|
|
||||||
hostaddr[i] = tlb_vaddr_to_host(env,
|
|
||||||
vaddr + TARGET_PAGE_SIZE * i,
|
|
||||||
1, mmu_idx);
|
|
||||||
if (!hostaddr[i]) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (i == maxidx) {
|
|
||||||
/*
|
|
||||||
* If it's all in the TLB it's fair game for just writing to;
|
|
||||||
* we know we don't need to update dirty status, etc.
|
|
||||||
*/
|
|
||||||
for (i = 0; i < maxidx - 1; i++) {
|
|
||||||
memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
|
|
||||||
}
|
|
||||||
memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* OK, try a store and see if we can populate the tlb. This
|
|
||||||
* might cause an exception if the memory isn't writable,
|
|
||||||
* in which case we will longjmp out of here. We must for
|
|
||||||
* this purpose use the actual register value passed to us
|
|
||||||
* so that we get the fault address right.
|
|
||||||
*/
|
|
||||||
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
|
|
||||||
/* Now we can populate the other TLB entries, if any */
|
|
||||||
for (i = 0; i < maxidx; i++) {
|
|
||||||
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
|
|
||||||
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
|
|
||||||
helper_ret_stb_mmu(env, va, 0, oi, GETPC());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Slow path (probably attempt to do this to an I/O device or
|
|
||||||
* similar, or clearing of a block of code we have translations
|
|
||||||
* cached for). Just do a series of byte writes as the architecture
|
|
||||||
* demands. It's not worth trying to use a cpu_physical_memory_map(),
|
|
||||||
* memset(), unmap() sequence here because:
|
|
||||||
* + we'd need to account for the blocksize being larger than a page
|
|
||||||
* + the direct-RAM access case is almost always going to be dealt
|
|
||||||
* with in the fastpath code above, so there's no speed benefit
|
|
||||||
* + we would have to deal with the map returning NULL because the
|
|
||||||
* bounce buffer was in use
|
|
||||||
*/
|
|
||||||
for (i = 0; i < blocklen; i++) {
|
|
||||||
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
memset(g2h(vaddr), 0, blocklen);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue