mirror of
https://github.com/yuzu-emu/unicorn.git
synced 2024-12-23 10:25:28 +00:00
target/arm: Move the DC ZVA helper into op_helper
Those helpers are a software implementation of the ARM v8 memory zeroing op code. They should be moved to the op helper file, which is going to eventually be built only when TCG is enabled. Backports commit 6cdca173ef81a9dbcee9e142f1a5a34ad9c44b75 from qemu
This commit is contained in:
parent
9bf229ede1
commit
9296465289
|
@ -13138,108 +13138,6 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size,
|
|||
#endif
|
||||
}
|
||||
|
||||
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
||||
{
|
||||
/*
|
||||
* Implement DC ZVA, which zeroes a fixed-length block of memory.
|
||||
* Note that we do not implement the (architecturally mandated)
|
||||
* alignment fault for attempts to use this on Device memory
|
||||
* (which matches the usual QEMU behaviour of not implementing either
|
||||
* alignment faults or any memory attribute handling).
|
||||
*/
|
||||
|
||||
ARMCPU *cpu = env_archcpu(env);
|
||||
uint64_t blocklen = 4 << cpu->dcz_blocksize;
|
||||
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
{
|
||||
/*
|
||||
* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
|
||||
* the block size so we might have to do more than one TLB lookup.
|
||||
* We know that in fact for any v8 CPU the page size is at least 4K
|
||||
* and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
|
||||
* 1K as an artefact of legacy v5 subpage support being present in the
|
||||
* same QEMU executable. So in practice the hostaddr[] array has
|
||||
* two entries, given the current setting of TARGET_PAGE_BITS_MIN.
|
||||
*/
|
||||
int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
|
||||
// msvc doesnt allow non-constant array sizes, so we work out the size it would be
|
||||
// TARGET_PAGE_SIZE is 1024
|
||||
// blocklen is 64
|
||||
// maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE
|
||||
// = (64+1024-1) / 1024
|
||||
// = 1
|
||||
#ifdef _MSC_VER
|
||||
void *hostaddr[1];
|
||||
#else
|
||||
void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
|
||||
#endif
|
||||
int try, i;
|
||||
unsigned mmu_idx = cpu_mmu_index(env, false);
|
||||
TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
|
||||
|
||||
assert(maxidx <= ARRAY_SIZE(hostaddr));
|
||||
|
||||
for (try = 0; try < 2; try++) {
|
||||
|
||||
for (i = 0; i < maxidx; i++) {
|
||||
hostaddr[i] = tlb_vaddr_to_host(env,
|
||||
vaddr + TARGET_PAGE_SIZE * i,
|
||||
1, mmu_idx);
|
||||
if (!hostaddr[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == maxidx) {
|
||||
/*
|
||||
* If it's all in the TLB it's fair game for just writing to;
|
||||
* we know we don't need to update dirty status, etc.
|
||||
*/
|
||||
for (i = 0; i < maxidx - 1; i++) {
|
||||
memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
|
||||
}
|
||||
memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* OK, try a store and see if we can populate the tlb. This
|
||||
* might cause an exception if the memory isn't writable,
|
||||
* in which case we will longjmp out of here. We must for
|
||||
* this purpose use the actual register value passed to us
|
||||
* so that we get the fault address right.
|
||||
*/
|
||||
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
|
||||
/* Now we can populate the other TLB entries, if any */
|
||||
for (i = 0; i < maxidx; i++) {
|
||||
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
|
||||
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
|
||||
helper_ret_stb_mmu(env, va, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path (probably attempt to do this to an I/O device or
|
||||
* similar, or clearing of a block of code we have translations
|
||||
* cached for). Just do a series of byte writes as the architecture
|
||||
* demands. It's not worth trying to use a cpu_physical_memory_map(),
|
||||
* memset(), unmap() sequence here because:
|
||||
* + we'd need to account for the blocksize being larger than a page
|
||||
* + the direct-RAM access case is almost always going to be dealt
|
||||
* with in the fastpath code above, so there's no speed benefit
|
||||
* + we would have to deal with the map returning NULL because the
|
||||
* bounce buffer was in use
|
||||
*/
|
||||
for (i = 0; i < blocklen; i++) {
|
||||
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
#else
|
||||
memset(g2h(vaddr), 0, blocklen);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that signed overflow is undefined in C. The following routines are
|
||||
* careful to use unsigned types where modulo arithmetic is required.
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
#include "qemu/osdep.h"
|
||||
#include "qemu/units.h"
|
||||
#include "qemu/log.h"
|
||||
#include "cpu.h"
|
||||
#include "exec/helper-proto.h"
|
||||
|
@ -1320,3 +1321,105 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
|
|||
return ((uint32_t)x >> shift) | (x << (32 - shift));
|
||||
}
|
||||
}
|
||||
|
||||
void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
|
||||
{
|
||||
/*
|
||||
* Implement DC ZVA, which zeroes a fixed-length block of memory.
|
||||
* Note that we do not implement the (architecturally mandated)
|
||||
* alignment fault for attempts to use this on Device memory
|
||||
* (which matches the usual QEMU behaviour of not implementing either
|
||||
* alignment faults or any memory attribute handling).
|
||||
*/
|
||||
|
||||
ARMCPU *cpu = env_archcpu(env);
|
||||
uint64_t blocklen = 4 << cpu->dcz_blocksize;
|
||||
uint64_t vaddr = vaddr_in & ~(blocklen - 1);
|
||||
|
||||
#ifndef CONFIG_USER_ONLY
|
||||
{
|
||||
/*
|
||||
* Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
|
||||
* the block size so we might have to do more than one TLB lookup.
|
||||
* We know that in fact for any v8 CPU the page size is at least 4K
|
||||
* and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
|
||||
* 1K as an artefact of legacy v5 subpage support being present in the
|
||||
* same QEMU executable. So in practice the hostaddr[] array has
|
||||
* two entries, given the current setting of TARGET_PAGE_BITS_MIN.
|
||||
*/
|
||||
int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
|
||||
// msvc doesnt allow non-constant array sizes, so we work out the size it would be
|
||||
// TARGET_PAGE_SIZE is 1024
|
||||
// blocklen is 64
|
||||
// maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE
|
||||
// = (64+1024-1) / 1024
|
||||
// = 1
|
||||
#ifdef _MSC_VER
|
||||
void *hostaddr[1];
|
||||
#else
|
||||
void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
|
||||
#endif
|
||||
int try, i;
|
||||
unsigned mmu_idx = cpu_mmu_index(env, false);
|
||||
TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
|
||||
|
||||
assert(maxidx <= ARRAY_SIZE(hostaddr));
|
||||
|
||||
for (try = 0; try < 2; try++) {
|
||||
|
||||
for (i = 0; i < maxidx; i++) {
|
||||
hostaddr[i] = tlb_vaddr_to_host(env,
|
||||
vaddr + TARGET_PAGE_SIZE * i,
|
||||
1, mmu_idx);
|
||||
if (!hostaddr[i]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == maxidx) {
|
||||
/*
|
||||
* If it's all in the TLB it's fair game for just writing to;
|
||||
* we know we don't need to update dirty status, etc.
|
||||
*/
|
||||
for (i = 0; i < maxidx - 1; i++) {
|
||||
memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
|
||||
}
|
||||
memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* OK, try a store and see if we can populate the tlb. This
|
||||
* might cause an exception if the memory isn't writable,
|
||||
* in which case we will longjmp out of here. We must for
|
||||
* this purpose use the actual register value passed to us
|
||||
* so that we get the fault address right.
|
||||
*/
|
||||
helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
|
||||
/* Now we can populate the other TLB entries, if any */
|
||||
for (i = 0; i < maxidx; i++) {
|
||||
uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
|
||||
if (va != (vaddr_in & TARGET_PAGE_MASK)) {
|
||||
helper_ret_stb_mmu(env, va, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Slow path (probably attempt to do this to an I/O device or
|
||||
* similar, or clearing of a block of code we have translations
|
||||
* cached for). Just do a series of byte writes as the architecture
|
||||
* demands. It's not worth trying to use a cpu_physical_memory_map(),
|
||||
* memset(), unmap() sequence here because:
|
||||
* + we'd need to account for the blocksize being larger than a page
|
||||
* + the direct-RAM access case is almost always going to be dealt
|
||||
* with in the fastpath code above, so there's no speed benefit
|
||||
* + we would have to deal with the map returning NULL because the
|
||||
* bounce buffer was in use
|
||||
*/
|
||||
for (i = 0; i < blocklen; i++) {
|
||||
helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
|
||||
}
|
||||
}
|
||||
#else
|
||||
memset(g2h(vaddr), 0, blocklen);
|
||||
#endif
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue