From 92964652894ca4217289ae4c084be8b6f522c0c7 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 8 Aug 2019 15:10:34 -0400 Subject: [PATCH] target/arm: Move the DC ZVA helper into op_helper Those helpers are a software implementation of the ARM v8 memory zeroing op code. They should be moved to the op helper file, which is going to eventually be built only when TCG is enabled. Backports commit 6cdca173ef81a9dbcee9e142f1a5a34ad9c44b75 from qemu --- qemu/target/arm/helper.c | 102 ----------------------------------- qemu/target/arm/op_helper.c | 103 ++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 102 deletions(-) diff --git a/qemu/target/arm/helper.c b/qemu/target/arm/helper.c index 5e8b191b..a612d719 100644 --- a/qemu/target/arm/helper.c +++ b/qemu/target/arm/helper.c @@ -13138,108 +13138,6 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, #endif } -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) -{ - /* - * Implement DC ZVA, which zeroes a fixed-length block of memory. - * Note that we do not implement the (architecturally mandated) - * alignment fault for attempts to use this on Device memory - * (which matches the usual QEMU behaviour of not implementing either - * alignment faults or any memory attribute handling). - */ - - ARMCPU *cpu = env_archcpu(env); - uint64_t blocklen = 4 << cpu->dcz_blocksize; - uint64_t vaddr = vaddr_in & ~(blocklen - 1); - -#ifndef CONFIG_USER_ONLY - { - /* - * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than - * the block size so we might have to do more than one TLB lookup. - * We know that in fact for any v8 CPU the page size is at least 4K - * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only - * 1K as an artefact of legacy v5 subpage support being present in the - * same QEMU executable. So in practice the hostaddr[] array has - * two entries, given the current setting of TARGET_PAGE_BITS_MIN. - */ - int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); - // msvc doesnt allow non-constant array sizes, so we work out the size it would be - // TARGET_PAGE_SIZE is 1024 - // blocklen is 64 - // maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE - // = (64+1024-1) / 1024 - // = 1 -#ifdef _MSC_VER - void *hostaddr[1]; -#else - void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; -#endif - int try, i; - unsigned mmu_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); - - assert(maxidx <= ARRAY_SIZE(hostaddr)); - - for (try = 0; try < 2; try++) { - - for (i = 0; i < maxidx; i++) { - hostaddr[i] = tlb_vaddr_to_host(env, - vaddr + TARGET_PAGE_SIZE * i, - 1, mmu_idx); - if (!hostaddr[i]) { - break; - } - } - if (i == maxidx) { - /* - * If it's all in the TLB it's fair game for just writing to; - * we know we don't need to update dirty status, etc. - */ - for (i = 0; i < maxidx - 1; i++) { - memset(hostaddr[i], 0, TARGET_PAGE_SIZE); - } - memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); - return; - } - /* - * OK, try a store and see if we can populate the tlb. This - * might cause an exception if the memory isn't writable, - * in which case we will longjmp out of here. We must for - * this purpose use the actual register value passed to us - * so that we get the fault address right. - */ - helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); - /* Now we can populate the other TLB entries, if any */ - for (i = 0; i < maxidx; i++) { - uint64_t va = vaddr + TARGET_PAGE_SIZE * i; - if (va != (vaddr_in & TARGET_PAGE_MASK)) { - helper_ret_stb_mmu(env, va, 0, oi, GETPC()); - } - } - } - - /* - * Slow path (probably attempt to do this to an I/O device or - * similar, or clearing of a block of code we have translations - * cached for). Just do a series of byte writes as the architecture - * demands. It's not worth trying to use a cpu_physical_memory_map(), - * memset(), unmap() sequence here because: - * + we'd need to account for the blocksize being larger than a page - * + the direct-RAM access case is almost always going to be dealt - * with in the fastpath code above, so there's no speed benefit - * + we would have to deal with the map returning NULL because the - * bounce buffer was in use - */ - for (i = 0; i < blocklen; i++) { - helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); - } - } -#else - memset(g2h(vaddr), 0, blocklen); -#endif -} - /* * Note that signed overflow is undefined in C. The following routines are * careful to use unsigned types where modulo arithmetic is required. diff --git a/qemu/target/arm/op_helper.c b/qemu/target/arm/op_helper.c index a9c91005..cc8712ee 100644 --- a/qemu/target/arm/op_helper.c +++ b/qemu/target/arm/op_helper.c @@ -17,6 +17,7 @@ * License along with this library; if not, see . */ #include "qemu/osdep.h" +#include "qemu/units.h" #include "qemu/log.h" #include "cpu.h" #include "exec/helper-proto.h" @@ -1320,3 +1321,105 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i) return ((uint32_t)x >> shift) | (x << (32 - shift)); } } + +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in) +{ + /* + * Implement DC ZVA, which zeroes a fixed-length block of memory. + * Note that we do not implement the (architecturally mandated) + * alignment fault for attempts to use this on Device memory + * (which matches the usual QEMU behaviour of not implementing either + * alignment faults or any memory attribute handling). + */ + + ARMCPU *cpu = env_archcpu(env); + uint64_t blocklen = 4 << cpu->dcz_blocksize; + uint64_t vaddr = vaddr_in & ~(blocklen - 1); + +#ifndef CONFIG_USER_ONLY + { + /* + * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than + * the block size so we might have to do more than one TLB lookup. + * We know that in fact for any v8 CPU the page size is at least 4K + * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only + * 1K as an artefact of legacy v5 subpage support being present in the + * same QEMU executable. So in practice the hostaddr[] array has + * two entries, given the current setting of TARGET_PAGE_BITS_MIN. + */ + int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE); + // msvc doesnt allow non-constant array sizes, so we work out the size it would be + // TARGET_PAGE_SIZE is 1024 + // blocklen is 64 + // maxidx = (blocklen+TARGET_PAGE_SIZE-1) / TARGET_PAGE_SIZE + // = (64+1024-1) / 1024 + // = 1 +#ifdef _MSC_VER + void *hostaddr[1]; +#else + void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)]; +#endif + int try, i; + unsigned mmu_idx = cpu_mmu_index(env, false); + TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx); + + assert(maxidx <= ARRAY_SIZE(hostaddr)); + + for (try = 0; try < 2; try++) { + + for (i = 0; i < maxidx; i++) { + hostaddr[i] = tlb_vaddr_to_host(env, + vaddr + TARGET_PAGE_SIZE * i, + 1, mmu_idx); + if (!hostaddr[i]) { + break; + } + } + if (i == maxidx) { + /* + * If it's all in the TLB it's fair game for just writing to; + * we know we don't need to update dirty status, etc. + */ + for (i = 0; i < maxidx - 1; i++) { + memset(hostaddr[i], 0, TARGET_PAGE_SIZE); + } + memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE)); + return; + } + /* + * OK, try a store and see if we can populate the tlb. This + * might cause an exception if the memory isn't writable, + * in which case we will longjmp out of here. We must for + * this purpose use the actual register value passed to us + * so that we get the fault address right. + */ + helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC()); + /* Now we can populate the other TLB entries, if any */ + for (i = 0; i < maxidx; i++) { + uint64_t va = vaddr + TARGET_PAGE_SIZE * i; + if (va != (vaddr_in & TARGET_PAGE_MASK)) { + helper_ret_stb_mmu(env, va, 0, oi, GETPC()); + } + } + } + + /* + * Slow path (probably attempt to do this to an I/O device or + * similar, or clearing of a block of code we have translations + * cached for). Just do a series of byte writes as the architecture + * demands. It's not worth trying to use a cpu_physical_memory_map(), + * memset(), unmap() sequence here because: + * + we'd need to account for the blocksize being larger than a page + * + the direct-RAM access case is almost always going to be dealt + * with in the fastpath code above, so there's no speed benefit + * + we would have to deal with the map returning NULL because the + * bounce buffer was in use + */ + for (i = 0; i < blocklen; i++) { + helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC()); + } + } +#else + memset(g2h(vaddr), 0, blocklen); +#endif +}