2017-01-22 13:27:17 +00:00
|
|
|
/*
|
|
|
|
* Software MMU support
|
|
|
|
*
|
|
|
|
* Generate helpers used by TCG for qemu_ld/st ops and code load
|
|
|
|
* functions.
|
|
|
|
*
|
|
|
|
* Included from target op helpers and exec.c.
|
|
|
|
*
|
|
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
|
|
*
|
|
|
|
* This library is free software; you can redistribute it and/or
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
* License as published by the Free Software Foundation; either
|
2019-02-03 22:30:53 +00:00
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
2017-01-22 13:27:17 +00:00
|
|
|
*
|
|
|
|
* This library is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
2015-08-21 07:04:50 +00:00
|
|
|
/* Modified for Unicorn Engine by Nguyen Anh Quynh, 2015 */
|
|
|
|
|
|
|
|
#if DATA_SIZE == 8
|
|
|
|
#define SUFFIX q
|
|
|
|
#define LSUFFIX q
|
|
|
|
#define SDATA_TYPE int64_t
|
|
|
|
#define DATA_TYPE uint64_t
|
|
|
|
#elif DATA_SIZE == 4
|
|
|
|
#define SUFFIX l
|
|
|
|
#define LSUFFIX l
|
|
|
|
#define SDATA_TYPE int32_t
|
|
|
|
#define DATA_TYPE uint32_t
|
|
|
|
#elif DATA_SIZE == 2
|
|
|
|
#define SUFFIX w
|
|
|
|
#define LSUFFIX uw
|
|
|
|
#define SDATA_TYPE int16_t
|
|
|
|
#define DATA_TYPE uint16_t
|
|
|
|
#elif DATA_SIZE == 1
|
|
|
|
#define SUFFIX b
|
|
|
|
#define LSUFFIX ub
|
|
|
|
#define SDATA_TYPE int8_t
|
|
|
|
#define DATA_TYPE uint8_t
|
|
|
|
#else
|
|
|
|
#error unsupported data size
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* For the benefit of TCG generated code, we want to avoid the complication
|
|
|
|
of ABI-specific return type promotion and always return a value extended
|
|
|
|
to the register size of the host. This is tcg_target_long, except in the
|
|
|
|
case of a 32-bit host and 64-bit data, and for that we always have
|
|
|
|
uint64_t. Don't bother with this widened value for SOFTMMU_CODE_ACCESS. */
|
|
|
|
#if defined(SOFTMMU_CODE_ACCESS) || DATA_SIZE == 8
|
|
|
|
# define WORD_TYPE DATA_TYPE
|
|
|
|
# define USUFFIX SUFFIX
|
|
|
|
#else
|
|
|
|
# define WORD_TYPE tcg_target_ulong
|
|
|
|
# define USUFFIX glue(u, SUFFIX)
|
|
|
|
# define SSUFFIX glue(s, SUFFIX)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef SOFTMMU_CODE_ACCESS
|
|
|
|
#define READ_ACCESS_TYPE MMU_INST_FETCH
|
|
|
|
#define ADDR_READ addr_code
|
|
|
|
#else
|
|
|
|
#define READ_ACCESS_TYPE MMU_DATA_LOAD
|
|
|
|
#define ADDR_READ addr_read
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if DATA_SIZE == 8
|
|
|
|
# define BSWAP(X) bswap64(X)
|
|
|
|
#elif DATA_SIZE == 4
|
|
|
|
# define BSWAP(X) bswap32(X)
|
|
|
|
#elif DATA_SIZE == 2
|
|
|
|
# define BSWAP(X) bswap16(X)
|
|
|
|
#else
|
|
|
|
# define BSWAP(X) (X)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if DATA_SIZE == 1
|
|
|
|
# define helper_le_ld_name glue(glue(helper_ret_ld, USUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_ld_name helper_le_ld_name
|
|
|
|
# define helper_le_lds_name glue(glue(helper_ret_ld, SSUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_lds_name helper_le_lds_name
|
|
|
|
# define helper_le_st_name glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_st_name helper_le_st_name
|
|
|
|
#else
|
|
|
|
# define helper_le_ld_name glue(glue(helper_le_ld, USUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_ld_name glue(glue(helper_be_ld, USUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_le_lds_name glue(glue(helper_le_ld, SSUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_lds_name glue(glue(helper_be_ld, SSUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_le_st_name glue(glue(helper_le_st, SUFFIX), MMUSUFFIX)
|
|
|
|
# define helper_be_st_name glue(glue(helper_be_st, SUFFIX), MMUSUFFIX)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef SOFTMMU_CODE_ACCESS
|
|
|
|
static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
|
2018-02-27 17:31:10 +00:00
|
|
|
size_t mmu_idx, size_t index,
|
2015-08-21 07:04:50 +00:00
|
|
|
target_ulong addr,
|
2018-11-17 02:34:05 +00:00
|
|
|
uintptr_t retaddr,
|
|
|
|
bool recheck)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-02-27 17:31:10 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
|
2018-11-17 02:34:05 +00:00
|
|
|
return io_readx(env, iotlbentry, mmu_idx, addr, retaddr, recheck,
|
|
|
|
DATA_SIZE);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2018-02-11 00:29:47 +00:00
|
|
|
WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
|
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-10-23 19:02:42 +00:00
|
|
|
uintptr_t mmu_idx = get_mmuidx(oi);
|
|
|
|
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
|
|
|
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
|
|
|
target_ulong tlb_addr = entry->ADDR_READ;
|
2018-02-26 07:38:39 +00:00
|
|
|
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
2015-08-21 07:04:50 +00:00
|
|
|
uintptr_t haddr;
|
|
|
|
DATA_TYPE res;
|
2015-09-24 06:18:02 +00:00
|
|
|
int error_code;
|
2016-01-16 08:44:02 +00:00
|
|
|
struct hook *hook;
|
|
|
|
bool handled;
|
2017-01-21 01:28:22 +00:00
|
|
|
HOOK_FOREACH_VAR_DECLARE;
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2015-08-28 10:42:25 +00:00
|
|
|
struct uc_struct *uc = env->uc;
|
|
|
|
MemoryRegion *mr = memory_mapping(uc, addr);
|
|
|
|
|
2015-12-28 07:19:30 +00:00
|
|
|
// memory might be still unmapped while reading or fetching
|
2015-09-09 07:52:15 +00:00
|
|
|
if (mr == NULL) {
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
2015-08-29 01:59:45 +00:00
|
|
|
#if defined(SOFTMMU_CODE_ACCESS)
|
2015-09-30 06:46:55 +00:00
|
|
|
error_code = UC_ERR_FETCH_UNMAPPED;
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_FETCH_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_FETCH_UNMAPPED, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
2015-09-09 07:52:15 +00:00
|
|
|
#else
|
2015-09-30 06:46:55 +00:00
|
|
|
error_code = UC_ERR_READ_UNMAPPED;
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_READ_UNMAPPED, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
2015-09-09 07:52:15 +00:00
|
|
|
#endif
|
2016-01-16 08:44:02 +00:00
|
|
|
if (handled) {
|
2015-09-04 03:55:17 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
|
|
|
mr = memory_mapping(uc, addr); // FIXME: what if mr is still NULL at this time?
|
|
|
|
} else {
|
|
|
|
env->invalid_addr = addr;
|
2015-09-09 07:52:15 +00:00
|
|
|
env->invalid_error = error_code;
|
2015-09-04 03:55:17 +00:00
|
|
|
// printf("***** Invalid fetch (unmapped memory) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-09 07:52:15 +00:00
|
|
|
#if defined(SOFTMMU_CODE_ACCESS)
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on fetch from NX
|
2015-09-04 03:55:17 +00:00
|
|
|
if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) { // non-executable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_FETCH_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_FETCH_PROT, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handled) {
|
2015-08-29 01:59:45 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-09-03 17:02:38 +00:00
|
|
|
} else {
|
2015-08-29 01:59:45 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-24 06:18:02 +00:00
|
|
|
env->invalid_error = UC_ERR_FETCH_PROT;
|
2015-09-01 02:08:48 +00:00
|
|
|
// printf("***** Invalid fetch (non-executable) at " TARGET_FMT_lx "\n", addr);
|
2015-08-29 01:59:45 +00:00
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
// Unicorn: callback on memory read
|
2016-10-22 03:19:55 +00:00
|
|
|
// NOTE: this happens before the actual read, so we cannot tell
|
|
|
|
// the callback if read access is succesful, or not.
|
|
|
|
// See UC_HOOK_MEM_READ_AFTER & UC_MEM_READ_AFTER if you only care
|
|
|
|
// about successful read
|
2016-01-16 08:44:02 +00:00
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD) {
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(env->uc, UC_MEM_READ, addr, DATA_SIZE, 0, hook->user_data);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on non-readable memory
|
2015-09-04 03:55:17 +00:00
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD && mr != NULL && !(mr->perms & UC_PROT_READ)) { //non-readable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_READ_PROT, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handled) {
|
2015-08-28 10:42:25 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-12-24 01:51:17 +00:00
|
|
|
} else {
|
2015-08-28 10:42:25 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-01 19:10:09 +00:00
|
|
|
env->invalid_error = UC_ERR_READ_PROT;
|
2015-08-28 10:42:25 +00:00
|
|
|
// printf("***** Invalid memory read (non-readable) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-26 07:38:39 +00:00
|
|
|
if (addr & ((1 << a_bits) - 1)) {
|
2018-02-25 06:51:06 +00:00
|
|
|
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
|
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/* If the TLB entry is for a different page, reload and try again. */
|
2018-07-03 23:21:22 +00:00
|
|
|
if (!tlb_hit(tlb_addr, addr)) {
|
2018-02-25 08:03:27 +00:00
|
|
|
if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
|
2015-08-21 07:04:50 +00:00
|
|
|
mmu_idx, retaddr);
|
2019-02-12 16:48:43 +00:00
|
|
|
index = tlb_index(env, mmu_idx, addr);
|
|
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
2018-10-23 19:02:42 +00:00
|
|
|
tlb_addr = entry->ADDR_READ;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle an IO access. */
|
|
|
|
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
|
2018-02-12 23:31:17 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry;
|
2015-08-21 07:04:50 +00:00
|
|
|
if ((addr & (DATA_SIZE - 1)) != 0) {
|
|
|
|
goto do_unaligned_access;
|
|
|
|
}
|
2018-02-12 23:31:17 +00:00
|
|
|
iotlbentry = &env->iotlb[mmu_idx][index];
|
|
|
|
if (iotlbentry->addr == 0) {
|
2015-08-21 07:04:50 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_READ_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("Invalid memory read at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(env->uc->current_cpu);
|
|
|
|
return 0;
|
2015-08-26 08:15:38 +00:00
|
|
|
} else {
|
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* ??? Note that the io helpers always read data in the target
|
|
|
|
byte ordering. We should push the LE/BE request down into io. */
|
2018-11-17 02:34:05 +00:00
|
|
|
res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr,
|
|
|
|
tlb_addr & TLB_RECHECK);
|
2015-08-21 07:04:50 +00:00
|
|
|
res = TGT_LE(res);
|
2016-10-22 03:19:55 +00:00
|
|
|
goto _out;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle slow unaligned access (it spans two pages or IO). */
|
|
|
|
if (DATA_SIZE > 1
|
|
|
|
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
|
|
|
|
>= TARGET_PAGE_SIZE)) {
|
|
|
|
target_ulong addr1, addr2;
|
|
|
|
DATA_TYPE res1, res2;
|
|
|
|
unsigned shift;
|
|
|
|
do_unaligned_access:
|
|
|
|
addr1 = addr & ~(DATA_SIZE - 1);
|
|
|
|
addr2 = addr1 + DATA_SIZE;
|
2018-02-26 07:51:50 +00:00
|
|
|
res1 = helper_le_ld_name(env, addr1, oi, retaddr);
|
|
|
|
res2 = helper_le_ld_name(env, addr2, oi, retaddr);
|
2015-08-21 07:04:50 +00:00
|
|
|
shift = (addr & (DATA_SIZE - 1)) * 8;
|
|
|
|
|
|
|
|
/* Little-endian combine. */
|
|
|
|
res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift));
|
2016-10-22 03:19:55 +00:00
|
|
|
goto _out;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
haddr = (uintptr_t)(addr + entry->addend);
|
2015-08-21 07:04:50 +00:00
|
|
|
#if DATA_SIZE == 1
|
|
|
|
res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr);
|
|
|
|
#else
|
|
|
|
res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr);
|
|
|
|
#endif
|
2016-10-22 03:19:55 +00:00
|
|
|
|
|
|
|
_out:
|
|
|
|
// Unicorn: callback on successful read
|
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD) {
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_AFTER) {
|
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(env->uc, UC_MEM_READ_AFTER, addr, DATA_SIZE, res, hook->user_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if DATA_SIZE > 1
|
2018-02-11 00:29:47 +00:00
|
|
|
WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
|
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-10-23 19:02:42 +00:00
|
|
|
uintptr_t mmu_idx = get_mmuidx(oi);
|
|
|
|
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
|
|
|
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
|
|
|
target_ulong tlb_addr = entry->ADDR_READ;
|
2018-02-26 07:38:39 +00:00
|
|
|
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
2015-08-21 07:04:50 +00:00
|
|
|
uintptr_t haddr;
|
|
|
|
DATA_TYPE res;
|
2015-09-24 06:18:02 +00:00
|
|
|
int error_code;
|
2016-01-16 08:44:02 +00:00
|
|
|
struct hook *hook;
|
|
|
|
bool handled;
|
2017-01-21 01:28:22 +00:00
|
|
|
HOOK_FOREACH_VAR_DECLARE;
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2015-08-28 10:42:25 +00:00
|
|
|
struct uc_struct *uc = env->uc;
|
|
|
|
MemoryRegion *mr = memory_mapping(uc, addr);
|
|
|
|
|
2015-09-09 07:52:15 +00:00
|
|
|
// memory can be unmapped while reading or fetching
|
|
|
|
if (mr == NULL) {
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
2015-08-29 01:59:45 +00:00
|
|
|
#if defined(SOFTMMU_CODE_ACCESS)
|
2015-09-30 06:46:55 +00:00
|
|
|
error_code = UC_ERR_FETCH_UNMAPPED;
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_FETCH_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_FETCH_UNMAPPED, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
2015-09-09 07:52:15 +00:00
|
|
|
#else
|
2015-09-30 06:46:55 +00:00
|
|
|
error_code = UC_ERR_READ_UNMAPPED;
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_READ_UNMAPPED, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
2015-09-09 07:52:15 +00:00
|
|
|
#endif
|
2016-01-16 08:44:02 +00:00
|
|
|
if (handled) {
|
2015-09-04 03:55:17 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
|
|
|
mr = memory_mapping(uc, addr); // FIXME: what if mr is still NULL at this time?
|
|
|
|
} else {
|
|
|
|
env->invalid_addr = addr;
|
2015-09-09 07:52:15 +00:00
|
|
|
env->invalid_error = error_code;
|
2015-09-04 03:55:17 +00:00
|
|
|
// printf("***** Invalid fetch (unmapped memory) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-09-09 07:52:15 +00:00
|
|
|
#if defined(SOFTMMU_CODE_ACCESS)
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on fetch from NX
|
2015-09-04 03:55:17 +00:00
|
|
|
if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) { // non-executable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_FETCH_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_FETCH_PROT, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handled) {
|
2015-08-29 01:59:45 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-09-03 17:02:38 +00:00
|
|
|
} else {
|
2015-08-29 01:59:45 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-24 06:18:02 +00:00
|
|
|
env->invalid_error = UC_ERR_FETCH_PROT;
|
2015-09-01 02:08:48 +00:00
|
|
|
// printf("***** Invalid fetch (non-executable) at " TARGET_FMT_lx "\n", addr);
|
2015-08-29 01:59:45 +00:00
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
// Unicorn: callback on memory read
|
2016-10-22 03:19:55 +00:00
|
|
|
// NOTE: this happens before the actual read, so we cannot tell
|
|
|
|
// the callback if read access is succesful, or not.
|
|
|
|
// See UC_HOOK_MEM_READ_AFTER & UC_MEM_READ_AFTER if you only care
|
|
|
|
// about successful read
|
2016-01-16 08:44:02 +00:00
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD) {
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(env->uc, UC_MEM_READ, addr, DATA_SIZE, 0, hook->user_data);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on non-readable memory
|
2015-09-04 03:55:17 +00:00
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD && mr != NULL && !(mr->perms & UC_PROT_READ)) { //non-readable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_READ_PROT, addr, DATA_SIZE, 0, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (handled) {
|
2015-08-28 10:42:25 +00:00
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-09-03 17:02:38 +00:00
|
|
|
} else {
|
2015-08-28 10:42:25 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-01 19:10:09 +00:00
|
|
|
env->invalid_error = UC_ERR_READ_PROT;
|
2015-08-28 10:42:25 +00:00
|
|
|
// printf("***** Invalid memory read (non-readable) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-26 07:38:39 +00:00
|
|
|
if (addr & ((1 << a_bits) - 1)) {
|
2018-02-25 06:51:06 +00:00
|
|
|
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
|
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/* If the TLB entry is for a different page, reload and try again. */
|
2018-07-03 23:21:22 +00:00
|
|
|
if (!tlb_hit(tlb_addr, addr)) {
|
2018-02-25 08:03:27 +00:00
|
|
|
if (!VICTIM_TLB_HIT(ADDR_READ, addr)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, READ_ACCESS_TYPE,
|
2015-08-21 07:04:50 +00:00
|
|
|
mmu_idx, retaddr);
|
2019-02-12 16:48:43 +00:00
|
|
|
index = tlb_index(env, mmu_idx, addr);
|
|
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
2018-10-23 19:02:42 +00:00
|
|
|
tlb_addr = entry->ADDR_READ;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle an IO access. */
|
|
|
|
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
|
2018-02-12 23:31:17 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry;
|
2015-08-21 07:04:50 +00:00
|
|
|
if ((addr & (DATA_SIZE - 1)) != 0) {
|
|
|
|
goto do_unaligned_access;
|
|
|
|
}
|
2018-02-12 23:31:17 +00:00
|
|
|
iotlbentry = &env->iotlb[mmu_idx][index];
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2018-02-12 23:31:17 +00:00
|
|
|
if (iotlbentry->addr == 0) {
|
2015-08-21 07:04:50 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_READ_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("Invalid memory read at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(env->uc->current_cpu);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ??? Note that the io helpers always read data in the target
|
|
|
|
byte ordering. We should push the LE/BE request down into io. */
|
2018-11-17 02:34:05 +00:00
|
|
|
res = glue(io_read, SUFFIX)(env, mmu_idx, index, addr, retaddr,
|
|
|
|
tlb_addr & TLB_RECHECK);
|
2015-08-21 07:04:50 +00:00
|
|
|
res = TGT_BE(res);
|
2016-10-22 03:19:55 +00:00
|
|
|
goto _out;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle slow unaligned access (it spans two pages or IO). */
|
|
|
|
if (DATA_SIZE > 1
|
|
|
|
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
|
|
|
|
>= TARGET_PAGE_SIZE)) {
|
|
|
|
target_ulong addr1, addr2;
|
|
|
|
DATA_TYPE res1, res2;
|
|
|
|
unsigned shift;
|
|
|
|
do_unaligned_access:
|
|
|
|
addr1 = addr & ~(DATA_SIZE - 1);
|
|
|
|
addr2 = addr1 + DATA_SIZE;
|
2018-02-26 07:51:50 +00:00
|
|
|
res1 = helper_be_ld_name(env, addr1, oi, retaddr);
|
|
|
|
res2 = helper_be_ld_name(env, addr2, oi, retaddr);
|
2015-08-21 07:04:50 +00:00
|
|
|
shift = (addr & (DATA_SIZE - 1)) * 8;
|
|
|
|
|
|
|
|
/* Big-endian combine. */
|
|
|
|
res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift));
|
2016-10-22 03:19:55 +00:00
|
|
|
goto _out;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
haddr = (uintptr_t)(addr + entry->addend);
|
2015-08-21 07:04:50 +00:00
|
|
|
res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr);
|
2016-10-22 03:19:55 +00:00
|
|
|
|
|
|
|
_out:
|
|
|
|
// Unicorn: callback on successful read
|
|
|
|
if (READ_ACCESS_TYPE == MMU_DATA_LOAD) {
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_READ_AFTER) {
|
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(env->uc, UC_MEM_READ_AFTER, addr, DATA_SIZE, res, hook->user_data);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
return res;
|
|
|
|
}
|
|
|
|
#endif /* DATA_SIZE > 1 */
|
|
|
|
|
|
|
|
#ifndef SOFTMMU_CODE_ACCESS
|
|
|
|
|
|
|
|
/* Provide signed versions of the load routines as well. We can of course
|
|
|
|
avoid this for 64-bit data, or for 32-bit data on 32-bit host. */
|
|
|
|
#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS
|
|
|
|
WORD_TYPE helper_le_lds_name(CPUArchState *env, target_ulong addr,
|
2018-02-11 00:29:47 +00:00
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-02-11 00:29:47 +00:00
|
|
|
return (SDATA_TYPE)helper_le_ld_name(env, addr, oi, retaddr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
# if DATA_SIZE > 1
|
|
|
|
WORD_TYPE helper_be_lds_name(CPUArchState *env, target_ulong addr,
|
2018-02-11 00:29:47 +00:00
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-02-11 00:29:47 +00:00
|
|
|
return (SDATA_TYPE)helper_be_ld_name(env, addr, oi, retaddr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static inline void glue(io_write, SUFFIX)(CPUArchState *env,
|
2018-02-27 17:31:10 +00:00
|
|
|
size_t mmu_idx, size_t index,
|
2015-08-21 07:04:50 +00:00
|
|
|
DATA_TYPE val,
|
|
|
|
target_ulong addr,
|
2018-11-17 02:34:05 +00:00
|
|
|
uintptr_t retaddr,
|
|
|
|
bool recheck)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-02-27 17:31:10 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry = &env->iotlb[mmu_idx][index];
|
2018-11-17 02:34:05 +00:00
|
|
|
return io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
|
|
|
|
recheck, DATA_SIZE);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
2018-02-11 00:29:47 +00:00
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-10-23 19:02:42 +00:00
|
|
|
uintptr_t mmu_idx = get_mmuidx(oi);
|
|
|
|
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
|
|
|
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
target_ulong tlb_addr = tlb_addr_write(entry);
|
2018-02-26 07:38:39 +00:00
|
|
|
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
2015-08-21 07:04:50 +00:00
|
|
|
uintptr_t haddr;
|
2016-01-16 08:44:02 +00:00
|
|
|
struct hook *hook;
|
|
|
|
bool handled;
|
2017-01-21 01:28:22 +00:00
|
|
|
HOOK_FOREACH_VAR_DECLARE;
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2015-08-28 01:03:17 +00:00
|
|
|
struct uc_struct *uc = env->uc;
|
|
|
|
MemoryRegion *mr = memory_mapping(uc, addr);
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
// Unicorn: callback on memory write
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(uc, UC_MEM_WRITE, addr, DATA_SIZE, val, hook->user_data);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unicorn: callback on invalid memory
|
2016-01-16 08:44:02 +00:00
|
|
|
if (mr == NULL) {
|
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_WRITE_UNMAPPED, addr, DATA_SIZE, val, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!handled) {
|
2015-08-21 07:04:50 +00:00
|
|
|
// save error & quit
|
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("***** Invalid memory write at " TARGET_FMT_lx "\n", addr);
|
2015-08-28 01:03:17 +00:00
|
|
|
cpu_exit(uc->current_cpu);
|
2015-08-21 07:04:50 +00:00
|
|
|
return;
|
2015-08-26 08:15:38 +00:00
|
|
|
} else {
|
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-09-24 06:18:02 +00:00
|
|
|
mr = memory_mapping(uc, addr); // FIXME: what if mr is still NULL at this time?
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on non-writable memory
|
|
|
|
if (mr != NULL && !(mr->perms & UC_PROT_WRITE)) { //non-writable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_WRITE_PROT, addr, DATA_SIZE, val, hook->user_data)))
|
|
|
|
break;
|
2015-08-28 01:03:17 +00:00
|
|
|
}
|
2016-01-16 08:44:02 +00:00
|
|
|
|
|
|
|
if (handled) {
|
|
|
|
env->invalid_error = UC_ERR_OK;
|
|
|
|
} else {
|
2015-08-28 01:03:17 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-01 19:10:09 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_PROT;
|
2015-08-28 01:03:17 +00:00
|
|
|
// printf("***** Invalid memory write (ro) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2018-02-26 07:38:39 +00:00
|
|
|
if (addr & ((1 << a_bits) - 1)) {
|
2018-02-25 06:51:06 +00:00
|
|
|
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
|
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/* If the TLB entry is for a different page, reload and try again. */
|
2018-07-03 23:21:22 +00:00
|
|
|
if (!tlb_hit(tlb_addr, addr)) {
|
2018-02-25 08:03:27 +00:00
|
|
|
if (!VICTIM_TLB_HIT(addr_write, addr)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
|
|
|
mmu_idx, retaddr);
|
2019-02-12 16:48:43 +00:00
|
|
|
index = tlb_index(env, mmu_idx, addr);
|
|
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
tlb_addr = tlb_addr_write(entry);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle an IO access. */
|
|
|
|
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
|
2018-02-12 23:31:17 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry;
|
2015-08-21 07:04:50 +00:00
|
|
|
if ((addr & (DATA_SIZE - 1)) != 0) {
|
|
|
|
goto do_unaligned_access;
|
|
|
|
}
|
2018-02-12 23:31:17 +00:00
|
|
|
iotlbentry = &env->iotlb[mmu_idx][index];
|
|
|
|
if (iotlbentry->addr == 0) {
|
2015-08-21 07:04:50 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("***** Invalid memory write at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(env->uc->current_cpu);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ??? Note that the io helpers always read data in the target
|
|
|
|
byte ordering. We should push the LE/BE request down into io. */
|
|
|
|
val = TGT_LE(val);
|
2018-11-17 02:34:05 +00:00
|
|
|
glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr,
|
|
|
|
retaddr, tlb_addr & TLB_RECHECK);
|
2015-08-21 07:04:50 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle slow unaligned access (it spans two pages or IO). */
|
|
|
|
if (DATA_SIZE > 1
|
|
|
|
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
|
|
|
|
>= TARGET_PAGE_SIZE)) {
|
2018-10-23 19:02:42 +00:00
|
|
|
int i;
|
|
|
|
target_ulong page2;
|
|
|
|
CPUTLBEntry *entry2;
|
2015-08-21 07:04:50 +00:00
|
|
|
do_unaligned_access:
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
/* Ensure the second page is in the TLB. Note that the first page
|
|
|
|
is already guaranteed to be filled, and that the second page
|
|
|
|
cannot evict the first. */
|
|
|
|
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
|
2018-10-23 19:02:42 +00:00
|
|
|
entry2 = tlb_entry(env, mmu_idx, page2);
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
if (!tlb_hit_page(tlb_addr_write(entry2), page2)
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
&& !VICTIM_TLB_HIT(addr_write, page2)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), page2, DATA_SIZE, MMU_DATA_STORE,
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* XXX: not efficient, but simple. */
|
|
|
|
/* This loop must go in the forward direction to avoid issues
|
|
|
|
with self-modifying code in Windows 64-bit. */
|
|
|
|
for (i = 0; i < DATA_SIZE; ++i) {
|
2015-08-21 07:04:50 +00:00
|
|
|
/* Little-endian extract. */
|
2017-01-19 11:50:28 +00:00
|
|
|
uint8_t val8 = (uint8_t)(val >> (i * 8));
|
2015-08-21 07:04:50 +00:00
|
|
|
glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
|
2018-02-26 07:51:50 +00:00
|
|
|
oi, retaddr);
|
2015-08-28 01:03:17 +00:00
|
|
|
if (env->invalid_error != UC_ERR_OK)
|
|
|
|
break;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
haddr = (uintptr_t)(addr + entry->addend);
|
2015-08-21 07:04:50 +00:00
|
|
|
#if DATA_SIZE == 1
|
|
|
|
glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val);
|
|
|
|
#else
|
|
|
|
glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
#if DATA_SIZE > 1
|
|
|
|
void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
|
2018-02-11 00:29:47 +00:00
|
|
|
TCGMemOpIdx oi, uintptr_t retaddr)
|
2015-08-21 07:04:50 +00:00
|
|
|
{
|
2018-10-23 19:02:42 +00:00
|
|
|
uintptr_t mmu_idx = get_mmuidx(oi);
|
|
|
|
uintptr_t index = tlb_index(env, mmu_idx, addr);
|
|
|
|
CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
target_ulong tlb_addr = tlb_addr_write(entry);
|
2018-02-26 07:38:39 +00:00
|
|
|
unsigned a_bits = get_alignment_bits(get_memop(oi));
|
2015-08-21 07:04:50 +00:00
|
|
|
uintptr_t haddr;
|
2016-01-16 08:44:02 +00:00
|
|
|
struct hook *hook;
|
|
|
|
bool handled;
|
2017-01-21 01:28:22 +00:00
|
|
|
HOOK_FOREACH_VAR_DECLARE;
|
2015-08-21 07:04:50 +00:00
|
|
|
|
2015-08-28 01:03:17 +00:00
|
|
|
struct uc_struct *uc = env->uc;
|
|
|
|
MemoryRegion *mr = memory_mapping(uc, addr);
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
// Unicorn: callback on memory write
|
2016-01-16 08:44:02 +00:00
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
((uc_cb_hookmem_t)hook->callback)(uc, UC_MEM_WRITE, addr, DATA_SIZE, val, hook->user_data);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Unicorn: callback on invalid memory
|
2016-01-16 08:44:02 +00:00
|
|
|
if (mr == NULL) {
|
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE_UNMAPPED) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_WRITE_UNMAPPED, addr, DATA_SIZE, val, hook->user_data)))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!handled) {
|
2015-08-21 07:04:50 +00:00
|
|
|
// save error & quit
|
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("***** Invalid memory write at " TARGET_FMT_lx "\n", addr);
|
2015-08-28 01:03:17 +00:00
|
|
|
cpu_exit(uc->current_cpu);
|
2015-08-21 07:04:50 +00:00
|
|
|
return;
|
2015-08-26 08:15:38 +00:00
|
|
|
} else {
|
|
|
|
env->invalid_error = UC_ERR_OK;
|
2015-09-24 06:18:02 +00:00
|
|
|
mr = memory_mapping(uc, addr); // FIXME: what if mr is still NULL at this time?
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-08-29 01:59:45 +00:00
|
|
|
// Unicorn: callback on non-writable memory
|
|
|
|
if (mr != NULL && !(mr->perms & UC_PROT_WRITE)) { //non-writable
|
2016-01-16 08:44:02 +00:00
|
|
|
handled = false;
|
|
|
|
HOOK_FOREACH(uc, hook, UC_HOOK_MEM_WRITE_PROT) {
|
2016-01-26 09:37:48 +00:00
|
|
|
if (!HOOK_BOUND_CHECK(hook, addr))
|
2016-01-16 08:44:02 +00:00
|
|
|
continue;
|
|
|
|
if ((handled = ((uc_cb_eventmem_t)hook->callback)(uc, UC_MEM_WRITE_PROT, addr, DATA_SIZE, val, hook->user_data)))
|
|
|
|
break;
|
2015-08-28 01:03:17 +00:00
|
|
|
}
|
2016-01-16 08:44:02 +00:00
|
|
|
|
|
|
|
if (handled) {
|
|
|
|
env->invalid_error = UC_ERR_OK;
|
|
|
|
} else {
|
2015-08-28 01:03:17 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-01 19:10:09 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_PROT;
|
2015-08-28 01:03:17 +00:00
|
|
|
// printf("***** Invalid memory write (ro) at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(uc->current_cpu);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-26 07:38:39 +00:00
|
|
|
if (addr & ((1 << a_bits) - 1)) {
|
2018-02-25 06:51:06 +00:00
|
|
|
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
|
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/* If the TLB entry is for a different page, reload and try again. */
|
2018-07-03 23:21:22 +00:00
|
|
|
if (!tlb_hit(tlb_addr, addr)) {
|
2018-02-25 08:03:27 +00:00
|
|
|
if (!VICTIM_TLB_HIT(addr_write, addr)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
|
|
|
mmu_idx, retaddr);
|
2019-02-12 16:48:43 +00:00
|
|
|
index = tlb_index(env, mmu_idx, addr);
|
|
|
|
entry = tlb_entry(env, mmu_idx, addr);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
tlb_addr = tlb_addr_write(entry);
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle an IO access. */
|
|
|
|
if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
|
2018-02-12 23:31:17 +00:00
|
|
|
CPUIOTLBEntry *iotlbentry;
|
2015-08-21 07:04:50 +00:00
|
|
|
if ((addr & (DATA_SIZE - 1)) != 0) {
|
|
|
|
goto do_unaligned_access;
|
|
|
|
}
|
2018-02-12 23:31:17 +00:00
|
|
|
iotlbentry = &env->iotlb[mmu_idx][index];
|
|
|
|
if (iotlbentry->addr == 0) {
|
2015-08-21 07:04:50 +00:00
|
|
|
env->invalid_addr = addr;
|
2015-09-30 06:46:55 +00:00
|
|
|
env->invalid_error = UC_ERR_WRITE_UNMAPPED;
|
2015-08-21 07:04:50 +00:00
|
|
|
// printf("***** Invalid memory write at " TARGET_FMT_lx "\n", addr);
|
|
|
|
cpu_exit(env->uc->current_cpu);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ??? Note that the io helpers always read data in the target
|
|
|
|
byte ordering. We should push the LE/BE request down into io. */
|
|
|
|
val = TGT_BE(val);
|
2018-11-17 02:34:05 +00:00
|
|
|
glue(io_write, SUFFIX)(env, mmu_idx, index, val, addr, retaddr,
|
|
|
|
tlb_addr & TLB_RECHECK);
|
2015-08-21 07:04:50 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle slow unaligned access (it spans two pages or IO). */
|
|
|
|
if (DATA_SIZE > 1
|
|
|
|
&& unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1
|
|
|
|
>= TARGET_PAGE_SIZE)) {
|
2018-10-23 19:02:42 +00:00
|
|
|
int i;
|
|
|
|
target_ulong page2;
|
|
|
|
CPUTLBEntry *entry2;
|
2015-08-21 07:04:50 +00:00
|
|
|
do_unaligned_access:
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
/* Ensure the second page is in the TLB. Note that the first page
|
|
|
|
is already guaranteed to be filled, and that the second page
|
|
|
|
cannot evict the first. */
|
|
|
|
page2 = (addr + DATA_SIZE) & TARGET_PAGE_MASK;
|
2018-10-23 19:02:42 +00:00
|
|
|
entry2 = tlb_entry(env, mmu_idx, page2);
|
cputlb: read CPUTLBEntry.addr_write atomically
Updates can come from other threads, so readers that do not
take tlb_lock must use atomic_read to avoid undefined
behaviour (UB).
This completes the conversion to tlb_lock. This conversion results
on average in no performance loss, as the following experiments
(run on an Intel i7-6700K CPU @ 4.00GHz) show.
1. aarch64 bootup+shutdown test:
- Before:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7487.087786 task-clock (msec) # 0.998 CPUs utilized ( +- 0.12% )
31,574,905,303 cycles # 4.217 GHz ( +- 0.12% )
57,097,908,812 instructions # 1.81 insns per cycle ( +- 0.08% )
10,255,415,367 branches # 1369.747 M/sec ( +- 0.08% )
173,278,962 branch-misses # 1.69% of all branches ( +- 0.18% )
7.504481349 seconds time elapsed ( +- 0.14% )
- After:
Performance counter stats for 'taskset -c 0 ../img/aarch64/die.sh' (10 runs):
7462.441328 task-clock (msec) # 0.998 CPUs utilized ( +- 0.07% )
31,478,476,520 cycles # 4.218 GHz ( +- 0.07% )
57,017,330,084 instructions # 1.81 insns per cycle ( +- 0.05% )
10,251,929,667 branches # 1373.804 M/sec ( +- 0.05% )
173,023,787 branch-misses # 1.69% of all branches ( +- 0.11% )
7.474970463 seconds time elapsed ( +- 0.07% )
2. SPEC06int:
SPEC06int (test set)
[Y axis: Speedup over master]
1.15 +-+----+------+------+------+------+------+-------+------+------+------+------+------+------+----+-+
| |
1.1 +-+.................................+++.............................+ tlb-lock-v2 (m+++x) +-+
| +++ | +++ tlb-lock-v3 (spinl|ck) |
| +++ | | +++ +++ | | |
1.05 +-+....+++...........####.........|####.+++.|......|.....###....+++...........+++....###.........+-+
| ### ++#| # |# |# ***### +++### +++#+# | +++ | #|# ### |
1 +-+++***+#++++####+++#++#++++++++++#++#+*+*++#++++#+#+****+#++++###++++###++++###++++#+#++++#+#+++-+
| *+* # #++# *** # #### *** # * *++# ****+# *| * # ****|# |# # #|# #+# # # |
0.95 +-+..*.*.#....#..#.*|*..#...#..#.*|*..#.*.*..#.*|.*.#.*++*.#.*++*+#.****.#....#+#....#.#..++#.#..+-+
| * * # # # *|* # # # *|* # * * # *++* # * * # * * # * |* # ++# # # # *** # |
| * * # ++# # *+* # # # *|* # * * # * * # * * # * * # *++* # **** # ++# # * * # |
0.9 +-+..*.*.#...|#..#.*.*..#.++#..#.*|*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*.|*.#...|#.#..*.*.#..+-+
| * * # *** # * * # |# # *+* # * * # * * # * * # * * # * * # *++* # |# # * * # |
0.85 +-+..*.*.#..*|*..#.*.*..#.***..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.****.#..*.*.#..+-+
| * * # *+* # * * # *|* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
| * * # * * # * * # *+* # * * # * * # * * # * * # * * # * * # * * # * |* # * * # |
0.8 +-+..*.*.#..*.*..#.*.*..#.*.*..#.*.*..#.*.*..#.*..*.#.*..*.#.*..*.#.*..*.#.*..*.#.*++*.#..*.*.#..+-+
| * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # * * # |
0.75 +-+--***##--***###-***###-***###-***###-***###-****##-****##-****##-****##-****##-****##--***##--+-+
400.perlben401.bzip2403.gcc429.m445.gob456.hmme45462.libqua464.h26471.omnet473483.xalancbmkgeomean
png: https://imgur.com/a/BHzpPTW
Notes:
- tlb-lock-v2 corresponds to an implementation with a mutex.
- tlb-lock-v3 corresponds to the current implementation, i.e.
a spinlock and a single lock acquisition in tlb_set_page_with_attrs.
Backports commit 403f290c0603f35f2d09c982bf5549b6d0803ec1 from qemu
2018-10-23 19:37:32 +00:00
|
|
|
if (!tlb_hit_page(tlb_addr_write(entry2), page2)
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
&& !VICTIM_TLB_HIT(addr_write, page2)) {
|
2018-03-06 15:48:51 +00:00
|
|
|
tlb_fill(ENV_GET_CPU(env), addr, DATA_SIZE, MMU_DATA_STORE,
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
mmu_idx, retaddr);
|
|
|
|
}
|
|
|
|
|
2015-08-21 07:04:50 +00:00
|
|
|
/* XXX: not efficient, but simple */
|
cputlb: Fix for self-modifying writes across page boundaries
As it currently stands, QEMU does not properly handle self-modifying code
when the write is unaligned and crosses a page boundary. The procedure
for handling a write to the current translation block is to write-protect
the current translation block, catch the write, split up the translation
block into the current instruction (which remains write-protected so that
the current instruction is not modified) and the remaining instructions
in the translation block, and then restore the CPU state to before the
write occurred so the write will be retried and successfully executed.
However, since unaligned writes across pages are split into one-byte
writes for simplicity, writes to the second page (which is not the
current TB) may succeed before a write to the current TB is attempted,
and since these writes are not invalidated before resuming state after
splitting the TB, these writes will be performed a second time, thus
corrupting the second page. Credit goes to Patrick Hulin for
discovering this.
In recent 64-bit versions of Windows running in emulated mode, this
results in either being very unstable (a BSOD after a couple minutes of
uptime), or being entirely unable to boot. Windows performs one or more
8-byte unaligned self-modifying writes (xors) which intersect the end
of the current TB and the beginning of the next TB, which runs into the
aforementioned issue. This commit fixes that issue by making the
unaligned write loop perform the writes in forwards order, instead of
reverse order. This way, QEMU immediately tries to write to the current
TB, and splits the TB before any write to the second page is executed.
The write then proceeds as intended. With this patch applied, I am able
to boot and use Windows 7 64-bit and Windows 10 64-bit in QEMU without
KVM.
Per Richard Henderson's input, this patch also ensures the second page
is in the TLB before executing the write loop, to ensure the second
page is mapped.
The original discussion of the issue is located at
http://lists.nongnu.org/archive/html/qemu-devel/2014-08/msg02161.html.
Backports commit 81daabaf7a572f138a8b88ba6eea556bdb0cce46 from qemu
2018-02-25 08:12:09 +00:00
|
|
|
/* This loop must go in the forward direction to avoid issues
|
|
|
|
with self-modifying code. */
|
|
|
|
for (i = 0; i < DATA_SIZE; ++i) {
|
2015-08-21 07:04:50 +00:00
|
|
|
/* Big-endian extract. */
|
2017-01-19 11:50:28 +00:00
|
|
|
uint8_t val8 = (uint8_t)(val >> (((DATA_SIZE - 1) * 8) - (i * 8)));
|
2015-08-21 07:04:50 +00:00
|
|
|
glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8,
|
2018-02-26 07:51:50 +00:00
|
|
|
oi, retaddr);
|
2015-08-28 01:03:17 +00:00
|
|
|
if (env->invalid_error != UC_ERR_OK)
|
|
|
|
break;
|
2015-08-21 07:04:50 +00:00
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2018-10-23 19:02:42 +00:00
|
|
|
haddr = (uintptr_t)(addr + entry->addend);
|
2015-08-21 07:04:50 +00:00
|
|
|
glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val);
|
|
|
|
}
|
|
|
|
#endif /* DATA_SIZE > 1 */
|
|
|
|
#endif /* !defined(SOFTMMU_CODE_ACCESS) */
|
|
|
|
|
|
|
|
#undef READ_ACCESS_TYPE
|
|
|
|
#undef DATA_TYPE
|
|
|
|
#undef SUFFIX
|
|
|
|
#undef LSUFFIX
|
|
|
|
#undef DATA_SIZE
|
|
|
|
#undef ADDR_READ
|
|
|
|
#undef WORD_TYPE
|
|
|
|
#undef SDATA_TYPE
|
|
|
|
#undef USUFFIX
|
|
|
|
#undef SSUFFIX
|
|
|
|
#undef BSWAP
|
|
|
|
#undef helper_le_ld_name
|
|
|
|
#undef helper_be_ld_name
|
|
|
|
#undef helper_le_lds_name
|
|
|
|
#undef helper_be_lds_name
|
|
|
|
#undef helper_le_st_name
|
|
|
|
#undef helper_be_st_name
|