From 022f8d82d1e93480c47c7c3b38d3aebd7adee838 Mon Sep 17 00:00:00 2001
From: Nguyen Anh Quynh <aquynh@gmail.com>
Date: Fri, 4 Sep 2015 11:55:17 +0800
Subject: [PATCH] handle memory fetch as invalid memory access. now we can also
 report error if exec memory is unmapped (UC_ERR_MEM_FETCH)

---
 bindings/go/unicorn/unicorn_const.go     | 24 ++++++------
 bindings/python/unicorn/unicorn_const.py | 24 ++++++------
 include/unicorn/unicorn.h                |  2 +
 qemu/softmmu_template.h                  | 48 ++++++++++++++++++++----
 regress/reg_write_sign_extension.py      |  2 +-
 uc.c                                     |  6 ++-
 6 files changed, 73 insertions(+), 33 deletions(-)

diff --git a/bindings/go/unicorn/unicorn_const.go b/bindings/go/unicorn/unicorn_const.go
index 04c52639..d8b42215 100644
--- a/bindings/go/unicorn/unicorn_const.go
+++ b/bindings/go/unicorn/unicorn_const.go
@@ -41,20 +41,22 @@ const (
 	UC_ERR_VERSION = 5
 	UC_ERR_MEM_READ = 6
 	UC_ERR_MEM_WRITE = 7
-	UC_ERR_CODE_INVALID = 8
-	UC_ERR_HOOK = 9
-	UC_ERR_INSN_INVALID = 10
-	UC_ERR_MAP = 11
-	UC_ERR_WRITE_PROT = 12
-	UC_ERR_READ_PROT = 13
-	UC_ERR_EXEC_PROT = 14
-	UC_ERR_INVAL = 15
+	UC_ERR_MEM_FETCH = 8
+	UC_ERR_CODE_INVALID = 9
+	UC_ERR_HOOK = 10
+	UC_ERR_INSN_INVALID = 11
+	UC_ERR_MAP = 12
+	UC_ERR_WRITE_PROT = 13
+	UC_ERR_READ_PROT = 14
+	UC_ERR_EXEC_PROT = 15
+	UC_ERR_INVAL = 16
 	UC_MEM_READ = 16
 	UC_MEM_WRITE = 17
 	UC_MEM_READ_WRITE = 18
-	UC_MEM_WRITE_PROT = 19
-	UC_MEM_READ_PROT = 20
-	UC_MEM_EXEC_PROT = 21
+	UC_MEM_EXEC = 19
+	UC_MEM_WRITE_PROT = 20
+	UC_MEM_READ_PROT = 21
+	UC_MEM_EXEC_PROT = 22
 	UC_HOOK_INTR = 32
 	UC_HOOK_INSN = 33
 	UC_HOOK_CODE = 34
diff --git a/bindings/python/unicorn/unicorn_const.py b/bindings/python/unicorn/unicorn_const.py
index 0d667784..b63b4a27 100644
--- a/bindings/python/unicorn/unicorn_const.py
+++ b/bindings/python/unicorn/unicorn_const.py
@@ -39,20 +39,22 @@ UC_ERR_MODE = 4
 UC_ERR_VERSION = 5
 UC_ERR_MEM_READ = 6
 UC_ERR_MEM_WRITE = 7
-UC_ERR_CODE_INVALID = 8
-UC_ERR_HOOK = 9
-UC_ERR_INSN_INVALID = 10
-UC_ERR_MAP = 11
-UC_ERR_WRITE_PROT = 12
-UC_ERR_READ_PROT = 13
-UC_ERR_EXEC_PROT = 14
-UC_ERR_INVAL = 15
+UC_ERR_MEM_FETCH = 8
+UC_ERR_CODE_INVALID = 9
+UC_ERR_HOOK = 10
+UC_ERR_INSN_INVALID = 11
+UC_ERR_MAP = 12
+UC_ERR_WRITE_PROT = 13
+UC_ERR_READ_PROT = 14
+UC_ERR_EXEC_PROT = 15
+UC_ERR_INVAL = 16
 UC_MEM_READ = 16
 UC_MEM_WRITE = 17
 UC_MEM_READ_WRITE = 18
-UC_MEM_WRITE_PROT = 19
-UC_MEM_READ_PROT = 20
-UC_MEM_EXEC_PROT = 21
+UC_MEM_EXEC = 19
+UC_MEM_WRITE_PROT = 20
+UC_MEM_READ_PROT = 21
+UC_MEM_EXEC_PROT = 22
 UC_HOOK_INTR = 32
 UC_HOOK_INSN = 33
 UC_HOOK_CODE = 34
diff --git a/include/unicorn/unicorn.h b/include/unicorn/unicorn.h
index 1aae5c11..855b34d8 100644
--- a/include/unicorn/unicorn.h
+++ b/include/unicorn/unicorn.h
@@ -113,6 +113,7 @@ typedef enum uc_err {
     UC_ERR_VERSION,  // Unsupported version (bindings)
     UC_ERR_MEM_READ, // Quit emulation due to invalid memory READ: uc_emu_start()
     UC_ERR_MEM_WRITE, // Quit emulation due to invalid memory WRITE: uc_emu_start()
+    UC_ERR_MEM_FETCH, // Quit emulation due to invalid memory FETCH: uc_emu_start()
     UC_ERR_CODE_INVALID, // Quit emulation due to invalid code address: uc_emu_start()
     UC_ERR_HOOK,    // Invalid hook type: uc_hook_add()
     UC_ERR_INSN_INVALID, // Quit emulation due to invalid instruction: uc_emu_start()
@@ -152,6 +153,7 @@ typedef enum uc_mem_type {
     UC_MEM_READ = 16,   // Memory is read from
     UC_MEM_WRITE,       // Memory is written to
     UC_MEM_READ_WRITE,  // Memory is accessed (either READ or WRITE)
+    UC_MEM_EXEC,        // Unmapped memory is fetched, but unmapped (invalid memory access)
     UC_MEM_WRITE_PROT,  // write to write protected memory
     UC_MEM_READ_PROT,   // read from read protected memory
     UC_MEM_EXEC_PROT,   // fetch from non-executable memory
diff --git a/qemu/softmmu_template.h b/qemu/softmmu_template.h
index 3c851686..07af297d 100644
--- a/qemu/softmmu_template.h
+++ b/qemu/softmmu_template.h
@@ -182,8 +182,24 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     MemoryRegion *mr = memory_mapping(uc, addr);
 
 #if defined(SOFTMMU_CODE_ACCESS)
+    // Unicorn: callback on fetch from unmapped memory
+    if (mr == NULL) {  // memory is not mapped
+        if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
+                                       uc, UC_MEM_EXEC, addr, DATA_SIZE, 0,
+                                       uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
+            env->invalid_error = UC_ERR_OK;
+            mr = memory_mapping(uc, addr);  // FIXME: what if mr is still NULL at this time?
+        } else {
+            env->invalid_addr = addr;
+            env->invalid_error = UC_ERR_MEM_FETCH;
+            // printf("***** Invalid fetch (unmapped memory) at " TARGET_FMT_lx "\n", addr);
+            cpu_exit(uc->current_cpu);
+            return 0;
+        }
+    }
+
     // Unicorn: callback on fetch from NX
-    if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) {  //non-executable
+    if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) {  // non-executable
         if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
                                        uc, UC_MEM_EXEC_PROT, addr, DATA_SIZE, 0,
                                        uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
@@ -199,7 +215,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
 #endif
 
     // Unicorn: callback on memory read
-    if (env->uc->hook_mem_read && READ_ACCESS_TYPE == MMU_DATA_LOAD) {
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && env->uc->hook_mem_read) {
         struct hook_struct *trace = hook_find(env->uc, UC_HOOK_MEM_READ, addr);
         if (trace) {
             ((uc_cb_hookmem_t)trace->callback)(env->uc, UC_MEM_READ,
@@ -208,7 +224,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 
     // Unicorn: callback on invalid memory
-    if (env->uc->hook_mem_idx && mr == NULL) {
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && env->uc->hook_mem_idx && mr == NULL) {
         if (!((uc_cb_eventmem_t)env->uc->hook_callbacks[env->uc->hook_mem_idx].callback)(
                     env->uc, UC_MEM_READ, addr, DATA_SIZE, 0,
                     env->uc->hook_callbacks[env->uc->hook_mem_idx].user_data)) {
@@ -224,7 +240,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 
     // Unicorn: callback on non-readable memory
-    if (mr != NULL && !(mr->perms & UC_PROT_READ)) {  //non-readable
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && mr != NULL && !(mr->perms & UC_PROT_READ)) {  //non-readable
         if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
                                        uc, UC_MEM_READ_PROT, addr, DATA_SIZE, 0,
                                        uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
@@ -340,8 +356,24 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     MemoryRegion *mr = memory_mapping(uc, addr);
 
 #if defined(SOFTMMU_CODE_ACCESS)
+    // Unicorn: callback on fetch from unmapped memory
+    if (mr == NULL) {  // memory is not mapped
+        if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
+                                       uc, UC_MEM_EXEC, addr, DATA_SIZE, 0,
+                                       uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
+            env->invalid_error = UC_ERR_OK;
+            mr = memory_mapping(uc, addr);  // FIXME: what if mr is still NULL at this time?
+        } else {
+            env->invalid_addr = addr;
+            env->invalid_error = UC_ERR_MEM_FETCH;
+            // printf("***** Invalid fetch (unmapped memory) at " TARGET_FMT_lx "\n", addr);
+            cpu_exit(uc->current_cpu);
+            return 0;
+        }
+    }
+
     // Unicorn: callback on fetch from NX
-    if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) {  //non-executable
+    if (mr != NULL && !(mr->perms & UC_PROT_EXEC)) {  // non-executable
         if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
                                        uc, UC_MEM_EXEC_PROT, addr, DATA_SIZE, 0,
                                        uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
@@ -357,7 +389,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
 #endif
 
     // Unicorn: callback on memory read
-    if (env->uc->hook_mem_read && READ_ACCESS_TYPE == MMU_DATA_LOAD) {
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && env->uc->hook_mem_read) {
         struct hook_struct *trace = hook_find(env->uc, UC_HOOK_MEM_READ, addr);
         if (trace) {
             ((uc_cb_hookmem_t)trace->callback)(env->uc, UC_MEM_READ,
@@ -366,7 +398,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 
     // Unicorn: callback on invalid memory
-    if (env->uc->hook_mem_idx && mr == NULL) {
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && env->uc->hook_mem_idx && mr == NULL) {
         if (!((uc_cb_eventmem_t)env->uc->hook_callbacks[env->uc->hook_mem_idx].callback)(
                     env->uc, UC_MEM_READ, addr, DATA_SIZE, 0,
                     env->uc->hook_callbacks[env->uc->hook_mem_idx].user_data)) {
@@ -382,7 +414,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, int mmu_idx,
     }
 
     // Unicorn: callback on non-readable memory
-    if (mr != NULL && !(mr->perms & UC_PROT_READ)) {  //non-readable
+    if (READ_ACCESS_TYPE == MMU_DATA_LOAD && mr != NULL && !(mr->perms & UC_PROT_READ)) {  //non-readable
         if (uc->hook_mem_idx != 0 && ((uc_cb_eventmem_t)uc->hook_callbacks[uc->hook_mem_idx].callback)(
                                        uc, UC_MEM_READ_PROT, addr, DATA_SIZE, 0,
                                        uc->hook_callbacks[uc->hook_mem_idx].user_data)) {
diff --git a/regress/reg_write_sign_extension.py b/regress/reg_write_sign_extension.py
index 69347c0f..1997864c 100755
--- a/regress/reg_write_sign_extension.py
+++ b/regress/reg_write_sign_extension.py
@@ -6,7 +6,7 @@ import unicorn
 ADDR = 0xffaabbcc
 
 def hook_mem_invalid(mu, access, address, size, value, user_data):
-    print ">>> Expected value: 0x%x, actual value: 0x%x" % (ADDR, address)
+    print ">>> Access type: %u, Expected value: 0x%x, actual value: 0x%x" % (access, ADDR, address)
     assert(address == ADDR)
     mu.mem_map(address & 0xfffff000, 4 * 1024)
     mu.mem_write(address, b'\xcc')
diff --git a/uc.c b/uc.c
index 210616bf..14fcabb1 100644
--- a/uc.c
+++ b/uc.c
@@ -73,12 +73,14 @@ const char *uc_strerror(uc_err code)
             return "Invalid memory read (UC_ERR_MEM_READ)";
         case UC_ERR_MEM_WRITE:
             return "Invalid memory write (UC_ERR_MEM_WRITE)";
+        case UC_ERR_MEM_FETCH:
+            return "Invalid memory fetch (UC_ERR_MEM_FETCH)";
         case UC_ERR_CODE_INVALID:
             return "Invalid code address (UC_ERR_CODE_INVALID)";
-        case UC_ERR_INSN_INVALID:
-            return "Invalid instruction (UC_ERR_INSN_INVALID)";
         case UC_ERR_HOOK:
             return "Invalid hook type (UC_ERR_HOOK)";
+        case UC_ERR_INSN_INVALID:
+            return "Invalid instruction (UC_ERR_INSN_INVALID)";
         case UC_ERR_MAP:
             return "Invalid memory mapping (UC_ERR_MAP)";
         case UC_ERR_WRITE_PROT: