From 9cb64915c7aeb25beca4535101b2316b4aa6b477 Mon Sep 17 00:00:00 2001 From: misson20000 Date: Mon, 22 May 2017 05:46:30 -0700 Subject: [PATCH] fix Ruby bindings (#830) * fix mem_unmap and query for Ruby bindings * ruby bindings: fix issues with GC freeing callbacks while we still have references to them * ruby bindings: add test for garbage collection of hooks * ruby bindings: let the VM garbage collect hooks properly this time * ruby bindings: update garbage collection test to make sure Proc is garbage collected after Uc is collected * ruby bindings: fix m_uc_hook_add to return the ruby VALUE with proper memory management instead of making another one with bad memory management * ruby bindings: fix cb_hook_intr signature * add architecture query * ruby bindings: only treat certain x86 registers specially if we're actually on x86 * only treat certain x86 registers specially if we're actually on x86 (uc_reg_read and uc_reg_write) * ruby bindings: read and write ARM64's 128-bit NEON registers --- bindings/ruby/test_hook_gc.rb | 60 ++++++ bindings/ruby/unicorn_gem/ext/unicorn.c | 262 +++++++++++++++--------- 2 files changed, 225 insertions(+), 97 deletions(-) create mode 100644 bindings/ruby/test_hook_gc.rb diff --git a/bindings/ruby/test_hook_gc.rb b/bindings/ruby/test_hook_gc.rb new file mode 100644 index 00000000..670809ef --- /dev/null +++ b/bindings/ruby/test_hook_gc.rb @@ -0,0 +1,60 @@ +#!/usr/bin/env ruby +require 'unicorn' +require 'unicorn/x86_const' +require 'weakref' + +include Unicorn + +X86_CODE32 = "\x41" # INC ecx; DEC edx + +# memory address where emulation starts +ADDRESS = 0x1000000 + +# callback for tracing instructions +hook_code = Proc.new do |uc, address, size, user_data| + puts("proc was run") +end + +hook_code_weak = WeakRef.new hook_code + +begin + # Initialize emulator in X86-32bit mode + mu = Uc.new UC_ARCH_X86, UC_MODE_32 + # map 2MB memory for this emulation + mu.mem_map(ADDRESS, 2 * 1024 * 1024) + + # write machine code to be emulated to memory + mu.mem_write(ADDRESS, X86_CODE32) + + # initialize machine registers + mu.reg_write(UC_X86_REG_ECX, 0x1234) + mu.reg_write(UC_X86_REG_EDX, 0x7890) + + # tracing all instructions with customized callback + mu.hook_add(UC_HOOK_CODE, hook_code) + + hook_code = nil # erase reference to proc + + GC.start() # force garbage collection to test if proc is garbage collected + + # emulate machine code in infinite time + mu.emu_start(ADDRESS, ADDRESS + X86_CODE32.bytesize) + + mu = nil # erase reference to Uc because apparently it doesn't go out of scope after this? +rescue UcError => e + puts("ERROR: %s" % e) + exit 1 +rescue NoMethodError => e + puts("proc was garbage collected and we tried to invoke `call` on something strange") + exit 1 +end + +GC.start() + +if hook_code_weak.weakref_alive?() then + puts("proc was not garbage collected") + exit 1 +end + +puts "test passed" +exit 0 diff --git a/bindings/ruby/unicorn_gem/ext/unicorn.c b/bindings/ruby/unicorn_gem/ext/unicorn.c index ea20f10b..33dfbb20 100644 --- a/bindings/ruby/unicorn_gem/ext/unicorn.c +++ b/bindings/ruby/unicorn_gem/ext/unicorn.c @@ -27,14 +27,22 @@ VALUE UnicornModule = Qnil; VALUE UcClass = Qnil; VALUE UcError = Qnil; VALUE SavedContext = Qnil; +VALUE Hook = Qnil; +struct hook { + uc_hook trace; + VALUE cb; + VALUE ud; + VALUE rUc; +}; void Init_unicorn() { rb_require("unicorn/unicorn_const"); UnicornModule = rb_define_module("Unicorn"); UcError = rb_define_class_under(UnicornModule, "UcError", rb_eStandardError); SavedContext = rb_define_class_under(UnicornModule, "SavedContext", rb_cObject); - + Hook = rb_define_class_under(UnicornModule, "Hook", rb_cObject); + UcClass = rb_define_class_under(UnicornModule, "Uc", rb_cObject); rb_define_method(UcClass, "initialize", m_uc_initialize, 2); rb_define_method(UcClass, "emu_start", m_uc_emu_start, -1); @@ -48,7 +56,7 @@ void Init_unicorn() { rb_define_method(UcClass, "mem_protect", m_uc_mem_protect, 3); rb_define_method(UcClass, "hook_add", m_uc_hook_add, -1); rb_define_method(UcClass, "hook_del", m_uc_hook_del, 1); - rb_define_method(UcClass, "query", m_uc_hook_del, 1); + rb_define_method(UcClass, "query", m_uc_query, 1); rb_define_method(UcClass, "context_save", m_uc_context_save, 0); rb_define_method(UcClass, "context_update", m_uc_context_update, 1); rb_define_method(UcClass, "contest_restore", m_uc_context_restore, 1); @@ -64,7 +72,8 @@ VALUE m_uc_initialize(VALUE self, VALUE arch, VALUE mode) { VALUE uc = Data_Wrap_Struct(UcClass, 0, uc_close, _uc); rb_iv_set(self, "@uch", uc); - + rb_iv_set(self, "@hooks", rb_ary_new()); + return self; } @@ -112,35 +121,56 @@ VALUE m_uc_reg_read(VALUE self, VALUE reg_id){ uc_engine *_uc; Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc); - switch(tmp_reg){ - case UC_X86_REG_GDTR: - case UC_X86_REG_IDTR: - case UC_X86_REG_LDTR: - case UC_X86_REG_TR: - mmr.selector = 0; - mmr.base = 0; - mmr.limit = 0; - mmr.flags = 0; - err = uc_reg_read(_uc, tmp_reg, &mmr); - if (err != UC_ERR_OK) { - rb_raise(UcError, "%s", uc_strerror(err)); - } - VALUE mmr_ary = rb_ary_new(); - reg_value = mmr.selector; - rb_ary_store(mmr_ary, 0, UINT2NUM(reg_value)); - rb_ary_store(mmr_ary, 1, ULL2NUM(mmr.base)); - rb_ary_store(mmr_ary, 2, UINT2NUM(mmr.limit)); - rb_ary_store(mmr_ary, 3, UINT2NUM(mmr.flags)); - return mmr_ary; - default: - err = uc_reg_read(_uc, tmp_reg, ®_value); - if (err != UC_ERR_OK) { - rb_raise(UcError, "%s", uc_strerror(err)); - } - return ULL2NUM(reg_value); + uc_arch arch; + uc_query(_uc, UC_QUERY_ARCH, &arch); + + if(arch == UC_ARCH_X86) { + switch(tmp_reg){ + case UC_X86_REG_GDTR: + case UC_X86_REG_IDTR: + case UC_X86_REG_LDTR: + case UC_X86_REG_TR: + mmr.selector = 0; + mmr.base = 0; + mmr.limit = 0; + mmr.flags = 0; + err = uc_reg_read(_uc, tmp_reg, &mmr); + + if (err != UC_ERR_OK) { + rb_raise(UcError, "%s", uc_strerror(err)); + } + VALUE mmr_ary = rb_ary_new(); + reg_value = mmr.selector; + rb_ary_store(mmr_ary, 0, UINT2NUM(reg_value)); + rb_ary_store(mmr_ary, 1, ULL2NUM(mmr.base)); + rb_ary_store(mmr_ary, 2, UINT2NUM(mmr.limit)); + rb_ary_store(mmr_ary, 3, UINT2NUM(mmr.flags)); + return mmr_ary; + } } - + if(arch == UC_ARCH_ARM64) { + // V & Q registers are the same + if(tmp_reg >= UC_ARM64_REG_V0 && tmp_reg <= UC_ARM64_REG_V31) { + tmp_reg += UC_ARM64_REG_Q0 - UC_ARM64_REG_V0; + } + if(tmp_reg >= UC_ARM64_REG_Q0 && tmp_reg <= UC_ARM64_REG_Q31) { + uint64_t neon128_value[2]; + err = uc_reg_read(_uc, tmp_reg, &neon128_value); + if (err != UC_ERR_OK) { + rb_raise(UcError, "%s", uc_strerror(err)); + } + VALUE float128_ary = rb_ary_new(); + rb_ary_store(float128_ary, 0, ULL2NUM(neon128_value[0])); + rb_ary_store(float128_ary, 1, ULL2NUM(neon128_value[1])); + return float128_ary; + } + } + err = uc_reg_read(_uc, tmp_reg, ®_value); + if (err != UC_ERR_OK) { + rb_raise(UcError, "%s", uc_strerror(err)); + } + return ULL2NUM(reg_value); } VALUE m_uc_reg_write(VALUE self, VALUE reg_id, VALUE reg_value){ @@ -150,27 +180,50 @@ VALUE m_uc_reg_write(VALUE self, VALUE reg_id, VALUE reg_value){ int64_t tmp; uc_engine *_uc; Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc); + + uc_arch arch; + uc_query(_uc, UC_QUERY_ARCH, &arch); - switch(tmp_reg){ - case UC_X86_REG_GDTR: - case UC_X86_REG_IDTR: - case UC_X86_REG_LDTR: - case UC_X86_REG_TR: + if(arch == UC_ARCH_X86) { + switch(tmp_reg){ + case UC_X86_REG_GDTR: + case UC_X86_REG_IDTR: + case UC_X86_REG_LDTR: + case UC_X86_REG_TR: + Check_Type(reg_value, T_ARRAY); + + mmr.selector = NUM2USHORT(rb_ary_entry(reg_value,0)); + mmr.base = NUM2ULL(rb_ary_entry(reg_value,1)); + mmr.limit = NUM2UINT(rb_ary_entry(reg_value,2)); + mmr.flags = NUM2UINT(rb_ary_entry(reg_value,3)); + err = uc_reg_write(_uc, tmp_reg, &mmr); + if (err != UC_ERR_OK) { + rb_raise(UcError, "%s", uc_strerror(err)); + } + return Qnil; + } + } + if(arch == UC_ARCH_ARM64) { + // V & Q registers are the same + if(tmp_reg >= UC_ARM64_REG_V0 && tmp_reg <= UC_ARM64_REG_V31) { + tmp_reg += UC_ARM64_REG_Q0 - UC_ARM64_REG_V0; + } + if(tmp_reg >= UC_ARM64_REG_Q0 && tmp_reg <= UC_ARM64_REG_Q31) { Check_Type(reg_value, T_ARRAY); - mmr.selector = NUM2USHORT(rb_ary_entry(reg_value,0)); - mmr.base = NUM2ULL(rb_ary_entry(reg_value,1)); - mmr.limit = NUM2UINT(rb_ary_entry(reg_value,2)); - mmr.flags = NUM2UINT(rb_ary_entry(reg_value,3)); - err = uc_reg_write(_uc, tmp_reg, &mmr); - break; - default: - tmp = NUM2ULL(reg_value); - - err = uc_reg_write(_uc, NUM2INT(reg_id), &tmp); - break; + uint64_t neon128_value[2]; + neon128_value[0] = NUM2ULL(rb_ary_entry(reg_value, 0)); + neon128_value[1] = NUM2ULL(rb_ary_entry(reg_value, 1)); + err = uc_reg_write(_uc, NUM2INT(reg_id), &neon128_value); + if (err != UC_ERR_OK) { + rb_raise(UcError, "%s", uc_strerror(err)); + } + return Qnil; + } } - + + tmp = NUM2ULL(reg_value); + err = uc_reg_write(_uc, NUM2INT(reg_id), &tmp); if (err != UC_ERR_OK) { rb_raise(UcError, "%s", uc_strerror(err)); } @@ -223,7 +276,7 @@ VALUE m_uc_mem_map(int argc, VALUE* argv, VALUE self){ VALUE m_uc_mem_unmap(VALUE self, VALUE address, VALUE size){ uc_err err; uc_engine *_uc; - _uc = (uc_engine*) NUM2ULL(rb_iv_get(self, "@uch")); + Data_Get_Struct(rb_iv_get(self, "@uch"), uc_engine, _uc); err = uc_mem_unmap(_uc, NUM2ULL(address), NUM2UINT(size)); if (err != UC_ERR_OK) { rb_raise(UcError, "%s", uc_strerror(err)); @@ -243,88 +296,95 @@ VALUE m_uc_mem_protect(VALUE self, VALUE address, VALUE size, VALUE perms){ } static void cb_hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; rb_funcall(cb, rb_intern("call"), 4, rUc, ULL2NUM(address), UINT2NUM(size), ud); } static void cb_hook_mem_access(uc_engine *uc, uint32_t access, uint64_t address, uint32_t size, int64_t value, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); - + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; rb_funcall(cb, rb_intern("call"), 6, rUc, UINT2NUM(access), ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud); } static bool cb_hook_mem_invalid(uc_engine *uc, uint32_t access, uint64_t address, uint32_t size, int64_t value, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; + return RTEST(rb_funcall(cb, rb_intern("call"), 6, rUc, UINT2NUM(access), ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud)); } static uint32_t cb_hook_insn_in(uc_engine *uc, uint32_t port, int size, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; return NUM2UINT(rb_funcall(cb, rb_intern("call"), 4, rUc, UINT2NUM(port), INT2NUM(size), ud)); } static void cb_hook_insn_out(uc_engine *uc, uint32_t port, int size, uint32_t value, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; rb_funcall(cb, rb_intern("call"), 5, rUc, UINT2NUM(port), INT2NUM(size), UINT2NUM(value), ud); } static void cb_hook_insn_syscall(uc_engine *uc, void *user_data){ - VALUE passthrough = (VALUE)user_data; + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; rb_funcall(cb, rb_intern("call"), 2, rUc, ud); } -static void cb_hook_intr(uc_engine *uc, uint64_t address, uint32_t size, int64_t value, void *user_data){ - VALUE passthrough = (VALUE)user_data; +static void cb_hook_intr(uc_engine *uc, uint32_t intno, void *user_data){ + struct hook *hook = (struct hook *)user_data; VALUE cb; VALUE ud; VALUE rUc; - cb = rb_ary_entry(passthrough, 0); - ud = rb_ary_entry(passthrough, 1); - rUc = rb_ary_entry(passthrough, 2); - rb_funcall(cb, rb_intern("call"), 5, rUc, ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud); + cb = hook->cb; + ud = hook->ud; + rUc = hook->rUc; + rb_funcall(cb, rb_intern("call"), 3, rUc, ULL2NUM(intno), ud); +} + +static void mark_hook(void *p){ + struct hook *hook = (struct hook *)p; + rb_gc_mark(hook->cb); + rb_gc_mark(hook->ud); + rb_gc_mark(hook->rUc); // just for completeness sake even though this should already be marked } VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){ @@ -335,7 +395,8 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){ VALUE end; VALUE arg1; uc_engine *_uc; - Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc); + Data_Get_Struct(rb_iv_get(self, "@uch"), uc_engine, _uc); + rb_scan_args(argc, argv, "24",&hook_type, &callback, &user_data, &begin, &end, &arg1); if (NIL_P(begin)) begin = ULL2NUM(1); @@ -346,38 +407,41 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){ if (NIL_P(arg1)) arg1 = INT2NUM(0); - VALUE passthrough; - uc_hook trace; uc_err err; if (rb_class_of(callback) != rb_cProc) rb_raise(UcError, "Expected Proc callback"); - passthrough = rb_ary_new(); - rb_ary_store(passthrough, 0, callback); - rb_ary_store(passthrough, 1, user_data); - rb_ary_store(passthrough, 2, self); - + struct hook *hook = (struct hook *)malloc(sizeof(struct hook)); + hook->cb = callback; + hook->ud = user_data; + hook->rUc = self; + VALUE r_hook; + VALUE hooks_list; + r_hook = Data_Wrap_Struct(Hook, mark_hook, free, hook); + hooks_list = rb_iv_get(self, "@hooks"); + rb_ary_push(hooks_list, r_hook); + uint32_t htype = NUM2UINT(hook_type); if(htype == UC_HOOK_INSN){ switch(NUM2INT(arg1)){ case UC_X86_INS_IN: - err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_in,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_in,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); break; case UC_X86_INS_OUT: - err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_out,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_out,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); break; case UC_X86_INS_SYSCALL: case UC_X86_INS_SYSENTER: - err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_syscall,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_syscall,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1)); break; } } else if(htype == UC_HOOK_INTR){ - err = uc_hook_add(_uc, &trace, htype, cb_hook_intr,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_intr,(void *)hook, NUM2ULL(begin), NUM2ULL(end)); } else if(htype == UC_HOOK_CODE || htype == UC_HOOK_BLOCK){ - err = uc_hook_add(_uc, &trace, htype, cb_hook_code,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_code,(void *)hook, NUM2ULL(begin), NUM2ULL(end)); } else if (htype & UC_HOOK_MEM_READ_UNMAPPED || htype & UC_HOOK_MEM_WRITE_UNMAPPED @@ -391,24 +455,28 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){ || htype & UC_HOOK_MEM_UNMAPPED || htype & UC_HOOK_MEM_PROT || htype & UC_HOOK_MEM_INVALID) { - err = uc_hook_add(_uc, &trace, htype, cb_hook_mem_invalid,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_mem_invalid,(void *)hook, NUM2ULL(begin), NUM2ULL(end)); } else{ - err = uc_hook_add(_uc, &trace, htype, cb_hook_mem_access,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end)); + err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_mem_access,(void *)hook, NUM2ULL(begin), NUM2ULL(end)); } if (err != UC_ERR_OK) { rb_raise(UcError, "%s", uc_strerror(err)); } - return INT2NUM(trace); + return r_hook; } VALUE m_uc_hook_del(VALUE self, VALUE hook){ - int h = NUM2INT(hook); uc_err err; uc_engine *_uc; Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc); - err = uc_hook_del(_uc, h); + struct hook *h; + Data_Get_Struct(hook, struct hook, h); + err = uc_hook_del(_uc, h->trace); + + rb_ary_delete(rb_iv_get(self, "@hooks"), hook); + if (err != UC_ERR_OK) { rb_raise(UcError, "%s", uc_strerror(err)); }