fix Ruby bindings (#830)

* fix mem_unmap and query for Ruby bindings

* ruby bindings: fix issues with GC freeing callbacks while we still have references to them

* ruby bindings: add test for garbage collection of hooks

* ruby bindings: let the VM garbage collect hooks properly this time

* ruby bindings: update garbage collection test to make sure Proc is garbage collected after Uc is collected

* ruby bindings: fix m_uc_hook_add to return the ruby VALUE with proper memory management instead of making another one with bad memory management

* ruby bindings: fix cb_hook_intr signature

* add architecture query

* ruby bindings: only treat certain x86 registers specially if we're actually on x86

* only treat certain x86 registers specially if we're actually on x86 (uc_reg_read and uc_reg_write)

* ruby bindings: read and write ARM64's 128-bit NEON registers
This commit is contained in:
misson20000 2017-05-22 05:46:30 -07:00 committed by Nguyen Anh Quynh
parent 3fdb2d2442
commit 9cb64915c7
2 changed files with 225 additions and 97 deletions

View file

@ -0,0 +1,60 @@
#!/usr/bin/env ruby
require 'unicorn'
require 'unicorn/x86_const'
require 'weakref'
include Unicorn
X86_CODE32 = "\x41" # INC ecx; DEC edx
# memory address where emulation starts
ADDRESS = 0x1000000
# callback for tracing instructions
hook_code = Proc.new do |uc, address, size, user_data|
puts("proc was run")
end
hook_code_weak = WeakRef.new hook_code
begin
# Initialize emulator in X86-32bit mode
mu = Uc.new UC_ARCH_X86, UC_MODE_32
# map 2MB memory for this emulation
mu.mem_map(ADDRESS, 2 * 1024 * 1024)
# write machine code to be emulated to memory
mu.mem_write(ADDRESS, X86_CODE32)
# initialize machine registers
mu.reg_write(UC_X86_REG_ECX, 0x1234)
mu.reg_write(UC_X86_REG_EDX, 0x7890)
# tracing all instructions with customized callback
mu.hook_add(UC_HOOK_CODE, hook_code)
hook_code = nil # erase reference to proc
GC.start() # force garbage collection to test if proc is garbage collected
# emulate machine code in infinite time
mu.emu_start(ADDRESS, ADDRESS + X86_CODE32.bytesize)
mu = nil # erase reference to Uc because apparently it doesn't go out of scope after this?
rescue UcError => e
puts("ERROR: %s" % e)
exit 1
rescue NoMethodError => e
puts("proc was garbage collected and we tried to invoke `call` on something strange")
exit 1
end
GC.start()
if hook_code_weak.weakref_alive?() then
puts("proc was not garbage collected")
exit 1
end
puts "test passed"
exit 0

View file

@ -27,13 +27,21 @@ VALUE UnicornModule = Qnil;
VALUE UcClass = Qnil;
VALUE UcError = Qnil;
VALUE SavedContext = Qnil;
VALUE Hook = Qnil;
struct hook {
uc_hook trace;
VALUE cb;
VALUE ud;
VALUE rUc;
};
void Init_unicorn() {
rb_require("unicorn/unicorn_const");
UnicornModule = rb_define_module("Unicorn");
UcError = rb_define_class_under(UnicornModule, "UcError", rb_eStandardError);
SavedContext = rb_define_class_under(UnicornModule, "SavedContext", rb_cObject);
Hook = rb_define_class_under(UnicornModule, "Hook", rb_cObject);
UcClass = rb_define_class_under(UnicornModule, "Uc", rb_cObject);
rb_define_method(UcClass, "initialize", m_uc_initialize, 2);
@ -48,7 +56,7 @@ void Init_unicorn() {
rb_define_method(UcClass, "mem_protect", m_uc_mem_protect, 3);
rb_define_method(UcClass, "hook_add", m_uc_hook_add, -1);
rb_define_method(UcClass, "hook_del", m_uc_hook_del, 1);
rb_define_method(UcClass, "query", m_uc_hook_del, 1);
rb_define_method(UcClass, "query", m_uc_query, 1);
rb_define_method(UcClass, "context_save", m_uc_context_save, 0);
rb_define_method(UcClass, "context_update", m_uc_context_update, 1);
rb_define_method(UcClass, "contest_restore", m_uc_context_restore, 1);
@ -64,6 +72,7 @@ VALUE m_uc_initialize(VALUE self, VALUE arch, VALUE mode) {
VALUE uc = Data_Wrap_Struct(UcClass, 0, uc_close, _uc);
rb_iv_set(self, "@uch", uc);
rb_iv_set(self, "@hooks", rb_ary_new());
return self;
}
@ -112,35 +121,56 @@ VALUE m_uc_reg_read(VALUE self, VALUE reg_id){
uc_engine *_uc;
Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc);
switch(tmp_reg){
case UC_X86_REG_GDTR:
case UC_X86_REG_IDTR:
case UC_X86_REG_LDTR:
case UC_X86_REG_TR:
mmr.selector = 0;
mmr.base = 0;
mmr.limit = 0;
mmr.flags = 0;
err = uc_reg_read(_uc, tmp_reg, &mmr);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
VALUE mmr_ary = rb_ary_new();
reg_value = mmr.selector;
rb_ary_store(mmr_ary, 0, UINT2NUM(reg_value));
rb_ary_store(mmr_ary, 1, ULL2NUM(mmr.base));
rb_ary_store(mmr_ary, 2, UINT2NUM(mmr.limit));
rb_ary_store(mmr_ary, 3, UINT2NUM(mmr.flags));
return mmr_ary;
default:
err = uc_reg_read(_uc, tmp_reg, &reg_value);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
return ULL2NUM(reg_value);
uc_arch arch;
uc_query(_uc, UC_QUERY_ARCH, &arch);
if(arch == UC_ARCH_X86) {
switch(tmp_reg){
case UC_X86_REG_GDTR:
case UC_X86_REG_IDTR:
case UC_X86_REG_LDTR:
case UC_X86_REG_TR:
mmr.selector = 0;
mmr.base = 0;
mmr.limit = 0;
mmr.flags = 0;
err = uc_reg_read(_uc, tmp_reg, &mmr);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
VALUE mmr_ary = rb_ary_new();
reg_value = mmr.selector;
rb_ary_store(mmr_ary, 0, UINT2NUM(reg_value));
rb_ary_store(mmr_ary, 1, ULL2NUM(mmr.base));
rb_ary_store(mmr_ary, 2, UINT2NUM(mmr.limit));
rb_ary_store(mmr_ary, 3, UINT2NUM(mmr.flags));
return mmr_ary;
}
}
if(arch == UC_ARCH_ARM64) {
// V & Q registers are the same
if(tmp_reg >= UC_ARM64_REG_V0 && tmp_reg <= UC_ARM64_REG_V31) {
tmp_reg += UC_ARM64_REG_Q0 - UC_ARM64_REG_V0;
}
if(tmp_reg >= UC_ARM64_REG_Q0 && tmp_reg <= UC_ARM64_REG_Q31) {
uint64_t neon128_value[2];
err = uc_reg_read(_uc, tmp_reg, &neon128_value);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
VALUE float128_ary = rb_ary_new();
rb_ary_store(float128_ary, 0, ULL2NUM(neon128_value[0]));
rb_ary_store(float128_ary, 1, ULL2NUM(neon128_value[1]));
return float128_ary;
}
}
err = uc_reg_read(_uc, tmp_reg, &reg_value);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
return ULL2NUM(reg_value);
}
VALUE m_uc_reg_write(VALUE self, VALUE reg_id, VALUE reg_value){
@ -151,26 +181,49 @@ VALUE m_uc_reg_write(VALUE self, VALUE reg_id, VALUE reg_value){
uc_engine *_uc;
Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc);
switch(tmp_reg){
case UC_X86_REG_GDTR:
case UC_X86_REG_IDTR:
case UC_X86_REG_LDTR:
case UC_X86_REG_TR:
uc_arch arch;
uc_query(_uc, UC_QUERY_ARCH, &arch);
if(arch == UC_ARCH_X86) {
switch(tmp_reg){
case UC_X86_REG_GDTR:
case UC_X86_REG_IDTR:
case UC_X86_REG_LDTR:
case UC_X86_REG_TR:
Check_Type(reg_value, T_ARRAY);
mmr.selector = NUM2USHORT(rb_ary_entry(reg_value,0));
mmr.base = NUM2ULL(rb_ary_entry(reg_value,1));
mmr.limit = NUM2UINT(rb_ary_entry(reg_value,2));
mmr.flags = NUM2UINT(rb_ary_entry(reg_value,3));
err = uc_reg_write(_uc, tmp_reg, &mmr);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
return Qnil;
}
}
if(arch == UC_ARCH_ARM64) {
// V & Q registers are the same
if(tmp_reg >= UC_ARM64_REG_V0 && tmp_reg <= UC_ARM64_REG_V31) {
tmp_reg += UC_ARM64_REG_Q0 - UC_ARM64_REG_V0;
}
if(tmp_reg >= UC_ARM64_REG_Q0 && tmp_reg <= UC_ARM64_REG_Q31) {
Check_Type(reg_value, T_ARRAY);
mmr.selector = NUM2USHORT(rb_ary_entry(reg_value,0));
mmr.base = NUM2ULL(rb_ary_entry(reg_value,1));
mmr.limit = NUM2UINT(rb_ary_entry(reg_value,2));
mmr.flags = NUM2UINT(rb_ary_entry(reg_value,3));
err = uc_reg_write(_uc, tmp_reg, &mmr);
break;
default:
tmp = NUM2ULL(reg_value);
err = uc_reg_write(_uc, NUM2INT(reg_id), &tmp);
break;
uint64_t neon128_value[2];
neon128_value[0] = NUM2ULL(rb_ary_entry(reg_value, 0));
neon128_value[1] = NUM2ULL(rb_ary_entry(reg_value, 1));
err = uc_reg_write(_uc, NUM2INT(reg_id), &neon128_value);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
return Qnil;
}
}
tmp = NUM2ULL(reg_value);
err = uc_reg_write(_uc, NUM2INT(reg_id), &tmp);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
@ -223,7 +276,7 @@ VALUE m_uc_mem_map(int argc, VALUE* argv, VALUE self){
VALUE m_uc_mem_unmap(VALUE self, VALUE address, VALUE size){
uc_err err;
uc_engine *_uc;
_uc = (uc_engine*) NUM2ULL(rb_iv_get(self, "@uch"));
Data_Get_Struct(rb_iv_get(self, "@uch"), uc_engine, _uc);
err = uc_mem_unmap(_uc, NUM2ULL(address), NUM2UINT(size));
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
@ -243,88 +296,95 @@ VALUE m_uc_mem_protect(VALUE self, VALUE address, VALUE size, VALUE perms){
}
static void cb_hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
rb_funcall(cb, rb_intern("call"), 4, rUc, ULL2NUM(address), UINT2NUM(size), ud);
}
static void cb_hook_mem_access(uc_engine *uc, uint32_t access, uint64_t address, uint32_t size, int64_t value, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
rb_funcall(cb, rb_intern("call"), 6, rUc, UINT2NUM(access), ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud);
}
static bool cb_hook_mem_invalid(uc_engine *uc, uint32_t access, uint64_t address, uint32_t size, int64_t value, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
return RTEST(rb_funcall(cb, rb_intern("call"), 6, rUc, UINT2NUM(access), ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud));
}
static uint32_t cb_hook_insn_in(uc_engine *uc, uint32_t port, int size, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
return NUM2UINT(rb_funcall(cb, rb_intern("call"), 4, rUc, UINT2NUM(port), INT2NUM(size), ud));
}
static void cb_hook_insn_out(uc_engine *uc, uint32_t port, int size, uint32_t value, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
rb_funcall(cb, rb_intern("call"), 5, rUc, UINT2NUM(port), INT2NUM(size), UINT2NUM(value), ud);
}
static void cb_hook_insn_syscall(uc_engine *uc, void *user_data){
VALUE passthrough = (VALUE)user_data;
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
rb_funcall(cb, rb_intern("call"), 2, rUc, ud);
}
static void cb_hook_intr(uc_engine *uc, uint64_t address, uint32_t size, int64_t value, void *user_data){
VALUE passthrough = (VALUE)user_data;
static void cb_hook_intr(uc_engine *uc, uint32_t intno, void *user_data){
struct hook *hook = (struct hook *)user_data;
VALUE cb;
VALUE ud;
VALUE rUc;
cb = rb_ary_entry(passthrough, 0);
ud = rb_ary_entry(passthrough, 1);
rUc = rb_ary_entry(passthrough, 2);
rb_funcall(cb, rb_intern("call"), 5, rUc, ULL2NUM(address), UINT2NUM(size), LL2NUM(value), ud);
cb = hook->cb;
ud = hook->ud;
rUc = hook->rUc;
rb_funcall(cb, rb_intern("call"), 3, rUc, ULL2NUM(intno), ud);
}
static void mark_hook(void *p){
struct hook *hook = (struct hook *)p;
rb_gc_mark(hook->cb);
rb_gc_mark(hook->ud);
rb_gc_mark(hook->rUc); // just for completeness sake even though this should already be marked
}
VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){
@ -335,7 +395,8 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){
VALUE end;
VALUE arg1;
uc_engine *_uc;
Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc);
Data_Get_Struct(rb_iv_get(self, "@uch"), uc_engine, _uc);
rb_scan_args(argc, argv, "24",&hook_type, &callback, &user_data, &begin, &end, &arg1);
if (NIL_P(begin))
begin = ULL2NUM(1);
@ -346,38 +407,41 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){
if (NIL_P(arg1))
arg1 = INT2NUM(0);
VALUE passthrough;
uc_hook trace;
uc_err err;
if (rb_class_of(callback) != rb_cProc)
rb_raise(UcError, "Expected Proc callback");
passthrough = rb_ary_new();
rb_ary_store(passthrough, 0, callback);
rb_ary_store(passthrough, 1, user_data);
rb_ary_store(passthrough, 2, self);
struct hook *hook = (struct hook *)malloc(sizeof(struct hook));
hook->cb = callback;
hook->ud = user_data;
hook->rUc = self;
VALUE r_hook;
VALUE hooks_list;
r_hook = Data_Wrap_Struct(Hook, mark_hook, free, hook);
hooks_list = rb_iv_get(self, "@hooks");
rb_ary_push(hooks_list, r_hook);
uint32_t htype = NUM2UINT(hook_type);
if(htype == UC_HOOK_INSN){
switch(NUM2INT(arg1)){
case UC_X86_INS_IN:
err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_in,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_in,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
break;
case UC_X86_INS_OUT:
err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_out,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_out,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
break;
case UC_X86_INS_SYSCALL:
case UC_X86_INS_SYSENTER:
err = uc_hook_add(_uc, &trace, htype, cb_hook_insn_syscall,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_insn_syscall,(void *)hook, NUM2ULL(begin), NUM2ULL(end), NUM2INT(arg1));
break;
}
}
else if(htype == UC_HOOK_INTR){
err = uc_hook_add(_uc, &trace, htype, cb_hook_intr,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_intr,(void *)hook, NUM2ULL(begin), NUM2ULL(end));
}
else if(htype == UC_HOOK_CODE || htype == UC_HOOK_BLOCK){
err = uc_hook_add(_uc, &trace, htype, cb_hook_code,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_code,(void *)hook, NUM2ULL(begin), NUM2ULL(end));
}
else if (htype & UC_HOOK_MEM_READ_UNMAPPED
|| htype & UC_HOOK_MEM_WRITE_UNMAPPED
@ -391,24 +455,28 @@ VALUE m_uc_hook_add(int argc, VALUE* argv, VALUE self){
|| htype & UC_HOOK_MEM_UNMAPPED
|| htype & UC_HOOK_MEM_PROT
|| htype & UC_HOOK_MEM_INVALID) {
err = uc_hook_add(_uc, &trace, htype, cb_hook_mem_invalid,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_mem_invalid,(void *)hook, NUM2ULL(begin), NUM2ULL(end));
}
else{
err = uc_hook_add(_uc, &trace, htype, cb_hook_mem_access,(void *)passthrough, NUM2ULL(begin), NUM2ULL(end));
err = uc_hook_add(_uc, &hook->trace, htype, cb_hook_mem_access,(void *)hook, NUM2ULL(begin), NUM2ULL(end));
}
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}
return INT2NUM(trace);
return r_hook;
}
VALUE m_uc_hook_del(VALUE self, VALUE hook){
int h = NUM2INT(hook);
uc_err err;
uc_engine *_uc;
Data_Get_Struct(rb_iv_get(self,"@uch"), uc_engine, _uc);
err = uc_hook_del(_uc, h);
struct hook *h;
Data_Get_Struct(hook, struct hook, h);
err = uc_hook_del(_uc, h->trace);
rb_ary_delete(rb_iv_get(self, "@hooks"), hook);
if (err != UC_ERR_OK) {
rb_raise(UcError, "%s", uc_strerror(err));
}