From 74aaf3b321487be1e18c8de6f5dc26c3a39c12a6 Mon Sep 17 00:00:00 2001 From: Adrian Herrera Date: Wed, 6 Apr 2016 09:21:36 +1000 Subject: [PATCH] Haskell bindings These Haskell bindings make large use of c2hs to generate much of the code, so Unicorn's const_generator is not used. The emulator is based on the Either monad transformer. The IO monad is used to run the underlying Unicorn library, while the Either monad is used to handle errors. Instructions on how to build the bindings are located in bindings/haskell/README.TXT. The same samples found in samples/ can be found in bindings/haskell/samples. They should produce the same output, with slight differences in their error handling and messaging. --- CREDITS.TXT | 1 + README.md | 2 +- bindings/README | 1 + bindings/haskell/.gitignore | 16 + bindings/haskell/README.TXT | 21 + bindings/haskell/Setup.hs | 2 + bindings/haskell/samples/SampleArm.hs | 133 ++++ bindings/haskell/samples/SampleArm64.hs | 85 +++ bindings/haskell/samples/SampleM68k.hs | 142 ++++ bindings/haskell/samples/SampleMips.hs | 129 ++++ bindings/haskell/samples/SampleSparc.hs | 85 +++ bindings/haskell/samples/SampleX86.hs | 675 ++++++++++++++++++ bindings/haskell/samples/Shellcode.hs | 153 ++++ bindings/haskell/src/Unicorn.hs | 284 ++++++++ bindings/haskell/src/Unicorn/CPU/Arm.chs | 29 + bindings/haskell/src/Unicorn/CPU/Arm64.chs | 29 + bindings/haskell/src/Unicorn/CPU/M68k.chs | 29 + bindings/haskell/src/Unicorn/CPU/Mips.chs | 61 ++ bindings/haskell/src/Unicorn/CPU/Sparc.chs | 29 + bindings/haskell/src/Unicorn/CPU/X86.chs | 65 ++ bindings/haskell/src/Unicorn/Hook.hs | 224 ++++++ .../haskell/src/Unicorn/Internal/Core.chs | 52 ++ .../haskell/src/Unicorn/Internal/Hook.chs | 415 +++++++++++ .../haskell/src/Unicorn/Internal/Unicorn.chs | 242 +++++++ bindings/haskell/src/Unicorn/Internal/Util.hs | 25 + bindings/haskell/src/cbits/unicorn_wrapper.c | 8 + .../haskell/src/include/unicorn_wrapper.h | 16 + bindings/haskell/unicorn.cabal | 42 ++ 28 files changed, 2994 insertions(+), 1 deletion(-) create mode 100644 bindings/haskell/.gitignore create mode 100644 bindings/haskell/README.TXT create mode 100644 bindings/haskell/Setup.hs create mode 100644 bindings/haskell/samples/SampleArm.hs create mode 100644 bindings/haskell/samples/SampleArm64.hs create mode 100644 bindings/haskell/samples/SampleM68k.hs create mode 100644 bindings/haskell/samples/SampleMips.hs create mode 100644 bindings/haskell/samples/SampleSparc.hs create mode 100644 bindings/haskell/samples/SampleX86.hs create mode 100644 bindings/haskell/samples/Shellcode.hs create mode 100644 bindings/haskell/src/Unicorn.hs create mode 100644 bindings/haskell/src/Unicorn/CPU/Arm.chs create mode 100644 bindings/haskell/src/Unicorn/CPU/Arm64.chs create mode 100644 bindings/haskell/src/Unicorn/CPU/M68k.chs create mode 100644 bindings/haskell/src/Unicorn/CPU/Mips.chs create mode 100644 bindings/haskell/src/Unicorn/CPU/Sparc.chs create mode 100644 bindings/haskell/src/Unicorn/CPU/X86.chs create mode 100644 bindings/haskell/src/Unicorn/Hook.hs create mode 100644 bindings/haskell/src/Unicorn/Internal/Core.chs create mode 100644 bindings/haskell/src/Unicorn/Internal/Hook.chs create mode 100644 bindings/haskell/src/Unicorn/Internal/Unicorn.chs create mode 100644 bindings/haskell/src/Unicorn/Internal/Util.hs create mode 100644 bindings/haskell/src/cbits/unicorn_wrapper.c create mode 100644 bindings/haskell/src/include/unicorn_wrapper.h create mode 100644 bindings/haskell/unicorn.cabal diff --git a/CREDITS.TXT b/CREDITS.TXT index 8156f74c..72a5ff4c 100644 --- a/CREDITS.TXT +++ b/CREDITS.TXT @@ -60,3 +60,4 @@ Ryan Hileman: Go binding Antonio Parata: .NET binding Jonathon Reinhart: C unit test Sascha Schirra: Ruby binding +Adrian Herrera: Haskell binding diff --git a/README.md b/README.md index 0f909c7d..8a5bbde8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Unicorn offers some unparalleled features: - Multi-architecture: ARM, ARM64 (ARMv8), M68K, MIPS, SPARC, and X86 (16, 32, 64-bit) - Clean/simple/lightweight/intuitive architecture-neutral API -- Implemented in pure C language, with bindings for Ruby, Python, Java, MSVC, .NET, Go and Delphi/Free Pascal. +- Implemented in pure C language, with bindings for Ruby, Python, Java, MSVC, .NET, Go, Delphi/Free Pascal and Haskell. - Native support for Windows & *nix (with Mac OSX, Linux, *BSD & Solaris confirmed) - High performance via Just-In-Time compilation - Support for fine-grained instrumentation at various levels diff --git a/bindings/README b/bindings/README index 4c4a2430..a588e24c 100644 --- a/bindings/README +++ b/bindings/README @@ -8,6 +8,7 @@ The following bindings are contributed by community. - .NET binding: by Antonio Parata. - MSVC binding: by Zak Escano - Ruby binding: by Sascha Schirra +- Haskell binding: by Adrian Herrera. More bindings created & maintained externally by community are available as follows. diff --git a/bindings/haskell/.gitignore b/bindings/haskell/.gitignore new file mode 100644 index 00000000..3f4aa15b --- /dev/null +++ b/bindings/haskell/.gitignore @@ -0,0 +1,16 @@ +dist +cabal-dev +*.o +*.hi +*.chi +*.chs.h +*.dyn_o +*.dyn_hi +.virtualenv +.hpc +.hsenv +.cabal-sandbox/ +cabal.sandbox.config +*.prof +*.aux +*.hp diff --git a/bindings/haskell/README.TXT b/bindings/haskell/README.TXT new file mode 100644 index 00000000..dc629c95 --- /dev/null +++ b/bindings/haskell/README.TXT @@ -0,0 +1,21 @@ +This documentation explains how to install Haskell binding for Unicorn +from source. + + +0. Install the core engine as dependency + + Follow README in the root directory to compile & install the core. + + On *nix, this can simply be done by (project root directory): + + $ sudo ./make.sh install + + +1. Change directories into the Haskell bindings, build and install + + $ cd bindings/haskell + $ cabal build + $ cabal install + +If the build fails, try installing c2hs manually (cabal install c2hs) and make +sure that $HOME/.cabal/bin is on your PATH. diff --git a/bindings/haskell/Setup.hs b/bindings/haskell/Setup.hs new file mode 100644 index 00000000..9a994af6 --- /dev/null +++ b/bindings/haskell/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/bindings/haskell/samples/SampleArm.hs b/bindings/haskell/samples/SampleArm.hs new file mode 100644 index 00000000..3de51f36 --- /dev/null +++ b/bindings/haskell/samples/SampleArm.hs @@ -0,0 +1,133 @@ +-- Sample code to demonstrate how to emulate ARM code + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.Arm as Arm + +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) + +-- Code to be emulated +-- +-- mov r0, #0x37; sub r1, r2, r3 +armCode :: BS.ByteString +armCode = BS.pack [0x37, 0x00, 0xa0, 0xe3, 0x03, 0x10, 0x42, 0xe0] + +-- sub sp, #0xc +thumbCode :: BS.ByteString +thumbCode = BS.pack [0x83, 0xb0] + +-- Memory address where emulation starts +address :: Word64 +address = 0x10000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +hookCode :: CodeHook () +hookCode _ addr size _ = + putStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + +testArm :: IO () +testArm = do + putStrLn "Emulate ARM code" + + result <- runEmulator $ do + -- Initialize emulator in ARM mode + uc <- open ArchArm [ModeArm] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address armCode + + -- Initialize machine registers + regWrite uc Arm.R0 0x1234 + regWrite uc Arm.R2 0x6789 + regWrite uc Arm.R3 0x3333 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing one instruction at address with customized callback + codeHookAdd uc hookCode () address address + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength armCode + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + r0 <- regRead uc Arm.R0 + r1 <- regRead uc Arm.R1 + + return (r0, r1) + case result of + Right (r0, r1) -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> R0 = 0x" ++ showHex r0 + putStrLn $ ">>> R1 = 0x" ++ showHex r1 + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +testThumb :: IO () +testThumb = do + putStrLn "Emulate THUMB code" + + result <- runEmulator $ do + -- Initialize emulator in ARM mode + uc <- open ArchArm [ModeThumb] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address thumbCode + + -- Initialize machine registers + regWrite uc Arm.Sp 0x1234 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing one instruction at address with customized callback + codeHookAdd uc hookCode () address address + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength thumbCode + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + sp <- regRead uc Arm.Sp + + return sp + case result of + Right sp -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> SP = 0x" ++ showHex sp + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +main :: IO () +main = do + testArm + putStrLn "==========================" + testThumb diff --git a/bindings/haskell/samples/SampleArm64.hs b/bindings/haskell/samples/SampleArm64.hs new file mode 100644 index 00000000..fbed5659 --- /dev/null +++ b/bindings/haskell/samples/SampleArm64.hs @@ -0,0 +1,85 @@ +-- Sample code to demonstrate how to emulate ARM64 code + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.Arm64 as Arm64 + +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) + +-- Code to be emulated +-- +-- add x11, x13, x15 +armCode :: BS.ByteString +armCode = BS.pack [0xab, 0x01, 0x0f, 0x8b] + +-- Memory address where emulation starts +address :: Word64 +address = 0x10000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +hookCode :: CodeHook () +hookCode _ addr size _ = + putStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + +testArm64 :: IO () +testArm64 = do + putStrLn "Emulate ARM64 code" + + result <- runEmulator $ do + -- Initialize emulator in ARM mode + uc <- open ArchArm64 [ModeArm] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address armCode + + -- Initialize machine registers + regWrite uc Arm64.X11 0x1234 + regWrite uc Arm64.X13 0x6789 + regWrite uc Arm64.X15 0x3333 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing one instruction at address with customized callback + codeHookAdd uc hookCode ()address address + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength armCode + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + x11 <- regRead uc Arm64.X11 + + return x11 + case result of + Right x11 -> do + -- Now print out some registers + putStrLn $ ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> X11 = 0x" ++ showHex x11 + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +main :: IO () +main = + testArm64 diff --git a/bindings/haskell/samples/SampleM68k.hs b/bindings/haskell/samples/SampleM68k.hs new file mode 100644 index 00000000..d77f4cfd --- /dev/null +++ b/bindings/haskell/samples/SampleM68k.hs @@ -0,0 +1,142 @@ +-- Sample code to demonstrate how to emulate m68k code + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.M68k as M68k + +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) + +-- Code to be emulated +-- +-- movq #-19, %d3 +m68kCode :: BS.ByteString +m68kCode = BS.pack [0x76, 0xed] + +-- Memory address where emulation starts +address :: Word64 +address = 0x10000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +hookCode :: CodeHook () +hookCode _ addr size _ = + putStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + +testM68k :: IO () +testM68k = do + putStrLn "Emulate M68K code" + + result <- runEmulator $ do + -- Initialize emulator in M68K mode + uc <- open ArchM68k [ModeBigEndian] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address m68kCode + + -- Initialize machine registers + regWrite uc M68k.D0 0x0000 + regWrite uc M68k.D1 0x0000 + regWrite uc M68k.D2 0x0000 + regWrite uc M68k.D3 0x0000 + regWrite uc M68k.D4 0x0000 + regWrite uc M68k.D5 0x0000 + regWrite uc M68k.D6 0x0000 + regWrite uc M68k.D7 0x0000 + + regWrite uc M68k.A0 0x0000 + regWrite uc M68k.A1 0x0000 + regWrite uc M68k.A2 0x0000 + regWrite uc M68k.A3 0x0000 + regWrite uc M68k.A4 0x0000 + regWrite uc M68k.A5 0x0000 + regWrite uc M68k.A6 0x0000 + regWrite uc M68k.A7 0x0000 + + regWrite uc M68k.Pc 0x0000 + regWrite uc M68k.Sr 0x0000 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instruction + codeHookAdd uc hookCode () 1 0 + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength m68kCode + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + d0 <- regRead uc M68k.D0 + d1 <- regRead uc M68k.D1 + d2 <- regRead uc M68k.D2 + d3 <- regRead uc M68k.D3 + d4 <- regRead uc M68k.D4 + d5 <- regRead uc M68k.D5 + d6 <- regRead uc M68k.D6 + d7 <- regRead uc M68k.D7 + + a0 <- regRead uc M68k.A0 + a1 <- regRead uc M68k.A1 + a2 <- regRead uc M68k.A2 + a3 <- regRead uc M68k.A3 + a4 <- regRead uc M68k.A4 + a5 <- regRead uc M68k.A5 + a6 <- regRead uc M68k.A6 + a7 <- regRead uc M68k.A7 + + pc <- regRead uc M68k.Pc + sr <- regRead uc M68k.Sr + + return (d0, d1, d2, d3, d4, d5, d6, d7, + a0, a1, a2, a3, a4, a5, a6, a7, + pc, sr) + case result of + Right (d0, d1, d2, d3, d4, d5, d6, d7, + a0, a1, a2, a3, a4, a5, a6, a7, + pc, sr) -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> A0 = 0x" ++ showHex a0 ++ + "\t\t>>> D0 = 0x" ++ showHex d0 + putStrLn $ ">>> A1 = 0x" ++ showHex a1 ++ + "\t\t>>> D1 = 0x" ++ showHex d1 + putStrLn $ ">>> A2 = 0x" ++ showHex a2 ++ + "\t\t>>> D2 = 0x" ++ showHex d2 + putStrLn $ ">>> A3 = 0x" ++ showHex a3 ++ + "\t\t>>> D3 = 0x" ++ showHex d3 + putStrLn $ ">>> A4 = 0x" ++ showHex a4 ++ + "\t\t>>> D4 = 0x" ++ showHex d4 + putStrLn $ ">>> A5 = 0x" ++ showHex a5 ++ + "\t\t>>> D5 = 0x" ++ showHex d5 + putStrLn $ ">>> A6 = 0x" ++ showHex a6 ++ + "\t\t>>> D6 = 0x" ++ showHex d6 + putStrLn $ ">>> A7 = 0x" ++ showHex a7 ++ + "\t\t>>> D7 = 0x" ++ showHex d7 + putStrLn $ ">>> PC = 0x" ++ showHex pc + putStrLn $ ">>> SR = 0x" ++ showHex sr + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +main :: IO () +main = + testM68k diff --git a/bindings/haskell/samples/SampleMips.hs b/bindings/haskell/samples/SampleMips.hs new file mode 100644 index 00000000..83efdbd3 --- /dev/null +++ b/bindings/haskell/samples/SampleMips.hs @@ -0,0 +1,129 @@ +-- Sample code to demonstrate how to emulate Mips code (big endian) + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.Mips as Mips + +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) + +-- Code to be emulated +-- +-- ori $at, $at, 0x3456 +mipsCodeEb :: BS.ByteString +mipsCodeEb = BS.pack [0x34, 0x21, 0x34, 0x56] + +-- ori $at, $at, 0x3456 +mipsCodeEl :: BS.ByteString +mipsCodeEl = BS.pack [0x56, 0x34, 0x21, 0x34] + +-- Memory address where emulation starts +address :: Word64 +address = 0x10000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +hookCode :: CodeHook () +hookCode _ addr size _ = + putStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + +testMipsEb :: IO () +testMipsEb = do + putStrLn "Emulate MIPS code (big-endian)" + + result <- runEmulator $ do + -- Initialize emulator in MIPS mode + uc <- open ArchMips [ModeMips32, ModeBigEndian] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address mipsCodeEb + + -- Initialise machine registers + regWrite uc Mips.Reg1 0x6789 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing one instruction at address with customized callback + codeHookAdd uc hookCode () address address + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength mipsCodeEb + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + r1 <- regRead uc Mips.Reg1 + + return r1 + case result of + Right r1 -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> R1 = 0x" ++ showHex r1 + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +testMipsEl :: IO () +testMipsEl = do + putStrLn "===========================" + putStrLn "Emulate MIPS code (little-endian)" + + result <- runEmulator $ do + -- Initialize emulator in MIPS mode + uc <- open ArchMips [ModeMips32, ModeLittleEndian] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address mipsCodeEl + + -- Initialize machine registers + regWrite uc Mips.Reg1 0x6789 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing one instruction at address with customized callback + codeHookAdd uc hookCode () address address + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength mipsCodeEl + start uc address (address + codeLen) Nothing Nothing + + -- Return the results + r1 <- regRead uc Mips.Reg1 + + return r1 + case result of + Right r1 -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> R1 = 0x" ++ showHex r1 + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +main :: IO () +main = do + testMipsEb + testMipsEl diff --git a/bindings/haskell/samples/SampleSparc.hs b/bindings/haskell/samples/SampleSparc.hs new file mode 100644 index 00000000..02a0984b --- /dev/null +++ b/bindings/haskell/samples/SampleSparc.hs @@ -0,0 +1,85 @@ +-- Sample code to demonstrate how to emulate Sparc code + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.Sparc as Sparc + +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) + +-- Code to be emulated +-- +-- add %g1, %g2, %g3 +sparcCode :: BS.ByteString +sparcCode = BS.pack [0x86, 0x00, 0x40, 0x02] + +-- Memory address where emulation starts +address :: Word64 +address = 0x10000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +hookCode :: CodeHook () +hookCode _ addr size _ = + putStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + +testSparc :: IO () +testSparc = do + putStrLn "Emulate SPARC code" + + result <- runEmulator $ do + -- Initialize emulator in Sparc mode + uc <- open ArchSparc [ModeSparc32, ModeBigEndian] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address sparcCode + + -- Initialize machine registers + regWrite uc Sparc.G1 0x1230 + regWrite uc Sparc.G2 0x6789 + regWrite uc Sparc.G3 0x5555 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instructions with customized callback + codeHookAdd uc hookCode () 1 0 + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength sparcCode + start uc address (address + codeLen) Nothing Nothing + + -- Return results + g3 <- regRead uc Sparc.G3 + + return g3 + case result of + Right g3 -> do + -- Now print out some registers + putStrLn ">>> Emulation done. Below is the CPU context" + putStrLn $ ">>> G3 = 0x" ++ showHex g3 + Left err -> putStrLn $ "Failed with error: " ++ show err ++ " (" ++ + strerror err ++ ")" + +main :: IO () +main = + testSparc diff --git a/bindings/haskell/samples/SampleX86.hs b/bindings/haskell/samples/SampleX86.hs new file mode 100644 index 00000000..fad686af --- /dev/null +++ b/bindings/haskell/samples/SampleX86.hs @@ -0,0 +1,675 @@ +-- Sample code to demonstrate how to emulate X86 code + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.X86 as X86 + +import Control.Monad.Trans.Class (lift) +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) +import System.Environment + +-- Code to be emulated +-- +-- inc ecx; dec edx +x86Code32 :: BS.ByteString +x86Code32 = BS.pack [0x41, 0x4a] + +-- jmp 4; nop; nop; nop; nop; nop; nop +x86Code32Jump :: BS.ByteString +x86Code32Jump = BS.pack [0xeb, 0x02, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90] + +-- inc ecx; dec edx; jmp self-loop +x86Code32Loop :: BS.ByteString +x86Code32Loop = BS.pack [0x41, 0x4a, 0xeb, 0xfe] + +-- mov [0xaaaaaaaa], ecx; inc ecx; dec edx +x86Code32MemWrite :: BS.ByteString +x86Code32MemWrite = BS.pack [0x89, 0x0d, 0xaa, 0xaa, 0xaa, 0xaa, 0x41, 0x4a] + +-- mov ecx, [0xaaaaaaaa]; inc ecx; dec edx +x86Code32MemRead :: BS.ByteString +x86Code32MemRead = BS.pack [0x8b, 0x0d, 0xaa, 0xaa, 0xaa, 0xaa, 0x41, 0x4a] + +-- jmp ouside; inc ecx; dec edx +x86Code32JmpInvalid :: BS.ByteString +x86Code32JmpInvalid = BS.pack [0xe9, 0xe9, 0xee, 0xee, 0xee, 0x41, 0x4a] + +-- inc ecx; in al, 0x3f; dec edx; out 0x46, al; inc ebx +x86Code32InOut :: BS.ByteString +x86Code32InOut = BS.pack [0x41, 0xe4, 0x3f, 0x4a, 0xe6, 0x46, 0x43] + +x86Code64 :: BS.ByteString +x86Code64 = BS.pack [0x41, 0xbc, 0x3b, 0xb0, 0x28, 0x2a, 0x49, 0x0f, 0xc9, + 0x90, 0x4d, 0x0f, 0xad, 0xcf, 0x49, 0x87, 0xfd, 0x90, + 0x48, 0x81, 0xd2, 0x8a, 0xce, 0x77, 0x35, 0x48, 0xf7, + 0xd9, 0x4d, 0x29, 0xf4, 0x49, 0x81, 0xc9, 0xf6, 0x8a, + 0xc6, 0x53, 0x4d, 0x87, 0xed, 0x48, 0x0f, 0xad, 0xd2, + 0x49, 0xf7, 0xd4, 0x48, 0xf7, 0xe1, 0x4d, 0x19, 0xc5, + 0x4d, 0x89, 0xc5, 0x48, 0xf7, 0xd6, 0x41, 0xb8, 0x4f, + 0x8d, 0x6b, 0x59, 0x4d, 0x87, 0xd0, 0x68, 0x6a, 0x1e, + 0x09, 0x3c, 0x59] + +-- add byte ptr [bx + si], al +x86Code16 :: BS.ByteString +x86Code16 = BS.pack [0x00, 0x00] + +-- SYSCALL +x86Code64Syscall :: BS.ByteString +x86Code64Syscall = BS.pack [0x0f, 0x05] + +-- Memory address where emulation starts +address :: Word64 +address = 0x1000000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex i = + N.showHex (fromIntegral i :: Word64) "" + +-- Pretty-print byte string as hex +showHexBS :: BS.ByteString -> String +showHexBS = + concatMap (flip N.showHex "") . reverse . BS.unpack + +-- Write a string (with a newline character) to standard output in the emulator +emuPutStrLn :: String -> Emulator () +emuPutStrLn = + lift . putStrLn + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +-- Callback for tracing basic blocks +hookBlock :: BlockHook () +hookBlock _ addr size _ = + putStrLn $ ">>> Tracing basic block at 0x" ++ showHex addr ++ + ", block size = 0x" ++ (maybe "0" showHex size) + +-- Callback for tracing instruction +hookCode :: CodeHook () +hookCode uc addr size _ = do + runEmulator $ do + emuPutStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + + eflags <- regRead uc X86.Eflags + emuPutStrLn $ ">>> --- EFLAGS is 0x" ++ showHex eflags + return () + +-- Callback for tracing instruction +hookCode64 :: CodeHook () +hookCode64 uc addr size _ = do + runEmulator $ do + rip <- regRead uc X86.Rip + emuPutStrLn $ ">>> Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + emuPutStrLn $ ">>> RIP is 0x" ++ showHex rip + return () + +-- Callback for tracing memory access (READ or WRITE) +hookMemInvalid :: MemoryEventHook () +hookMemInvalid uc MemWriteUnmapped addr size (Just value) _ = do + runEmulator $ do + emuPutStrLn $ ">>> Missing memory is being WRITE at 0x" ++ + showHex addr ++ ", data size = " ++ show size ++ + ", data value = 0x" ++ showHex value + memMap uc 0xaaaa0000 (2 * 1024 * 1024) [ProtAll] + return True +hookMemInvalid _ _ _ _ _ _ = + return False + +hookMem64 :: MemoryHook () +hookMem64 _ MemRead addr size _ _ = + putStrLn $ ">>> Memory is being READ at 0x" ++ showHex addr ++ + ", data size = " ++ show size +hookMem64 _ MemWrite addr size (Just value) _ = + putStrLn $ ">>> Memory is being WRITE at 0x" ++ showHex addr ++ + ", data size = " ++ show size ++ ", data value = 0x" ++ + showHex value + +-- Callback for IN instruction (X86) +-- This returns the data read from the port +hookIn :: InHook () +hookIn uc port size _ = do + result <- runEmulator $ do + eip <- regRead uc X86.Eip + + emuPutStrLn $ "--- reading from port 0x" ++ showHex port ++ + ", size: " ++ show size ++ ", address: 0x" ++ showHex eip + + case size of + -- Read 1 byte to AL + 1 -> return 0xf1 + -- Read 2 byte to AX + 2 -> return 0xf2 + -- Read 4 byte to EAX + 4 -> return 0xf4 + -- Should never reach this + _ -> return 0 + case result of + Right r -> return r + Left _ -> return 0 + +-- Callback for OUT instruction (X86) +hookOut :: OutHook () +hookOut uc port size value _ = do + runEmulator $ do + eip <- regRead uc X86.Eip + + emuPutStrLn $ "--- writing to port 0x" ++ showHex port ++ ", size: " ++ + show size ++ ", value: 0x" ++ showHex value ++ + ", address: 0x" ++ showHex eip + + -- Confirm that value is indeed the value of AL/AX/EAX + case size of + 1 -> do + tmp <- regRead uc X86.Al + emuPutStrLn $ "--- register value = 0x" ++ showHex tmp + 2 -> do + tmp <- regRead uc X86.Ax + emuPutStrLn $ "--- register value = 0x" ++ showHex tmp + 4 -> do + tmp <- regRead uc X86.Eax + emuPutStrLn $ "--- register value = 0x" ++ showHex tmp + -- Should never reach this + _ -> return () + return () + +-- Callback for SYSCALL instruction (X86) +hookSyscall :: SyscallHook () +hookSyscall uc _ = do + runEmulator $ do + rax <- regRead uc X86.Rax + if rax == 0x100 then + regWrite uc X86.Rax 0x200 + else + emuPutStrLn $ "ERROR: was not expecting rax=0x" ++ showHex rax ++ + " in syscall" + return () + +testI386 :: IO () +testI386 = do + putStrLn "Emulate i386 code" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32 + + -- Initialize machine registers + regWrite uc X86.Ecx 0x1234 + regWrite uc X86.Edx 0x7890 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instruction by having @begin > @end + codeHookAdd uc hookCode () 1 0 + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32 + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + ecx <- regRead uc X86.Ecx + edx <- regRead uc X86.Edx + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + emuPutStrLn $ ">>> EDX = 0x" ++ showHex edx + + -- Read from memory + tmp <- memRead uc address 4 + emuPutStrLn $ ">>> Read 4 bytes from [0x" ++ showHex address ++ + "] = 0x" ++ showHexBS tmp + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +testI386Jump :: IO () +testI386Jump = do + putStrLn "===================================" + putStrLn "Emulate i386 code with jump" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32Jump + + -- Tracing 1 basic block with customized callback + blockHookAdd uc hookBlock () address address + + -- Tracing 1 instruction at address + codeHookAdd uc hookCode () address address + + -- Emulate machine code ininfinite time + let codeLen = codeLength x86Code32Jump + start uc address (address + codeLen) Nothing Nothing + + emuPutStrLn ">>> Emulation done. Below is the CPU context" + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +-- Emulate code that loop forever +testI386Loop :: IO () +testI386Loop = do + putStrLn "===================================" + putStrLn "Emulate i386 code that loop forever" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated in memory + memWrite uc address x86Code32Loop + + -- Initialize machine registers + regWrite uc X86.Ecx 0x1234 + regWrite uc X86.Edx 0x7890 + + -- Emulate machine code in 2 seconds, so we can quit even if the code + -- loops + let codeLen = codeLength x86Code32Loop + start uc address (address + codeLen) (Just $ 2 * 1000000) Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + ecx <- regRead uc X86.Ecx + edx <- regRead uc X86.Edx + + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + emuPutStrLn $ ">>> EDX = 0x" ++ showHex edx + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +-- Emulate code that read invalid memory +testI386InvalidMemRead :: IO () +testI386InvalidMemRead = do + putStrLn "===================================" + putStrLn "Emulate i386 code that read from invalid memory" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32MemRead + + -- Initialize machine registers + regWrite uc X86.Ecx 0x1234 + regWrite uc X86.Edx 0x7890 + + -- Tracing all basic block with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instructions by having @beegin > @end + codeHookAdd uc hookCode () 1 0 + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32MemRead + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + ecx <- regRead uc X86.Ecx + edx <- regRead uc X86.Edx + + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + emuPutStrLn $ ">>> EDX = 0x" ++ showHex edx + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +-- Emulate code that write invalid memory +testI386InvalidMemWrite :: IO () +testI386InvalidMemWrite = do + putStrLn "===================================" + putStrLn "Emulate i386 code that write to invalid memory" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32MemWrite + + -- Initialize machine registers + regWrite uc X86.Ecx 0x1234 + regWrite uc X86.Edx 0x7890 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instruction by having @begin > @end + codeHookAdd uc hookCode () 1 0 + + -- Intercept invalid memory events + memoryEventHookAdd uc HookMemReadUnmapped hookMemInvalid () 1 0 + memoryEventHookAdd uc HookMemWriteUnmapped hookMemInvalid () 1 0 + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32MemWrite + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + ecx <- regRead uc X86.Ecx + edx <- regRead uc X86.Edx + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + emuPutStrLn $ ">>> EDX = 0x" ++ showHex edx + + -- Read from memory + tmp <- memRead uc 0xaaaaaaaa 4 + emuPutStrLn $ ">>> Read 4 bytes from [0x" ++ showHex 0xaaaaaaaa ++ + "] = 0x" ++ showHexBS tmp + + tmp <- memRead uc 0xffffffaa 4 + emuPutStrLn $ ">>> Read 4 bytes from [0x" ++ showHex 0xffffffaa ++ + "] = 0x" ++ showHexBS tmp + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +-- Emulate code that jump to invalid memory +testI386JumpInvalid :: IO () +testI386JumpInvalid = do + putStrLn "===================================" + putStrLn "Emulate i386 code that jumps to invalid memory" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32JmpInvalid + + -- Initialize machine registers + regWrite uc X86.Ecx 0x1234 + regWrite uc X86.Edx 0x7890 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instructions by having @begin > @end + codeHookAdd uc hookCode () 1 0 + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32JmpInvalid + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + ecx <- regRead uc X86.Ecx + edx <- regRead uc X86.Edx + + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + emuPutStrLn $ ">>> EDX = 0x" ++ showHex edx + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +testI386InOut :: IO () +testI386InOut = do + putStrLn "===================================" + putStrLn "Emulate i386 code with IN/OUT instructions" + + result <- runEmulator $ do + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32InOut + + -- Initialize machine registers + regWrite uc X86.Eax 0x1234 + regWrite uc X86.Ecx 0x6789 + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instructions + codeHookAdd uc hookCode () 1 0 + + -- uc IN instruction + inHookAdd uc hookIn () 1 0 + + -- uc OUT instruction + outHookAdd uc hookOut () 1 0 + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32InOut + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + eax <- regRead uc X86.Eax + ecx <- regRead uc X86.Ecx + + emuPutStrLn $ ">>> EAX = 0x" ++ showHex eax + emuPutStrLn $ ">>> ECX = 0x" ++ showHex ecx + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +testX8664 :: IO () +testX8664 = do + putStrLn "Emulate x86_64 code" + + result <- runEmulator $ do + -- Initialize emualator in X86-64bit mode + uc <- open ArchX86 [Mode64] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code64 + + -- Initialize machine registers + regWrite uc X86.Rsp (fromIntegral address + 0x200000) + + regWrite uc X86.Rax 0x71f3029efd49d41d + regWrite uc X86.Rbx 0xd87b45277f133ddb + regWrite uc X86.Rcx 0xab40d1ffd8afc461 + regWrite uc X86.Rdx 0x919317b4a733f01 + regWrite uc X86.Rsi 0x4c24e753a17ea358 + regWrite uc X86.Rdi 0xe509a57d2571ce96 + regWrite uc X86.R8 0xea5b108cc2b9ab1f + regWrite uc X86.R9 0x19ec097c8eb618c1 + regWrite uc X86.R10 0xec45774f00c5f682 + regWrite uc X86.R11 0xe17e9dbec8c074aa + regWrite uc X86.R12 0x80f86a8dc0f6d457 + regWrite uc X86.R13 0x48288ca5671c5492 + regWrite uc X86.R14 0x595f72f6e4017f6e + regWrite uc X86.R15 0x1efd97aea331cccc + + -- Tracing all basic blocks with customized callback + blockHookAdd uc hookBlock () 1 0 + + -- Tracing all instructions in the range [address, address+20] + codeHookAdd uc hookCode64 () address (address + 20) + + -- Tracing all memory WRITE access (with @begin > @end) + memoryHookAdd uc HookMemWrite hookMem64 () 1 0 + + -- Tracing all memory READ access (with @begin > @end) + memoryHookAdd uc HookMemRead hookMem64 () 1 0 + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength x86Code64 + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + rax <- regRead uc X86.Rax + rbx <- regRead uc X86.Rbx + rcx <- regRead uc X86.Rcx + rdx <- regRead uc X86.Rdx + rsi <- regRead uc X86.Rsi + rdi <- regRead uc X86.Rdi + r8 <- regRead uc X86.R8 + r9 <- regRead uc X86.R9 + r10 <- regRead uc X86.R10 + r11 <- regRead uc X86.R11 + r12 <- regRead uc X86.R12 + r13 <- regRead uc X86.R13 + r14 <- regRead uc X86.R14 + r15 <- regRead uc X86.R15 + + emuPutStrLn $ ">>> RAX = 0x" ++ showHex rax + emuPutStrLn $ ">>> RBX = 0x" ++ showHex rbx + emuPutStrLn $ ">>> RCX = 0x" ++ showHex rcx + emuPutStrLn $ ">>> RDX = 0x" ++ showHex rdx + emuPutStrLn $ ">>> RSI = 0x" ++ showHex rsi + emuPutStrLn $ ">>> RDI = 0x" ++ showHex rdi + emuPutStrLn $ ">>> R8 = 0x" ++ showHex r8 + emuPutStrLn $ ">>> R9 = 0x" ++ showHex r9 + emuPutStrLn $ ">>> R10 = 0x" ++ showHex r10 + emuPutStrLn $ ">>> R11 = 0x" ++ showHex r11 + emuPutStrLn $ ">>> R12 = 0x" ++ showHex r12 + emuPutStrLn $ ">>> R13 = 0x" ++ showHex r13 + emuPutStrLn $ ">>> R14 = 0x" ++ showHex r14 + emuPutStrLn $ ">>> R15 = 0x" ++ showHex r15 + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +testX8664Syscall :: IO () +testX8664Syscall = do + putStrLn "===================================" + putStrLn "Emulate x86_64 code with 'syscall' instruction" + + result <- runEmulator $ do + -- Initialize emulator in X86-64bit mode + uc <- open ArchX86 [Mode64] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code64Syscall + + -- Hook interrupts for syscall + syscallHookAdd uc hookSyscall () 1 0 + + -- Initialize machine registers + regWrite uc X86.Rax 0x100 + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all code + let codeLen = codeLength x86Code64Syscall + start uc address (address + codeLen) Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + rax <- regRead uc X86.Rax + emuPutStrLn $ ">>> RAX = 0x" ++ showHex rax + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +testX8616 :: IO () +testX8616 = do + putStrLn "Emulate x86 16-bit code" + + result <- runEmulator $ do + -- Initialize emulator in X86-16bit mode + uc <- open ArchX86 [Mode16] + + -- Map 8KB memory for this emulation + memMap uc 0 (8 * 1024) [ProtAll] + + -- Write machine code to be emulated in memory + memWrite uc 0 x86Code16 + + -- Initialize machine registers + regWrite uc X86.Eax 7 + regWrite uc X86.Ebx 5 + regWrite uc X86.Esi 6 + + -- Emulate machine code in infinite time (last param = Nothing), or + -- when finishing all the code + let codeLen = codeLength x86Code16 + start uc 0 codeLen Nothing Nothing + + -- Now print out some registers + emuPutStrLn ">>> Emulation done. Below is the CPU context" + + -- Read from memory + tmp <- memRead uc 11 1 + emuPutStrLn $ ">>> Read 1 bytes from [0x" ++ showHex 11 ++ + "] = 0x" ++ showHexBS tmp + case result of + Right _ -> return () + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +main :: IO () +main = do + progName <- getProgName + args <- getArgs + case args of + ["-32"] -> do + testI386 + testI386InOut + testI386Jump + testI386Loop + testI386InvalidMemRead + testI386InvalidMemWrite + testI386JumpInvalid + ["-64"] -> do + testX8664 + testX8664Syscall + ["-16"] -> testX8616 + -- Test memleak + ["-0"] -> testI386 + _ -> putStrLn $ "Syntax: " ++ progName ++ " <-16|-32|-64>" + diff --git a/bindings/haskell/samples/Shellcode.hs b/bindings/haskell/samples/Shellcode.hs new file mode 100644 index 00000000..65ad979c --- /dev/null +++ b/bindings/haskell/samples/Shellcode.hs @@ -0,0 +1,153 @@ +-- Sample code to trace code with Linux code with syscall + +import Unicorn +import Unicorn.Hook +import qualified Unicorn.CPU.X86 as X86 + +import Control.Monad.Trans.Class (lift) +import qualified Data.ByteString as BS +import Data.Word +import qualified Numeric as N (showHex) +import System.Environment + +-- Code to be emulated +x86Code32 :: BS.ByteString +x86Code32 = BS.pack [0xeb, 0x19, 0x31, 0xc0, 0x31, 0xdb, 0x31, 0xd2, 0x31, + 0xc9, 0xb0, 0x04, 0xb3, 0x01, 0x59, 0xb2, 0x05, 0xcd, + 0x80, 0x31, 0xc0, 0xb0, 0x01, 0x31, 0xdb, 0xcd, 0x80, + 0xe8, 0xe2, 0xff, 0xff, 0xff, 0x68, 0x65, 0x6c, 0x6c, + 0x6f] + +x86Code32Self :: BS.ByteString +x86Code32Self = BS.pack [0xeb, 0x1c, 0x5a, 0x89, 0xd6, 0x8b, 0x02, 0x66, 0x3d, + 0xca, 0x7d, 0x75, 0x06, 0x66, 0x05, 0x03, 0x03, 0x89, + 0x02, 0xfe, 0xc2, 0x3d, 0x41, 0x41, 0x41, 0x41, 0x75, + 0xe9, 0xff, 0xe6, 0xe8, 0xdf, 0xff, 0xff, 0xff, 0x31, + 0xd2, 0x6a, 0x0b, 0x58, 0x99, 0x52, 0x68, 0x2f, 0x2f, + 0x73, 0x68, 0x68, 0x2f, 0x62, 0x69, 0x6e, 0x89, 0xe3, + 0x52, 0x53, 0x89, 0xe1, 0xca, 0x7d, 0x41, 0x41, 0x41, + 0x41, 0x41, 0x41, 0x41, 0x41] + +-- Memory address where emulation starts +address :: Word64 +address = 0x1000000 + +-- Pretty-print integral as hex +showHex :: (Integral a, Show a) => a -> String +showHex = + flip N.showHex "" + +-- Pretty-print byte string as hex +showHexBS :: BS.ByteString -> String +showHexBS = + concatMap (flip N.showHex " ") . BS.unpack + +-- Write a string (with a newline character) to standard output in the emulator +emuPutStrLn :: String -> Emulator () +emuPutStrLn = + lift . putStrLn + +-- Calculate code length +codeLength :: Num a => BS.ByteString -> a +codeLength = + fromIntegral . BS.length + +-- Callback for tracing instructions +hookCode :: CodeHook () +hookCode uc addr size _ = do + runEmulator $ do + emuPutStrLn $ "Tracing instruction at 0x" ++ showHex addr ++ + ", instruction size = 0x" ++ (maybe "0" showHex size) + + eip <- regRead uc X86.Eip + tmp <- memRead uc addr (maybe 0 id size) + + emuPutStrLn $ "*** EIP = " ++ showHex eip ++ " ***: " ++ showHexBS tmp + return () + +-- Callback for handling interrupts +-- ref: http://syscalls.kernelgrok.com +hookIntr :: InterruptHook () +hookIntr uc intno _ + | intno == 0x80 = do + runEmulator $ do + eax <- regRead uc X86.Eax + eip <- regRead uc X86.Eip + + case eax of + -- sys_exit + 1 -> do + emuPutStrLn $ ">>> 0x" ++ showHex eip ++ + ": interrupt 0x" ++ showHex intno ++ + ", SYS_EXIT. quit!\n" + stop uc + -- sys_write + 4 -> do + -- ECX = buffer address + ecx <- regRead uc X86.Ecx + + -- EDX = buffer size + edx <- regRead uc X86.Edx + + -- Read the buffer in + buffer <- memRead uc (fromIntegral ecx) (fromIntegral edx) + err <- errno uc + if err == ErrOk then + emuPutStrLn $ ">>> 0x" ++ showHex eip ++ + ": interrupt 0x" ++ showHex intno ++ + ", SYS_WRITE. buffer = 0x" ++ + showHex ecx ++ ", size = " ++ + show edx ++ ", content = " ++ + showHexBS buffer + else + emuPutStrLn $ ">>> 0x" ++ showHex eip ++ + ": interrupt 0x" ++ showHex intno ++ + ", SYS_WRITE. buffer = 0x" ++ + showHex ecx ++ ", size = " ++ show edx ++ + " (cannot get content)" + _ -> emuPutStrLn $ ">>> 0x" ++ showHex eip ++ + ": interrupt 0x" ++ showHex intno ++ + ", EAX = 0x" ++ showHex eax + return () + | otherwise = return () + +testI386 :: IO () +testI386 = do + result <- runEmulator $ do + emuPutStrLn "Emulate i386 code" + + -- Initialize emulator in X86-32bit mode + uc <- open ArchX86 [Mode32] + + -- Map 2MB memory for this emulation + memMap uc address (2 * 1024 * 1024) [ProtAll] + + -- Write machine code to be emulated to memory + memWrite uc address x86Code32Self + + -- Initialize machine registers + regWrite uc X86.Esp (fromIntegral address + 0x200000) + + -- Tracing all instructions by having @begin > @end + codeHookAdd uc hookCode () 1 0 + + -- Handle interrupt ourself + interruptHookAdd uc hookIntr () 1 0 + + emuPutStrLn "\n>>> Start tracing this Linux code" + + -- Emulate machine code in infinite time + let codeLen = codeLength x86Code32Self + start uc address (address + codeLen) Nothing Nothing + case result of + Right _ -> putStrLn "\n>>> Emulation done." + Left err -> putStrLn $ "Failed with error " ++ show err ++ ": " ++ + strerror err + +main :: IO () +main = do + progName <- getProgName + args <- getArgs + case args of + ["-32"] -> testI386 + _ -> putStrLn $ "Syntax: " ++ progName ++ " <-32|-64>" diff --git a/bindings/haskell/src/Unicorn.hs b/bindings/haskell/src/Unicorn.hs new file mode 100644 index 00000000..856133ff --- /dev/null +++ b/bindings/haskell/src/Unicorn.hs @@ -0,0 +1,284 @@ +{-| +Module : Unicorn +Description : The Unicorn CPU emulator. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Unicorn is a lightweight, multi-platform, multi-architecture CPU emulator +framework based on QEMU. + +Further information is available at . +-} +module Unicorn ( + -- * Emulator control + Emulator, + Engine, + Architecture(..), + Mode(..), + QueryType(..), + runEmulator, + open, + query, + start, + stop, + + -- * Register operations + regWrite, + regRead, + + -- * Memory operations + MemoryPermission(..), + MemoryRegion(..), + memWrite, + memRead, + memMap, + memUnmap, + memProtect, + memRegions, + + -- * Error handling + Error(..), + errno, + strerror, + + -- * Misc. + version, +) where + +import Control.Monad (liftM) +import Control.Monad.Trans.Class (lift) +import Control.Monad.Trans.Either (hoistEither, left, right, runEitherT) +import Foreign + +import Prelude hiding (until) +import Data.ByteString (ByteString, pack) + +import Unicorn.Internal.Core +import Unicorn.Internal.Unicorn + +------------------------------------------------------------------------------- +-- Emulator control +------------------------------------------------------------------------------- + +-- | Run the Unicorn emulator and return a result on success, or an 'Error' on +-- failure. +runEmulator :: Emulator a -- ^ The emulation code to execute + -> IO (Either Error a) -- ^ A result on success, or an 'Error' on + -- failure +runEmulator = + runEitherT + +-- | Create a new instance of the Unicorn engine. +open :: Architecture -- ^ CPU architecture + -> [Mode] -- ^ CPU hardware mode + -> Emulator Engine -- ^ A 'Unicorn' engine on success, or an 'Error' on + -- failure +open arch mode = do + (err, ucPtr) <- lift $ ucOpen arch mode + if err == ErrOk then + -- Return a pointer to the unicorn engine if ucOpen completed + -- successfully + lift $ mkEngine ucPtr + else + -- Otherwise return the error + left err + +-- | Query internal status of the Unicorn engine. +query :: Engine -- ^ 'Unicorn' engine handle + -> QueryType -- ^ Query type + -> Emulator Int -- ^ The result of the query +query uc queryType = do + (err, result) <- lift $ ucQuery uc queryType + if err == ErrOk then + right result + else + left err + +-- | Emulate machine code for a specific duration of time. +start :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Address where emulation starts + -> Word64 -- ^ Address where emulation stops (i.e. when this + -- address is hit) + -> Maybe Int -- ^ Optional duration to emulate code (in + -- microseconds). + -- If 'Nothing' is provided, continue to emulate + -- until the code is finished + -> Maybe Int -- ^ Optional number of instructions to emulate. If + -- 'Nothing' is provided, emulate all the code + -- available, until the code is finished + -> Emulator () -- ^ An 'Error' on failure +start uc begin until timeout count = do + err <- lift $ ucEmuStart uc begin until (maybeZ timeout) (maybeZ count) + if err == ErrOk then + right () + else + left err + where maybeZ = maybe 0 id + +-- | Stop emulation (which was started by 'start'). +-- This is typically called from callback functions registered by tracing APIs. +-- +-- NOTE: For now, this will stop execution only after the current block. +stop :: Engine -- ^ 'Unicorn' engine handle + -> Emulator () -- ^ An 'Error' on failure +stop uc = do + err <- lift $ ucEmuStop uc + if err == ErrOk then + right () + else + left err + +------------------------------------------------------------------------------- +-- Register operations +------------------------------------------------------------------------------- + +-- | Write to register. +regWrite :: Reg r => + Engine -- ^ 'Unicorn' engine handle + -> r -- ^ Register ID to write to + -> Int64 -- ^ Value to write to register + -> Emulator () -- ^ An 'Error' on failure +regWrite uc regId value = do + err <- lift . alloca $ \ptr -> do + poke ptr value + ucRegWrite uc regId ptr + if err == ErrOk then + right () + else + left err + +-- | Read register value. +regRead :: Reg r => + Engine -- ^ 'Unicorn' engine handle + -> r -- ^ Register ID to read from + -> Emulator Int64 -- ^ The value read from the register on success, + -- or an 'Error' on failure +regRead uc regId = do + (err, val) <- lift $ ucRegRead uc regId + if err == ErrOk then + right val + else + left err + +------------------------------------------------------------------------------- +-- Memory operations +------------------------------------------------------------------------------- + +-- | Write to memory. +memWrite :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Starting memory address of bytes to write + -> ByteString -- ^ The data to write + -> Emulator () -- ^ An 'Error' on failure +memWrite uc address bytes = do + err <- lift $ ucMemWrite uc address bytes + if err == ErrOk then + right () + else + left err + +-- | Read memory contents. +memRead :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Starting memory address to read + -- from + -> Int -- ^ Size of memory to read (in bytes) + -> Emulator ByteString -- ^ The memory contents on success, or + -- an 'Error' on failure +memRead uc address size = do + -- Allocate an array of the given size + result <- lift . allocaArray size $ \ptr -> do + err <- ucMemRead uc address ptr size + if err == ErrOk then + -- If ucMemRead completed successfully, pack the contents of the + -- array into a ByteString and return it + liftM (Right . pack) (peekArray size ptr) + else + -- Otherwise return the error + return $ Left err + hoistEither result + +-- | Map a range of memory. +memMap :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Start address of the new memory region to + -- be mapped in. This address must be + -- aligned to 4KB, or this will return with + -- 'ErrArg' error + -> Int -- ^ Size of the new memory region to be mapped + -- in. This size must be a multiple of 4KB, or + -- this will return with an 'ErrArg' error + -> [MemoryPermission] -- ^ Permissions for the newly mapped region + -> Emulator () -- ^ An 'Error' on failure +memMap uc address size perms = do + err <- lift $ ucMemMap uc address size perms + if err == ErrOk then + right () + else + left err + +-- | Unmap a range of memory. +memUnmap :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Start addres of the memory region to be unmapped. + -- This address must be aligned to 4KB or this will + -- return with an 'ErrArg' error + -> Int -- ^ Size of the memory region to be modified. This + -- must be a multiple of 4KB, or this will return with + -- an 'ErrArg' error + -> Emulator () -- ^ An 'Error' on failure +memUnmap uc address size = do + err <- lift $ ucMemUnmap uc address size + if err == ErrOk then + right () + else + left err + +-- | Change permissions on a range of memory. +memProtect :: Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Start address of the memory region to + -- modify. This address must be aligned to + -- 4KB, or this will return with an + -- 'ErrArg' error + -> Int -- ^ Size of the memory region to be + -- modified. This size must be a multiple + -- of 4KB, or this will return with an + -- 'ErrArg' error + -> [MemoryPermission] -- ^ New permissions for the mapped region + -> Emulator () -- ^ An 'Error' on failure +memProtect uc address size perms = do + err <- lift $ ucMemProtect uc address size perms + if err == ErrOk then + right () + else + left err + +-- | Retrieve all memory regions mapped by 'memMap'. +memRegions :: Engine -- ^ 'Unicorn' engine handle + -> Emulator [MemoryRegion] -- ^ A list of memory regions +memRegions uc = do + (err, regionPtr, count) <- lift $ ucMemRegions uc + if err == ErrOk then do + regions <- lift $ peekArray count regionPtr + right regions + else + left err + +------------------------------------------------------------------------------- +-- Misc. +------------------------------------------------------------------------------- + +-- | Combined API version & major and minor version numbers. Returns a +-- hexadecimal number as (major << 8 | minor), which encodes both major and +-- minor versions. +version :: Int +version = + ucVersion nullPtr nullPtr + +-- | Report the 'Error' of the last failed API call. +errno :: Engine -- ^ 'Unicorn' engine handle + -> Emulator Error -- ^ The last 'Error' code +errno = + lift . ucErrno + +-- | Return a string describing the given 'Error'. +strerror :: Error -- ^ The 'Error' code + -> String -- ^ Description of the error code +strerror = + ucStrerror diff --git a/bindings/haskell/src/Unicorn/CPU/Arm.chs b/bindings/haskell/src/Unicorn/CPU/Arm.chs new file mode 100644 index 00000000..789b7e3e --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/Arm.chs @@ -0,0 +1,29 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.Arm +Description : Definitions for the ARM architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the ARM architecture. +-} +module Unicorn.CPU.Arm ( + Register(..), +) where + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | ARM registers. +{# enum uc_arm_reg as Register + {underscoreToCase} + omit (UC_ARM_REG_INVALID, + UC_ARM_REG_ENDING) + with prefix="UC_ARM_REG_" + deriving (Show, Eq) #} + +instance Reg Register diff --git a/bindings/haskell/src/Unicorn/CPU/Arm64.chs b/bindings/haskell/src/Unicorn/CPU/Arm64.chs new file mode 100644 index 00000000..4c67c43e --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/Arm64.chs @@ -0,0 +1,29 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.Arm64 +Description : Definitions for the ARM64 (ARMv8) architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the ARM64 (ARMv8) architecture. +-} +module Unicorn.CPU.Arm64 ( + Register(..), +) where + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | ARM64 registers. +{# enum uc_arm64_reg as Register + {underscoreToCase} + omit (UC_ARM64_REG_INVALID, + UC_ARM64_REG_ENDING) + with prefix="UC_ARM64_REG_" + deriving (Show, Eq) #} + +instance Reg Register diff --git a/bindings/haskell/src/Unicorn/CPU/M68k.chs b/bindings/haskell/src/Unicorn/CPU/M68k.chs new file mode 100644 index 00000000..457ced79 --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/M68k.chs @@ -0,0 +1,29 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.Mk68k +Description : Definitions for the MK68K architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the MK68K architecture. +-} +module Unicorn.CPU.M68k ( + Register(..), +) where + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | M68K registers. +{# enum uc_m68k_reg as Register + {underscoreToCase} + omit (UC_M68K_REG_INVALID, + UC_M68K_REG_ENDING) + with prefix="UC_M68K_REG_" + deriving (Show, Eq) #} + +instance Reg Register diff --git a/bindings/haskell/src/Unicorn/CPU/Mips.chs b/bindings/haskell/src/Unicorn/CPU/Mips.chs new file mode 100644 index 00000000..fa14bc4c --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/Mips.chs @@ -0,0 +1,61 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.Mips +Description : Definitions for the MIPS architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the MIPS architecture. +-} +module Unicorn.CPU.Mips ( + Register(..), +) where + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | MIPS registers. +{# enum UC_MIPS_REG as Register + {underscoreToCase, + UC_MIPS_REG_0 as Reg0, + UC_MIPS_REG_1 as Reg1, + UC_MIPS_REG_2 as Reg2, + UC_MIPS_REG_3 as Reg3, + UC_MIPS_REG_4 as Reg4, + UC_MIPS_REG_5 as Reg5, + UC_MIPS_REG_6 as Reg6, + UC_MIPS_REG_7 as Reg7, + UC_MIPS_REG_8 as Reg8, + UC_MIPS_REG_9 as Reg9, + UC_MIPS_REG_10 as Reg10, + UC_MIPS_REG_11 as Reg11, + UC_MIPS_REG_12 as Reg12, + UC_MIPS_REG_13 as Reg13, + UC_MIPS_REG_14 as Reg14, + UC_MIPS_REG_15 as Reg15, + UC_MIPS_REG_16 as Reg16, + UC_MIPS_REG_17 as Reg17, + UC_MIPS_REG_18 as Reg18, + UC_MIPS_REG_19 as Reg19, + UC_MIPS_REG_20 as Reg20, + UC_MIPS_REG_21 as Reg21, + UC_MIPS_REG_22 as Reg22, + UC_MIPS_REG_23 as Reg23, + UC_MIPS_REG_24 as Reg24, + UC_MIPS_REG_25 as Reg25, + UC_MIPS_REG_26 as Reg26, + UC_MIPS_REG_27 as Reg27, + UC_MIPS_REG_28 as Reg28, + UC_MIPS_REG_29 as Reg29, + UC_MIPS_REG_30 as Reg30, + UC_MIPS_REG_31 as Reg31} + omit (UC_MIPS_REG_INVALID, + UC_MIPS_REG_ENDING) + with prefix="UC_MIPS_REG_" + deriving (Show, Eq) #} + +instance Reg Register diff --git a/bindings/haskell/src/Unicorn/CPU/Sparc.chs b/bindings/haskell/src/Unicorn/CPU/Sparc.chs new file mode 100644 index 00000000..28d63702 --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/Sparc.chs @@ -0,0 +1,29 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.Sparc +Description : Definitions for the SPARC architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the SPARC architecture. +-} +module Unicorn.CPU.Sparc ( + Register(..), +) where + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | SPARC registers. +{# enum uc_sparc_reg as Register + {underscoreToCase} + omit (UC_SPARC_REG_INVALID, + UC_SPARC_REG_ENDING) + with prefix="UC_SPARC_REG_" + deriving (Show, Eq) #} + +instance Reg Register diff --git a/bindings/haskell/src/Unicorn/CPU/X86.chs b/bindings/haskell/src/Unicorn/CPU/X86.chs new file mode 100644 index 00000000..eccb8b35 --- /dev/null +++ b/bindings/haskell/src/Unicorn/CPU/X86.chs @@ -0,0 +1,65 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.CPU.X86 +Description : Definitions for the X86 architecture. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Definitions for the X86 architecture. +-} +module Unicorn.CPU.X86 ( + Mmr(..), + Register(..), + Instruction(..), +) where + +import Control.Applicative +import Data.Word +import Foreign + +import Unicorn.Internal.Core (Reg) + +{# context lib="unicorn" #} + +#include + +-- | Memory-managemen Register for instructions IDTR, GDTR, LDTR, TR. +-- Borrow from SegmentCache in qemu/target-i386/cpu.h +data Mmr = Mmr { + selector :: Word16, -- ^ Not used by GDTR and IDTR + base :: Word64, -- ^ Handle 32 or 64 bit CPUs + limit :: Word32, + flags :: Word32 -- ^ Not used by GDTR and IDTR +} + +instance Storable Mmr where + sizeOf _ = {# sizeof uc_x86_mmr #} + alignment _ = {# alignof uc_x86_mmr #} + peek p = Mmr <$> liftA fromIntegral ({# get uc_x86_mmr->selector #} p) + <*> liftA fromIntegral ({# get uc_x86_mmr->base #} p) + <*> liftA fromIntegral ({# get uc_x86_mmr->limit #} p) + <*> liftA fromIntegral ({# get uc_x86_mmr->flags #} p) + poke p mmr = do + {# set uc_x86_mmr.selector #} p (fromIntegral $ selector mmr) + {# set uc_x86_mmr.base #} p (fromIntegral $ base mmr) + {# set uc_x86_mmr.limit #} p (fromIntegral $ limit mmr) + {# set uc_x86_mmr.flags #} p (fromIntegral $ flags mmr) + +-- | X86 registers. +{# enum uc_x86_reg as Register + {underscoreToCase} + omit (UC_X86_REG_INVALID, + UC_X86_REG_ENDING) + with prefix="UC_X86_REG_" + deriving (Show, Eq) #} + +instance Reg Register + +-- | X86 instructions. +{# enum uc_x86_insn as Instruction + {underscoreToCase} + omit (UC_X86_INS_INVALID, + UC_X86_INS_ENDING) + with prefix="UC_X86_INS_" + deriving (Show, Eq) #} diff --git a/bindings/haskell/src/Unicorn/Hook.hs b/bindings/haskell/src/Unicorn/Hook.hs new file mode 100644 index 00000000..d716a9ba --- /dev/null +++ b/bindings/haskell/src/Unicorn/Hook.hs @@ -0,0 +1,224 @@ +{-| +Module : Unicorn.Hook +Description : Unicorn hooks. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Insert hook points into the Unicorn emulator engine. +-} +module Unicorn.Hook ( + -- * Hook types + Hook, + MemoryHookType(..), + MemoryEventHookType(..), + MemoryAccess(..), + + -- * Hook callbacks + CodeHook, + InterruptHook, + BlockHook, + InHook, + OutHook, + SyscallHook, + MemoryHook, + MemoryReadHook, + MemoryWriteHook, + MemoryEventHook, + + -- * Hook callback management + codeHookAdd, + interruptHookAdd, + blockHookAdd, + inHookAdd, + outHookAdd, + syscallHookAdd, + memoryHookAdd, + memoryEventHookAdd, + hookDel, +) where + +import Control.Monad +import Control.Monad.Trans.Class +import Control.Monad.Trans.Either (hoistEither, left, right) +import Foreign + +import Unicorn.Internal.Core +import Unicorn.Internal.Hook +import Unicorn.Internal.Util +import qualified Unicorn.CPU.X86 as X86 + +------------------------------------------------------------------------------- +-- Hook callback management (registration and deletion) +------------------------------------------------------------------------------- + +-- | Register a callback for a code hook event. +codeHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> CodeHook a -- ^ Code hook callback + -> a -- ^ User-defined data. This will be passed to + -- the callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' + -- on failure +codeHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalCodeHook callback + getResult $ ucHookAdd uc HookCode funPtr userDataPtr begin end + hoistEither result + +-- | Register a callback for an interrupt hook event. +interruptHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> InterruptHook a -- ^ Interrupt callback + -> a -- ^ User-defined data. This will be passed + -- to the callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or 'Error' + -- on failure +interruptHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalInterruptHook callback + getResult $ ucHookAdd uc HookIntr funPtr userDataPtr begin end + hoistEither result + +-- | Register a callback for a block hook event. +blockHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> BlockHook a -- ^ Block callback + -> a -- ^ User-defined data. This will be passed to + -- the callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' + -- on failure +blockHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalBlockHook callback + getResult $ ucHookAdd uc HookBlock funPtr userDataPtr begin end + hoistEither result + +-- | Register a callback for an IN instruction hook event (X86). +inHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> InHook a -- ^ IN instruction callback + -> a -- ^ User-defined data. This will be passed to the + -- callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' on + -- failure +inHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalInHook callback + getResult $ ucInsnHookAdd uc HookInsn funPtr userDataPtr begin end + X86.In + hoistEither result + +-- | Register a callback for an OUT instruction hook event (X86). +outHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> OutHook a -- ^ OUT instruction callback + -> a -- ^ User-defined data. This will be passed to the + -- callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' on + -- failure +outHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalOutHook callback + getResult $ ucInsnHookAdd uc HookInsn funPtr userDataPtr begin end + X86.Out + hoistEither result + +-- | Register a callback for a SYSCALL instruction hook event (X86). +syscallHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> SyscallHook a -- ^ SYSCALL instruction callback + -> a -- ^ User-defined data. This will be passed to + -- the callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' + -- on failure +syscallHookAdd uc callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalSyscallHook callback + getResult $ ucInsnHookAdd uc HookInsn funPtr userDataPtr begin end + X86.Syscall + hoistEither result + +-- | Register a callback for a valid memory access event. +memoryHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> MemoryHookType -- ^ A valid memory access (e.g. read, write, + -- etc.) to trigger the callback on + -> MemoryHook a -- ^ Memory access callback + -> a -- ^ User-defined data. This will be passed to + -- the callback function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or an 'Error' + -- on failure +memoryHookAdd uc memHookType callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalMemoryHook callback + getResult $ ucHookAdd uc memHookType funPtr userDataPtr begin end + hoistEither result + +-- | Register a callback for an invalid memory access event. +memoryEventHookAdd :: Storable a + => Engine -- ^ 'Unicorn' engine handle + -> MemoryEventHookType -- ^ An invalid memory access (e.g. + -- read, write, etc.) to trigger + -- the callback on + -> MemoryEventHook a -- ^ Invalid memory access callback + -> a -- ^ User-defined data. This will + -- be passed to the callback + -- function + -> Word64 -- ^ Start address + -> Word64 -- ^ End address + -> Emulator Hook -- ^ The hook handle on success, or + -- an 'Error' on failure +memoryEventHookAdd uc memEventHookType callback userData begin end = do + result <- lift . alloca $ \userDataPtr -> do + poke userDataPtr userData + funPtr <- marshalMemoryEventHook callback + getResult $ ucHookAdd uc memEventHookType funPtr userDataPtr begin end + hoistEither result + +-- | Unregister (remove) a hook callback. +hookDel :: Engine -- ^ 'Unicorn' engine handle + -> Hook -- ^ 'Hook' handle + -> Emulator () -- ^ 'ErrOk' on success, or other value on failure +hookDel uc hook = do + err <- lift $ ucHookDel uc hook + if err == ErrOk then + right () + else + left err + +------------------------------------------------------------------------------- +-- Helper functions +------------------------------------------------------------------------------- + +-- Takes the tuple returned by `ucHookAdd`, an IO (Error, Hook), and +-- returns either a `Right Hook` if no error occurred or a `Left Error` if an +-- error occurred +getResult :: IO (Error, Hook) -> IO (Either Error Hook) +getResult = + liftM (uncurry checkResult) + where checkResult err hook = + if err == ErrOk then + Right hook + else + Left err diff --git a/bindings/haskell/src/Unicorn/Internal/Core.chs b/bindings/haskell/src/Unicorn/Internal/Core.chs new file mode 100644 index 00000000..f3e08390 --- /dev/null +++ b/bindings/haskell/src/Unicorn/Internal/Core.chs @@ -0,0 +1,52 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.Internal.Core +Description : Core Unicorn components. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Defines core Unicorn components. + +This module should not be directly imported; it is only exposed because of the +way cabal handles ordering of chs files. +-} +module Unicorn.Internal.Core where + +import Control.Monad +import Control.Monad.Trans.Either (EitherT) +import Foreign + +{# context lib="unicorn" #} + +#include +#include "unicorn_wrapper.h" + +-- | The Unicorn engine. +{# pointer *uc_engine as Engine + foreign finalizer uc_close_wrapper as close + newtype #} + +-- | A pointer to a Unicorn engine. +{# pointer *uc_engine as EnginePtr -> Engine #} + +-- | Make a new Unicorn engine out of an engine pointer. The returned Unicorn +-- engine will automatically close 'uc_close_wrapper' when it goes out of +-- scope. +mkEngine :: EnginePtr -> IO Engine +mkEngine ptr = + liftM Engine (newForeignPtr close ptr) + +-- | Errors encountered by the Unicorn API. These values are returned by +-- 'errno'. +{# enum uc_err as Error + {underscoreToCase} + with prefix = "UC_" + deriving (Show, Eq) #} + +-- | The emulator runs in the IO monad and allows for the handling of errors +-- "under the hood". +type Emulator a = EitherT Error IO a + +-- | An architecture-dependent register. +class Enum a => Reg a diff --git a/bindings/haskell/src/Unicorn/Internal/Hook.chs b/bindings/haskell/src/Unicorn/Internal/Hook.chs new file mode 100644 index 00000000..6af7a9ff --- /dev/null +++ b/bindings/haskell/src/Unicorn/Internal/Hook.chs @@ -0,0 +1,415 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +{-| +Module : Unicorn.Internal.Hook +Description : Unicorn hooks. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Low-level bindings for inserting hook points into the Unicorn emulator engine. + +This module should not be directly imported; it is only exposed because of the +way cabal handles ordering of chs files. +-} +module Unicorn.Internal.Hook ( + -- * Types + Hook, + HookType(..), + MemoryHookType(..), + MemoryEventHookType(..), + MemoryAccess(..), + + -- * Hook callback bindings + CodeHook, + InterruptHook, + BlockHook, + InHook, + OutHook, + SyscallHook, + MemoryHook, + MemoryReadHook, + MemoryWriteHook, + MemoryEventHook, + + -- * Hook marshalling + marshalCodeHook, + marshalInterruptHook, + marshalBlockHook, + marshalInHook, + marshalOutHook, + marshalSyscallHook, + marshalMemoryHook, + marshalMemoryReadHook, + marshalMemoryWriteHook, + marshalMemoryEventHook, + + -- * Hook registration and deletion bindings + ucHookAdd, + ucInsnHookAdd, + ucHookDel, +) where + +import Control.Monad +import Foreign + +import Unicorn.Internal.Util + +{# context lib="unicorn" #} + +{# import Unicorn.Internal.Core #} +{# import Unicorn.CPU.X86 #} + +#include +#include "unicorn_wrapper.h" + +------------------------------------------------------------------------------- +-- Types +------------------------------------------------------------------------------- + +-- When we pass a Unicorn engine to a hook callback, we do not want this engine +-- object to be freed automatically when the callback returns (which is what +-- would typically occur when using a ForeignPtr), because we want to continue +-- using the Unicorn engine outside the callback. To avoid this, +-- unicorn_wrapper.h provides a dummy "close" function that does nothing. When +-- we go to create a Unicorn engine to pass to a callback, we use a pointer to +-- this dummy close function as the finalizer pointer. When the callback +-- returns, the Unicorn engine remains untouched! +-- +-- XX Is there a better way to do this? +foreign import ccall "&uc_close_dummy" + closeDummy :: FunPtr (EnginePtr -> IO ()) + +mkEngineNC :: EnginePtr -> IO Engine +mkEngineNC ptr = + liftM Engine (newForeignPtr closeDummy ptr) + +-- | A Unicorn hook. +type Hook = {# type uc_hook #} + +-- Hook types. These are used internally within this module by the callback +-- registration functions and are not exposed to the user. +-- +-- Note that the both valid and invalid memory access hooks are omitted from +-- this enum (and are exposed to the user). +{# enum uc_hook_type as HookType + {underscoreToCase} + omit (UC_HOOK_MEM_READ_UNMAPPED, + UC_HOOK_MEM_WRITE_UNMAPPED, + UC_HOOK_MEM_FETCH_UNMAPPED, + UC_HOOK_MEM_READ_PROT, + UC_HOOK_MEM_WRITE_PROT, + UC_HOOK_MEM_FETCH_PROT, + UC_HOOK_MEM_READ, + UC_HOOK_MEM_WRITE, + UC_HOOK_MEM_FETCH) + with prefix="UC_" + deriving (Show, Eq) #} + +-- | Memory hook types (for valid memory accesses). +{# enum uc_hook_type as MemoryHookType + {underscoreToCase} + omit (UC_HOOK_INTR, + UC_HOOK_INSN, + UC_HOOK_CODE, + UC_HOOK_BLOCK, + UC_HOOK_MEM_READ_UNMAPPED, + UC_HOOK_MEM_WRITE_UNMAPPED, + UC_HOOK_MEM_FETCH_UNMAPPED, + UC_HOOK_MEM_READ_PROT, + UC_HOOK_MEM_WRITE_PROT, + UC_HOOK_MEM_FETCH_PROT) + with prefix="UC_" + deriving (Show, Eq) #} + +-- | Memory event hook types (for invalid memory accesses). +{# enum uc_hook_type as MemoryEventHookType + {underscoreToCase} + omit (UC_HOOK_INTR, + UC_HOOK_INSN, + UC_HOOK_CODE, + UC_HOOK_BLOCK, + UC_HOOK_MEM_READ, + UC_HOOK_MEM_WRITE, + UC_HOOK_MEM_FETCH) + with prefix="UC_" + deriving (Show, Eq) #} + +-- | Unify the hook types with a type class +class Enum a => HookTypeC a + +instance HookTypeC HookType +instance HookTypeC MemoryHookType +instance HookTypeC MemoryEventHookType + +-- | Memory access. +{# enum uc_mem_type as MemoryAccess + {underscoreToCase} + with prefix="UC_" + deriving (Show, Eq) #} + +------------------------------------------------------------------------------- +-- Hook callbacks +------------------------------------------------------------------------------- + +-- | Callback function for tracing code. +type CodeHook a = Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Addres where the code is being executed + -> Maybe Int -- ^ Size of machine instruction(s) being + -- executed, or 'Nothing' when size is unknown + -> a -- ^ User data passed to tracing APIs + -> IO () + +type CCodeHook = EnginePtr -> Word64 -> Word32 -> Ptr () -> IO () + +foreign import ccall "wrapper" + mkCodeHook :: CCodeHook -> IO {# type uc_cb_hookcode_t #} + +marshalCodeHook :: Storable a + => CodeHook a -> IO {# type uc_cb_hookcode_t #} +marshalCodeHook codeHook = + mkCodeHook $ \ucPtr address size userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + let maybeSize = if size == 0 then Nothing + else Just $ fromIntegral size + codeHook uc address maybeSize userData + +-- | Callback function for tracing interrupts. +type InterruptHook a = Engine -- ^ 'Unicorn' engine handle + -> Int -- ^ Interrupt number + -> a -- ^ User data passed to tracing APIs + -> IO () + +type CInterruptHook = EnginePtr -> Word32 -> Ptr () -> IO () + +foreign import ccall "wrapper" + mkInterruptHook :: CInterruptHook -> IO {# type uc_cb_hookintr_t #} + +marshalInterruptHook :: Storable a + => InterruptHook a -> IO {# type uc_cb_hookintr_t #} +marshalInterruptHook interruptHook = + mkInterruptHook $ \ucPtr intNo userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + interruptHook uc (fromIntegral intNo) userData + +-- | Callback function for tracing blocks. +type BlockHook a = CodeHook a + +marshalBlockHook :: Storable a + => BlockHook a -> IO {# type uc_cb_hookcode_t #} +marshalBlockHook = + marshalCodeHook + +-- | Callback function for tracing IN instructions (X86). +type InHook a = Engine -- ^ 'Unicorn' engine handle + -> Int -- ^ Port number + -> Int -- ^ Data size (1/2/4) to be read from this port + -> a -- ^ User data passed to tracing APIs + -> IO Word32 -- ^ The data read from the port + +type CInHook = EnginePtr -> Word32 -> Int32 -> Ptr () -> IO Word32 + +foreign import ccall "wrapper" + mkInHook :: CInHook -> IO {# type uc_cb_insn_in_t #} + +marshalInHook :: Storable a + => InHook a -> IO {# type uc_cb_insn_in_t #} +marshalInHook inHook = + mkInHook $ \ucPtr port size userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + inHook uc (fromIntegral port) (fromIntegral size) userData + +-- | Callback function for tracing OUT instructions (X86). +type OutHook a = Engine -- ^ 'Unicorn' engine handle + -> Int -- ^ Port number + -> Int -- ^ Data size (1/2/4) to be written to this port + -> Int -- ^ Data value to be written to this port + -> a -- ^ User data passed to tracing APIs + -> IO () + +type COutHook = EnginePtr -> Word32 -> Int32 -> Word32 -> Ptr () -> IO () + +foreign import ccall "wrapper" + mkOutHook :: COutHook -> IO {# type uc_cb_insn_out_t #} + +marshalOutHook :: Storable a + => OutHook a -> IO {# type uc_cb_insn_out_t #} +marshalOutHook outHook = + mkOutHook $ \ucPtr port size value userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + outHook uc (fromIntegral port) (fromIntegral size) (fromIntegral value) + userData + +-- | Callback function for tracing SYSCALL instructions (X86). +type SyscallHook a = Engine -- ^ 'Unicorn' engine handle + -> a -- ^ User data passed to tracing APIs + -> IO () + +type CSyscallHook = Ptr () -> Ptr () -> IO () + +foreign import ccall "wrapper" + mkSyscallHook :: CSyscallHook -> IO {# type uc_cb_insn_syscall_t #} + +marshalSyscallHook :: Storable a + => SyscallHook a -> IO {# type uc_cb_insn_syscall_t #} +marshalSyscallHook syscallHook = + mkSyscallHook $ \ucPtr userDataPtr -> do + uc <- mkEngineNC $ castPtr ucPtr + userData <- castPtrAndPeek userDataPtr + syscallHook uc userData + +-- | Callback function for hooking memory operations. +type MemoryHook a = Engine -- ^ 'Unicorn' engine handle + -> MemoryAccess -- ^ Memory access; read or write + -> Word64 -- ^ Address where the code is being + -- executed + -> Int -- ^ Size of data being read or written + -> Maybe Int -- ^ Value of data being wrriten, or + -- 'Nothing' if read + -> a -- ^ User data passed to tracing APIs + -> IO () + +type CMemoryHook = EnginePtr + -> Int32 + -> Word64 + -> Int32 + -> Int64 + -> Ptr () + -> IO () + +foreign import ccall "wrapper" + mkMemoryHook :: CMemoryHook -> IO {# type uc_cb_hookmem_t #} + +marshalMemoryHook :: Storable a + => MemoryHook a -> IO {# type uc_cb_hookmem_t #} +marshalMemoryHook memoryHook = + mkMemoryHook $ \ucPtr memAccessI address size value userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + let memAccess = toMemAccess memAccessI + maybeValue = case memAccess of + MemRead -> Nothing + MemWrite -> Just $ fromIntegral value + _ -> undefined -- XX Handle this? + memoryHook uc memAccess address (fromIntegral size) maybeValue userData + +-- | Callback function for hooking memory reads. +type MemoryReadHook a = Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Address where the code is being executed + -> Int -- ^ Size of data being read + -> a -- ^ User data passed to tracing APIs + -> IO () + +marshalMemoryReadHook :: Storable a + => MemoryReadHook a -> IO {# type uc_cb_hookmem_t #} +marshalMemoryReadHook memoryReadHook = + mkMemoryHook $ \ucPtr _ address size _ userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + memoryReadHook uc address (fromIntegral size) userData + +-- | Callback function for hooking memory writes. +type MemoryWriteHook a = Engine -- ^ 'Unicorn' engine handle + -> Word64 -- ^ Address where the code is being + -- executed + -> Int -- ^ Size of data being written + -> Int -- ^ Value of data being written + -> a -- ^ User data passed to tracing APIs + -> IO () + +marshalMemoryWriteHook :: Storable a + => MemoryWriteHook a -> IO {# type uc_cb_hookmem_t #} +marshalMemoryWriteHook memoryWriteHook = + mkMemoryHook $ \ucPtr _ address size value userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + memoryWriteHook uc address (fromIntegral size) (fromIntegral value) + userData + +-- | Callback function for handling invalid memory access events. +type MemoryEventHook a = Engine -- ^ 'Unicorn' engine handle + -> MemoryAccess -- ^ Memory access; read or write + -> Word64 -- ^ Address where the code is being + -- executed + -> Int -- ^ Size of data being read or written + -> Maybe Int -- ^ Value of data being written, or + -- 'Nothing' if read + -> a -- ^ User data passed to tracing APIs + -> IO Bool -- ^ Return 'True' to continue, or + -- 'False' to stop the program (due to + -- invalid memory) + +type CMemoryEventHook = EnginePtr + -> Int32 + -> Word64 + -> Int32 + -> Int64 + -> Ptr () + -> IO Int32 + +foreign import ccall "wrapper" + mkMemoryEventHook :: CMemoryEventHook -> IO {# type uc_cb_eventmem_t #} + +marshalMemoryEventHook :: Storable a + => MemoryEventHook a -> IO {# type uc_cb_eventmem_t #} +marshalMemoryEventHook eventMemoryHook = + mkMemoryEventHook $ \ucPtr memAccessI address size value userDataPtr -> do + uc <- mkEngineNC ucPtr + userData <- castPtrAndPeek userDataPtr + let memAccess = toMemAccess memAccessI + maybeValue = case memAccess of + MemReadUnmapped -> Nothing + MemReadProt -> Nothing + MemWriteUnmapped -> Just $ fromIntegral value + MemWriteProt -> Just $ fromIntegral value + _ -> undefined -- XX Handle this? + res <- eventMemoryHook uc memAccess address (fromIntegral size) + maybeValue userData + return $ boolToInt res + where boolToInt True = 1 + boolToInt False = 0 + + +------------------------------------------------------------------------------- +-- Hook callback registration (and deletion) +------------------------------------------------------------------------------- + +{# fun variadic uc_hook_add as ucHookAdd + `(Storable a, HookTypeC h)' => + {`Engine', + alloca- `Hook' peek*, + enumToNum `h', + castFunPtrToPtr `FunPtr b', + castPtr `Ptr a', + `Word64', + `Word64'} + -> `Error' #} + +{# fun variadic uc_hook_add[int] as ucInsnHookAdd + `(Storable a, HookTypeC h)' => + {`Engine', + alloca- `Hook' peek*, + enumToNum `h', + castFunPtrToPtr `FunPtr b', + castPtr `Ptr a', + `Word64', + `Word64', + enumToNum `Instruction'} + -> `Error' #} + +-- | Unregister (remove) a hook callback. +{# fun uc_hook_del as ^ + {`Engine', + fromIntegral `Hook'} + -> `Error' #} + +------------------------------------------------------------------------------- +-- Helper functions +------------------------------------------------------------------------------- + +toMemAccess :: Integral a => a -> MemoryAccess +toMemAccess = + toEnum . fromIntegral diff --git a/bindings/haskell/src/Unicorn/Internal/Unicorn.chs b/bindings/haskell/src/Unicorn/Internal/Unicorn.chs new file mode 100644 index 00000000..3e80b7bd --- /dev/null +++ b/bindings/haskell/src/Unicorn/Internal/Unicorn.chs @@ -0,0 +1,242 @@ +{-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE ScopedTypeVariables #-} + +{-| +Module : Unicorn.Internal.Unicorn +Description : The Unicorn CPU emulator. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 + +Low-level bindings for the Unicorn CPU emulator framework. + +This module should not be directly imported; it is only exposed because of the +way cabal handles ordering of chs files. +-} +module Unicorn.Internal.Unicorn ( + -- * Types + Architecture(..), + Mode(..), + MemoryPermission(..), + MemoryRegion(..), + QueryType(..), + + -- * Function bindings + ucOpen, + ucQuery, + ucEmuStart, + ucEmuStop, + ucRegWrite, + ucRegRead, + ucMemWrite, + ucMemRead, + ucMemMap, + ucMemUnmap, + ucMemProtect, + ucMemRegions, + ucVersion, + ucErrno, + ucStrerror, +) where + +import Foreign +import Foreign.C + +import Control.Applicative +import Data.ByteString (ByteString, useAsCStringLen) +import Prelude hiding (until) + +import Unicorn.Internal.Util + +{# context lib="unicorn" #} + +{# import Unicorn.Internal.Core #} + +#include + +------------------------------------------------------------------------------- +-- Types +------------------------------------------------------------------------------- + +-- | CPU architecture. +{# enum uc_arch as Architecture + {underscoreToCase} + with prefix = "UC_" + deriving (Show, Eq) #} + +-- | CPU hardware mode. +{# enum uc_mode as Mode + {underscoreToCase} + with prefix="UC_" + deriving (Show, Eq) #} + +-- | Memory permissions. +{# enum uc_prot as MemoryPermission + {underscoreToCase} + with prefix="UC_" + deriving (Show, Eq) #} + +-- | Memory region mapped by 'memMap'. Retrieve the list of memory regions with +-- 'memRegions'. +data MemoryRegion = MemoryRegion { + begin :: Word64, -- ^ Begin address of the region (inclusive) + end :: Word64, -- ^ End address of the region (inclusive) + perms :: [MemoryPermission] -- ^ Memory permissions of the region +} + +instance Storable MemoryRegion where + sizeOf _ = {# sizeof uc_mem_region #} + alignment _ = {# alignof uc_mem_region #} + peek p = MemoryRegion + <$> liftA fromIntegral ({# get uc_mem_region->begin #} p) + <*> liftA fromIntegral ({# get uc_mem_region->end #} p) + <*> liftA expandMemPerms ({# get uc_mem_region->perms #} p) + poke p mr = do + {# set uc_mem_region.begin #} p (fromIntegral $ begin mr) + {# set uc_mem_region.end #} p (fromIntegral $ end mr) + {# set uc_mem_region.perms #} p (combineEnums $ perms mr) + +-- | A pointer to a memory region. +{# pointer *uc_mem_region as MemoryRegionPtr -> MemoryRegion #} + +-- | Query types for the 'query' API. +{# enum uc_query_type as QueryType + {underscoreToCase} + with prefix="UC_" + deriving (Show, Eq) #} + +------------------------------------------------------------------------------- +-- Emulator control +------------------------------------------------------------------------------- + +{# fun uc_open as ^ + {`Architecture', + combineEnums `[Mode]', + alloca- `EnginePtr' peek*} + -> `Error' #} + +{# fun uc_query as ^ + {`Engine', + `QueryType', + alloca- `Int' castPtrAndPeek*} + -> `Error' #} + +{# fun uc_emu_start as ^ + {`Engine', + `Word64', + `Word64', + `Int', + `Int'} + -> `Error' #} + +{# fun uc_emu_stop as ^ + {`Engine'} + -> `Error' #} + +------------------------------------------------------------------------------- +-- Register operations +------------------------------------------------------------------------------- + +{# fun uc_reg_write as ^ + `Reg r' => + {`Engine', + enumToNum `r', + castPtr `Ptr Int64'} + -> `Error' #} + +{# fun uc_reg_read as ^ + `Reg r' => + {`Engine', + enumToNum `r', + allocaInt64ToVoid- `Int64' castPtrAndPeek*} + -> `Error' #} + +------------------------------------------------------------------------------- +-- Memory operations +------------------------------------------------------------------------------- + +{# fun uc_mem_write as ^ + {`Engine', + `Word64', + withByteStringLen* `ByteString'&} + -> `Error' #} + +{# fun uc_mem_read as ^ + {`Engine', + `Word64', + castPtr `Ptr Word8', + `Int'} + -> `Error' #} + +{# fun uc_mem_map as ^ + {`Engine', + `Word64', + `Int', + combineEnums `[MemoryPermission]'} + -> `Error' #} + +{# fun uc_mem_unmap as ^ + {`Engine', + `Word64', + `Int'} + -> `Error' #} + +{# fun uc_mem_protect as ^ + {`Engine', + `Word64', + `Int', + combineEnums `[MemoryPermission]'} + -> `Error' #} + +{# fun uc_mem_regions as ^ + {`Engine', + alloca- `MemoryRegionPtr' peek*, + alloca- `Int' castPtrAndPeek*} + -> `Error' #} + +------------------------------------------------------------------------------- +-- Misc. +------------------------------------------------------------------------------- + +{# fun pure unsafe uc_version as ^ + {id `Ptr CUInt', + id `Ptr CUInt'} + -> `Int' #} + +{# fun unsafe uc_errno as ^ + {`Engine'} + -> `Error' #} + +{# fun pure unsafe uc_strerror as ^ + {`Error'} + -> `String' #} + +------------------------------------------------------------------------------- +-- Helper functions +------------------------------------------------------------------------------- + +expandMemPerms :: (Integral a, Bits a) => a -> [MemoryPermission] +expandMemPerms perms = + -- Only interested in the 3 least-significant bits + let maskedPerms = fromIntegral $ perms .&. 0x7 in + if maskedPerms == 0x0 then + [ProtNone] + else if maskedPerms == 0x7 then + [ProtAll] + else + checkRWE maskedPerms [ProtRead, ProtWrite, ProtExec] + where + checkRWE perms (prot:prots) = + if perms .&. (fromEnum prot) /= 0 then + prot : checkRWE perms prots + else + checkRWE perms prots + checkRWE _ [] = + [] + +allocaInt64ToVoid :: (Ptr () -> IO b) -> IO b +allocaInt64ToVoid f = + alloca $ \(ptr :: Ptr Int64) -> poke ptr 0 >> f (castPtr ptr) + +withByteStringLen :: ByteString -> ((Ptr (), CULong) -> IO a) -> IO a +withByteStringLen bs f = + useAsCStringLen bs $ \(ptr, len) -> f (castPtr ptr, fromIntegral len) diff --git a/bindings/haskell/src/Unicorn/Internal/Util.hs b/bindings/haskell/src/Unicorn/Internal/Util.hs new file mode 100644 index 00000000..ecd70aab --- /dev/null +++ b/bindings/haskell/src/Unicorn/Internal/Util.hs @@ -0,0 +1,25 @@ +{-| +Module : Unicorn.Internal.Util +Description : Utility (aka helper) functions for the Unicorn emulator. +Copyright : (c) Adrian Herrera, 2016 +License : GPL-2 +-} +module Unicorn.Internal.Util where + +import Data.Bits +import Foreign + +-- | Combine a list of Enums by performing a bitwise-OR. +combineEnums :: (Enum a, Num b, Bits b) => [a] -> b +combineEnums = + foldr (\p -> (.|.) (enumToNum p)) 0 + +-- | Cast a pointer and then peek inside it. +castPtrAndPeek :: Storable a => Ptr b -> IO a +castPtrAndPeek = + peek . castPtr + +-- | Convert an 'Eum' to a 'Num'. +enumToNum :: (Enum a, Num b) => a -> b +enumToNum = + fromIntegral . fromEnum diff --git a/bindings/haskell/src/cbits/unicorn_wrapper.c b/bindings/haskell/src/cbits/unicorn_wrapper.c new file mode 100644 index 00000000..1ee60706 --- /dev/null +++ b/bindings/haskell/src/cbits/unicorn_wrapper.c @@ -0,0 +1,8 @@ +#include "unicorn_wrapper.h" + +void uc_close_wrapper(uc_engine *uc) { + uc_close(uc); +} + +void uc_close_dummy(uc_engine *uc) { +} diff --git a/bindings/haskell/src/include/unicorn_wrapper.h b/bindings/haskell/src/include/unicorn_wrapper.h new file mode 100644 index 00000000..76d414aa --- /dev/null +++ b/bindings/haskell/src/include/unicorn_wrapper.h @@ -0,0 +1,16 @@ +#ifndef UNICORN_WRAPPER_H +#define UNICORN_WRAPPER_H + +#include + +/* + * Wrap Unicorn's uc_close function and ignore the returned error code. + */ +void uc_close_wrapper(uc_engine *uc); + +/* + * Doesn't actually do anything. + */ +void uc_close_dummy(uc_engine *uc); + +#endif diff --git a/bindings/haskell/unicorn.cabal b/bindings/haskell/unicorn.cabal new file mode 100644 index 00000000..77af442e --- /dev/null +++ b/bindings/haskell/unicorn.cabal @@ -0,0 +1,42 @@ +-- Initial unicorn.cabal generated by cabal init. For further +-- documentation, see http://haskell.org/cabal/users-guide/ + +name: unicorn +version: 0.1.0.0 +category: FFI, Emulation +synopsis: Unicorn CPU emulator engine +description: Haskell bindings for the Unicorn CPU emulator engine. +homepage: https://github.com/unicorn-engine/unicorn +author: Adrian Herrera +license: GPL +copyright: (c) 2016, Adrian Herrera +category: System +build-type: Simple +stability: experimental +cabal-version: >=1.10 +extra-source-files: cbits/, include/ + +library + exposed-modules: Unicorn.Internal.Core + Unicorn.Internal.Unicorn + Unicorn.CPU.Arm64 + Unicorn.CPU.Arm + Unicorn.CPU.M68k + Unicorn.CPU.Mips + Unicorn.CPU.Sparc + Unicorn.CPU.X86 + Unicorn.Internal.Hook + Unicorn.Hook + Unicorn + other-modules: Unicorn.Internal.Util + build-depends: base >=4 && <5, + bytestring >= 0.9.1, + transformers <= 0.5, + either >= 4.4 + hs-source-dirs: src + c-sources: src/cbits/unicorn_wrapper.c + include-dirs: src/include + build-tools: c2hs + pkgconfig-depends: unicorn + default-language: Haskell2010 + ghc-options: -Wall