Unicorn是一個輕量級, 多平台, 多架構的CPU模擬器框架,基於qemu開發,它可以代替CPU模擬代碼的執行,常用於惡意代碼分析,Fuzz等,該項目被用於Radare2逆向分析框架,GEF(gdb的pwn分析插件),Pwndbg,Angr符號執行框架等多個著名項目。
開發准備
官網
編譯安裝
安裝提供了兩種方式:
-
從源安裝
-
OSX(homebrew)
brew install unicorn # homebrew安裝好后需要設置library的全局變量 export DYLD_LIBRARY_PATH=/usr/local/opt/unicorn/lib/:$DYLD_LIBRARY_PATH # 然后安裝python庫 pip install unicorn
-
Linux參考官方安裝手冊
-
Winodws參考官方安裝手冊
-
-
從源碼構建
-
下載源碼包:
# 構建 ./make.sh sudo ./make.sh install # 安裝python 庫 pip install unicorn # 升級unicorn庫 pip install unicorn --upgrade
其他交叉編譯(如IOS、ARM、Android)參考官方文檔
-
快速入門
安裝好unicorn后,可以用下面的測試用例來檢測unicorn的功能是否可用
Python
-
以下為Python調用unicorn框架測試代碼
from __future__ import print_function from unicorn import * from unicorn.x86_const import * # 要模擬執行的指令 X86_CODE32 = b"\x41\x4a" # INC ecx; DEC edx # 模擬執行的起始地址 ADDRESS = 0x1000000 print("Emulate i386 code") try: # 初始化模擬X86-32模式 mu = Uc(UC_ARCH_X86, UC_MODE_32) # 為模擬執行申請2MB的空間 mu.mem_map(ADDRESS, 2 * 1024 * 1024) # 向內存寫入執行的指令 mu.mem_write(ADDRESS, X86_CODE32) # 初始化寄存器的值,方便執行后觀察結果 mu.reg_write(UC_X86_REG_ECX, 0x1234) mu.reg_write(UC_X86_REG_EDX, 0x7890) # 在無限時間和無限指令中模擬代碼 mu.emu_start(ADDRESS, ADDRESS + len(X86_CODE32)) # 現在打印執行后寄存器中的結果 print("Emulation done. Below is the CPU context") r_ecx = mu.reg_read(UC_X86_REG_ECX) r_edx = mu.reg_read(UC_X86_REG_EDX) print(">>> ECX = 0x%x" %r_ecx) print(">>> EDX = 0x%x" %r_edx) except UcError as e: print("ERROR: %s" % e)
最終輸出結果:
Emulate i386 code Emulation done. Below is the CPU context >>> ECX = 0x1235 >>> EDX = 0x788f
看到結果,ECX被加了1,並且EDX被減去1,表示python可以成功調用unicorn
C
-
以下為C調用unicorn框架測試代碼
#include<stdio.h> #include<unicorn/unicorn.h> #include<string.h> #define ADDRESS 0x1000000 #define X86_CODE32 "\x41\x4a\x66\x0f\xef\xc1" // INC ecx; DEC edx; PXOR xmm0, xmm1 // 在終端中輸出起始地址和硬編碼大小 static void hook_block(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { printf(">>> Tracing basic block at 0x%"PRIx64 ", block size = 0x%x\n", address, size); } // hook 回調函數,用於監視程序運行時的變化 static void hook_code(uc_engine *uc, uint64_t address, uint32_t size, void *user_data) { int eflags; printf(">>> Tracing instruction at 0x%"PRIx64 ", instruction size = 0x%x\n", address, size); uc_reg_read(uc, UC_X86_REG_EFLAGS, &eflags); //獲取寄存器值放入eflags變量中 printf(">>> --- EFLAGS is 0x%x\n", eflags); // Uncomment below code to stop the emulation using uc_emu_stop() // if (address == 0x1000009) // uc_emu_stop(uc); } static void test_i386(void) { uc_engine *uc; uc_err err; uint32_t tmp; uc_hook trace1, trace2; int r_ecx = 0x1234; // ECX 寄存器 int r_edx = 0x7890; // EDX 寄存器 // XMM0 、 XMM1 寄存器, 數組分別為低64位和高64位 uint64_t r_xmm0[2] = {0x08090a0b0c0d0e0f, 0x0001020304050607}; uint64_t r_xmm1[2] = {0x8090a0b0c0d0e0f0, 0x0010203040506070}; printf("Emulate i386 code\n"); // 初始化x86環境 err = uc_open(UC_ARCH_X86, UC_MODE_32, &uc); if (err) { printf("Failed on uc_open() with error returned: %u\n", err); return; } // 為模擬執行代碼申請 2MB 內存 uc_mem_map(uc, ADDRESS, 2 * 1024 * 1024, UC_PROT_ALL); // 向目標地址寫入opcode if (uc_mem_write(uc, ADDRESS, X86_CODE32, sizeof(X86_CODE32) - 1)) { printf("Failed to write emulation code to memory, quit!\n"); return; } // 初始化寄存器ECX、EDX、XMM0、XMM1 uc_reg_write(uc, UC_X86_REG_ECX, &r_ecx); uc_reg_write(uc, UC_X86_REG_EDX, &r_edx); uc_reg_write(uc, UC_X86_REG_XMM0, &r_xmm0); uc_reg_write(uc, UC_X86_REG_XMM1, &r_xmm1); // 在函數內插樁,成功時會調用回調函數 uc_hook_add(uc, &trace1, UC_HOOK_BLOCK, hook_block, NULL, 1, 0); // 每當代碼執行時調用回調函數 uc_hook_add(uc, &trace2, UC_HOOK_CODE, hook_code, NULL, 1, 0); // 模擬執行 err = uc_emu_start(uc, ADDRESS, ADDRESS + sizeof(X86_CODE32) - 1, 0, 0); if (err) { printf("Failed on uc_emu_start() with error returned %u: %s\n", err, uc_strerror(err)); } // 最后輸出一些模擬執行完成后寄存器的值 printf(">>> Emulation done. Below is the CPU context\n"); uc_reg_read(uc, UC_X86_REG_ECX, &r_ecx); uc_reg_read(uc, UC_X86_REG_EDX, &r_edx); uc_reg_read(uc, UC_X86_REG_XMM0, &r_xmm0); printf(">>> ECX = 0x%x\n", r_ecx); printf(">>> EDX = 0x%x\n", r_edx); printf(">>> XMM0 = 0x%.16"PRIx64"%.16"PRIx64"\n", r_xmm0[1], r_xmm0[0]); // 讀取內存中的內容 if (!uc_mem_read(uc, ADDRESS, &tmp, sizeof(tmp))) printf(">>> Read 4 bytes from [0x%x] = 0x%x\n", ADDRESS, tmp); else printf(">>> Failed to read 4 bytes from [0x%x]\n", ADDRESS); // 最后需要關閉,否則會導致內存泄露 uc_close(uc); } int main(){ test_i386(); return 0; }
編輯Makefile進行編譯:
LDFLAGS += $(shell pkg-config --libs glib-2.0) -lpthread -lm -lunicorn all: test2 %: %.c $(CC) $(CFLAGS) $^ $(LDFLAGS) -o $@
上面的Makefile等同於命令:
cc test2.c -L/usr/local/Cellar/glib/2.70.1/lib -L/usr/local/opt/gettext/lib -lglib-2.0 -lintl -lpthread -lm -lunicorn -o test2
運行結果如下:
Emulate i386 code >>> Tracing basic block at 0x1000000, block size = 0x6 >>> Tracing instruction at 0x1000000, instruction size = 0x1 >>> --- EFLAGS is 0x0 >>> Tracing instruction at 0x1000001, instruction size = 0x1 >>> --- EFLAGS is 0x4 >>> Tracing instruction at 0x1000002, instruction size = 0x4 >>> --- EFLAGS is 0x10 >>> Emulation done. Below is the CPU context >>> ECX = 0x1235 >>> EDX = 0x788f >>> XMM0 = 0x00112233445566778899aabbccddeeff >>> Read 4 bytes from [0x1000000] = 0xf664a41
Go
-
Go語言需要安裝packge
go get github.com/unicorn-engine/unicorn/bindings/go/unicorn
-
示例代碼如下
package main import ( "fmt" "github.com/unicorn-engine/unicorn/bindings/go/unicorn" ) func main() { un,_:=unicorn.NewUnicorn(unicorn.ARCH_X86,unicorn.MODE_32) code := []byte{184,210,4,0,0} // mov eax,1234 un.MemMap(0x1000,0x1000) un.MemWrite(0x1000,code) err:=un.Start(0x1000,0x1000+uint64(len(code))) if err!=nil{ panic(err) } eax,_:=un.RegRead(unicorn.X86_REG_EAX) fmt.Println(eax) }
最終輸出結果:1234。注意這里是十進制
其他unicorn示例
還有其他更多的python示例,也包含其他編程語言的示例,其中Go、Java、ruby、rust、pascal等的示例代碼,可以參考鏈接:
unicorn/bindings at master · unicorn-engine/unicorn
API參考
C語言函數定義在unicorn.h頭文件中,Python函數定義在unicorn_const.py和unicorn.py中,函數和定義簡短,用時再看也來得及。
總結
簡單來說,可以把unicorn理解成一個CPU,把需要執行的代碼片段和內存空間布局好,unicorn會執行代碼片段,並返回結果。unicorn可以模擬執行多種架構的指令,比如ARM、x86、MIPS等,並且有多種語言的API接口,其中我比較喜歡用的是Python、C和Go,可以根據自己喜歡的語言基於unicorn進行開發,寫出自己的一些工具,比如fuzzer、惡意代碼分析工具、二進制插樁、加密算法分析等。