最近受同學所托,將5個內嵌了MASM語法格式的匯編代碼之C函數翻譯成純C函數,以支持多種CPU指令集(比如x86/x64, arm, sparc, ...)。整個過程充滿了艱辛,但也充滿了樂趣。作為一個既喜歡C又喜歡匯編的程序員,在廢寢忘食之余深深地體會到,“反匯編(disassemble)容易,反編譯(decompile)難”。逆向工程實在是太不容易啦!下面給出一個簡單點兒的例子予以說明。[P.S. 一肚子的Asm, C和gdb,終於派上了用場:-)]
o void add16(word *, word *, word n)
1 typedef unsigned char byte; /* 1 byte */ 2 typedef unsigned short word; /* 2 bytes */ 3 typedef unsigned int dword; /* 4 bytes */ 4 typedef unsigned long long qword; /* 8 bytes */ 5 6 void add16(word *a, word *b, word n) 7 { 8 word *dstp = a; 9 word *srcp = b; 10 word count = n; 11 12 __asm { 13 xor ecx, ecx 14 mov cx, count 15 mov esi, srcp 16 mov edi, dstp 17 xor ebx, ebx 18 19 LOOP01: 20 xor eax, eax 21 mov ax, [esi] 22 add eax, ebx 23 xor edx, edx 24 mov dx, [edi] 25 add eax, edx 26 27 mov [edi], ax 28 shr eax, 16 29 mov ebx, eax 30 31 add esi, 2 32 add edi, 2 33 loop LOOP01 34 35 add [edi], bx 36 } 37 }
作為一個對x86匯編熟悉N(>=12) 年的程序員,翻譯上面的內嵌代碼,花了足足5個小時。 5小時的艱苦歷程如下:
第1步: 將匯編代碼摘取出來,用NASM實現 (本人不喜歡Windows編程,所以在Linux上用NASM)
第2步: 給每一行匯編代碼加注釋,加完注釋后的匯編源文件foo.asm如下:
1 BITS 32 2 3 SECTION .data 4 5 count: equ 0x5 6 dstp: dw 0x1234, 0x3456, 0xffff, 0x789a, 0x9abc, 0x0000 7 srcp: dw 0xabcd, 0xffff, 0xffff, 0x0123, 0x2345, 0x0000 8 9 SECTION .text 10 11 global _start 12 13 _start: 14 xor ecx, ecx ; ecx = 0 15 mov cx, count ; ecx = count 16 mov esi, srcp ; esi = srcp 17 mov edi, dstp ; edi = dstp 18 xor ebx, ebx ; ebx = 0 /* carry */ 19 20 LOOP01: 21 xor eax, eax ; eax = 0 22 mov ax, [esi] ; ax = *esi = *srcp 23 add eax, ebx ; eax += ebx /* eax += carry */ 24 xor edx, edx ; edx = 0 25 mov dx, [edi] ; dx = *edi = *dstp 26 add eax, edx ; eax += edx; /* eax += *dstp */ 27 28 mov [edi], ax ; *edi = ax, i.e. *dstp = ax 29 shr eax, 16 ; eax >>= 16 30 mov ebx, eax ; ebx = eax /* next carry */ 31 32 add esi, 2 ; esi += 2 /* srcp++ */ 33 add edi, 2 ; edi += 2 /* dstp++ */ 34 loop LOOP01 ; jmp back to LOOP01 until ecx == 0 35 36 add [edi], bx ; *edi += bx 37 38 _exit: 39 mov eax, 1 ; syscall num of exit 40 mov ebx, 0 ; error code 41 int 0x80
第3步: 編譯foo.asm,用gdb調試
$ nasm -f elf32 -g -F stabs foo.asm $ ld -o foo foo.o
用gdb單步調試的過程比較冗長,這里貼出最簡版調試過程,
$ gdb foo GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1 ...<snip>.................................... (gdb) set disassembly-flavor intel (gdb) # (gdb) (gdb) disas _start Dump of assembler code for function _start: 0x08048080 <+0>: xor ecx,ecx 0x08048082 <+2>: mov cx,0x5 0x08048086 <+6>: mov esi,0x80490cc 0x0804808b <+11>: mov edi,0x80490c0 0x08048090 <+16>: xor ebx,ebx End of assembler dump. (gdb) # (gdb) (gdb) disas LOOP01 Dump of assembler code for function LOOP01: 0x08048092 <+0>: xor eax,eax 0x08048094 <+2>: mov ax,WORD PTR [esi] 0x08048097 <+5>: add eax,ebx 0x08048099 <+7>: xor edx,edx 0x0804809b <+9>: mov dx,WORD PTR [edi] 0x0804809e <+12>: add eax,edx 0x080480a0 <+14>: mov WORD PTR [edi],ax 0x080480a3 <+17>: shr eax,0x10 0x080480a6 <+20>: mov ebx,eax 0x080480a8 <+22>: add esi,0x2 0x080480ab <+25>: add edi,0x2 0x080480ae <+28>: loop 0x8048092 <LOOP01> 0x080480b0 <+30>: add WORD PTR [edi],bx End of assembler dump. (gdb) # (gdb) (gdb) disas _exit Dump of assembler code for function _exit: 0x080480b3 <+0>: mov eax,0x1 0x080480b8 <+5>: mov ebx,0x0 0x080480bd <+10>: int 0x80 End of assembler dump. (gdb) # (gdb) (gdb) (gdb) (gdb) (gdb) (gdb) set disassembly-flavor intel (gdb) display /i $eip (gdb) # (gdb) (gdb) b _start Breakpoint 1 at 0x8048080 (gdb) b _exit Breakpoint 2 at 0x80480b3 (gdb) info b Num Type Disp Enb Address What 1 breakpoint keep y 0x08048080 <_start> 2 breakpoint keep y 0x080480b3 <_exit> (gdb) # (gdb) (gdb) r Starting program: /var/tmp/sandbox/fanli/raw/05/cnblog/foo Breakpoint 1, 0x08048080 in _start () 1: x/i $eip => 0x8048080 <_start>: xor ecx,ecx (gdb) # (gdb) (gdb) ni 0x08048082 in _start () 1: x/i $eip => 0x8048082 <_start+2>: mov cx,0x5 (gdb) 0x08048086 in _start () 1: x/i $eip => 0x8048086 <_start+6>: mov esi,0x80490cc (gdb) 0x0804808b in _start () 1: x/i $eip => 0x804808b <_start+11>: mov edi,0x80490c0 (gdb) 0x08048090 in _start () 1: x/i $eip => 0x8048090 <_start+16>: xor ebx,ebx (gdb) info r ecx edi esi ecx 0x5 5 edi 0x80490c0 134516928 esi 0x80490cc 134516940 (gdb) x /5hx 0x80490c0 0x80490c0 <dstp>: 0x1234 0x3456 0xffff 0x789a 0x9abc (gdb) x /5hx 0x80490cc 0x80490cc <srcp>: 0xabcd 0xffff 0xffff 0x0123 0x2345 (gdb) # (gdb) (gdb) c Continuing. Breakpoint 2, 0x080480b3 in _exit () 1: x/i $eip => 0x80480b3 <_exit>: mov eax,0x1 (gdb) # (gdb) (gdb) x /5hx 0x80490c0 0x80490c0 <dstp>: 0xbe01 0x3455 0xffff 0x79be 0xbe01 (gdb) x /5hx 0x80490cc 0x80490cc <srcp>: 0xabcd 0xffff 0xffff 0x0123 0x2345 (gdb) # (gdb) (gdb) ni 0x080480b8 in _exit () 1: x/i $eip => 0x80480b8 <_exit+5>: mov ebx,0x0 (gdb) 0x080480bd in _exit () 1: x/i $eip => 0x80480bd <_exit+10>: int 0x80 (gdb) [Inferior 1 (process 21920) exited normally] (gdb)
從上面的調試過程可以看出,
a: 0x1234 0x3456 0xffff 0x789a 0x9abc b: 0xabcd 0xffff 0xffff 0x0123 0x2345 After a = a + b is done, a: 0xbe01 0x3455 0xffff 0x79be 0xbe01 That is, (1) a = 0x9abc789affff34561234 (2) b = 0x23450123ffffffffabcd (3) a += b (4) a = 0xbe0179beffff3455be01
第4步:用Python驗證一下上面的計算結果,
$ python Python 2.7.6 (default, Oct 26 2016, 20:32:47) ...<snip>.................................... >>> a = 0x9abc789affff34561234 >>> b = 0x23450123ffffffffabcd >>> a += b >>> print " a = 0x%x" % a a = 0xbe0179beffff3455be01
第5步: 將foo.asm翻譯成foo2.c
1 #include <stdio.h> 2 3 typedef unsigned char byte; /* 1 byte */ 4 typedef unsigned short word; /* 2 bytes */ 5 typedef unsigned int dword; /* 4 bytes */ 6 typedef unsigned long long qword; /* 8 bytes */ 7 8 void add16(word *a, word *b, word n) 9 { 10 word *dstp = a; 11 word *srcp = b; 12 word count = n; 13 14 word carry = 0; 15 for (word i = 0; i < count; i++) { 16 word p = *dstp; 17 word q = *srcp; 18 19 dword n = (dword)p + (dword)carry + (dword)q; 20 21 *dstp = n & 0xFFFF; // *dstp : low 16 bits of n 22 carry = (word)(n >> 16); // carry : high 16 bits of n 23 24 srcp++; 25 dstp++; 26 } 27 28 *dstp += carry; 29 } 30 31 static void dump(word a[], word n) 32 { 33 printf("\t%p: ", a); 34 for (word i = 0; i < n; i++) 35 printf("0x%04x ", a[i]); 36 printf("\n"); 37 } 38 39 int main(int argc, char *argv[]) 40 { 41 word src[] = {0xabcd, 0xffff, 0xffff, 0x0123, 0x2345, 0x0000}; 42 word dst[] = {0x1234, 0x3456, 0xffff, 0x789a, 0x9abc, 0x0000}; 43 word n = 0x5; 44 45 dump(dst, sizeof(dst)/sizeof(word)); 46 printf("+\n"); 47 dump(src, sizeof(src)/sizeof(word)); 48 49 add16(dst, src, n); 50 51 printf("=\n"); 52 dump(dst, sizeof(dst)/sizeof(word)); 53 54 return 0; 55 }
o add16()截圖
第6步: 編譯foo2.c並運行
$ gcc -g -Wall -m32 -std=c99 -o foo2 foo2.c $ ./foo2 0xbf9c1bc4: 0x1234 0x3456 0xffff 0x789a 0x9abc 0x0000 + 0xbf9c1bb8: 0xabcd 0xffff 0xffff 0x0123 0x2345 0x0000 = 0xbf9c1bc4: 0xbe01 0x3455 0xffff 0x79be 0xbe01 0x0000
與foo.asm對應的運算結果做比較,
0xbf9c1bc4: 0xbe01 0x3455 0xffff 0x79be 0xbe01 0x0000
二者運算的結果完全一致,由此可見,add16()其實就是做大數加法。
- a = {a[0], a[1], ..., a[N]}, a[i] 為一個word, 占兩個字節
- b = {b[0], b[1], ..., b[N]}, b[i]為一個word, 占兩個字節
- a + b = {a[0]+b[0], a[1]+b[1], ..., a[N]+b[N]}
- a[i] + b[i] 可能發生進位(carry), 將carry加到a[i+1]的位置上即可
P.S. 在翻譯過程中,我參考了下面兩個與轉移指令有關的文檔。 (如果有興趣,請閱讀)
結束語: 上面給出的例子foo.asm只有一重循環,所以翻譯成C代碼相對簡單。但是,如果有多重循環和多次跳轉, 那么翻譯起來就困難許多。例如:
1 BITS 32 2 3 SECTION .data 4 5 adp: dd 0x12345678, 0x9abcdef0, 0xffffffff, 0x9abcdefa, \ 6 0x00000000 7 adp_size: equ $ - adp 8 bdp: dd 0xbfffc061, 0xfedcba99, 0x76543211, 0xfedcba99, \ 9 0x00000000 10 bdp_size: equ $ - bdp 11 cdp: dd 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 12 0x00000000, 0x00000000, 0x00000000, 0x00000000, \ 13 0x00000000, 0x00000000 14 cdp_size: equ $ - cdp 15 16 SECTION .text 17 18 global _start 19 20 _start: 21 mov ecx, cdp ; 001 ecx = cdp, cdp[] = adp[] * bdp[] 22 add ecx, cdp_size ; 002 ecx += cdp_size 23 24 xor edi, edi ; 003 edi = 0 25 26 LOOP1: ; 004 27 mov ebx, adp ; 005 ebx = adp 28 add ebx, edi ; 006 ebx += edi 29 mov eax, [ebx] ; 007 eax = *ebx 30 cmp eax, 0 ; 008 if (eax == 0) 31 je NEXT3 ; 009 goto NEXT3 32 xor esi, esi ; 010 esi = 0 33 push eax ; 011 save eax to stack (eax was *ebx = [adp+edi]) 34 35 LOOP2: ; 012 36 mov ebx, bdp ; 013 ebx = bdp 37 add ebx, esi ; 014 ebx += esi 38 mov eax, [ebx] ; 015 eax = *ebx 39 cmp eax, 0 ; 016 if (eax == 0) 40 je NEXT2 ; 017 goto NEXT2 41 pop edx ; 018 get edx from stack, which was pushed @ 011 42 push edx ; 019 save edx back to stack 43 mul edx ; 020 edxeax = eax * edx 44 mov ebx, cdp ; 021 ebx = cdp 45 add ebx, esi ; 022 ebx += esi; 46 add ebx, edi ; 023 ebx += edi; 47 push ebx ; 024 push ebx to stack 48 add [ebx], eax ; 025 *ebx += eax; 49 50 LOOP3: ; 026 51 jnc NEXT1 ; 027 == jae (>=) 52 add ebx, 4 ; 028 ebx += 4 53 cmp ebx, ecx ; 029 if (ebx >= ecx) ; ecx = cdp + cdp_size 54 jae NEXT1 ; 030 goto NEXT1 55 add dword [ebx], 1 ; 031 *ebx += 1 56 jmp LOOP3 ; 032 go back to LOOP3 57 58 NEXT1: ; 033 only used by LOOP3 59 pop ebx ; 034 get ebx from stack 60 add ebx, 4 ; 035 ebx += 4 61 add [ebx], edx ; 036 *ebx += edx 62 63 LOOP4: ; 037 64 jnc NEXT2 ; 038 jnc (=jae) 65 add ebx, 4 ; 039 ebx += 4 66 cmp ebx, ecx ; 040 if (ebx >= ecx) 67 jae NEXT2 ; 041 goto NEXT2 68 add dword [ebx], 1 ; 042 *ebx += 1 69 jmp LOOP4 ; 043 go back to LOOP4 70 71 NEXT2: ; 044 72 add esi, 4 ; 045 esi += 4 73 cmp esi, bdp_size ; 046 if (esi < bdp_size) 74 jb LOOP2 ; 047 go back to LOOP2 75 pop eax ; 048 get eax from stack 76 77 NEXT3: ; 049 78 add edi, 4 ; 050 edi += 4 79 cmp edi, adp_size ; 051 if (edi < adp_size) 80 jb LOOP1 ; 052 go back to LOOP1 81 82 _exit: 83 mov eax, 1 ; syscall num of exit 84 mov ebx, 0 ; error code 85 int 0x80
上面的代碼一共包括4個LOOP,3個NEXT和10個跳轉指令(e.g. jmp, jb, jae, jnc, je),翻譯成C代碼難度非常大。究其本質,實為做大數乘法。有關其翻譯實現后的C代碼,請參考前文。 反匯編(將C代碼翻譯成匯編代碼)有現成的工具可用(e.g. gdb, objdump), 所以很容易。反編譯(將匯編代碼翻譯成C代碼),國外有收費的軟件可以用(e.g. Hex-Rays Decompiler),但是也不能保證100%的正確性。所以,反匯編容易,反編譯難,逆向工程很不容易。 (p.s. 有一個學信息工程專業的(但從來沒做過程序員的)同學在微信群里用略帶不屑的語氣說“匯編是一種很古老的語言”,我真的很無語Orz...再高級的語言寫的代碼,也要變成機器碼才能運行不是,似乎匯編是無法繞過的...)