有符號整數比較v.s.無符號整數比較


本文嘗試從匯編的角度給出有符號整數比較與無符號整數比較的區別所在。 在《深入理解計算機系統》(英文版第二版)一書中的Page#77,有下面一個練習題:


將上述示例代碼寫入foo1.c文件,運行並分析bug產生的代碼行。
1. foo1.c

 1 #include <stdio.h>
 2 
 3 float sum_elements(float a[], unsigned length)
 4 {
 5         int i;
 6         float result = 0;
 7         for (i = 0; i <= length-1; i++)
 8                 result += a[i];
 9         return result;
10 }
11 
12 int main(int argc, char *argv[])
13 {
14         float a[] = {1.0, 2.0, 3.0};
15         float m = sum_elements(a, 0);
16         printf("%.1f\n", m);
17         return 0;
18 }

編譯並運行,發現存在着非法內存訪問,

$ ulimit -c unlimited
$ gcc -g -Wall -std=c99 -o foo1 foo1.c $ ./foo1 Segmentation fault (core dumped)

用gdb查看一下core文件,

$ gdb foo1 core
GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
...<snip>....................................
Reading symbols from foo1...done.
[New LWP 3403]
Core was generated by `./foo1'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8
8                       result += a[i];
(gdb) bt
#0  0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8
#1  0x080484a1 in main (argc=1, argv=0xbfdd5154) at foo1.c:15
(gdb) l 6,8
6               float result = 0;
7               for (i = 0; i <= length-1; i++)
8                       result += a[i];
(gdb)

我們可以看出,core的位置在第8行,但有bug的代碼則是第7行。 (第6行不可能有bug) 注意length是一個無符號整數,而i則是一個有符號整數,我們期望的結果是,當length等於0的時候,length-1為-1,其實則不然。於是實際運行的時候,i <= length-1的條件滿足,代碼運行到第8行,當i>=3的時候,必然出現非法的內存訪問錯誤。 從C語言編程的角度,修復這一行很簡單,有兩種方法:

  • for (i = 0; i < length; i++)
  • for (i = 0; i <= (int)length - 1; i++)

但這還不足以說明問題的本質。下面使用第二種修復方法給出foo2.c,然后通過反匯編比較foo1.c和foo2.c,從而給出有符號整數比較與無符號整數比較的區別所在。

2. foo2.c

 1 #include <stdio.h>
 2 
 3 float sum_elements(float a[], unsigned length)
 4 {
 5         int i;
 6         float result = 0;
 7         for (i = 0; i <= (int)length-1; i++)
 8                 result += a[i];
 9         return result;
10 }
11 
12 int main(int argc, char *argv[])
13 {
14         float a[] = {1.0, 2.0, 3.0};
15         float m = sum_elements(a, 0);
16         printf("%.1f\n", m);
17         return 0;
18 }

編譯並運行

$ rm -f core
$ ulimit -c unlimited
$ gcc -g -Wall -std=c99 -o foo2 foo2.c
$ ./foo2
0.0

將foo1里的函數sum_elements反匯編存入foo1.gdb.out,

 1 (gdb) disas /m sum_elements
 2 Dump of assembler code for function sum_elements:
 3 4       {
 4    0x0804841d <+0>:     push   ebp
 5    0x0804841e <+1>:     mov    ebp,esp
 6    0x08048420 <+3>:     sub    esp,0x18
 7 
 8 5               int i;
 9 6               float result = 0;
10    0x08048423 <+6>:     mov    eax,ds:0x8048558
11    0x08048428 <+11>:    mov    DWORD PTR [ebp-0x4],eax
12 
13 7               for (i = 0; i <= length-1; i++)
14    0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0
15    0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
16    0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1
17    0x08048451 <+52>:    mov    eax,DWORD PTR [ebp-0x8]
18    0x08048454 <+55>:    mov    edx,DWORD PTR [ebp+0xc]
19    0x08048457 <+58>:    sub    edx,0x1
20    0x0804845a <+61>:    cmp    eax,edx
21    0x0804845c <+63>:    jbe    0x8048434 <sum_elements+23>
22 
23 8                       result += a[i];
24    0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
25    0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
26    0x0804843a <+29>:    lea    edx,[eax*4+0x0]
27    0x08048441 <+36>:    mov    eax,DWORD PTR [ebp+0x8]
28    0x08048444 <+39>:    add    eax,edx
29    0x08048446 <+41>:    fld    DWORD PTR [eax]
30    0x08048448 <+43>:    faddp  st(1),st
31    0x0804844a <+45>:    fstp   DWORD PTR [ebp-0x4]
32 
33 9               return result;
34    0x0804845e <+65>:    mov    eax,DWORD PTR [ebp-0x4]
35    0x08048461 <+68>:    mov    DWORD PTR [ebp-0x18],eax
36    0x08048464 <+71>:    fld    DWORD PTR [ebp-0x18]
37 
38 10      }
39    0x08048467 <+74>:    leave
40    0x08048468 <+75>:    ret
41 
42 End of assembler dump.

將foo2里的函數sum_elements反匯編存入foo2.gdb.out,

 1 (gdb) disas /m sum_elements
 2 Dump of assembler code for function sum_elements:
 3 4       {
 4    0x0804841d <+0>:     push   ebp
 5    0x0804841e <+1>:     mov    ebp,esp
 6    0x08048420 <+3>:     sub    esp,0x18
 7 
 8 5               int i;
 9 6               float result = 0;
10    0x08048423 <+6>:     mov    eax,ds:0x8048558
11    0x08048428 <+11>:    mov    DWORD PTR [ebp-0x4],eax
12 
13 7               for (i = 0; i <= (int)length-1; i++)
14    0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0
15    0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
16    0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1
17    0x08048451 <+52>:    mov    eax,DWORD PTR [ebp+0xc]
18    0x08048454 <+55>:    sub    eax,0x1
19    0x08048457 <+58>:    cmp    eax,DWORD PTR [ebp-0x8]
20    0x0804845a <+61>:    jge    0x8048434 <sum_elements+23>
21 
22 8                       result += a[i];
23    0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
24    0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
25    0x0804843a <+29>:    lea    edx,[eax*4+0x0]
26    0x08048441 <+36>:    mov    eax,DWORD PTR [ebp+0x8]
27    0x08048444 <+39>:    add    eax,edx
28    0x08048446 <+41>:    fld    DWORD PTR [eax]
29    0x08048448 <+43>:    faddp  st(1),st
30    0x0804844a <+45>:    fstp   DWORD PTR [ebp-0x4]
31 
32 9               return result;
33    0x0804845c <+63>:    mov    eax,DWORD PTR [ebp-0x4]
34    0x0804845f <+66>:    mov    DWORD PTR [ebp-0x18],eax
35    0x08048462 <+69>:    fld    DWORD PTR [ebp-0x18]
36 
37 10      }
38    0x08048465 <+72>:    leave
39    0x08048466 <+73>:    ret
40 
41 End of assembler dump.

使用meld對比如下,

o foo1.gdb.out核心匯編代碼解讀

...<snip>.......................................................................
                                                  ; i      is saved in [ebp-0x8]
                                                  ; length is saved in [ebp+0xc]
7               for (i = 0; i <= length-1; i++)
   0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0    ; i = 0
   0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
   0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1    ; i++
   0x08048451 <+52>:    mov    eax,DWORD PTR [ebp-0x8]    ; save i        to eax
   0x08048454 <+55>:    mov    edx,DWORD PTR [ebp+0xc]    ; save length   to edx
   0x08048457 <+58>:    sub    edx,0x1                    ; save length-1 to edx
   0x0804845a <+61>:    cmp    eax,edx                    ; exec i - (length-1)
   0x0804845c <+63>:    jbe    0x8048434 <sum_elements+23>; if below or equal
                                                          ; (i.e. <=) jump to
                                                          ;    result += a[i]

8                       result += a[i];
   0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
   0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
...<snip>.......................................................................

o foo2.gdb.out核心匯編代碼解讀

...<snip>.......................................................................
                                                  ; i      is saved in [ebp-0x8]
                                                  ; length is saved in [ebp+0xc]
7               for (i = 0; i <= (int)length-1; i++)
   0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0    ; i = 0
   0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
   0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1    ; i++
   0x08048451 <+52>:    mov    eax,DWORD PTR [ebp+0xc]    ; save length   to eax
   0x08048454 <+55>:    sub    eax,0x1                    ; save length-1 to eax
   0x08048457 <+58>:    cmp    eax,DWORD PTR [ebp-0x8]    ; exec (length-1) - i
   0x0804845a <+61>:    jge    0x8048434 <sum_elements+23>; if greater or equal
                                                          ; (i.e. >=) jump to
                                                          ;    result += a[i]

8                       result += a[i];
   0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
   0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
...<snip>.......................................................................

注意: 在foo1.gdb.out中,跳轉指令是jbe, 而在foo2.gdb.out中,跳轉指令是jge。 也就是說,

  • for (i = 0; i <= length-1; i++) :      <= 使用的是jbe
  • for (i = 0; i <= (int)length-1; i++): <= 使用的是jle (>=為jge)

到此為止,我們發現了隱藏在編譯器(gcc)后面的秘密,原來使用的匯編指令有所不同,在執行有符號整數比較與無符號整數比較的時候。 對應匯編指令總結如下:

指令 含義 運算符號
jbe unsigned below or equal (lower or same) <=
jae unsigned above or equal (higher or same) >=
jb unsigned below (lower) <
ja unsigned above (higher) >
jle signed less or equal <=
jge signed greater or equal >=
jl signed less than <
jg signed greater than >

從上面的表中可以看出,

  • 對於無符號(unsigned)整數比較,使用的是單詞是above或below;
  • 對於有符號(signed)整數比較,則使用的單詞是greater或less。為了方便記憶,不妨記做sgl。對於有過InfiniBand編程經驗的人來說,sgl再熟悉不過了,那就是分散聚合表(scatter/gather list)。

於是,很好地詮釋了這兩行代碼的區別:

  • for (i = 0; i <= length-1; i++) :      <= 使用的是jbe, 因為lengh是無符號整數
  • for (i = 0; i <= (int)length-1; i++): <= 使用的是jle, 因為(int)length是有符號整數

小結: 有符號整數比較使用的匯編指令為jg(>), jl(<), jge(>=), jle(<=); 無符號整數比較使用的匯編指令為ja(>), jb(<), jae(>=), jbe(<=)。 記憶的方法也很簡單,那就是sgl

sgl : signed greater less : scatter/gather list

補充說明: test和cmp都是比較指令, test用於邏輯比較,cmp則用於算術比較。

  • The test instruction is identical to the and instruction except it does not affect operands.
  • The cmp instruction is identical to the sub instruction except it does not affect operands.


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM