1. 程式人生 > >有符號整數比較v.s.無符號整數比較

有符號整數比較v.s.無符號整數比較

內存 read symbols span 技術分享 unsigned -1 core dump assembler

本文嘗試從匯編的角度給出有符號整數比較與無符號整數比較的區別所在。 在《深入理解計算機系統》(英文版第二版)一書中的Page#77,有下面一個練習題:

技術分享
將上述示例代碼寫入foo1.c文件,運行並分析bug產生的代碼行。


1. foo1.c

 1 #include <stdio.h>
 2 
 3 float sum_elements(float a[], unsigned length)
 4 {
 5         int i;
 6         float result = 0;
 7         for (i = 0; i <= length-1; i++)
 8
result += a[i]; 9 return result; 10 } 11 12 int main(int argc, char *argv[]) 13 { 14 float a[] = {1.0, 2.0, 3.0}; 15 float m = sum_elements(a, 0); 16 printf("%.1f\n", m); 17 return 0; 18 }

編譯並運行,發現存在著非法內存訪問,

$ ulimit -c unlimited
$ gcc -g -Wall -std=c99 -o foo1 foo1.c $ .
/foo1 Segmentation fault (core dumped)

用gdb查看一下core文件,

$ gdb foo1 core
GNU gdb (Ubuntu 7.7.1-0ubuntu5~14.04.2) 7.7.1
...<snip>....................................
Reading symbols from foo1...done.
[New LWP 3403]
Core was generated by `./foo1.
Program terminated with signal SIGSEGV, Segmentation fault.
#
0 0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8 8 result += a[i]; (gdb) bt #0 0x08048446 in sum_elements (a=0xbfdd50a4, length=0) at foo1.c:8 #1 0x080484a1 in main (argc=1, argv=0xbfdd5154) at foo1.c:15 (gdb) l 6,8 6 float result = 0; 7 for (i = 0; i <= length-1; i++) 8 result += a[i]; (gdb)

我們可以看出,core的位置在第8行,但有bug的代碼則是第7行。 (第6行不可能有bug) 註意length是一個無符號整數,而i則是一個有符號整數,我們期望的結果是,當length等於0的時候,length-1為-1,其實則不然。於是實際運行的時候,i <= length-1的條件滿足,代碼運行到第8行,當i>=3的時候,必然出現非法的內存訪問錯誤。 從C語言編程的角度,修復這一行很簡單,有兩種方法:

  • for (i = 0; i < length; i++)
  • for (i = 0; i <= (int)length - 1; i++)

但這還不足以說明問題的本質。下面使用第二種修復方法給出foo2.c,然後通過反匯編比較foo1.c和foo2.c,從而給出有符號整數比較與無符號整數比較的區別所在。

2. foo2.c

 1 #include <stdio.h>
 2 
 3 float sum_elements(float a[], unsigned length)
 4 {
 5         int i;
 6         float result = 0;
 7         for (i = 0; i <= (int)length-1; i++)
 8                 result += a[i];
 9         return result;
10 }
11 
12 int main(int argc, char *argv[])
13 {
14         float a[] = {1.0, 2.0, 3.0};
15         float m = sum_elements(a, 0);
16         printf("%.1f\n", m);
17         return 0;
18 }

編譯並運行

$ rm -f core
$ ulimit -c unlimited
$ gcc -g -Wall -std=c99 -o foo2 foo2.c
$ ./foo2
0.0

將foo1裏的函數sum_elements反匯編存入foo1.gdb.out,

 1 (gdb) disas /m sum_elements
 2 Dump of assembler code for function sum_elements:
 3 4       {
 4    0x0804841d <+0>:     push   ebp
 5    0x0804841e <+1>:     mov    ebp,esp
 6    0x08048420 <+3>:     sub    esp,0x18
 7 
 8 5               int i;
 9 6               float result = 0;
10    0x08048423 <+6>:     mov    eax,ds:0x8048558
11    0x08048428 <+11>:    mov    DWORD PTR [ebp-0x4],eax
12 
13 7               for (i = 0; i <= length-1; i++)
14    0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0
15    0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
16    0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1
17    0x08048451 <+52>:    mov    eax,DWORD PTR [ebp-0x8]
18    0x08048454 <+55>:    mov    edx,DWORD PTR [ebp+0xc]
19    0x08048457 <+58>:    sub    edx,0x1
20    0x0804845a <+61>:    cmp    eax,edx
21    0x0804845c <+63>:    jbe    0x8048434 <sum_elements+23>
22 
23 8                       result += a[i];
24    0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
25    0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
26    0x0804843a <+29>:    lea    edx,[eax*4+0x0]
27    0x08048441 <+36>:    mov    eax,DWORD PTR [ebp+0x8]
28    0x08048444 <+39>:    add    eax,edx
29    0x08048446 <+41>:    fld    DWORD PTR [eax]
30    0x08048448 <+43>:    faddp  st(1),st
31    0x0804844a <+45>:    fstp   DWORD PTR [ebp-0x4]
32 
33 9               return result;
34    0x0804845e <+65>:    mov    eax,DWORD PTR [ebp-0x4]
35    0x08048461 <+68>:    mov    DWORD PTR [ebp-0x18],eax
36    0x08048464 <+71>:    fld    DWORD PTR [ebp-0x18]
37 
38 10      }
39    0x08048467 <+74>:    leave
40    0x08048468 <+75>:    ret
41 
42 End of assembler dump.

將foo2裏的函數sum_elements反匯編存入foo2.gdb.out,

 1 (gdb) disas /m sum_elements
 2 Dump of assembler code for function sum_elements:
 3 4       {
 4    0x0804841d <+0>:     push   ebp
 5    0x0804841e <+1>:     mov    ebp,esp
 6    0x08048420 <+3>:     sub    esp,0x18
 7 
 8 5               int i;
 9 6               float result = 0;
10    0x08048423 <+6>:     mov    eax,ds:0x8048558
11    0x08048428 <+11>:    mov    DWORD PTR [ebp-0x4],eax
12 
13 7               for (i = 0; i <= (int)length-1; i++)
14    0x0804842b <+14>:    mov    DWORD PTR [ebp-0x8],0x0
15    0x08048432 <+21>:    jmp    0x8048451 <sum_elements+52>
16    0x0804844d <+48>:    add    DWORD PTR [ebp-0x8],0x1
17    0x08048451 <+52>:    mov    eax,DWORD PTR [ebp+0xc]
18    0x08048454 <+55>:    sub    eax,0x1
19    0x08048457 <+58>:    cmp    eax,DWORD PTR [ebp-0x8]
20    0x0804845a <+61>:    jge    0x8048434 <sum_elements+23>
21 
22 8                       result += a[i];
23    0x08048434 <+23>:    fld    DWORD PTR [ebp-0x4]
24    0x08048437 <+26>:    mov    eax,DWORD PTR [ebp-0x8]
25    0x0804843a <+29>:    lea    edx,[eax*4+0x0]
26    0x08048441 <+36>:    mov    eax,DWORD PTR [ebp+0x8]
27    0x08048444 <+39>:    add    eax,edx
28    0x08048446 <+41>:    fld    DWORD PTR [eax]
29    0x08048448 <+43>:    faddp  st(1),st
30    0x0804844a <+45>:    fstp   DWORD PTR [ebp-0x4]
31 
32 9               return result;
33    0x0804845c <+63>:    mov    eax,DWORD PTR [ebp-0x4]
34    0x0804845f <+66>:    mov    DWORD PTR [ebp-0x18],eax
35    0x08048462 <+69>:    fld    DWORD PTR [ebp-0x18]
36 
37 10      }
38    0x08048465 <+72>:    leave
39    0x08048466 <+73>:    ret
40 
41 End of assembler dump.

使用meld對比如下,

技術分享

有符號整數比較v.s.無符號整數比較