計組複習題集_第三章
首先說一點我自己容易搞混的知識點
leal (%edx), %eax
movl %edx, %eax
效果雖然一樣,就是%eax中儲存的內容變成了%edx中儲存的內容
但是實現過程有點不一樣:
書上寫的是:
movl 從指定的位置讀入資料
leal 將有效地址寫入目的運算元
說通俗點就是:
movl 類似於 *p = a;
leal 類似於 (*x = a;) p = x;
3.18
Starting with C code of the form
int test(int x, int y) { int val = ____; if (____) { if (____) val = ____; else val = ____; } else if (____) val = ____; return val; }
GCC generates the following assembly code:
; x at %ebp+8, y at %ebp+12 movl 8(%ebp), %eax movl 12(%ebp), %edx cmpl $-3, %eax jge .L2 cmpl %edx, %eax jle .L3 imull %edx, %eax jmp .L4 .L3: leal (%edx,%eax), %eax jmp .L2: cmpl $2, %eax jg .L5 xorl %edx, %eax jmp .L4 .L5: subl %edx, %eax .L4:
Fill in the missing expressions in the C code. To make the code fit into the
C code template, you will need to undo some of the reordering of computations
done by GCC.
ANSWER
/** * 注意GCC對C程式碼進行了重排序,也就是C程式碼第一行的val = x ^ y被移到了彙編程式碼的下面,只有能以這個值輸出時才進行計算 * (但其實重排序不多 * 彙編程式碼在if的處理上,會滿足:除非迫不得已,否則不跳轉,也就是說else才跳轉。所以"jge""jle"等往往代表了else的條件。 */ int test(int x, int y) { int val = x ^ y; if (x < -3) { // if x>= -3, then jmp, 否則繼續按順序執行彙編程式碼 if (y < x) // 同理 val = x * y; else // .L3 val = x + y; // leal (%edx, %eax), %eax == a = a+b; } else if (x > 2) // .L2 //這裡挺有趣的,雖然是"cmpl $2, %eax"和"jg .L5",我第一反應是判斷條件為 x <= 2, 但是實際上判斷條件是 x > 2, else if 也是else的一種.. val = x - y; return val; }
3.22
A function, fun_a, has the following overall structure:
int fun_a(unsigned x) { int val = 0; while (_____) { _____; } return _____; }
The GCC C compiler generates the following assembly code:
;x at %ebp+8 movl 8(%ebp), %edx movl $0, %eax testl %edx, %edx je .L7 .L10: xorl %edx, %eax shrl %edx ;Shift right by 1 jne .L10 .L7: andl $1, %eax
Reverse engineer the operation of this code and then do the following:
A. Use the assembly-code version to fill in the missing parts of the C code.
B. Describe in English what this function computes
ANSWER
/*A*/
int fun_a(unsigned x) {
int val = 0;
while (x != 0) {
val ^= x;
x >>= 1;
}
return val & 1;
}
/*B*/
/**
* 由於返回值 val & 1 從意義上來說是隻取了val最低位的資訊,於是我們只討論val最低位表達的資訊即可
* 迴圈的作用就是計算val = (x >> 0) ^ (x >> 1) ^ (x >> 2) ^ ... ^ (x >> k-1) [假設x有k位有效數字]
* 比如令 x = 1011
* 那麼:
* 1011
* 0101
* 0010
* ^ 0001
* ------
* 1101
* 可以看出迴圈結束後val的最低位就是x的從最低位到最高位的每一位取異或,舉個例子,假設x=10111,那麼val的最低位=1^0^1^1^1。
* 當有偶數個1時,先把所有的1兩兩取餘轉成0,式子中就只剩下0了,式子的結果是0,例如;1^0^0^1 = (1^1)^0^0 = 0^0^0 = 0;
* 當有奇數個1時,儘可能地把1兩兩取餘轉成0,最後式子中只剩下一個1,表示式的值就是1,例如;1^0^1^1 = (1^1)^0^1 = 0^0^1 = 1;
* 由此val的最低位的最終值反映了x中1的數量的奇偶性,這也是這個函式的作用:計算引數x中1的數量的奇偶性,當x中有奇數個1,返回1;當x中有偶數個1,返回0。
*/
3.29
For a C function
switcher
with the general structureint switcher(int a, int b, int c) { int answer; switch(a) { case _____: /* Case A */ c = _____; /* Fall through */ case : /* Case B */ answer = _____; break; case _____: /* Case C */ case _____: /* Case D */ answer = _____; break; case _____: /* Case E */ answer = _____; break; default: answer = _____; } return answer; }
GCC generates the assembly code and jump table shown in Figure 3.20.
Fill in the missing parts of the C code. Except for the ordering of case labels C and D, there is only one way to fit the different cases into the template.
;a at %ebp+8, b at %ebp+12, c at %ebp+16 movl 8(%ebp), %eax cmpl $7, %eax ja .L2 jmp *.L7(,%eax,4) .L2: movl 12(%ebp), %eax jmp .L8 .L5: movl $4, %eax jmp .L8 .L6: movl 12(%ebp), %eax xorl $15, %eax movl %eax, 16(%ebp) .L3: movl 16(%ebp), %eax addl $112, %eax jmp .L8 .L4: movl 16(%ebp), %eax addl 12(%ebp), %eax sall $2, %eax .L8:
.L7: .long .L3 .long .L2 .long .L4 .long .L2 .long .L5 .long .L6 .long .L2 .long .L4
ANSWER
/**
* default 的標號是 ja 對應的標號
*/
int switcher(int a, int b, int c)
{
int answer;
switch(a) {
case 5: /* Case A */
c = b ^ 15;
/* Fall through */
case 0: /* Case B */
answer = c + 112;
break;
case 2: /* Case C */
case 7: /* Case D */
answer = (b + c) << 2;
break;
case 4: /* Case E */
answer = a; // 也可以寫成 answer = 4, 從上下文看,answer的賦值應該是與a/b/c相關的,但是在編譯時編譯器對此語句進行了優化:把返回值設為4,而非a。
break;
default:
answer = b;
}
return answer;
}
3.34
For a C function having the general structure
int rfun(unsigned x) { if (_____) return _____; unsigned nx = _____; int rv = rfun(nx); return _____; }
GCC generates the following assembly code (with the setup and completion code omitted):
movl 8(%ebp), %ebx movl $0, %eax testl %ebx, %ebx je .L3 movl %ebx, %eax shrl %eax ;Shift right by 1 movl %eax, (%esp) call rfun movl %ebx, %edx andl $1, %edx leal (%edx,%eax), %eax .L3:
A. What value does
rfun
store in the callee-save register%ebx
?B. Fill in the missing expressions in the C code shown above.
C. Describe in English what function this code computes.
ANSWER
/*A*/
/**
* %ebx 儲存的是x的值
*/
/*B*/
int rfun(unsigned x) {
if (x == 0)
return x;
unsigned nx = x >> 1;
int rv = rfun(nx);
return (x & 0x1) + rv;
}
/*C*/
/**
* 計算x中位的和:遞迴地計算除了最低位之外的所有其他位的和,然後加上最低位得到結果
*/
3.37
Consider the following source code, where M and N are constants declared with
#define
:int mat1[M][N]; int mat2[N][M]; int sum_element(int i, int j) { return mat1[i][j] + mat2[j][i]; }
In compiling this program, GCC generates the following assembly code:
;i at %ebp+8, j at %ebp+12 movl 8(%ebp), %ecx movl 12(%ebp), %edx leal 0(,%ecx,8), %eax subl %ecx, %eax addl %edx, %eax ;%eax = 7 * i + j leal (%edx,%edx,4), %edx addl %ecx, %edx ;%ebx = 5 * j + i movl mat1(,%eax,4), %eax addl mat2(,%edx,4), %eax
Use your reverse engineering skills to determine the values of M and N based on this assembly code.
ANSWER
%eax = 7 * i + j
%ebx = 5 * j + i
而 mat1[i][j] = mat1 + (i * N + j) * 4
mat1(,%eax,4) = mat1(,7 * i + j,4) = mat1 + (i * 7 + j) * 4
所以 N = 7
同理 M = 5
3.39
Consider the following structure declaration:
struct prob { int *p; struct { int x; int y; } s; struct prob *next; };
This declaration illustrates that one structure can be embedded within another, just as arrays can be embedded within structures, and arrays can be embedded within arrays.
The following procedure (with some expressions omitted) operates on this structure:void sp_init(struct prob *sp) { sp->s.x = _____; sp->p = _____; sp->next = _____; }
A. What are the offsets (in bytes) of the following fields?
p: _____ s.x: _____ s.y: _____ next: _____
B. How many total bytes does the structure require?
C.The compiler generates the following assembly code for the body of
sp_init
:;sp at %ebp+8 movl 8(%ebp), %eax ;%eax = sp (type of sp: pointer) movl 8(%eax), %edx ;%edx = sp->s.x movl %edx, 4(%eax) ; leal 4(%eax), %edx ;sp->s.x = movl %edx, (%eax) ; movl %eax, 12(%eax) ;sp.y = %eax
On the basis of this information, fill in the missing expressions in the code for
sp_init
.
ANSWER
/*A*/
p: 0~7
s.x: 8~11
s.y: 11~15
next: 16~23
/*B*/
24 bytes in total.
/*C*/
void sp_init(struct prob *sp)
{
sp->s.x = _____;
sp->p = _____;
sp->next = _____;
}
3.56
Consider the following assembly code:
;x at %ebp+8, n at %ebp+12
movl 8(%ebp), %esi
movl 12(%ebp), %ebx
movl $-1, %edi
movl $1, %edx
.L2:
movl %edx, %eax
andl %esi, %eax
xorl %eax, %edi
movl %ebx, %ecx
sall %cl, %edx
testl %edx, %edx
jne .L2
movl %edi, %eax
The preceding code was generated by compiling C code that had the following overall form:
int loop(int x, int n) { int result = _____; int mask; for (mask = _____; mask _____; mask = _____) { result ^= _____; } return result; }
Your task is to fill in the missing parts of the C code to get a program equivalent to the generated assembly code. Recall that the result of the function is returned in register
%eax
. You will find it helpful to examine the assembly code before, during, and after the loop to form a consistent mapping between the registers and the program variables.A. Which registers hold program values
x
,n
,result
, andmask
?B. What are the initial values of result and mask?
C. What is the test condition for mask?
D. How does mask get updated?
E. How does result get updated?
F. Fill in all the missing parts of the C code.
ANSWER
/*A*/
x : %esi
n : %ebx
mask : %edx
result : %edi
/*B*/
mask : 1
result : -1
/*C*/
mask != 0;
/*D*/
mask <<= n;
/*E*/
result ^= x & mask;
/*F*/
int loop(int x, int n)
{
int result = -1;
int mask;
for (mask = 1; mask != 0; mask = mask << (short)n) {
result ^= x & mask;
}
return result;
}
3.59
This problem will give you a chance to reverse engineer a
switch
statement from machine code. In the following procedure, the body of theswitch
statement has been removed:int switch_prob(int x, int n) { int result = x; switch(n) { /* Fill in code here */ } return result; }
Figure 3.44 shows the disassembled machine code for the procedure. We can see in lines 4 and 5 that parameters
x
andn
are loaded into registers%eax
and%edx
, respectively.The jump table resides in a different area of memory. We can see from the indirect jump on line 9 that the jump table begins at address
0x80485d0
. Using theGDB
debugger, we can examine the six 4-byte words of memory comprising the jump table with the commandx/6w 0x80485d0
. GDB prints the following:(GDB) x/6w 0x80485d0 0x80485d0: 0x08048438 0x08048448 0x08048438 > 0x0804843d 0x80485e0: 0x08048442 0x08048445
Fill in the body of the switch statement with C code that will have the same behavior as the machine code.
08048420 <switch_prob>: 8048420: 55 push %ebp 8048421: 89 e5 mov %esp,%ebp 8048423: 8b 45 08 mov 0x8(%ebp),%eax 8048426: 8b 55 0c mov 0xc(%ebp),%edx 8048429: 83 ea 32 sub $0x32,%edx 804842c: 83 fa 05 cmp $0x5,%edx 804842f: 77 17 ja 8048448 <switch_prob+0x28> 8048431: ff 24 95 d0 85 04 08 jmp *0x80485d0(,%edx,4) 8048438: c1 e0 02 shl $0x2,%eax 804843b: eb 0e jmp 804844b <switch_prob+0x2b> 804843d: c1 f8 02 sar $0x2,%eax 8048440: eb 09 jmp 804844b <switch_prob+0x2b> 8048442: 8d 04 40 lea (%eax,%eax,2),%eax 8048445: 0f af c0 imul %eax,%eax 8048448: 83 c0 0a add $0xa,%eax 804844b: 5d pop %ebp 804844c: c3 ret
ANSWER
/**
* 8048429 和 804842c兩行說明,找到對應的陣列位置後還要加上32才是n
*/
int switch_prob(int x, int n)
{
int result = x;
switch(n) {
case 32:
case 34:
result <<= 2;
break;
case 35:
result >>= 2;
break;
case 36:
result *= 3;
case 37:
result *= result;
default:
result += 0xa;
}
return result;
}
3.64
For this exercise, we will examine the code generated by GCC for functions that have structures as arguments and return values, and from this see how these language features are typically implemented.
The following C code has a function word_sum having structures as argument and return values, and a function prod that callsword_sum
:typedef struct { int a; int *p; } str1; typedef struct { int sum; int diff; } str2; str2 word_sum(str1 s1) { str2 result; result.sum = s1.a + *s1.p; result.diff = s1.a - *s1.p; return result; } int prod(int x, int y) { str1 s1; str2 s2; s1.a = x; s1.p = &y; s2 = word_sum(s1); return s2.sum * s2.diff; }
GCC generates the following code for these two functions:
word_sum: pushl %ebp movl %esp, %ebp pushl %ebx movl 8(%ebp), %eax movl 12(%ebp), %ebx movl 16(%ebp), %edx movl (%edx), %edx movl %ebx, %ecx subl %edx, %ecx movl %ecx, 4(%eax) addl %ebx, %edx movl %edx, (%eax) popl %ebx popl %ebp ret $4
prod: pushl %ebp movl %esp, %ebp subl $20, %esp leal 12(%ebp), %edx leal -8(%ebp), %ecx movl 8(%ebp), %eax movl %eax, 4(%esp) movl %edx, 8(%esp) movl %ecx, (%esp) call word_sum subl $4, %esp movl -4(%ebp), %eax imull -8(%ebp), %eax leave ret
The instruction ret $4 is like a normal return instruction, but it increments the stack pointer by 8 (4 for the return address plus 4 additional), rather than 4.
A. We can see in lines 5–7 of the code for word_sum that it appears as if three
values are being retrieved from the stack, even though the function has only a single argument. Describe what these three values are.B. We can see in line 4 of the code for prod that 20 bytes are allocated in the stack frame. These get used as five fields of 4 bytes each. Describe how each of these fields gets used.
C. How would you describe the general strategy for passing structures as arguments to a function?
D. How would you describe the general strategy for handling a structure as a return value from a function?
ANSWER
/* A */
/**
* movl (%edx), %edx ; 取地址操作,對應*s1.p,可知 12(%ebp) 對應 s1.p, 根據 str1 的結構關係,我們可以知道 16(%ebp) 對應 s1.v
* movl %ebx, %ecx ;
* subl %edx, %ecx ; - 操作,對應 s1.a - *s1.p;
* movl %ecx, 4(%eax) ;
* addl %ebx, %edx ; + 操作,對應 s1.a + *s1.p;
* movl %edx, (%eax) ; 根據這幾個賦值操作,可以推斷出 %eax, 也就是 8(%ebp) 對應result, 4(%eax) = result.sum, (%eax) = result.prod
*/
line 5 ~ line 7 的幾個值:
8(%ebp) = %eax : result
12(%ebp) = %ebx : s1.p
16(%ebp) = %edx : s1.v
/* B */
/* 不嚴謹地說,movl就是揭示這個地址對應的意義的操作 */
/**
* prod:
* pushl %ebp
* movl %esp, %ebp
* subl $20, %esp
* leal 12(%ebp), %edx ; 12(%ebp)說明是傳參,leal說明是取地址並將地址的值存入%edx指向的記憶體,可推斷出是 &x
* leal -8(%ebp), %ecx ;
* movl 8(%ebp), %eax ; 8(%ebp)說明是傳參,movl 說明是取8(%ebp)指向的記憶體中的值存入%edx指向的記憶體,可推斷出是 y
* movl %eax, 4(%esp) ; 賦值操作,將y賦值給某個變數,可知是賦給s1.v,故得知 s1.v 存在 %esp+4 = %ebp-16
* movl %edx, 8(%esp) ; 同理,s1.p 存在 -12(%ebp)
* movl %ecx, (%esp) ; 取了 %ebp-8 這個地址存到了(%esp)裡,這個地址目前還不知道什麼意義。但是當我們看完了程式碼之後,能知道%ebp-8是指向s2的,那麼%ebp - 8存的就是&s2
* call word_sum ;
* subl $4, %esp ;
* movl -4(%ebp), %eax ;
* imull -8(%ebp), %eax ; 由上行和這行可以推出s2在-8(%ebp), 且s2.prod 在 %ebp-4, s2.sum 在 %ebp-8
* leave
* ret
*/
-4(%ebp) : s2.prod
-8(%ebp) : s2.num
-12(%ebp) : s1.p
-16(%ebp) : s1.v
-20(%ebp) : &s2
/* C */
傳入結構體引數時的傳入方法:
從word_sum的彙編程式碼我們可以看到,向函式傳入結構體引數的通用策略是:將結構體引數的成員的值分別作為引數傳入函式。
通俗地說,就是將結構體引數拆開再傳入。
/* D */
函式怎麼將一個結構體作為返回值返回:
首先,ret指令返回的是%eax指向的記憶體儲存的值。
然後,看word_sum的彙編程式碼,%eax儲存的是結構體result的地址。(賦值操作是movl %edx,(%eax), 也就是賦給%eax儲存的值指向的地址)
所以,函式返回的是結構體變數的地址。
3.65
In the following code, A and B are constants defined with
#define
:typedef struct { short x[A][B]; /* Unknown constants A and B */ int y; } str1; typedef struct { char array[B]; int t; short s[B]; int u; } str2; void setVal(str1 *p, str2 *q) { int v1 = q->t; int v2 = q->u; p->y = v1+v2; }
GCC generates the following code for the body of setVal:
movl 12(%ebp), %eax movl 36(%eax), %edx addl 12(%eax), %edx movl 8(%ebp), %eax movl %edx, 92(%eax)
What are the values of A and B? (The solution is unique.)
ANSWER
movl 12(%ebp), %eax ; gets q
movl 36(%eax), %edx ; 結合下一行,由於資料對齊的原因,只能推出 16+2*B 在 [33,36]範圍內都是可能的, 不能直接說 16+2*B = 36。 從此句得知 B ∈ [8.5, 10]
addl 12(%eax), %edx ; B ∈ [9,12], 結合上一句的推理結果,B = 9 或 10。
movl 8(%ebp), %eax ; gets p
movl %edx, 92(%eax) ; A*B*2 ∈ [89 , 92], A * B ∈ [44.5 , 46]
; 所以 B = 9 或 10, 且需滿足 A * B ∈ [44.5 , 46] ∧ A ∈ N+(正整數集)
; 設 B = 9, 則 A ∈ [44.5/9, 46/9]。由於 [44.5/9, 46/9] ∩ N+ = 5,所以 A = 5 是有效的數字。
; 設 B = 10, 則 A ∈ [4.45, 4.6], 這個區間內並沒有整數,[4.45, 4.6] ∩ N+ = φ,沒有能滿足條件的值,因此 B 不能取10。
; 綜上所述,B = 9 且 A = 5
A = 5, B = 9
; 這道題中文版和英文版資料不一樣,所以結果也不一樣
; 中文版的按照上面的流程走一遍,得到答案是: A = 3, B = 7