1. 程式人生 > >計組複習題集_第三章

計組複習題集_第三章

首先說一點我自己容易搞混的知識點

leal (%edx), %eax
movl %edx, %eax

效果雖然一樣,就是%eax中儲存的內容變成了%edx中儲存的內容

但是實現過程有點不一樣:

書上寫的是:

movl 從指定的位置讀入資料

leal 將有效地址寫入目的運算元

說通俗點就是:

movl 類似於 *p = a;

leal 類似於 (*x = a;) p = x;

3.18

Starting with C code of the form

int test(int x, int y) {
    int val = ____;
    if (____) {
        if (____)
            val = ____;
        else
            val = ____;
    } else if (____)
        val = ____;
    return val;
}

GCC generates the following assembly code:

; x at %ebp+8, y at %ebp+12
  movl    8(%ebp), %eax
  movl    12(%ebp), %edx
  cmpl    $-3, %eax
  jge     .L2
  cmpl    %edx, %eax
  jle     .L3
  imull   %edx, %eax
  jmp     .L4
.L3:
  leal    (%edx,%eax), %eax
  jmp   
.L2:
  cmpl    $2, %eax
  jg      .L5
  xorl    %edx, %eax
  jmp     .L4
.L5:
  subl    %edx, %eax
.L4:

Fill in the missing expressions in the C code. To make the code fit into the
C code template, you will need to undo some of the reordering of computations
done by GCC.

ANSWER

/**
 * 注意GCC對C程式碼進行了重排序,也就是C程式碼第一行的val = x ^ y被移到了彙編程式碼的下面,只有能以這個值輸出時才進行計算
 * (但其實重排序不多
 * 彙編程式碼在if的處理上,會滿足:除非迫不得已,否則不跳轉,也就是說else才跳轉。所以"jge""jle"等往往代表了else的條件。
 */
int test(int x, int y) {
    int val = x ^ y;
    if (x < -3) {   // if x>= -3, then jmp, 否則繼續按順序執行彙編程式碼
        if (y < x)  // 同理
            val = x * y;
        else        // .L3
            val = x + y; // leal (%edx, %eax), %eax == a = a+b;
    } else if (x > 2) // .L2
        //這裡挺有趣的,雖然是"cmpl $2, %eax"和"jg .L5",我第一反應是判斷條件為 x <= 2, 但是實際上判斷條件是 x > 2, else if 也是else的一種..
        val = x - y;  
    return val;
}

3.22

A function, fun_a, has the following overall structure:

int fun_a(unsigned x) {
  int val = 0;
  while (_____) {
    _____;
  }
  return _____;
}

The GCC C compiler generates the following assembly code:

;x at %ebp+8
  movl    8(%ebp), %edx
  movl    $0, %eax
  testl   %edx, %edx
  je      .L7
.L10:
  xorl    %edx, %eax
  shrl    %edx ;Shift right by 1
  jne     .L10
.L7:
  andl    $1, %eax

Reverse engineer the operation of this code and then do the following:

A. Use the assembly-code version to fill in the missing parts of the C code.

B. Describe in English what this function computes

ANSWER

/*A*/
int fun_a(unsigned x) {
  int val = 0;
  while (x != 0) {
    val ^= x;
    x >>= 1; 
  }
  return val & 1;
}

/*B*/
/**
 * 由於返回值 val & 1 從意義上來說是隻取了val最低位的資訊,於是我們只討論val最低位表達的資訊即可
 * 迴圈的作用就是計算val = (x >> 0) ^ (x >> 1) ^ (x >> 2) ^ ... ^ (x >> k-1) [假設x有k位有效數字]
 * 比如令 x = 1011
 * 那麼:
 *   1011
 *   0101
 *   0010
 * ^ 0001
 * ------
 *   1101
 * 可以看出迴圈結束後val的最低位就是x的從最低位到最高位的每一位取異或,舉個例子,假設x=10111,那麼val的最低位=1^0^1^1^1。
 * 當有偶數個1時,先把所有的1兩兩取餘轉成0,式子中就只剩下0了,式子的結果是0,例如;1^0^0^1 = (1^1)^0^0 = 0^0^0 = 0;
 * 當有奇數個1時,儘可能地把1兩兩取餘轉成0,最後式子中只剩下一個1,表示式的值就是1,例如;1^0^1^1 = (1^1)^0^1 = 0^0^1 = 1;
 * 由此val的最低位的最終值反映了x中1的數量的奇偶性,這也是這個函式的作用:計算引數x中1的數量的奇偶性,當x中有奇數個1,返回1;當x中有偶數個1,返回0。
 */

3.29

For a C function switcher with the general structure

int switcher(int a, int b, int c)
{
  int answer;
  switch(a) {
  case _____:         /* Case A */
    c = _____;
    /* Fall through */
  case :              /* Case B */
    answer = _____;
    break;
  case _____:         /* Case C */
  case _____:         /* Case D */
    answer = _____;
    break;
  case _____:         /* Case E */
    answer = _____;
    break;
  default:
    answer = _____;
  }
  return answer;
}

GCC generates the assembly code and jump table shown in Figure 3.20.

Fill in the missing parts of the C code. Except for the ordering of case labels C and D, there is only one way to fit the different cases into the template.

;a at %ebp+8, b at %ebp+12, c at %ebp+16
  movl    8(%ebp), %eax
  cmpl    $7, %eax
  ja      .L2
  jmp     *.L7(,%eax,4)
.L2:
  movl    12(%ebp), %eax
  jmp     .L8
.L5:
  movl    $4, %eax
  jmp     .L8
.L6:
  movl    12(%ebp), %eax
  xorl    $15, %eax
  movl    %eax, 16(%ebp)
.L3:
  movl    16(%ebp), %eax
  addl    $112, %eax
  jmp     .L8
.L4:
  movl    16(%ebp), %eax
  addl    12(%ebp), %eax
  sall    $2, %eax
.L8:
.L7:
  .long   .L3
  .long   .L2
  .long   .L4
  .long   .L2
  .long   .L5
  .long   .L6
  .long   .L2
  .long   .L4

ANSWER

/**
 * default 的標號是 ja 對應的標號
 */
int switcher(int a, int b, int c)
{
  int answer;
  switch(a) {
  case 5:         /* Case A */
    c = b ^ 15;
    /* Fall through */
  case 0:              /* Case B */
    answer = c + 112;
    break;
  case 2:         /* Case C */
  case 7:         /* Case D */
    answer = (b + c) << 2;
    break;
  case 4:         /* Case E */
    answer = a;   // 也可以寫成 answer = 4, 從上下文看,answer的賦值應該是與a/b/c相關的,但是在編譯時編譯器對此語句進行了優化:把返回值設為4,而非a。
    break;
  default:
    answer = b;
  }
  return answer;
}

3.34

For a C function having the general structure

int rfun(unsigned x) {
  if (_____)
    return _____;
  unsigned nx = _____;
  int rv = rfun(nx);
  return _____;
}

GCC generates the following assembly code (with the setup and completion code omitted):

  movl    8(%ebp), %ebx
  movl    $0, %eax
  testl   %ebx, %ebx
  je      .L3
  movl    %ebx, %eax
  shrl    %eax            ;Shift right by 1
  movl    %eax, (%esp)
  call    rfun
  movl    %ebx, %edx
  andl    $1, %edx
  leal    (%edx,%eax), %eax
.L3:

A. What value does rfun store in the callee-save register %ebx?

B. Fill in the missing expressions in the C code shown above.

C. Describe in English what function this code computes.

ANSWER

/*A*/
/**
 * %ebx 儲存的是x的值
 */

/*B*/
int rfun(unsigned x) {
  if (x == 0)
    return x;
  unsigned nx = x >> 1;
  int rv = rfun(nx);
  return (x & 0x1) + rv;
}

/*C*/
/**
 * 計算x中位的和:遞迴地計算除了最低位之外的所有其他位的和,然後加上最低位得到結果
 */

3.37

Consider the following source code, where M and N are constants declared with #define:

int mat1[M][N];
int mat2[N][M];

int sum_element(int i, int j) {
  return mat1[i][j] + mat2[j][i];
}

In compiling this program, GCC generates the following assembly code:

;i at %ebp+8, j at %ebp+12
movl    8(%ebp), %ecx
movl    12(%ebp), %edx
leal    0(,%ecx,8), %eax
subl    %ecx, %eax
addl    %edx, %eax          ;%eax = 7 * i + j
leal    (%edx,%edx,4), %edx
addl    %ecx, %edx          ;%ebx = 5 * j + i
movl    mat1(,%eax,4), %eax
addl    mat2(,%edx,4), %eax

Use your reverse engineering skills to determine the values of M and N based on this assembly code.

ANSWER

 
%eax = 7 * i + j
%ebx = 5 * j + i
而 mat1[i][j] = mat1 + (i * N + j) * 4
mat1(,%eax,4) = mat1(,7 * i + j,4) = mat1 + (i * 7 + j) * 4
所以 N = 7
同理 M = 5

3.39

Consider the following structure declaration:

struct prob {
  int *p;
  struct {
    int x;
    int y;
  } s;
  struct prob *next;
};

This declaration illustrates that one structure can be embedded within another, just as arrays can be embedded within structures, and arrays can be embedded within arrays.
The following procedure (with some expressions omitted) operates on this structure:

void sp_init(struct prob *sp)
{
  sp->s.x = _____;
  sp->p = _____;
  sp->next = _____;
}

A. What are the offsets (in bytes) of the following fields?

     p: _____
   s.x: _____
   s.y: _____
  next: _____

B. How many total bytes does the structure require?

C.The compiler generates the following assembly code for the body of sp_init:

;sp at %ebp+8
  movl 8(%ebp), %eax  ;%eax = sp (type of sp: pointer)
  movl 8(%eax), %edx  ;%edx = sp->s.x
  movl %edx, 4(%eax)  ;
  leal 4(%eax), %edx  ;sp->s.x = 
  movl %edx, (%eax)   ; 
  movl %eax, 12(%eax) ;sp.y =  %eax

On the basis of this information, fill in the missing expressions in the code for sp_init.

ANSWER

/*A*/
     p: 0~7
   s.x: 8~11
   s.y: 11~15
  next: 16~23

/*B*/
  24 bytes in total.

/*C*/
void sp_init(struct prob *sp)
{
  sp->s.x = _____;
  sp->p = _____;
  sp->next = _____;
}

3.56

Consider the following assembly code:

;x at %ebp+8, n at %ebp+12
  movl    8(%ebp), %esi
  movl    12(%ebp), %ebx
  movl    $-1, %edi
  movl    $1, %edx
.L2:
  movl    %edx, %eax
  andl    %esi, %eax
  xorl    %eax, %edi
  movl    %ebx, %ecx
  sall    %cl, %edx
  testl   %edx, %edx
  jne     .L2
  movl    %edi, %eax

The preceding code was generated by compiling C code that had the following overall form:

int loop(int x, int n)
{
  int result = _____;
  int mask;
  for (mask = _____; mask _____; mask = _____) {
    result ^= _____;
  }
  return result;
}

Your task is to fill in the missing parts of the C code to get a program equivalent to the generated assembly code. Recall that the result of the function is returned in register %eax. You will find it helpful to examine the assembly code before, during, and after the loop to form a consistent mapping between the registers and the program variables.

A. Which registers hold program values x, n, result, and mask?

B. What are the initial values of result and mask?

C. What is the test condition for mask?

D. How does mask get updated?

E. How does result get updated?

F. Fill in all the missing parts of the C code.

ANSWER

/*A*/
       x : %esi
       n : %ebx
    mask : %edx
  result : %edi

/*B*/
    mask : 1
  result : -1

/*C*/
  mask != 0;

/*D*/
  mask <<= n;

/*E*/
  result ^= x & mask;

/*F*/
int loop(int x, int n)
{
  int result = -1;
  int mask;
  for (mask = 1; mask != 0; mask = mask << (short)n) {
    result ^= x & mask;
  }
  return result;
}

3.59

This problem will give you a chance to reverse engineer a switch statement from machine code. In the following procedure, the body of the switch statement has been removed:

int switch_prob(int x, int n)
{
  int result = x;
  switch(n) {
    /* Fill in code here */
  }
  return result;
}

Figure 3.44 shows the disassembled machine code for the procedure. We can see in lines 4 and 5 that parameters x and n are loaded into registers %eax and %edx, respectively.

The jump table resides in a different area of memory. We can see from the indirect jump on line 9 that the jump table begins at address 0x80485d0. Using the GDB debugger, we can examine the six 4-byte words of memory comprising the jump table with the command x/6w 0x80485d0. GDB prints the following:

(GDB) x/6w 0x80485d0
0x80485d0: 0x08048438 0x08048448 0x08048438 > 0x0804843d
0x80485e0: 0x08048442 0x08048445

Fill in the body of the switch statement with C code that will have the same behavior as the machine code.

08048420 <switch_prob>:
8048420: 55                     push %ebp
8048421: 89 e5                  mov %esp,%ebp
8048423: 8b 45 08               mov 0x8(%ebp),%eax
8048426: 8b 55 0c               mov 0xc(%ebp),%edx
8048429: 83 ea 32               sub $0x32,%edx
804842c: 83 fa 05               cmp $0x5,%edx
804842f: 77 17                  ja 8048448 <switch_prob+0x28>
8048431: ff 24 95 d0 85 04 08   jmp *0x80485d0(,%edx,4)
8048438: c1 e0 02               shl $0x2,%eax
804843b: eb 0e                  jmp 804844b <switch_prob+0x2b>
804843d: c1 f8 02               sar $0x2,%eax
8048440: eb 09                  jmp 804844b <switch_prob+0x2b>
8048442: 8d 04 40               lea (%eax,%eax,2),%eax
8048445: 0f af c0               imul %eax,%eax
8048448: 83 c0 0a               add $0xa,%eax
804844b: 5d                     pop %ebp
804844c: c3                     ret

ANSWER

/**
 * 8048429 和 804842c兩行說明,找到對應的陣列位置後還要加上32才是n
 */
int switch_prob(int x, int n)
{
  int result = x;
  switch(n) {
    case 32:
    case 34:
      result <<= 2;
      break;
    case 35:
      result >>= 2;
      break;
    case 36: 
      result *= 3;
    case 37:
      result *= result;
    default:
      result += 0xa;
  }
  return result;
}

3.64

For this exercise, we will examine the code generated by GCC for functions that have structures as arguments and return values, and from this see how these language features are typically implemented.
The following C code has a function word_sum having structures as argument and return values, and a function prod that calls word_sum:

typedef struct {
  int a;
  int *p;
} str1;

typedef struct {
  int sum;
  int diff;
} str2;

str2 word_sum(str1 s1) {
  str2 result;
  result.sum = s1.a + *s1.p;
  result.diff = s1.a - *s1.p;
  return result;
}

int prod(int x, int y)
{
  str1 s1;
  str2 s2;
  s1.a = x;
  s1.p = &y;
  s2 = word_sum(s1);
  return s2.sum * s2.diff;
}

GCC generates the following code for these two functions:

word_sum:
  pushl %ebp
  movl %esp, %ebp
  pushl %ebx
  movl 8(%ebp), %eax
  movl 12(%ebp), %ebx
  movl 16(%ebp), %edx
  movl (%edx), %edx
  movl %ebx, %ecx
  subl %edx, %ecx
  movl %ecx, 4(%eax)
  addl %ebx, %edx
  movl %edx, (%eax)
  popl %ebx
  popl %ebp
  ret $4
prod:
  pushl %ebp
  movl %esp, %ebp
  subl $20, %esp
  leal 12(%ebp), %edx
  leal -8(%ebp), %ecx
  movl 8(%ebp), %eax
  movl %eax, 4(%esp)
  movl %edx, 8(%esp)
  movl %ecx, (%esp)
  call word_sum
  subl $4, %esp
  movl -4(%ebp), %eax
  imull -8(%ebp), %eax
  leave
  ret

The instruction ret $4 is like a normal return instruction, but it increments the stack pointer by 8 (4 for the return address plus 4 additional), rather than 4.

A. We can see in lines 5–7 of the code for word_sum that it appears as if three
values are being retrieved from the stack, even though the function has only a single argument. Describe what these three values are.

B. We can see in line 4 of the code for prod that 20 bytes are allocated in the stack frame. These get used as five fields of 4 bytes each. Describe how each of these fields gets used.

C. How would you describe the general strategy for passing structures as arguments to a function?

D. How would you describe the general strategy for handling a structure as a return value from a function?

ANSWER

/* A */
/**
 * movl (%edx), %edx  ; 取地址操作,對應*s1.p,可知 12(%ebp) 對應 s1.p, 根據 str1 的結構關係,我們可以知道 16(%ebp) 對應 s1.v
 * movl %ebx, %ecx    ; 
 * subl %edx, %ecx    ; - 操作,對應 s1.a - *s1.p;
 * movl %ecx, 4(%eax) ; 
 * addl %ebx, %edx    ; + 操作,對應 s1.a + *s1.p;
 * movl %edx, (%eax)  ; 根據這幾個賦值操作,可以推斷出 %eax, 也就是 8(%ebp) 對應result, 4(%eax) = result.sum, (%eax) = result.prod
 */
  line 5 ~ line 7 的幾個值:
  8(%ebp)  = %eax : result
  12(%ebp) = %ebx : s1.p
  16(%ebp) = %edx : s1.v

/* B */
/* 不嚴謹地說,movl就是揭示這個地址對應的意義的操作 */
/**
 * prod:
 *   pushl %ebp
 *   movl %esp, %ebp
 *   subl $20, %esp
 *   leal 12(%ebp), %edx  ; 12(%ebp)說明是傳參,leal說明是取地址並將地址的值存入%edx指向的記憶體,可推斷出是 &x
 *   leal -8(%ebp), %ecx  ; 
 *   movl 8(%ebp), %eax   ; 8(%ebp)說明是傳參,movl 說明是取8(%ebp)指向的記憶體中的值存入%edx指向的記憶體,可推斷出是 y
 *   movl %eax, 4(%esp)   ; 賦值操作,將y賦值給某個變數,可知是賦給s1.v,故得知 s1.v 存在 %esp+4 = %ebp-16
 *   movl %edx, 8(%esp)   ; 同理,s1.p 存在 -12(%ebp)
 *   movl %ecx, (%esp)    ; 取了 %ebp-8 這個地址存到了(%esp)裡,這個地址目前還不知道什麼意義。但是當我們看完了程式碼之後,能知道%ebp-8是指向s2的,那麼%ebp - 8存的就是&s2
 *   call word_sum        ;
 *   subl $4, %esp        ;
 *   movl -4(%ebp), %eax  ;
 *   imull -8(%ebp), %eax ; 由上行和這行可以推出s2在-8(%ebp), 且s2.prod 在 %ebp-4, s2.sum 在 %ebp-8
 *   leave
 *   ret
 */
   -4(%ebp) : s2.prod
   -8(%ebp) : s2.num
  -12(%ebp) : s1.p
  -16(%ebp) : s1.v
  -20(%ebp) : &s2

/* C */
傳入結構體引數時的傳入方法:
從word_sum的彙編程式碼我們可以看到,向函式傳入結構體引數的通用策略是:將結構體引數的成員的值分別作為引數傳入函式。
通俗地說,就是將結構體引數拆開再傳入。

/* D */
函式怎麼將一個結構體作為返回值返回:
首先,ret指令返回的是%eax指向的記憶體儲存的值。
然後,看word_sum的彙編程式碼,%eax儲存的是結構體result的地址。(賦值操作是movl %edx,(%eax), 也就是賦給%eax儲存的值指向的地址)
所以,函式返回的是結構體變數的地址。

3.65

In the following code, A and B are constants defined with #define:

typedef struct {
  short x[A][B]; /* Unknown constants A and B */
  int y;
} str1;

typedef struct {
  char array[B];
  int t;
  short s[B];
  int u;
} str2;

void setVal(str1 *p, str2 *q) {
  int v1 = q->t;
  int v2 = q->u;
  p->y = v1+v2;
}

GCC generates the following code for the body of setVal:

movl    12(%ebp), %eax
movl    36(%eax), %edx
addl    12(%eax), %edx
movl    8(%ebp), %eax
movl    %edx, 92(%eax)

What are the values of A and B? (The solution is unique.)

ANSWER

movl    12(%ebp), %eax  ; gets q
movl    36(%eax), %edx  ; 結合下一行,由於資料對齊的原因,只能推出 16+2*B 在 [33,36]範圍內都是可能的, 不能直接說 16+2*B = 36。 從此句得知 B ∈ [8.5, 10]
addl    12(%eax), %edx  ; B ∈ [9,12], 結合上一句的推理結果,B = 9 或 10。
movl    8(%ebp), %eax   ; gets p
movl    %edx, 92(%eax)  ; A*B*2 ∈ [89 , 92], A * B ∈ [44.5 , 46]

; 所以 B = 9 或 10, 且需滿足 A * B ∈ [44.5 , 46] ∧ A ∈ N+(正整數集)
; 設 B = 9, 則 A ∈ [44.5/9, 46/9]。由於 [44.5/9, 46/9] ∩ N+ = 5,所以 A = 5 是有效的數字。
; 設 B = 10, 則 A ∈ [4.45, 4.6], 這個區間內並沒有整數,[4.45, 4.6] ∩ N+ = φ,沒有能滿足條件的值,因此 B 不能取10。
; 綜上所述,B = 9 且 A = 5

A = 5, B = 9
; 這道題中文版和英文版資料不一樣,所以結果也不一樣
; 中文版的按照上面的流程走一遍,得到答案是: A = 3, B = 7