深入理解計算機系統家庭作業第五章
阿新 • • 發佈:2019-02-02
/*
***5.15
*/
A. 畫圖略
B. 3
C. 1
D. 乘法不在關鍵路徑上,故乘法可以按流水線執行
/*
***5.16
*/
A. 每次要載入兩個資料,故至少需要兩個週期B. 迴圈展開並沒有改變關鍵路徑長
/*
***5.17
*/
A. 載入資料的時間B. IA32沒有足夠的暫存器來儲存臨時變數
/*
***5.18
*/
void inner4 (vec_ptr x,vec_ptr y;data_t *dest) { long int i; int length = vec_length(x); data_t *xdata = get_vec_start(x); data_t *ydata = get_vec_start(y); data_t sum = (data_t)0; for(i=0;i<length - 2;i+=3) { sum= sum+(xdata[i]*ydata[i]+xdata[i+1]*ydata[i+1]+xdata[i+2]*ydata[i+2]); } for(;i<length ;i++) { sum = sum + xdata[i]*ydata[i]; } *dest = sum; }
/*
***5.19
*/
void *word_memeset(void *s, int c, size_t n) { size_t cnt = 0; size_t k = sizeof(unsigned long); unsigned long l; unsigned char* schar = (unsigned char*)s; unsigned char* lchar = (unsigned char *)&l; //先將long的各位元組變為c的低位位元組 for(int i = 0; i < k; i++) lchar[i] = (unsigned char)c; //將地址調整為k的倍數,直到能夠被k整除 while((size_t)schar % k) { *schar++ = (unsigned char)c; cnt++; } //調整完地址之後求出新的n,更新cnt=0 n = n - cnt; cnt = 0; for(int i = 0;i < n - k +1;i+=k) { for(int j = 0;j < k/4;j++) { schar[0] = lchar[0]; schar[1] = lchar[1]; schar[2] = lchar[2]; schar[3] = lchar[3]; schar += 4; cnt +=4; } } //遍歷最後幾個元素 while(cnt < n) { *schar++ = (unsigned char)c; cnt++; } return s; }
/*
***5.20
*/
//直接求和法的並行 double poly(double a[], double x, int degree) { long int i; double result = a[0]; double result1 = 0,result2 = 0,result3 = 0,result4 = 0; double result5 =0; double xpwr1 = x; double xpwr2 = x * xpwr1; double xpwr3 = x * xpwr2; double xpwr4 = x * xpwr3; double xpwr5 = x * xpwr4; double step = xpwr5; for(i = 1;i <= degree - 4;i += 5) { result1 += a[i] * xpwr1; result2 += a[i+1] * xpwr2; result3 += a[i+2] * xpwr3; result4 += a[i+3] * xpwr4; result5 += a[i+4] * xpwr5; xpwr1 *= step; xpwr2 *= step; xpwr3 *= step; xpwr4 *= step; xpwr5 *= step; } for(;i <= degree;i++) { result += a[i] * xpwr1; xpwr1 *= x; } return result = result + result1 + result2 + result3 + result4 + result5; } //Horner法的並行 //其實也是採用臨時變數的並行累積,以等差的間隔將多項式分成n個部分後再用Horner方法 double polyh(double a[], double x, int degree) { long int i; double result = 0; if(degree < 5) { result = a[degree]; for(i = degree - 1; i >= 0; i--) result = a[i] + x * result; } //否則,5路平行計算 else { double result1 = a[degree]; double result2 = a[degree - 1]; double result3 = a[degree - 2]; double result4 = a[degree - 3]; double result5 = a[degree - 4]; double step = x * x * x * x * x; for(i = degree - 5; i >= 4; i-= 5) { result1 = a[i] + result1 * step; result2 = a[i - 1] + result1 * step; result3 = a[i - 2] + result2 * step; result4 = a[i - 3] + result3 * step; result5 = a[i - 4] + result4 * step; } for(;i >= 0;i--) { result = a[i] + x * result; } result += result1 + result2 + result3 + result4 + result5; } return result; }
/*
***5.21
*/
void psum1(float a[], float p[], long int n)
{
long int i;
long int last_val,val1,val2,val3;
p[0] = last_val = a[0];
for(i = 1; i < n - 2; i += 3)
{
//每個加法和賦值運算都可並行
val1 = last_val + a[i];
val2 = last_val + (a[i] + a[i + 1]);
val3 = last_val + ((a[i] + a[i + 1]) + a[i + 2]);
p[i] = val1;
p[i + 1] = val2;
p[i + 2] = val3;
last_val = val3;
}
for(;i < n;i++)
{
val1 = last_val + a[i];
p[i] = val1;
last_val = val1;
}
}
/*
***5.22
*/
代入公式 S = 1/((1-α) + α/k) 可得:
方案1加速比為 1.25
方案2加速比為 1.2
故第一種方案比較好