當沒有浮點數加法運算時,使用整數完成double型別的加法運算
阿新 • • 發佈:2018-12-16
加法部分程式碼:
#include "stdio.h" #include "project06.support.h" struct node { unsigned long long int sign; unsigned long long int exp; unsigned long long int frac; unsigned long long int sig; }; double add(double a, double b) { struct node nodea, nodeb; union double_precision dpa; union double_precision dpb; dpa.drep = a; dpb.drep = b; //得到符號位 nodea.sign = ((dpa.irep >> 63) & 0x1); nodeb.sign = ((dpb.irep >> 63) & 0x1); //得到指數 nodea.exp = ((dpa.irep >> 52) & 0x7ff); nodeb.exp = ((dpb.irep >> 52) & 0x7ff); //得到小數 nodea.frac = (dpa.irep & 0x000fffffffffffff); nodeb.frac = (dpb.irep & 0x000fffffffffffff); //得到有效數 nodea.sig = ((dpa.irep & 0x000fffffffffffff) | 0x0010000000000000); nodeb.sig = ((dpb.irep & 0x000fffffffffffff) | 0x0010000000000000); //判斷a是否為NAN 或者 INFINITY if (nodea.exp == (unsigned long long int)0x7ff) { if (nodea.frac != (unsigned long long int)0) { return NAN; } else { return INFINITY; } } //判斷b是否為NAN 或者 INFINITY if (nodeb.exp == (unsigned long long int)0x7ff) { if (nodeb.frac != (unsigned long long int)0) { return NAN; } else { return INFINITY; } } struct node node_res; long long int siga, sigb, sig_res; //將有效小數變成有符號數,方便將減法運算變成加法運算 siga = (long long int)nodea.sig; sigb = (long long int)nodeb.sig; if (nodea.sign == 1) { siga *= -1; } if (nodeb.sign == 1) { sigb *= -1; } //指數小的進行右移 if (nodea.exp > nodeb.exp) { sig_res = siga + (sigb >> (nodea.exp - nodeb.exp)); node_res.exp = nodea.exp; } else { sig_res = (siga >> (nodeb.exp - nodea.exp)) + sigb; node_res.exp = nodeb.exp; } //判斷結果是否為0 if (sig_res == 0) return 0; //將帶符號的有效小數轉成不帶符號的 node_res.sign = 0; if (sig_res < 0) { sig_res *= -1; node_res.sign = 1; } unsigned long long int temp = (unsigned long long int)sig_res; //判斷是否溢位,如果溢位則右移 if ((temp & 0x0020000000000000) == 0x0020000000000000) { temp = temp >> 1; node_res.exp += 1; node_res.frac = (temp & 0x000fffffffffffff); node_res.sig = ((temp & 0x000fffffffffffff) | 0x0010000000000000); } //迴圈左移 while ((temp & 0x0010000000000000) != 0x0010000000000000) { temp = temp << 1; node_res.exp -= 1; } //pack node_res.sig = temp; node_res.frac = (temp & 0x000fffffffffffff); union double_precision dp_res; dp_res.irep = (node_res.sign << 63) + (node_res.exp << 52) + node_res.frac; //日誌 printf("value a: %f\n", a); printf("value b: %f\n", b); printf("sign= %016llx exp= %016llx frac= %016llx sig= %016llx \n", nodea.sign, nodea.exp, nodea.frac, nodea.sig); printf("sign= %016llx exp= %016llx frac= %016llx sig= %016llx \n", nodeb.sign, nodeb.exp, nodeb.frac, nodeb.sig); printf("sign= %016llx exp= %016llx frac= %016llx sig= %016llx \n", node_res.sign, node_res.exp, node_res.frac, node_res.sig); printf("result = %lf\n\n", dp_res.drep); return dp_res.drep; }
驗算部分程式碼:
#include "stdio.h" #include "project06.support.h" int main(){ double a = 8.75; double b = 0.5; add(a,b); /* example 10. +8.75 (4021800000000000) sign bit: 0 biased exponent: 402 fraction: 1800000000000 significand: 11800000000000 +0.50 (3fe0000000000000) sign bit: 0 biased exponent: 3fe fraction: 0000000000000 significand: 10000000000000 9.25 */ a = 6.25; b = 4.75; add(a,b); /* example 11. +6.25 (4019000000000000) sign bit: 0 biased exponent: 401 fraction: 9000000000000 significand: 19000000000000 +4.75 (4013000000000000) sign bit: 0 biased exponent: 401 fraction: 3000000000000 significand: 13000000000000 +11.00 */ a = -8.75; b = 0.5; add(a,b); /* example 12. -8.75 (c021800000000000) sign bit: 1 biased exponent: 402 fraction: 1800000000000 significand: 11800000000000 +0.50 (3fe0000000000000) sign bit: 0 biased exponent: 3fe fraction: 0000000000000 significand: 10000000000000 -8.25 */ return 0; }