lcc原始碼解析之expr.c
阿新 • • 發佈:2019-02-12
又憋了一個周天,終於大概搞明白了表示式解析這一編譯器中我個人認為也許最迷人的部分。
之所以程式碼讀起來費勁,主要還是在於理論上沒有搞清楚,確實很繞,需要反覆理解。
所以,打算在本篇之後寫一篇理論的科普文,儘管因為我文字表達能力太爛,這是我一直竭力避免的。
-------------update-------------
相關理論知識已經補上,閱讀程式碼請同時參考這裡
---------update end -----------
其實細節上還是有不少不明白的地方,但是下週估計單位工作會比較忙,所以先把程式碼貼上來,然後慢慢完善,不然不知道又要拖到什麼時候。
#include "c.h" static char rcsid[] = "$Id: expr.c,v 1.1 2002/08/28 23:12:43 drh Exp $"; static char prec[] = { #define xx(a,b,c,d,e,f,g) c, #define yy(a,b,c,d,e,f,g) c, #include "token.h" };//操作符的優先順序 static int oper[] = { #define xx(a,b,c,d,e,f,g) d, #define yy(a,b,c,d,e,f,g) d, #include "token.h" };//通用操作符 float refinc = 1.0; static Tree expr2(void); static Tree expr3(int); static Tree nullcheck(Tree); static Tree postfix(Tree); static Tree unary(void); static Tree primary(void); static Type super(Type ty); //返回ty的父型別 static Type super(Type ty) { switch (ty->op) { case INT: if (ty->size < inttype->size) return inttype; break; case UNSIGNED: if (ty->size < unsignedtype->size) return unsignedtype; break; case POINTER: return unsignedptr; } return ty; } /* 處理表達式,返回語法樹。 */ Tree expr(int tok) { static char stop[] = { IF, ID, '}', 0 }; Tree p = expr1(0); while (t == ',') {//處理逗號,建立RIGHT樹 Tree q; t = gettok(); q = pointer(expr1(0)); p = tree(RIGHT, q->type, root(value(p)), q); } //如果tok大於0,但表示式後跟有其他文法符號。 if (tok) // test(tok, stop); return p; } Tree expr0(int tok) { return root(expr(tok)); } //處理賦值表示式,返回賦值表示式分析樹 Tree expr1(int tok) { static char stop[] = { IF, ID, 0 }; Tree p = expr2(); if (t == '=' || (prec[t] >= 6 && prec[t] <= 8)/*優先順序6到8的運算子分別為:| ^ &,可與=組合成賦值表示式*/ || (prec[t] >= 11 && prec[t] <= 13)) {/*同上,分別為(<< >>), (+ -), (* / %)*/ int op = t; t = gettok(); if (oper[op] == ASGN) //LL(1)中的第一個L,從左向右掃描,左子樹是p,繼續分析右子樹 p = asgntree(ASGN, p, value(expr1(0))); else { /*正常來說,上面if中的第二三兩行的單運算子應該 已經在expr2中呼叫expr3中處理過了, 所以這裡這些單運算子後面肯定有=,否則語法報錯*/ expect('='); p = incr(op, p, expr1(0));//右子樹繼續遞迴下降 } } if (tok) test(tok, stop); return p; } /*為符合賦值運算子生成賦值樹*/ Tree incr(int op, Tree v, Tree e) { return asgntree(ASGN, v, (*optree[op])(oper[op], v, e)); } //處理條件表示式 static Tree expr2(void) { Tree p = expr3(4); if (t == '?') { Tree l, r; Coordinate pts[2]; //函式名代替函式,告警 if (Aflag > 1 && isfunc(p->type)) warning("%s used in a conditional expression\n", funcname(p)); p = pointer(p); t = gettok(); //記錄then表示式起始座標 pts[0] = src; //分析 l = pointer(expr(':')); //記錄else表示式起始座標 pts[1] = src; r = pointer(expr2()); if (generic(p->op) != CNST && events.points) { apply(events.points, &pts[0], &l); apply(events.points, &pts[1], &r); } p = condtree(p, l, r);//生成COND樹 } return p; } //將關係表示式轉換成一個值 Tree value(Tree p) { int op = generic(rightkid(p)->op); if (p->type != voidtype && (op==AND || op==OR || op==NOT || op==EQ || op==NE || op== LE || op==LT || op== GE || op==GT)) p = condtree(p, consttree(1, inttype), consttree(0, inttype)); return p; } /*處理優先順序4到13的表示式*/ static Tree expr3(int k) { int k1; Tree p = unary(); for (k1 = prec[t]; k1 >= k; k1--) while (prec[t] == k1 && *cp != '=') {/*將擴充套件賦值操作符留在後面處理*/ Tree r; Coordinate pt; int op = t; t = gettok(); pt = src; p = pointer(p); /*對於&& ||,可以改變控制流,所以提供事件鉤子 同時,改其左結合為右結合, 會在節點產生過程中修正,故k1不加一*/ if (op == ANDAND || op == OROR) { /* && || */ r = pointer(expr3(k1)); if (events.points) apply(events.points, &pt, &r); } else //繼續遞迴處理更高優先順序操作符 r = pointer(expr3(k1 + 1)); //呼叫樹建立函式指標陣列中相應函式建立對應的樹,相關函式在enode.c中 p = (*optree[op])(oper[op], p, r); } return p; } /* 左結合的一元字首表示式,優先順序為14 包括: * & - + ! ~ ++ -- sizeof */ static Tree unary(void) { Tree p; switch (t) { case '*': t = gettok(); p = unary(); p = pointer(p); if (isptr(p->type) && (isfunc(p->type->type) || isarray(p->type->type))) p = retype(p, p->type->type); else { if (YYnull) p = nullcheck(p); p = rvalue(p); } break; case '&': t = gettok(); p = unary(); if (isarray(p->type) || isfunc(p->type)) p = retype(p, ptr(p->type)); else p = lvalue(p); if (isaddrop(p->op) && p->u.sym->sclass == REGISTER) error("invalid operand of unary &; `%s' is declared register\n", p->u.sym->name); else if (isaddrop(p->op)) p->u.sym->addressed = 1; break; case '+': t = gettok(); p = unary(); p = pointer(p); if (isarith(p->type)) p = cast(p, promote(p->type)); else typeerror(ADD, p, NULL); break; case '-': t = gettok(); p = unary(); p = pointer(p); if (isarith(p->type)) { Type ty = promote(p->type); p = cast(p, ty); if (isunsigned(ty)) { warning("unsigned operand of unary -\n"); p = simplify(ADD, ty, simplify(BCOM, ty, p, NULL), cnsttree(ty, 1UL)); } else p = simplify(NEG, ty, p, NULL); } else typeerror(SUB, p, NULL); break; case '~': t = gettok(); p = unary(); p = pointer(p); if (isint(p->type)) { Type ty = promote(p->type); p = simplify(BCOM, ty, cast(p, ty), NULL); } else typeerror(BCOM, p, NULL); break; case '!': t = gettok(); p = unary(); p = pointer(p); if (isscalar(p->type)) p = simplify(NOT, inttype, cond(p), NULL); else typeerror(NOT, p, NULL); break; //將++轉給+=1處理 case INCR: t = gettok(); p = unary(); p = incr(INCR, pointer(p), consttree(1, inttype)); break; case DECR: t = gettok(); p = unary(); p = incr(DECR, pointer(p), consttree(1, inttype)); break; case TYPECODE: case SIZEOF: { int op = t; Type ty; p = NULL; t = gettok(); if (t == '(') { t = gettok(); if (istypename(t, tsym)) { ty = typename(); expect(')'); } else { p = postfix(expr(')')); ty = p->type; } } else { p = unary(); ty = p->type; } assert(ty); if (op == TYPECODE) p = cnsttree(inttype, (long)ty->op); else { //sizeof不能處理函式 if (isfunc(ty) || ty->size == 0) error("invalid type argument `%t' to `sizeof'\n", ty); //sizeof無法處理位域中匯出型別 else if (p && rightkid(p)->op == FIELD) error("`sizeof' applied to a bit field\n"); p = cnsttree(unsignedlong, (unsigned long)ty->size); } } break; case '(': t = gettok(); if (istypename(t, tsym)) { Type ty, ty1 = typename(), pty; expect(')'); ty = unqual(ty1); if (isenum(ty)) { Type ty2 = ty->type; if (isconst(ty1)) ty2 = qual(CONST, ty2); if (isvolatile(ty1)) ty2 = qual(VOLATILE, ty2); ty1 = ty2; ty = ty->type; } p = pointer(unary()); pty = p->type; if (isenum(pty)) pty = pty->type; if (isarith(pty) && isarith(ty) || isptr(pty) && isptr(ty)) { explicitCast++; p = cast(p, ty); explicitCast--; } else if (isptr(pty) && isint(ty) || isint(pty) && isptr(ty)) { if (Aflag >= 1 && ty->size < pty->size) warning("conversion from `%t' to `%t' is compiler dependent\n", p->type, ty); p = cast(p, ty); } else if (ty != voidtype) { error("cast from `%t' to `%t' is illegal\n", p->type, ty1); ty1 = inttype; } if (generic(p->op) == INDIR || ty->size == 0) p = tree(RIGHT, ty1, NULL, p); else p = retype(p, ty1); } else p = postfix(expr(')')); break; default: p = postfix(primary()); } return p; } //字尾表示式,++ -- static Tree postfix(Tree p) { for (;;) switch (t) { //字尾++--跟字首不一樣, case INCR: p = tree(RIGHT, p->type, tree(RIGHT, p->type, p, incr(t, p, consttree(1, inttype))), p); t = gettok(); break; case DECR: p = tree(RIGHT, p->type, tree(RIGHT, p->type, p, incr(t, p, consttree(1, inttype))), p); t = gettok(); break; case '[': { Tree q; t = gettok(); q = expr(']'); if (YYnull) if (isptr(p->type)) p = nullcheck(p); else if (isptr(q->type)) q = nullcheck(q); p = (*optree['+'])(ADD, pointer(p), pointer(q)); if (isptr(p->type) && isarray(p->type->type)) p = retype(p, p->type->type); else p = rvalue(p); } break; case '(': { Type ty; Coordinate pt; p = pointer(p); if (isptr(p->type) && isfunc(p->type->type)) ty = p->type->type; else { error("found `%t' expected a function\n", p->type); ty = func(voidtype, NULL, 1); p = retype(p, ptr(ty)); } pt = src; t = gettok(); p = call(p, ty, pt); } break; case '.': t = gettok(); if (t == ID) { if (isstruct(p->type)) { Tree q = addrof(p); p = field(q, token); q = rightkid(q); if (isaddrop(q->op) && q->u.sym->temporary) p = tree(RIGHT, p->type, p, NULL); } else error("left operand of . has incompatible type `%t'\n", p->type); t = gettok(); } else error("field name expected\n"); break; case DEREF: t = gettok(); p = pointer(p); if (t == ID) { if (isptr(p->type) && isstruct(p->type->type)) { if (YYnull) p = nullcheck(p); p = field(p, token); } else error("left operand of -> has incompatible type `%t'\n", p->type); t = gettok(); } else error("field name expected\n"); break; default: return p; } } //原子表示式,處理之後得出終結符號,一般都是常量和id static Tree primary(void) { Tree p; assert(t != '('); switch (t) { //int/float常量 case ICON: case FCON: p = tree(mkop(CNST,tsym->type), tsym->type, NULL, NULL); p->u.v = tsym->u.c.v; break; //string常量 case SCON: if (ischar(tsym->type->type)) tsym->u.c.v.p = stringn(tsym->u.c.v.p, tsym->type->size); else tsym->u.c.v.p = memcpy(allocate((tsym->type->size/widechar->size)*sizeof (int), PERM), tsym->u.c.v.p, (tsym->type->size/widechar->size)*sizeof (int)); tsym = constant(tsym->type, tsym->u.c.v); if (tsym->u.c.loc == NULL) tsym->u.c.loc = genident(STATIC, tsym->type, GLOBAL); p = idtree(tsym->u.c.loc); break; //ID case ID: if (tsym == NULL) { Symbol p = install(token, &identifiers, level, PERM); p->src = src; if (getchr() == '(') { Symbol q = lookup(token, externals); p->type = func(inttype, NULL, 1); p->sclass = EXTERN; if (Aflag >= 1) warning("missing prototype\n"); if (q && !eqtype(q->type, p->type, 1)) warning("implicit declaration of `%s' does not match previous declaration at %w\n", q->name, &q->src); if (q == NULL) { q = install(p->name, &externals, GLOBAL, PERM); q->type = p->type; q->sclass = EXTERN; q->src = src; (*IR->defsymbol)(q); } p->u.alias = q; } else { error("undeclared identifier `%s'\n", p->name); p->sclass = AUTO; p->type = inttype; if (p->scope == GLOBAL) (*IR->defsymbol)(p); else addlocal(p); } t = gettok(); if (xref) use(p, src); return idtree(p); } if (xref) use(tsym, src); if (tsym->sclass == ENUM) p = consttree(tsym->u.value, inttype); else { if (tsym->sclass == TYPEDEF) error("illegal use of type name `%s'\n", tsym->name); p = idtree(tsym); } break; case FIRSTARG: if (level > PARAM && cfunc && cfunc->u.f.callee[0]) p = idtree(cfunc->u.f.callee[0]); else { error("illegal use of `%k'\n", FIRSTARG); p = cnsttree(inttype, 0L); } break; default: error("illegal expression\n"); p = cnsttree(inttype, 0L); } t = gettok(); return p; } /* 生成一顆樹用於訪問符號表入口p所標示的標示符 分別依據標示符的作用域和儲存型別來決定需要的地址操作符 然後根據其型別來決定訪問她的樹的結構 並在樹的u.sym欄位中存入一個指向符號表入口p的指標 */ Tree idtree(Symbol p) { int op; Tree e; Type ty = p->type ? unqual(p->type) : voidptype; if (p->scope == GLOBAL || p->sclass == STATIC) op = ADDRG;//全域性,靜態通過ADDRG定址 else if (p->scope == PARAM) { op = ADDRF;//引數通過ADDRG定址 if (isstruct(p->type) && !IR->wants_argb) { e = tree(mkop(op,voidptype), ptr(ptr(p->type)), NULL, NULL); e->u.sym = p; return rvalue(rvalue(e)); } } else if (p->sclass == EXTERN) { assert(p->u.alias); p = p->u.alias; op = ADDRG;//外部同樣通過ADDRG定址 } else op = ADDRL;//區域性 p->ref += refinc; //函式和陣列不能直接用左值或右值,只有專用的地址操作符才能引用他們 if (isarray(ty)) e = tree(mkop(op,voidptype), p->type, NULL, NULL); else if (isfunc(ty)) e = tree(mkop(op,funcptype), p->type, NULL, NULL); //其他型別的標示符應用標示符的右值 else e = tree(mkop(op,voidptype), ptr(p->type), NULL, NULL); //在樹的u.sym中存一個指向符號表入口p的指標 e->u.sym = p; if (isptr(e->type)) e = rvalue(e); return e; } // Tree rvalue(Tree p) { Type ty = deref(p->type); ty = unqual(ty); return tree(mkop(INDIR,ty), ty, p, NULL); } // Tree lvalue(Tree p) { if (generic(p->op) != INDIR) { error("lvalue required\n"); return value(p); } else if (unqual(p->type) == voidtype) warning("`%t' used as an lvalue\n", p->type); return p->kids[0]; } Tree retype(Tree p, Type ty) { Tree q; //如果型別相同,直接返回p if (p->type == ty) return p; //不同的話新建一顆樹,其他引數跟p一致,修改type跟ty一致 q = tree(p->op, ty, p->kids[0], p->kids[1]); q->node = p->node; q->u = p->u; return q; } /*返回在一組巢狀的RIGHT樹中最右邊的非RIGHT運算元*/ Tree rightkid(Tree p) { while (p && p->op == RIGHT) if (p->kids[1]) p = p->kids[1]; else if (p->kids[0]) p = p->kids[0]; else assert(0); assert(p); return p; } //是否是一顆CALL樹 int hascall(Tree p) { if (p == 0) return 0; if (generic(p->op) == CALL || (IR->mulops_calls && (p->op == DIV+I || p->op == MOD+I || p->op == MUL+I || p->op == DIV+U || p->op == MOD+U || p->op == MUL+U))) return 1; return hascall(p->kids[0]) || hascall(p->kids[1]); } //以xty和ytp為入參,返回作用於這兩個型別的任意二元算數操作結果的型別 Type binary(Type xty, Type yty) { //型別相同,無需處理 #define xx(t) if (xty == t || yty == t) return t xx(longdouble); xx(doubletype); xx(floattype); xx(unsignedlonglong); xx(longlong); xx(unsignedlong); if (xty == longtype && yty == unsignedtype || xty == unsignedtype && yty == longtype) if (longtype->size > unsignedtype->size) return longtype; else return unsignedlong; xx(longtype); xx(unsignedtype); return inttype; #undef xx } // Tree pointer(Tree p) { if (isarray(p->type)) /* assert(p->op != RIGHT || p->u.sym == NULL), */ p = retype(p, atop(p->type)); else if (isfunc(p->type)) p = retype(p, ptr(p->type)); return p; } //輸入一顆可以表示某個值的樹,新增一個與0比較的操作 //從而將它轉換為一顆表示條件的樹 Tree cond(Tree p) { int op = generic(rightkid(p)->op); if (op == AND || op == OR || op == NOT || op == EQ || op == NE || op == LE || op == LT || op == GE || op == GT) return p; p = pointer(p); return (*optree[NEQ])(NE, p, consttree(0, inttype)); } //int, char, float之間的轉換 Tree cast(Tree p, Type type) { Type src, dst; p = value(p); if (p->type == type) return p; dst = unqual(type);//if(type-op >= CONST,type->type) src = unqual(p->type); if (src->op != dst->op || src->size != dst->size) { switch (src->op) { case INT: if (src->size < inttype->size) p = simplify(CVI, inttype, p, NULL); break; case UNSIGNED: if (src->size < inttype->size) p = simplify(CVU, inttype, p, NULL); else if (src->size < unsignedtype->size) p = simplify(CVU, unsignedtype, p, NULL); break; case ENUM: p = retype(p, inttype); break; case POINTER: if (isint(dst) && src->size > dst->size) warning("conversion from `%t' to `%t' is undefined\n", p->type, type); p = simplify(CVP, super(src), p, NULL); break; case FLOAT: break; default: assert(0); } { src = unqual(p->type); dst = super(dst); if (src->op != dst->op) switch (src->op) { case INT: p = simplify(CVI, dst, p, NULL); break; case UNSIGNED: if (isfloat(dst)) { Type ssrc = signedint(src); Tree two = cnsttree(longdouble, (long double)2.0); p = (*optree['+'])(ADD, (*optree['*'])(MUL, two, simplify(CVU, ssrc, simplify(RSH, src, p, consttree(1, inttype)), NULL)), simplify(CVU, ssrc, simplify(BAND, src, p, consttree(1, unsignedtype)), NULL)); } else p = simplify(CVU, dst, p, NULL); break; case FLOAT: if (isunsigned(dst)) { Type sdst = signedint(dst); Tree c = cast(cnsttree(longdouble, (long double)sdst->u.sym->u.limits.max.i + 1), src); p = condtree( simplify(GE, src, p, c), (*optree['+'])(ADD, cast(cast(simplify(SUB, src, p, c), sdst), dst), cast(cnsttree(unsignedlong, (unsigned long)sdst->u.sym->u.limits.max.i + 1), dst)), simplify(CVF, sdst, p, NULL)); } else p = simplify(CVF, dst, p, NULL); break; default: assert(0); } dst = unqual(type); } } src = unqual(p->type); switch (src->op) { case INT: if (src->op != dst->op || src->size != dst->size) p = simplify(CVI, dst, p, NULL); break; case UNSIGNED: if (src->op != dst->op || src->size != dst->size) p = simplify(CVU, dst, p, NULL); break; case FLOAT: if (src->op != dst->op || src->size != dst->size) p = simplify(CVF, dst, p, NULL); break; case POINTER: if (src->op != dst->op) p = simplify(CVP, dst, p, NULL); else { if (isfunc(src->type) && !isfunc(dst->type) || !isfunc(src->type) && isfunc(dst->type)) warning("conversion from `%t' to `%t' is compiler dependent\n", p->type, type); if (src->size != dst->size) p = simplify(CVP, dst, p, NULL); } break; default: assert(0); } return retype(p, type); } Tree field(Tree p, const char *name) { Field q; Type ty1, ty = p->type; if (isptr(ty)) ty = deref(ty); ty1 = ty; ty = unqual(ty); if ((q = fieldref(name, ty)) != NULL) { if (isarray(q->type)) { ty = q->type->type; if (isconst(ty1) && !isconst(ty)) ty = qual(CONST, ty); if (isvolatile(ty1) && !isvolatile(ty)) ty = qual(VOLATILE, ty); ty = array(ty, q->type->size/ty->size, q->type->align); } else { ty = q->type; if (isconst(ty1) && !isconst(ty)) ty = qual(CONST, ty); if (isvolatile(ty1) && !isvolatile(ty)) ty = qual(VOLATILE, ty); ty = ptr(ty); } if (YYcheck && !isaddrop(p->op) && q->offset > 0) /* omit */ p = nullcall(ty, YYcheck, p, consttree(q->offset, inttype)); /* omit */ else /* omit */ p = simplify(ADD+P, ty, p, consttree(q->offset, signedptr)); if (q->lsb) { p = tree(FIELD, ty->type, rvalue(p), NULL); p->u.field = q; } else if (!isarray(q->type)) p = rvalue(p); } else { error("unknown field `%s' of `%t'\n", name, ty); p = rvalue(retype(p, ptr(inttype))); } return p; } /* funcname - return name of function f or a function' */ char *funcname(Tree f) { if (isaddrop(f->op)) return stringf("`%s'", f->u.sym->name); return "a function"; } static Tree nullcheck(Tree p) { if (!needconst && YYnull && isptr(p->type)) { p = value(p); if (strcmp(YYnull->name, "_YYnull") == 0) { Symbol t1 = temporary(REGISTER, voidptype); p = tree(RIGHT, p->type, tree(OR, voidtype, cond(asgn(t1, cast(p, voidptype))), vcall(YYnull, voidtype, (file && *file ? pointer(idtree(mkstr(file)->u.c.loc)) : cnsttree(voidptype, NULL)), cnsttree(inttype, (long)lineno) , NULL)), idtree(t1)); } else p = nullcall(p->type, YYnull, p, cnsttree(inttype, 0L)); } return p; } Tree nullcall(Type pty, Symbol f, Tree p, Tree e) { Type ty; if (isarray(pty)) return retype(nullcall(atop(pty), f, p, e), pty); ty = unqual(unqual(p->type)->type); return vcall(f, pty, p, e, cnsttree(inttype, (long)ty->size), cnsttree(inttype, (long)ty->align), (file && *file ? pointer(idtree(mkstr(file)->u.c.loc)) : cnsttree(voidptype, NULL)), cnsttree(inttype, (long)lineno) , NULL); }