編譯原理學習:TINY語言詞法掃描程式實現
阿新 • • 發佈:2019-02-06
最近對解釋型程式(類似python或者是linux裡的bc計算器)非常感興趣,就開始學習一下編譯原理。今天自己實現了TINY語言的詞法掃描程式。大部分參考《編譯原理及實踐》一書。但是我做了一些小小的改進。
先說一下TINY語言:
1、註釋:放在一對大括號內。書上的註釋不能巢狀,我做了一點改進,允許巢狀。
2、關鍵字:read write if end repeat until else
3、型別:只支援整型和布林型。
4、計算:+ - * / ( ) < = :=,其中:=為賦值運算,=為判斷。沒有〈和<= >=
一個示例的TINY語言程式:
test.tine: (選自《編譯原理及實踐》)
{ Sample program
in TINY language -
computes factorial
}
read x; { input an integer }
if 0 < x then { don't compute if x <= 0 }
fact := 1;
repeat
fact := fact * x;
x := x - 1;
until x = 0;
write fact { output factorial of x }
end
在globals.h中,涉及到一些型別的宣告:
#ifndef GLOBALS_H #define GLOBALS_H #include <stdio.h> typedef enum { ENDFILE, ERROR, IF, THEN, ELSE, END, REPEAT, UNTIL, READ, WRITE, ID, NUM, ASSIGN, EQ, LT, PLUS, MINUS, TIMES, OVER, LPAREN, RPAREN, SEMI } TokenType; extern lineno; /* The max size of identifier of reserved word */ #define MAXTOKENLEN 50 #endif
用於生成詞法掃描的flex輸入,這是程式的核心部分:
tiny.l
%{ #include <stdio.h> #include <string.h> #include "globals.h" #include "util.h" char tokenString[MAXTOKENLEN + 1]; %} digit [0-9] number {digit}+ letter [a-zA-Z] identifier {letter}[a-zA-Z0-9]* newline \n whitespace [ \t] %% "if" {return IF;} "then" {return THEN;} "else" {return ELSE;} "end" {return END;} "repeat" {return REPEAT;} "until" {return UNTIL;} "read" {return READ;} "write" {return WRITE;} ":=" {return ASSIGN;} "=" {return EQ;} "<" {return LT;} "+" {return PLUS;} "-" {return MINUS;} "*" {return TIMES;} "/" {return OVER;} "(" {return LPAREN;} ")" {return RPAREN;} ";" {return SEMI;} {number} {return NUM;} {identifier} <span style="white-space:pre"> </span>{return ID;} {newline} {lineno++;} {whitespace} <span style="white-space:pre"> </span>{ /* Do nothing */ } "{" { char c; int count = 1; do { c = input(); if (c == EOF) break; else if (c == '\n') lineno++; else if (c == '{') count++; else if (c == '}') count--; } while (count != 0); } . {return ERROR;} %% TokenType getToken(void) { TokenType currentToken; currentToken = yylex(); strncpy(tokenString, yytext, MAXTOKENLEN); printf("%d: ", lineno); printToken(currentToken, tokenString); return currentToken; }
printToken函式在util.c中實現:
util.h:
#ifndef UTIL_H
#define UTIL_H
#include "globals.h"
void printToken(TokenType token, char* tokenString);
TokenType getToken(void);
#endif
util.c:
#include "util.h"
#include <stdio.h>
#include "globals.h"
void printToken(TokenType token, char* tokenString)
{
switch(token)
{
case IF:
case THEN:
case ELSE:
case END:
case REPEAT:
case UNTIL:
case READ:
case WRITE:
printf("\treversed word: %s\n", tokenString);
break;
case ID:
printf("\tidentifier: %s\n", tokenString);
break;
case NUM:
printf("\tnumber: %s\n", tokenString);
break;
case ASSIGN:
case EQ:
case LT:
case PLUS:
case MINUS:
case TIMES:
case OVER:
case LPAREN:
case RPAREN:
case SEMI:
printf("\toperator: %s\n", tokenString);
}
}
main.c:
#include "globals.h"
#include "util.h"
#include <stdio.h>
#include <stdlib.h>
int lineno = 1;
int main(int argc, char* argv[])
{
TokenType token;
do
{
token = getToken();
} while (token != ENDFILE);
return 0;
}
這就是所有的檔案了!最後,是makefile檔案:
scanner.exe: main.o lex.yy.o util.o
gcc main.o lex.yy.o util.o -o scanner.exe -lfl
main.o: main.c globals.h util.h
gcc main.c -c
util.o: util.c util.h globals.h
gcc util.c -c
lex.yy.o: tiny.l
flex tiny.l
gcc lex.yy.c -c
於是,一個簡單的詞法掃描程式就完成了。
由於使用的是預設的輸入,所以這個程式直接支援從鍵盤輸入,執行效果如下:
當然,也可以使用重定向操作,使用效果如下: