【編譯原理實驗】詞法分析器程式設計

阿新 • • 發佈：2020-11-05

1. 實驗內容

設計、編制並除錯一個簡單語言CP(Compiler Principle)的詞法分析程式，加深對詞法分析原理的理解。

CP語言的詞法：

(1) 關鍵詞： begin end if then else for while do and or not

注意：所有關鍵詞都是小寫的。

(2) 識別符號ID，與標準C語言一致，即：以下劃線或字母開頭的字母數字下劃線組成的符號串。

(3)無符號整數NUM：數字串

(4)運算子和分界符： +、-、*、/、>、<、=、:=、>=、<=、<>、++、--、(、)、; 、 #

注意：:=表示賦值運算子、#表示註釋開始的部分，

;表示語句結束，<>表示不等關係

(5)空白符包括空格、製表符和換行符，用於分割ID、NUM、運算子、分界符和關鍵詞，詞法分析階段要忽略空白符。

2. 實驗要求

(1) 給出各類單詞符號的分類編碼。

(2) 詞法分析程式應該能發現輸入串中的錯誤。

(3) 詞法分析作為單獨一遍，將詞法分析程式輸出的二元式序列儲存為中間檔案形式。

3. 設計思路

先給各類單詞符號編碼（見下圖），以文件的形式輸入將要識別的符號串，並將其存放在一個數組中，從前往後，每次根據識別到的第一個字元X做出如下操作：

(1)X為空白符：跳過X繼續往下判斷。

(2)X為註釋符：識別註釋符#，跳過其後的字元直到跳過換行符結束。

(3)X為數字符：記錄此字元及其後面的字元直到遇到一個非數字符，若該非數字符為空白符、運算子或分界符，則將記錄的這一段識別為數字串；否則，繼續往後記錄直到遇到空白符、運算子或分界符，並將記錄的這一段識別為錯誤單詞。

(4)X為運算子：判斷X和其後的一個字元是否能組成運算子，若能則將X和其後的字元組合識別成運算子，否則直接識別X為運算子。

(5)X為字母符或下劃線：記錄此字元及其後面的字元直到遇到空白符、運算子或分界符，若記錄的符號串能匹配到關鍵詞，則將其識別為關鍵詞，否則將其識別為識別符號。

(6)X為其他符：識別為錯誤輸入。

重複上述操作直到識別完整個輸入串。

4. 程式結構

(1)主要資料結構：

字元陣列：用於記錄輸入串。

map容器：記錄各類單詞符號及其編碼，方便後續查詢。

string型別變數：用於記錄中間單詞，方便處理。

(2)函式定義：

void map_init()：初始化map容器，存入各類單詞及其編碼。

int check_num(char a)：檢測字元a是否是數字符。

int check_letter(char a)：檢測字元a是否是字母符或下劃線。

int check_devide(char a)：檢測字元a是否是空白符、運算子、分節符或註釋符。

int main()：主體程式。

(3)演算法流程：

5. 實驗程式碼

#include<bits/stdc++.h>
using namespace std;
char ch[200];
map<string,int> mp;
               
void map_init()//放入所有單詞及編號 
{
    mp["begin"]=0;mp["end"]=1;mp["if"]=2;
    mp["then"]=3;mp["else"]=4;mp["for"]=5;
    mp["while"]=6;mp["do"]=7;mp["and"]=8;
    mp["or"]=9;mp["not"]=10;mp["+"]=11;
    mp["-"]=12;mp["*"]=13;mp["/"]=14;
    mp[">"]=15;mp["<"]=16;mp["="]=17;
    mp[":="]=18;mp[">="]=19;mp["<="]=20;
    mp["<>"]=21;mp["++"]=22;mp["--"]=23;
    mp["("]=24;mp[")"]=25;mp[";"]=26;
    mp["#"]=27;
}

int check_num(char a)//檢查是否是數字 
{
    if(a>='0'&&a<='9') return 1;
    else return 0;
}

int check_letter(char a)//檢查是否是字母 
{
    if((a>='a'&&a<='z')||(a>='A'&&a<='Z')||(a=='_')) return 1;
    else return 0;
}

int check_devide(char a)//檢測是否是運算子或分隔符 
{
    if(a=='\n'||a=='\t'||a==' '||a=='+'||a=='-'||a=='*'||
       a=='/'||a=='>'||a=='<'||a=='='||a==':'||a=='('||
       a==')'||a==';'||a=='#')
    {
        return 1;
    }
    else return 0;
}

int main()
{
    FILE *fp;
    fp=fopen("test8.txt","r");
    if(fp==NULL)
    {
        cout<<"Open Error";
        exit(1);
    }
    ofstream ofs;
    ofs.open("result8.txt",ios::out);
    int len=0;
    while(fscanf(fp,"%c",&ch[len])!=EOF) len++;
    fclose(fp);
    string temp="";
    map_init();
    
    for(int i=0;i<len;i++)
    {
        temp="";
        if(ch[i]=='\n'||ch[i]=='\t'||ch[i]==' ') continue;   //跳過換行符、製表符和空格 
        else if(ch[i]=='#')  //跳過註釋 
        {
            cout<<"<#,27>"<<endl;
            ofs<<"<#,27>"<<endl;
            while(ch[i]!='\n') i++;
        }
        else if(check_num(ch[i]))        //檢測數字串 
        {
            while(check_num(ch[i]))
            {
                temp+=ch[i];
                i++;
            }
            if(check_devide(ch[i])||i>=len)//處理到分隔符或結尾 
            {
                cout<<'<'<<temp<<','<<"29"<<'>'<<endl;
                ofs<<'<'<<temp<<','<<"29"<<'>'<<endl;
                i--;//不跳過當前字元 
            }
            else
            {
                while(!check_devide(ch[i]))//跳到下一個開始位置 
                {
                    temp+=ch[i];
                    i++;
                }
                cout<<"Error:"<<temp<<endl; 
                ofs<<"Error:"<<temp<<endl; 
                i--;
            }
        }
        else if(check_devide(ch[i]))//檢測運算子 
        {
            temp+=ch[i];
            map<string,int>::iterator iter=mp.find(temp);
            if(check_devide(ch[i+1]))
            {
                string temp1=temp+ch[i+1];
                map<string,int>::iterator iter1=mp.find(temp1);
                if(iter1!=mp.end())
                {
                    cout<<'<'<<temp1<<','<<iter1->second<<'>'<<endl;
                    ofs<<'<'<<temp1<<','<<iter1->second<<'>'<<endl;
                    i++;
                }
                else
                {
                    cout<<'<'<<temp<<','<<iter->second<<'>'<<endl;
                    ofs<<'<'<<temp<<','<<iter->second<<'>'<<endl;
                }
            }
            else
            {
               ofs<<'<'<<temp<<','<<iter->second<<'>'<<endl;
               cout<<'<'<<temp<<','<<iter->second<<'>'<<endl;
            }
        }
        else if(check_letter(ch[i]))//檢測識別符號或關鍵字 
        {
            while(!check_devide(ch[i])&&i<len)
            {
                temp+=ch[i];
                i++;
            }
            i--;
            map<string,int>::iterator iter=mp.find(temp);
            if(iter!=mp.end())
            {
                cout<<'<'<<temp<<','<<iter->second<<'>'<<endl;
                ofs<<'<'<<temp<<','<<iter->second<<'>'<<endl;
            }
            else
            {
                cout<<'<'<<temp<<','<<"28"<<'>'<<endl;
                ofs<<'<'<<temp<<','<<"28"<<'>'<<endl;
            }
        }
        else
        {
            cout<<"Error:undefined"<<endl;
            ofs<<"Error:undefined"<<endl;
        }
    }
    ofs.close();
    return 0;
}