編譯原理(九) LR(0)文法分析法(演算法描述和C++程式碼實現)
後期DEBUG發現make_set函式和make_go存在問題,於2015年12月4日更新了程式碼,見諒
概念梳理
最左推導:每一步替換最左邊的非終結符
最右推導:每一步替換最右邊的非終結符,最右推導稱為規範推導
短語:令G是一個文法,S是文法的開始符號,假定αβδ是文法G的一個句型,如果有
直接短語:令G是一個文法,S是文法的開始符號,假定αβδ是文法G的一個句型,如果有
注意:短語和直接短語的區別在於第二個條件, 直接短語中的第二個條件表示有文法規則 A
控制代碼:一個句型的最左直接短語稱為該句型的控制代碼。
- 控制代碼特徵:
- (1) 它是直接短語,即某規則右部。
- (2) 它具有最左性。
規範歸約:文法的最右推導為規範推導,自底向上分析是自頂向下最右推導的逆過程,叫規範歸約
活字首:指規範句型的一個字首,這種字首不含控制代碼之後任何符號。之所以稱為活字首,是因為在右邊新增一些符號之首,就可以使它稱為一個規範句型。
專案:對於一個文法G,我們首先要構造一個NFA,它能識別G的所有活字首。這個NFA的每一個狀態是下面定義的一個“專案”。
專案分類:
- 歸約專案
凡圓點在最右的專案,如A→α•稱為一個“歸約專案” - 接受專案
對文法的開始符號S’的歸約專案,如S’→α•稱為“接受”專案。 - 移進專案
形如A→α•aβ的專案,其中a為終結符,稱為“移進”專案。 - 待約專案
形如A→α•Bβ的專案,其中B為非終結符,稱為“待約”專案。
專案規範族:假定I是文法G’的任一專案集,定義和構造I的閉包CLOSURE(I)的辦法是:
- I的任何專案都屬於CLOSURE(I);
- 若A→α•Bβ屬於CLOSURE(I),那麼,對任何關於B的產生式B→γ,專案B→•γ也屬於CLOSURE(I);
LR(0)文法:假如一個文法G的拓廣文法G’的活字首識別自動機的每個狀態(專案集)不存在下述情況:
- 既含移進專案又含歸約專案。
- 含多個歸約專案。
則稱G是一個LR(0)文法。換言之,LR(0)文法規範族的每個專案集不包含任何衝突專案。
拓廣文法:假定文法G是一個以S為開始符號的文法,我們構造一個文法G’,它包含整個G,但它引進了一個不出現在G中的非終結符S’,並加進一個新產生式S’→S,而這個S’是G’的開始符號。那麼我們稱G’是G的拓廣文法。
函式GO(I,X):函式GO(I,X)是一個狀態轉換函式。
- 第一個變元I是一個專案集,
- 第二個變元X是一個文法符號。
- 函式值GO(I,X)定義為GO(I,X)=CLOSURE(J),其中J={任何形如A→αX•β的專案 | A→α•Xβ屬於I}
演算法描述
專案集構造演算法
列舉每個規範句型,然後列舉”
專案集規範族構造演算法
假定I是文法G’的任一專案集,定義和構造I的閉包CLOSURE(I)的辦法是:
I的任何專案都屬於CLOSURE(I);
若A→α•Bβ屬於CLOSURE(I),那麼,對任何關於B的產生式B→γ,專案B→•γ也屬於CLOSURE(I);
重複執行上述兩步驟直至CLOSURE(I)不再增大為止。
Go(I,a)函式構造演算法
遍歷所有的專案,如果任意兩個專案之間存在邊(有向),那麼這兩個專案所在的專案規範族之間連上對應的有向邊。
LR(0)分析表構造演算法
假定專案集規範族C={I0,I1,…,In}。令每一個專案集Ik的下標k作為分析器的狀態。分析表的ACTION子表和GOTO子表可按如下方法構造
- 令那個包含專案S’→•S的集合Ik的下標k為分析器的初態。
- 若專案A→α•aβ屬於Ik且GO(Ik , a)= Ij,a為終結符,置ACTION[k,a]為“把(j,a)移進棧”,簡記為“sj”。
- 若專案A→α•屬於Ik,對任何終結符a(或結束符#),置ACTION[k,a]為“用產生式A→α進行歸約”,簡記為“rj”(假定產生式A→α是文法G’的第j個產生式)。
- 若專案S’→S•屬於Ik,則置ACTION[k,#]為“接受”,簡記為“acc”。
- 若GO(Ik , A)= Ij,A為非終結符,則置GOTO[k,A]=j。
- 分析表中凡不能用規則1至4填入資訊的空白格均填上“報錯標誌”。
LR(0)分析法的分析過程
- 遍歷輸入字串,對於每一個字元,獲取當前狀態棧的頂部的狀態值,通過查詢action表獲取的當前的操作是移進、規約還是接受
- 如果當前操作是移進,將新的狀態放入狀態棧當中,當移入的字元放入符號棧中。
- 如果當前操作是規約,那麼將需要規約的部分的狀態從狀態棧中彈出,將規約後的狀態放入狀態棧,將規約後的左部放入符號棧,當前的字元不向下推進
- 如果接收,則結束
程式碼實現
#include <iostream>
#include <cstdio>
#include <algorithm>
#include <cstring>
#include <cctype>
#include <vector>
#include <string>
#include <queue>
#include <map>
#include <sstream>
#define MAX 507
#define DEBUG
using namespace std;
class WF
{
public:
string left,right;
int back;
int id;
WF ( char s1[] , char s2[] , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
WF ( const string& s1 , const string& s2 , int x , int y )
{
left = s1;
right = s2;
back = x;
id = y;
}
bool operator < ( const WF& a ) const
{
if ( left == a.left )
return right < a.right;
return left < a.left;
}
bool operator == ( const WF& a ) const
{
return ( left == a.left )&& ( right == a.right );
}
void print ( )
{
printf ( "%s->%s\n" , left.c_str() , right.c_str() );
}
};
class Closure
{
public:
vector<WF> element;
void print ( string str )
{
printf ( "%-15s%-15s\n" , "" , str.c_str());
for ( int i = 0 ; i < element.size() ; i++ )
element[i].print();
}
bool operator == ( const Closure& a ) const
{
if ( a.element.size() != element.size() ) return false;
for ( int i = 0 ; i < a.element.size() ; i++ )
if ( element[i] == a.element[i] ) continue;
else return false;
return true;
}
};
struct Content
{
int type;
int num;
string out;
Content(){ type = -1; }
Content ( int a , int b )
:type(a),num(b){}
};
vector<WF> wf;
map<string,vector<int> > dic;
string start = "S";
vector<Closure> collection;
vector<WF> items;
char CH = '$';
int go[MAX][MAX];
int to[MAX];
vector<char> V;
bool used[MAX];
Content action[MAX][MAX];
int Goto[MAX][MAX];
void make_item ( )
{
memset ( to , -1 , sizeof ( -1 ) );
for ( int i = 0 ; i < wf.size() ; i++ )
for ( int j = 0 ; j <= wf[i].right.length() ; j++ )
{
string temp = wf[i].right;
temp.insert ( temp.begin()+j , CH );
dic[wf[i].left].push_back ( items.size() );
if ( j )
to[items.size()-1] = items.size();
items.push_back ( WF ( wf[i].left , temp , i , items.size()) );
}
#ifdef DEBUG
puts("-------------------------專案表-------------------------");
for ( int i = 0 ; i < items.size() ; i++ )
printf ( "%s->%s\n" , items[i].left.c_str() , items[i].right.c_str() );
puts("--------------------------------------------------------");
#endif
}
void make_set ( )
{
bool has[MAX];
for ( int i = 0 ; i < items.size() ; i++ )
if ( items[i].left[0] == 'S' && items[i].right[0] == CH )
{
Closure temp;
string& str = items[i].right;
vector<WF>& element = temp.element;
element.push_back ( items[i] );
int x = 0;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
memset ( has , 0 , sizeof ( has ) );
has[i] = 1;
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
collection.push_back ( temp );
}
for ( int i = 0 ; i < collection.size() ; i++ )
{
map<int,Closure> temp;
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
int ii;
//cout << i << "previous: " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << i <<"after: " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k< items.size() ; k++ )
if ( items[k] == cmp )
{
ii = k;
break;
}
//string& str1 = items[ii].right;
memset ( has , 0 , sizeof ( has ) );
vector<WF>& element = temp[y].element;
element.push_back ( items[ii] );
has[ii] = 1;
x++;
/*if ( x != str.length()-1 )
{
string tt = str.substr(x+1,1);
vector<int>& id = dic[tt];
for ( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
//items[tx].print();
if ( items[tx].right[0] == CH )
element.push_back ( items[tx] );
}
}*/
if ( x != str.length()-1 )
{
queue<string> q;
q.push( str.substr(x+1,1) );
while ( !q.empty() )
{
string u = q.front();
q.pop();
vector<int>& id = dic[u];
for( int j = 0 ; j < id.size() ; j++ )
{
int tx = id[j];
if ( items[tx].right[0] == CH )
{
if ( has[tx] ) continue;
has[tx] = 1;
if ( isupper(items[tx].right[1] ) )
q.push ( items[tx].right.substr(1,1));
element.push_back ( items[tx] );
}
}
}
}
}
map<int,Closure>::iterator it = temp.begin();
for ( ; it != temp.end() ; it++ )
collection.push_back ( it->second );
for ( int i = 0 ; i < collection.size() ; i++ )
sort ( collection[i].element.begin() , collection[i].element.end() );
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = i+1 ; j < collection.size() ; j++ )
if ( collection[i] == collection[j] )
collection.erase ( collection.begin()+j );
}
#ifdef DEBUG
puts ("-------------CLOSURE---------------------");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
sin.clear();
string out;
sin <<"closure-I" << i;
sin >> out;
collection[i].print ( out );
}
puts("");
#endif
}
void make_V ( )
{
memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );
}
void make_cmp ( vector<WF>& cmp1 , int i , char ch )
{
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string str = collection[i].element[j].right;
int k;
for ( k = 0 ; k < str.length() ; k++ )
if ( str[k] == CH )
break;
if ( k != str.length() - 1 && str[k+1] == ch )
{
str.erase ( str.begin()+k);
str.insert ( str.begin()+k+1 , CH );
cmp1.push_back ( WF ( collection[i].element[j].left , str , -1 , -1 ) );
}
}
sort ( cmp1.begin() , cmp1.end() );
}
void make_go ( )
{
memset ( go , -1 , sizeof ( go ) );
int m = collection.size();
/*for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
string left = collection[i].element[j].left;
string str = collection[i].element[j].right;
int x = 0;
for ( ; x < str.length() ; x++ )
if ( str[x] == CH ) break;
if ( x == str.length()-1 )
continue;
int y = str[x+1];
//cout << "before : " << str << endl;
str.erase ( str.begin()+x);
str.insert ( str.begin()+x+1 , CH );
//cout << "after : " << str << endl;
WF cmp = WF ( collection[i].element[j].left , str , -1 , -1 );
for ( int k = 0 ; k < m ; k++ )
{
bool flag = false;
for ( int t = 0 ; t < collection[k].element.size() ; t++ )
{
if ( cmp == collection[k].element[t] )
{
flag = true;
break;
}
}
if ( flag )
{
go[i][y] = k;
}
}
}*/
for ( int t = 0 ; t < V.size() ; t++ )
{
char ch = V[t];
for ( int i = 0 ; i < m ; i++ )
{
vector<WF> cmp1;
make_cmp ( cmp1 , i , ch );
cout << cmp1.size() << endl;
if ( cmp1.size() == 0 ) continue;
for ( int j = 0 ; j < m ; j++ )
{
vector<WF> cmp2;
for ( int k = 0 ; k < collection[j].element.size() ; k++ )
{
string& str = collection[j].element[k].right;
int x;
for ( x = 0 ; x < str.length() ; x++ )
if ( str[x] == CH )
break;
if ( x && str[x-1] == ch )
cmp2.push_back ( WF( collection[j].element[k].left , str , -1 , -1 ) );
}
sort ( cmp2.begin() , cmp2.end() );
cout << cmp2.size() << endl;
bool flag = true;
if ( cmp2.size() != cmp1.size() ) continue;
cout << cmp1.size() << endl;
for ( int k = 0 ; k < cmp1.size() ; k++ )
if ( cmp1[k] == cmp2[k] ) continue;
else flag = false;
cout << "out " << endl;
if ( flag )
go[i][ch] = j;
}
//cout << "YES" << endl;
}
}
#ifdef DEBUG
puts ("---------------EDGE----------------------");
stringstream sin;
string out;
for ( int i = 0 ; i < m ; i++ )
for ( int j = 0 ; j < m ; j++ )
for ( int k = 0 ; k < MAX ; k++ )
if ( go[i][k] == j )
{
sin.clear();
sin << "I" << i << "--" <<(char)(k)<<"--I"<<j;
sin >> out;
printf ( "%s\n" , out.c_str() );
}
#endif
}
void make_table ( )
{
memset ( Goto , -1 , sizeof ( Goto ) );
/*memset ( used , 0 , sizeof ( used ) );
for ( int i = 0 ; i < wf.size() ; i++ )
{
string& str = wf[i].left;
for ( int j = 0 ; j < str.length() ; j++ )
{
if ( used[str[j]] ) continue;
used[str[j]] = 1;
V.push_back ( str[j] );
}
string& str1 = wf[i].right;
for ( int j = 0 ; j < str1.length() ; j++ )
{
if ( used[str1[j]] ) continue;
used[str1[j]] = 1;
V.push_back ( str1[j] );
}
}
sort ( V.begin() , V.end() );
V.push_back ( '#' );*/
//write s to the table
for( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
int x = go[i][ch];
if ( x == -1 ) continue;
if ( !isupper(ch) )
action[i][ch] = Content ( 0 , x );
else
Goto[i][ch] = x;
}
//write r and acc to the table
for ( int i = 0 ; i < collection.size() ; i++ )
for ( int j = 0 ; j < collection[i].element.size() ; j++ )
{
WF& tt = collection[i].element[j];
if ( tt.right[tt.right.length()-1] == CH )
{
if ( tt.left[0] == 'S' )
action[i]['#'] = Content ( 2 , -1 );
else
for ( int k = 0 ; k < V.size() ; k++ )
{
int y = V[k];
//cout << "YES " << endl;
action[i][y] = Content ( 1, tt.back );
}
}
}
#ifdef DEBUG
puts ( "------------------------------------------LR(0)分析表--------------------------------------------------------" );
printf ( "%10s%5c%5s" , "|" , V[0] , "|");
for ( int i = 1 ; i < V.size() ; i++ )
printf ( "%5c%5s" , V[i] , "|" );
puts ("");
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
stringstream sin;
for ( int i = 0 ; i < collection.size() ; i++ )
{
printf ( "%5d%5s" , i , "|" );
for ( int j = 0 ; j < V.size() ; j++ )
{
char ch = V[j];
if ( isupper(ch) )
{
if ( Goto[i][ch] == -1 )
printf ( "%10s" , "|" );
else
printf ( "%5d%5s" , Goto[i][ch] , "|" );
}
else
{
sin.clear();
if ( action[i][ch].type == -1 )
printf ( "%10s" , "|" );
else
{
Content& temp = action[i][ch];
if ( temp.type == 0 )
sin << "S";
if ( temp.type == 1 )
sin << "R";
if ( temp.type == 2 )
sin << "acc";
if ( temp.num != -1 )
sin << temp.num;
sin >> temp.out;
printf ( "%7s%3s" , temp.out.c_str() , "|" );
}
}
}
puts ("");
}
for ( int i = 0 ; i < (V.size()+1)*10 ; i++ )
printf ( "-" );
puts("");
#endif
}
void print ( string s1 , string s2 , string s3 , string s4 , string s5 , string s6 , string s7 )
{
printf ( "%-15s|%-15s%-15s%-20s|%-15s%-15s%-15s\n" , s1.c_str() , s2.c_str() , s3.c_str() ,s4.c_str(),s5.c_str(),
s6.c_str() , s7.c_str() );
}
string get_steps ( int x )
{
stringstream sin;
sin << x;
string ret;
sin >> ret;
return ret;
}
template<class T>
string get_stk ( vector<T> stk )
{
stringstream sin;
for ( int i = 0 ; i < stk.size() ; i++ )
sin << stk[i];
string ret;
sin >> ret;
return ret;
}
string get_shift ( WF& temp )
{
stringstream sin;
sin << "reduce(" << temp.left << "->" << temp.right <<")";
string out;
sin >> out;
return out;
}
void analyse ( string src )
{
print ( "steps","op-stack" ,"input","operation","state-stack" , "ACTION" , "GOTO" );
vector<char> op_stack;
vector<int> st_stack;
src+= "#";
op_stack.push_back ( '#' );
st_stack.push_back ( 0 );
int steps= 1;
for ( int i = 0 ; i < src.length() ; i++ )
{
char u = src[i];
int top = st_stack[st_stack.size()-1];
Content& act = action[top][u];
//cout << "YES : " << i << " " << u << " " << top << " " << act.type << endl;
if ( act.type == 0 )
{
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i), "shift", get_stk( st_stack ) , act.out , "" );
op_stack.push_back ( u );
st_stack.push_back ( act.num );
}
else if ( act.type == 1 )
{
WF& tt = wf[act.num];
int y = st_stack[st_stack.size()-tt.right.length()-1];
int x = Goto[y][tt.left[0]];
//cout << y << " " << tt.left[0] << " " << x << endl;
print ( get_steps ( steps++ ) , get_stk ( op_stack ) , src.substr(i) , get_shift(tt) ,get_stk( st_stack),act.out,get_steps(x));
for ( int j = 0 ; j < tt.right.length() ; j++ )
{
st_stack.pop_back();
op_stack.pop_back();
}
op_stack.push_back ( tt.left[0] );
st_stack.push_back ( x );
i--;
}
else if ( act.type == 2 )
{
print ( get_steps( steps++ ), get_stk( op_stack ) , src.substr(i) , "Accept" , get_stk(st_stack) , act.out , "" );
//i--;
}
else continue;
}
}
int main ( )
{
int n;
char s[MAX];
while ( ~scanf ( "%d" , &n ) )
{
for ( int i = 0 ; i < n ; i++ )
{
scanf ( "%s" , s );
int len = strlen(s),j;
for ( j = 0 ; j < len ; j++ )
if ( s[j] == '-' ) break;
s[j] = 0;
wf.push_back ( WF ( s , s+j+2 ,-1 , -1 ) );
#ifdef DEBUG
wf[wf.size()-1].print();
#endif
}
make_item();
make_set();
make_V();
make_go();
make_table();
analyse ( "abbcde" );
}
}
Input:
Output:
後期除錯
關於12月4日對於make_set函式和make_go函式的更正:
- 錯誤主要源自於自己對於概念的模糊,make_set過程中如果加入的新的產生式的右側”
- make_go是在對規範族之間建邊時,一定是通過一個字元的移進,所有與其相關的式子都符合才能建邊,而不是隻要有兩個規範族中有符合要求的規範式就能建邊