1. 程式人生 > >DFA,NFA,GNFA轉化為RG有窮自動機轉正則表示式

DFA,NFA,GNFA轉化為RG有窮自動機轉正則表示式

一個NFA轉RG的簡單java實現:

package hwk2;

//import com.alibaba.fastjson.JSON;

public class Nfa2Re {
	// Step1 : Add state S and State A to the formal NFA
	public static String[] states = new String[] {"S","q1","q2","q3","q4","A"};
	public static String[] tokens = new String[] {"0","1","e"};
	public static String[
][][] thegma = new String[][][] { { {},{},{"q1"}, }, { {"q1"},{"q1","q2"},{} },{ {"q3"},{},{"q3"} },{ {},{"q4"},{} },{ {"q4"},{"q4"},{"A"} },{ {},{},{} } }; public static String q1="q1"; public static String[] accept=new String[] {"q4"}; // maxtrix R, using to store edge between two state of GNFA
public static String[][] relation2=null; public static void main(String[] args) { initrelation(); // init matrix R System.out.println(convert()); // print convert result testdata(); // load another NFA to test the program initrelation(); // init matrix R System.out.println(convert()); // bad }
// init relations between each two points public static void initrelation() { int states_num = states.length; relation2 = new String[states_num][states_num]; for(int i=0;i<states.length;i++) { for(int j=0;j<tokens.length;j++) { for(String state :thegma[i][j]) { int toindex=indexOf(states,state); String tok = tokens[j]; if(relation2[i][toindex]==null) { relation2[i][toindex]=tok; }else { relation2[i][toindex] = "("+relation2[i][toindex]+"U"+tok+")"; } } } } } // convert GNFA to RE public static String convert() { for(int i=1;i<states.length-1;i++) { // if is a circle, add * after it if(relation2[i][i]!=null) { if(!isClosed(relation2[i][i])) { relation2[i][i] = "("+relation2[i][i]+")"; } relation2[i][i]+="*"; } // remove state step-by-step for(int j=0;j<states.length;j++) { for(int k=0;k<states.length;k++) { if(relation2[j][i]!=null && relation2[i][k]!=null && j!=i && k!=i) { // if(needCup(relation2[j][i])) relation2[j][i]="("+relation2[j][i]+")"; // if(needCup(relation2[i][k])) relation2[i][k]="("+relation2[i][k]+")"; String nstr = relation2[j][i] + relation2[i][k]; if(relation2[i][i]!=null) nstr=relation2[j][i] + relation2[i][i]+relation2[i][k]; if(relation2[j][k]==null || relation2[j][k].equals("")) { relation2[j][k] = nstr; }else { if(!isClosed(nstr)) nstr = "("+nstr+")"; if(!isClosed(relation2[j][k])) relation2[j][k]="("+relation2[j][k]+")"; relation2[j][k]="("+relation2[j][k]+"U"+nstr+")"; } } } } // if a state is removed, edge is null for(int l=0;l<states.length;l++) { relation2[i][l]=null; relation2[l][i]=null; } // System.out.println(JSON.toJSONString(relation2)); } return relation2[0][states.length-1];//relation2[0][states.length-1].substring(1, relation2[0][states.length-1].length()-1); } // find the index of a element in array public static int indexOf(Object[] a,Object b) { int index=-1; for(int i=0;i<a.length;i++) { if(a[i].equals(b)) { index=i; break; } } return index; } // judge whether need to add "()" public static boolean isClosed(String str) { if(indexOf(tokens, str)>=0) return true; if(str.indexOf("(")==0 && str.lastIndexOf(")")==str.length()-1 ) { int m=0;int itercount=0; for(char chr :str.toCharArray()) { itercount++; if(chr=='(') m++; if(chr==')') m--; if(m==0) break; // the first ( is closed } if(itercount==str.length()) {// the closed char is the last char return true; } } return false; } // 判斷待連線兩邊是否有類似 aUb cUd的形式 public static boolean needCup(String str) { boolean isneed=false; if(str.indexOf('U')>=0 && !isClosed(str)) { return true; } return isneed; } // anther NFA public static void testdata() { states = new String[] {"S","q0","q1","q2","q3","A"}; tokens = new String[] {"a","b","e"}; thegma = new String[][][] { {{},{},{"q0"}}, {{"q1"},{},{}}, {{"q3"},{"q2"},{}}, {{"q3"},{"q0"},{}}, {{},{},{"A"}}, {{},{},{}} }; } }

思路為:NFA的形式化作為程式輸入,以NFA的狀態間的鄰接矩陣(存放兩點間的轉移字串)為迭代基礎,按照NFA轉化為GNFA的過程,進行狀態刪去,同時更新被刪除節點(狀態)的前驅後繼之間的字串。更新考慮的內容依次為:

  • 被刪節點若有環(自己到達自己的字串不為空),則取出環,改寫為 (str)*
  • 兩兩拼接被刪節點前後的字串,如果有環,則應把環拼接到兩串之間
  • 拼接好的串使用U併入前後兩點之間的鄰接矩陣的對應位置

執行效果如下:
在這裡插入圖片描述
在這裡插入圖片描述
在這裡插入圖片描述

有待改進的地方為:

  1. 鄰接矩陣是稀疏矩陣,可以優化儲存方式,以降低儲存
  2. 加括號可以優化,以減少重複的括號
  3. ε\varepsilon可以設法刪除優化
  4. 得到的正則表示式也許還可以進一步簡化
  5. 程式更完善的輸入輸出