Simplified Regular Expression Parser
阿新 • • 發佈:2019-01-26
一、題目如下:
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.
It has only two meta characters: '.' and '*'.
'.' -- exact one arbitrary character match.
'*' -- zero or more arbitrary character match.
--------------------------
具個例子,表示式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字串), 'abd'。
二、解法:
解析給定的表示式
生成對應的 NFA(Nondeterministic Finite Automation)
NFA轉化為DFA(Deterministic Finite Automation)
利用DFA判斷輸入字串
不懂正則表示式? 參考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 參考http://planning.cs.uiuc.edu/node558.html
不懂DFA?參考http://lambda.uta.edu/cse5317/notes/node8.html
三、相關程式碼:
FiniteAutomata.java 和State.java,隨手寫寫,請選擇性參考。
FiniteAutomata.java
State.java
--------------------------
Write a parser for a simplified regular expression
On an alphabet set [a-z], a simplified regular expression is much simpler than the normal regular expression.
It has only two meta characters: '.' and '*'.
'.' -- exact one arbitrary character match.
'*' -- zero or more arbitrary character match.
--------------------------
具個例子,表示式 ab.*d 能匹配 'abcd', 'abccd', 'abad'。 不能匹配'abc', ' '(空字串), 'abd'。
二、解法:
解析給定的表示式
生成對應的 NFA(Nondeterministic Finite Automation)
NFA轉化為DFA(Deterministic Finite Automation)
利用DFA判斷輸入字串
不懂正則表示式? 參考http://deerchao.net/tutorials/regex/regex.htm
不懂NFA? 參考http://planning.cs.uiuc.edu/node558.html
不懂DFA?參考http://lambda.uta.edu/cse5317/notes/node8.html
三、相關程式碼:
FiniteAutomata.java 和State.java,隨手寫寫,請選擇性參考。
FiniteAutomata.java
package parser;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
public class FiniteAutomata {
private State start;
private final static char[] inputs;
static {
inputs = new char[26];
for (char i = 0; i < 26; i++) {
inputs[i] = (char) ('a' + i);
}
}
private final char episilon = 0;
private char stateId;
public void compile(String pattern) {
this.stateId = 'A';
State NFA = toNFA(pattern);
this.start = convertToDFA(NFA);
}
private State convertToDFA(State NFA) {
Map<Set<State>, State> cache = new HashMap<Set<State>, State>();
Queue<Set<State>> queue = new LinkedList<Set<State>>();
// start state of DFA
Set<State> start = episilon(NFA);
State starter = nextState();
cache.put(start, starter);
queue.offer(start);
while (!queue.isEmpty()) {
Set<State> currentEpisilon = queue.poll();
State current = cache.get(currentEpisilon);
// create new State
for (char input : inputs) {
Set<State> nextEpisilon = move(currentEpisilon, input);
if (nextEpisilon.isEmpty()) {
continue;
}
State next;
if (!cache.containsKey(nextEpisilon)) {
next = nextState();
cache.put(nextEpisilon, next);
queue.offer(nextEpisilon);
} else {
next = cache.get(nextEpisilon);
}
boolean isAccept = containsAcceptState(nextEpisilon);
next.setAccept(isAccept);
current.setTransition(input, next);
}
}
return starter;
}
private Set<State> move(Set<State> currentEpisilon, char input) {
Set<State> result = new HashSet<State>();
for (State s : currentEpisilon) {
State next = s.transit(input);
if (next != null) {
result.add(next);
}
}
return episilon(result);
}
private boolean containsAcceptState(Set<State> nextEpisilon) {
for (State state : nextEpisilon) {
if (state.isAccept()) {
return true;
}
}
return false;
}
private Set<State> episilon(State s) {
Set<State> cache = new HashSet<State>();
cache.add(s);
return episilon(s, cache);
}
private Set<State> episilon(Set<State> states) {
Set<State> cache = new HashSet<State>();
for (State s : states) {
cache.add(s);
cache.addAll(episilon(s, cache));
}
return cache;
}
private Set<State> episilon(State s, Set<State> cache) {
State next = s.transit(episilon);
if (next != null && !cache.contains(next)) {
cache.add(next);
cache.addAll(episilon(s, cache));
}
return cache;
}
private State toNFA(String pattern) {
char[] expr = pattern.toCharArray();
State currentState = nextState();
State starter = currentState;
for (char c : expr) {
if (c == '.') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState = nextState;
} else if (c == '*') {
State nextState = nextState();
// add each transition for all inputs
for (char i : inputs) {
currentState.setTransition(i, nextState);
}
currentState.setTransition(episilon, nextState);
nextState.setTransition(episilon, currentState);
currentState = nextState;
} else if (c >= 'a' && c <= 'z') {
State nextState = nextState();
currentState.setTransition(c, nextState);
currentState = nextState;
} else {
throw new RuntimeException("Unrecognized expression");
}
}
currentState.setAccept(true);
return starter;
}
private State nextState() {
return new State(stateId++);
}
public void constructDFA(Map<State, Map<Character, State>> mapping) {
Iterator<Map.Entry<State, Map<Character, State>>> it = mapping
.entrySet().iterator();
while (it.hasNext()) {
Entry<State, Map<Character, State>> entry = it.next();
State state = entry.getKey();
Map<Character, State> transition = entry.getValue();
state.setTransition(transition);
}
}
public boolean match(String c) {
char[] candidate = c.toCharArray();
for (char d : candidate) {
start = start.transit(d);
if (start == null) {
return false;
}
}
return start.isAccept();
}
public static String[] patterns = { "abc", "*", "*abc", "*abc", "a*bc",
"a*bc", "a*", "a*", "a*", "a*", "*abc*", "*****", "...", ".*",
".bc*", ".b*c*a", "*", "abc", "*a", "a", ".a*c", "a.*b", "..", "",
"" };
public static String[] candidates = { "abc", "abc", "abc", "aaabbbabc",
"aaabbbabc", "abc", "abc", "a", "aa", "abcdef", "abc", "abc",
"abc", "abc", "abc", "abca", "", "abcd", "abcd", "", "abc", "abc",
"abc", "", "abc" };
public static void main(String[] args) {
FiniteAutomata fa = new FiniteAutomata();
for (int i = 0; i < patterns.length; i++) {
fa.compile(patterns[i]);
System.out.println(fa.match(candidates[i]));
}
}
}
State.java
package parser;
import java.util.HashMap;
import java.util.Map;
public class State {
private boolean accept = false;
private char id;
private Map<Character, State> mapping = new HashMap<Character, State>();
public State(char c) {
this.id = c;
}
public State(char c, boolean isAccept) {
this.id = c;
this.accept = isAccept;
}
public State transit(char input) {
return mapping.get(input);
}
public void setTransition(char input, State next) {
this.mapping.put(input, next);
}
public void setTransition(Map<Character, State> mapping) {
this.mapping = mapping;
}
public boolean isAccept() {
return this.accept;
}
public void setAccept(boolean b) {
this.accept = b;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + id;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
State other = (State) obj;
if (id != other.id)
return false;
return true;
}
@Override
public String toString() {
return "State [id=" + id + "]";
}
}