利用AC自動機進行關鍵字的提取和過濾
阿新 • • 發佈:2019-02-20
Test.javapackage com.AC.domain; import java.util.*; import java.io.*; import java.math.*; public class Patterns { private final Node root = new Node(); private List<Node> tree; public Patterns(List<Keyword> keywords){ tree = new ArrayList<Node> (); root.failureNode=root; tree.add(root); for(Keyword keyword : keywords){ addKeyword(keyword); } setFailNode(); } private void setFailNode() { // TODO Auto-generated method stub Queue<Node> queue = new LinkedList<Node>(); Node node =root; for (Node d1 : node.childrenList){ queue.offer(d1); } while (!queue.isEmpty()){ node = queue.poll(); if (node.childrenList!=null){ for (Node curNode : node.childrenList) { queue.offer(curNode); Node failNode = node.failureNode; while(!failNode.containsChild(curNode.character)){ failNode = failNode.failureNode; if(failNode==null||failNode.state==0) break; } if(failNode!=null&&failNode.containsChild(curNode.character)) { curNode.failureNode = failNode.getChild(curNode.character); curNode.addKeywords(curNode.failureNode.keywords); } } } } } private void addKeyword(Keyword keyword) { // TODO Auto-generated method stub char [] wordCharArr = keyword.getWord().toCharArray(); Node current = root; for(char currentChar : wordCharArr){ if(current.containsChild(currentChar)){ current = current.getChild(currentChar); } else{ Node node = new Node (currentChar,root); current.addChild(node); current=node; tree.add(node); } } current.addKeyword(keyword); } public List<Keyword> searchKeyword(String data,Integer category) { List<Keyword> matchResult = new ArrayList<Keyword>(); Node node = root; char[] chs = data.toCharArray(); for (int i=0;i<chs.length;i++){ while(node!=null&&!node.containsChild(chs[i])){ // if(node.state==0) break; node = node.failureNode; if(node==null||node.state==0) break; } if(node!=null&&node.containsChild(chs[i])) { node = node.getChild(chs[i]); if(node.keywords!=null){ for(Keyword pattern : node.keywords){ if(category == null){ // System.out.println(pattern.getWord()); matchResult.add(new Keyword(pattern.getWord())); } else{ if(pattern.getCategories().contains(category)){ matchResult.add(pattern); } } } } } } return matchResult; } }
附美團文章連結:http://tech.meituan.com/ac.htmlpackage com.AC.domain; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; public class Test { public static void main(String []args){ // abcd abc abe ae bc be bce cm kcabcmgh List<Keyword> keywords = new ArrayList<Keyword>(); List<Keyword> result = new ArrayList<Keyword> (); /* List<Keyword> re= new ArrayList<Keyword> (); re.clear(); Keyword a= new Keyword("abcd"); re.add(a); Keyword b= new Keyword("abc"); re.add(b); System.out.println(re.size());*/ Keyword a1= new Keyword(); a1.setWord("abcd"); keywords.add(a1); Keyword a2= new Keyword(); a2.setWord("abc"); keywords.add(a2); Keyword a3= new Keyword(); a3.setWord("abe"); keywords.add(a3); Keyword a5= new Keyword(); a5.setWord("ae"); keywords.add(a5); Keyword a6= new Keyword(); a6.setWord("bc"); keywords.add(a6); Keyword a7= new Keyword(); a7.setWord("be"); keywords.add(a7); Keyword a8= new Keyword(); a8.setWord("bce"); keywords.add(a8); Keyword a9= new Keyword(); a9.setWord("cm"); keywords.add(a9); Patterns patterns=new Patterns(keywords); result=patterns.searchKeyword("kcabcmgha", null); // System.out.println(result.size()); System.out.println("keys: "); for(Keyword key:result){ System.out.println(key.getWord()); } // System.out.println(result); } }