資料結構——Trie 字典樹 字首樹
阿新 • • 發佈:2018-12-18
一、什麼是Trie
Trie不同於二分搜尋樹、堆、線段樹等二叉樹結構,Trie是一個多叉樹。使用場景:通訊錄高效搜尋,專為處理字串設計的。
比如字典中有n條資料,如果使用樹結構,查詢的時間複雜度是O(logn),如果有100萬條資料的話,logn大約是20,如果有1億條資料的話,logn大約是30(參考2的N次方計算器)
如果使用Trie這種資料結構,查詢每條資料的時間複雜度和字典中一共有多少條資料沒有關係!是不是屌炸天呢?
Trie查詢的時間複雜度與查詢的字元長度有關,時間複雜度為:O(w),w為單詞的長度。
二、構建一個Trie
Trie的基本結構與新增方法:
public class Trie { private class Node { public boolean isWord; public TreeMap<Character, Node> next; public Node(boolean isWord) { this.isWord = isWord; next = new TreeMap<>(); } public Node() { this(false); } } private Node root; private int size; public Trie() { root = new Node(); size = 0; } //獲得Trie中儲存的單詞數量 public int getSize(){ return this.size; } //傳遞入一個字串(單詞),拆分成一個個的字元char public void add(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i ++){ char c = word.charAt(i); //判斷當前的cur節點下一節點隱射中是否有指向c的節點 if(cur.next.get(c) == null) cur.next.put(c,new Node()); //迴圈結束後cur來到字串最後一個字元所處節點,但並不一定是葉子節點,如pan和panda cur = cur.next.get(c); } //如果已經存在panda,則在add(pan)時候,只是走了3遍cur = cur.next.get(c); //不重複新增元素 if(!cur.isWord) { cur.isWord = true; size++; } } }
判斷某個單詞在Trie中是否存在
public boolean contains(String word){ Node cur = root; for(int i = 0 ; i < word.length() ; i++){ char c = word.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } //迴圈結束後則表示到達了單詞結尾的字元 return cur.isWord; }
三、Trie字典樹的字首查詢
//Trie字典樹的字首查詢 public boolean isPrefix(String prefix){ Node cur = root; for(int i = 0 ; i < prefix.length() ; i++){ char c = prefix.charAt(i); if(cur.next.get(c) == null) return false; cur = cur.next.get(c); } return true; }
四、Trie字典樹搜尋和正則匹配
參考模型:
import java.util.TreeMap;
/**
* Your WordDictionary object will be instantiated and called as such:
* WordDictionary obj = new WordDictionary();
* obj.addWord(word);
* boolean param_2 = obj.search(word);
*/
class WordDictionary {
private class Node{
public boolean isWord;
public TreeMap<Character,Node> next;
public Node(boolean isWord){
this.isWord = isWord;
next = new TreeMap<>();
}
public Node(){
this(false);
}
}
private Node root;
/** Initialize your data structure here. */
public WordDictionary() {
root = new Node();
}
/** Adds a word into the data structure. */
public void addWord(String word) {
Node cur = root;
for(int i = 0 ; i < word.length() ; i++){
char c = word.charAt(i);
if(cur.next.get(c) == null)
cur.next.put(c,new Node());
cur = cur.next.get(c);
}
cur.isWord = true;
}
/** Returns if the word is in the data structure. A word could contain the dot character '.' to represent any one letter. */
public boolean search(String word) {
return match(root,word,0);
}
private boolean match(Node node, String word, int index) {
if(index == word.length())
return node.isWord;
char c = word.charAt(index);
if(c!='.') {
if (node.next.get(c) == null)
return false;
return match(node.next.get(c),word,index + 1);
}else{
for(char nextChar : node.next.keySet())
if(match(node.next.get(nextChar),word,index + 1))
return true;
return false;
}
}
}
五、Letcode鍵值對映——字首開頭的鍵的值的總和
對應Letcode 667題 鍵值對映
程式碼實現:
class MapSum {
private class Node{
private int value;
private TreeMap<Character,Node> next;
public Node(int value){
this.value = value;
next = new TreeMap<>();
}
public Node(){
this(0);
}
}
private Node root;
/** Initialize your data structure here. */
public MapSum() {
root = new Node();
}
public void insert(String word, int val) {
Node cur = root;
for(int i = 0 ; i < word.length() ; i++){
char c = word.charAt(i);
if(cur.next.get(c) == null)
cur.next.put(c,new Node());
cur = cur.next.get(c);
}
cur.value = val;
}
public int sum(String prefix) {
Node cur = root;
for(int i = 0 ; i < prefix.length() ; i++){
char c = prefix.charAt(i);
if(cur.next.get(c) == null)
return 0;
cur = cur.next.get(c);
}
return sum(cur);
}
private int sum(Node node) {
int res = node.value;
for(char c : node.next.keySet())
res += sum(node.next.get(c));
return res;
}
}