字串多模匹配---最左優先
阿新 • • 發佈:2021-12-30
#pragma once
#include <string> #include <unordered_map> #include <vector>
struct Node{ int fail; std::string change_word; std::string word; using TransMap = std::unordered_map<char, int>; TransMap trans; }; class ChangeWordTrie { public: inline void Init(int capacity = 100000) { trie_.reserve(capacity); rt_ = NewNode(); }
void Insert(const std::string& text, const std::string& word) { int u = rt_; for (size_t i = 0; i < text.size(); ++i) { char c = text[i]; if (!trie_[u].trans.count(c)) trie_[u].trans[c] = NewNode(); u = trie_[u].trans[c]; } trie_[u].word = word; trie_[u].change_word = text; } void Build() { std::queue<int> Q; trie_[rt_].fail = rt_; for (const auto& p : trie_[rt_].trans) { char c = p.first; int nxt = trie_[rt_].trans[c]; trie_[nxt].fail = rt_; Q.push(nxt); }
// BFS 構建 Trie圖 while (!Q.empty()) { int u = Q.front(); Q.pop(); for (const auto& p : trie_[u].trans) { char c = p.first; int u_fail = trie_[u].fail; int nxt = trie_[u].trans[c]; while (u_fail != rt_ && !trie_[u_fail].trans.count(c)) { u_fail = trie_[u_fail].fail; } trie_[nxt].fail = trie_[u_fail].trans.count(c) ? trie_[u_fail].trans[c] : rt_; Q.push(nxt); } } }
std::string Search(const std::string& text) { /* TODO : 通過合併優化last陣列遍歷單詞節點的效率 保證複雜度 */
std::string result = text; int u = rt_; int change = 0; //替換變種詞後源字串長度的改變 for (size_t i = 0; i < text.size(); ++i) { char c = text[i]; while (u != rt_ && !trie_[u].trans.count(c)) { u = trie_[u].fail; } if (trie_[u].trans.count(c)) u = trie_[u].trans[c]; if (!trie_[u].word.empty()) { result = result.replace(i - trie_[u].change_word.length() + 1 - change, trie_[u].change_word.length(), trie_[u].word); change += trie_[u].change_word.length() - trie_[u].word.length(); u = rt_; } } return result; }
private: int rt_; // Root of trie std::vector<Node> trie_;
int NewNode() { trie_.emplace_back(); return trie_.size() - 1; } };
#include <string> #include <unordered_map> #include <vector>
struct Node{ int fail; std::string change_word; std::string word; using TransMap = std::unordered_map<char, int>; TransMap trans; }; class ChangeWordTrie { public: inline void Init(int capacity = 100000) { trie_.reserve(capacity); rt_ = NewNode(); }
void Insert(const std::string& text, const std::string& word) { int u = rt_; for (size_t i = 0; i < text.size(); ++i) { char c = text[i]; if (!trie_[u].trans.count(c)) trie_[u].trans[c] = NewNode(); u = trie_[u].trans[c]; } trie_[u].word = word; trie_[u].change_word = text; } void Build() { std::queue<int> Q; trie_[rt_].fail = rt_; for (const auto& p : trie_[rt_].trans) { char c = p.first; int nxt = trie_[rt_].trans[c]; trie_[nxt].fail = rt_; Q.push(nxt); }
// BFS 構建 Trie圖 while (!Q.empty()) { int u = Q.front(); Q.pop(); for (const auto& p : trie_[u].trans) { char c = p.first; int u_fail = trie_[u].fail; int nxt = trie_[u].trans[c]; while (u_fail != rt_ && !trie_[u_fail].trans.count(c)) { u_fail = trie_[u_fail].fail; } trie_[nxt].fail = trie_[u_fail].trans.count(c) ? trie_[u_fail].trans[c] : rt_; Q.push(nxt); } } }
std::string Search(const std::string& text) { /* TODO : 通過合併優化last陣列遍歷單詞節點的效率 保證複雜度 */
std::string result = text; int u = rt_; int change = 0; //替換變種詞後源字串長度的改變 for (size_t i = 0; i < text.size(); ++i) { char c = text[i]; while (u != rt_ && !trie_[u].trans.count(c)) { u = trie_[u].fail; } if (trie_[u].trans.count(c)) u = trie_[u].trans[c]; if (!trie_[u].word.empty()) { result = result.replace(i - trie_[u].change_word.length() + 1 - change, trie_[u].change_word.length(), trie_[u].word); change += trie_[u].change_word.length() - trie_[u].word.length(); u = rt_; } } return result; }
private: int rt_; // Root of trie std::vector<Node> trie_;
int NewNode() { trie_.emplace_back(); return trie_.size() - 1; } };