資料結構備忘錄:Trie樹基本操作
阿新 • • 發佈:2019-01-04
Trie樹是一種可以實現字串多模匹配的資料結構,在字串處理中有很重要的作用,本文Trie樹實現參考了殷人昆資料結構與演算法C++語言描述第二版中的內容。不同的是分支節點的分支結構用C++標準庫map容器實現,原因是map基於紅黑樹,查詢速度快,另外節省記憶體空間,避免浪費
C++實現如下:
1 #include "pch.h" 2 #include <map> 3 #include <stack> 4 #include <vector> 5 #include <string> 6 #include <iostream> 7using namespace std; 8 9 struct TrieTreeNode //Trie樹節點型別 10 { 11 enum NodeType { DATANODE, BRANCHNODE } type_flag; //節點型別標誌,分支節點或存放關鍵字的葉節點 12 union 13 { 14 string key_in_trie; //葉節點關鍵字 15 map<string, TrieTreeNode *> sub_ptr; //分支節點的分支字元和對應的指向分支字元對應的子節點的指標之間的對映關係16 }; 17 18 TrieTreeNode(const string &k) :type_flag(NodeType::DATANODE), key_in_trie(k) {} 19 TrieTreeNode() :type_flag(NodeType::BRANCHNODE), sub_ptr() {} 20 21 TrieTreeNode(TrieTreeNode &be_copied) 22 { 23 switch (be_copied.type_flag) 24 { 25case NodeType::DATANODE: new (&key_in_trie) string(be_copied.key_in_trie); break; 26 case NodeType::BRANCHNODE: 27 { 28 new (&sub_ptr) map<string, TrieTreeNode *>(); 29 for (map<string, TrieTreeNode *>::iterator p = be_copied.sub_ptr.begin(); p != be_copied.sub_ptr.end(); ++p) 30 { 31 sub_ptr.insert(make_pair(p->first, nullptr)); 32 } 33 } 34 break; 35 } 36 } 37 38 ~TrieTreeNode() 39 { 40 switch (type_flag) 41 { 42 case NodeType::DATANODE : key_in_trie.~string(); break; 43 case NodeType::BRANCHNODE: break; 44 } 45 } 46 }; 47 48 class TrieTree 49 { 50 public: 51 bool insert(const string &be_inserted) const; //Trie樹中插入關鍵字,true成功false失敗 52 bool deleteElem(const string &be_deleted) const; //Trie樹中刪除指定關鍵字,true成功false失敗 53 TrieTreeNode *copy(); //拷貝Trie樹,返回指向副本Trie樹的指標 54 TrieTree() { root = new TrieTreeNode(); } 55 TrieTree(TrieTree &be_copied) { root = be_copied.copy(); } 56 ~TrieTree(); 57 private: 58 bool static strCompare(const string &left, const string &right, const size_t &i); 59 TrieTreeNode *root; //Trie樹根節點 60 }; 61 62 bool TrieTree::strCompare(const string &left, const string &right, const size_t &i) 63 { 64 for (size_t j = i; ; ++j) 65 { 66 if (j >= left.size() && j >= right.size()) 67 return true; 68 else if (j >= left.size() || j >= right.size()) 69 return false; 70 else if (left[j] != right[j]) 71 return false; 72 } 73 } 74 75 bool TrieTree::deleteElem(const string &be_deleted) const 76 { 77 TrieTreeNode *run = root; 78 stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack; 79 string::size_type i = 0; 80 while (run->type_flag != TrieTreeNode::NodeType::DATANODE) 81 { 82 if (i < be_deleted.size()) 83 { 84 string temp = be_deleted.substr(i, 1); 85 ++i; 86 map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find(temp); 87 if (it == run->sub_ptr.end()) 88 { 89 return false; 90 } 91 else 92 { 93 work_stack.push(make_pair(run, it)); 94 run = it->second; 95 } 96 } 97 else 98 { 99 map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find(""); 100 if (it != run->sub_ptr.end()) 101 { 102 work_stack.push(make_pair(run, it)); 103 run = it->second; 104 break; 105 } 106 else 107 { 108 return false; 109 } 110 } 111 } 112 113 if (work_stack.top().second->first != "" && strCompare(be_deleted, run->key_in_trie, i) == false) 114 { 115 return false; 116 } 117 118 bool delete_or_not = true; 119 while (work_stack.top().first != root) 120 { 121 if (delete_or_not == true) 122 { 123 delete work_stack.top().second->second; 124 if (work_stack.top().second->second->type_flag == TrieTreeNode::NodeType::DATANODE) 125 { 126 run = nullptr; 127 } 128 129 work_stack.top().first->sub_ptr.erase(work_stack.top().second); 130 131 if (work_stack.top().first->sub_ptr.size() >= 2) 132 { 133 return true; 134 } 135 else if (work_stack.top().first->sub_ptr.size() == 1) 136 { 137 if (work_stack.top().first->sub_ptr.begin()->second->type_flag != TrieTreeNode::NodeType::DATANODE) 138 { 139 return true; 140 } 141 else 142 { 143 run = work_stack.top().first->sub_ptr.begin()->second; 144 delete work_stack.top().first; 145 delete_or_not = false; 146 } 147 } 148 work_stack.pop(); 149 } 150 else 151 { 152 if (work_stack.top().first->sub_ptr.size() >= 2) 153 { 154 work_stack.top().second->second = run; 155 return true; 156 } 157 else 158 { 159 delete work_stack.top().first; 160 work_stack.pop(); 161 } 162 } 163 } 164 165 if (delete_or_not == true) 166 { 167 delete work_stack.top().second->second; 168 root->sub_ptr.erase(work_stack.top().second); 169 } 170 else 171 { 172 work_stack.top().second->second = run; 173 } 174 return true; 175 } 176 177 bool TrieTree::insert(const string &be_inserted) const 178 { 179 TrieTreeNode *run = root; 180 map<string, TrieTreeNode *>::iterator father; 181 string::size_type i = 0; 182 while (run->type_flag != TrieTreeNode::NodeType::DATANODE) 183 { 184 if (i < be_inserted.size()) 185 { 186 string temp = be_inserted.substr(i, 1); 187 ++i; 188 map<string, TrieTreeNode *>::iterator it = run->sub_ptr.find(temp); 189 if (it == run->sub_ptr.end()) 190 { 191 run->sub_ptr.insert(make_pair(temp, new TrieTreeNode(be_inserted))); 192 return true; 193 } 194 else 195 { 196 father = it; 197 run = it->second; 198 } 199 } 200 else 201 { 202 if (run->sub_ptr.find("") != run->sub_ptr.end()) 203 { 204 return false; 205 } 206 else 207 { 208 run->sub_ptr.insert(make_pair("", new TrieTreeNode(be_inserted))); 209 return true; 210 } 211 } 212 } 213 214 if (strCompare(be_inserted, run->key_in_trie, i) == true) 215 { 216 return false; 217 } 218 else 219 { 220 while (true) 221 { 222 father->second = new TrieTreeNode(); 223 if (i >= be_inserted.size()) 224 { 225 father->second->sub_ptr.insert(make_pair("", new TrieTreeNode(be_inserted))); 226 father->second->sub_ptr.insert(make_pair(run->key_in_trie.substr(i, 1), run)); 227 } 228 else if (i >= run->key_in_trie.size()) 229 { 230 father->second->sub_ptr.insert(make_pair("", run)); 231 father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode(be_inserted))); 232 } 233 else if (be_inserted[i] != run->key_in_trie[i]) 234 { 235 father->second->sub_ptr.insert(make_pair(run->key_in_trie.substr(i, 1), run)); 236 father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode(be_inserted))); 237 } 238 else 239 { 240 father = father->second->sub_ptr.insert(make_pair(be_inserted.substr(i, 1), new TrieTreeNode())).first; 241 ++i; 242 continue; 243 } 244 return true; 245 } 246 } 247 } 248 249 TrieTree::~TrieTree() 250 { 251 TrieTreeNode *run = root; 252 stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack; 253 254 bool trace_back_flag = true; 255 while (true) 256 { 257 if (trace_back_flag == true) 258 { 259 if (run == root) 260 { 261 if (run->sub_ptr.begin() == run->sub_ptr.end()) 262 { 263 delete root; 264 return; 265 } 266 } 267 else 268 { 269 if (run->type_flag == TrieTreeNode::DATANODE) 270 { 271 delete run; 272 run = work_stack.top().first; 273 work_stack.top().second = run->sub_ptr.erase(work_stack.top().second); 274 trace_back_flag = false; 275 continue; 276 } 277 } 278 279 work_stack.push(make_pair(run, run->sub_ptr.begin())); 280 run = run->sub_ptr.begin()->second; 281 } 282 else 283 { 284 if (run == root) 285 { 286 if (work_stack.top().second == root->sub_ptr.end()) 287 { 288 delete root; 289 return; 290 } 291 292 run = work_stack.top().second->second; 293 trace_back_flag = true; 294 } 295 else 296 { 297 if (work_stack.top().second != run->sub_ptr.end()) 298 { 299 run = work_stack.top().second->second; 300 trace_back_flag = true; 301 } 302 else 303 { 304 delete run; 305 work_stack.pop(); 306 run = work_stack.top().first; 307 work_stack.top().second = run->sub_ptr.erase(work_stack.top().second); 308 } 309 } 310 } 311 } 312 } 313 314 TrieTreeNode *TrieTree::copy() 315 { 316 TrieTreeNode *be_copied = root; 317 stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> work_stack; 318 stack<pair<TrieTreeNode *, map<string, TrieTreeNode *>::iterator>> copy_trace_stack; 319 TrieTreeNode *root_of_copy = nullptr; 320 321 bool trace_back_flag = true; 322 while (true) 323 { 324 if (trace_back_flag == true) 325 { 326 if (be_copied == root) 327 { 328 root_of_copy = new TrieTreeNode(*be_copied); 329 if (be_copied->sub_ptr.begin() == be_copied->sub_ptr.end()) 330 { 331 break; 332 } 333 copy_trace_stack.push(make_pair(root_of_copy, root_of_copy->sub_ptr.begin())); 334 } 335 else 336 { 337 if (work_stack.top().second == work_stack.top().first->sub_ptr.begin()) 338 { 339 copy_trace_stack.top().second->second = new TrieTreeNode(*be_copied); 340 } 341 else 342 { 343 ++copy_trace_stack.top().second; 344 copy_trace_stack.top().second->second = new TrieTreeNode(*be_copied); 345 } 346 if (be_copied->type_flag != TrieTreeNode::DATANODE) 347 copy_trace_stack.push(make_pair(copy_trace_stack.top().second->second, copy_trace_stack.top().second->second->sub_ptr.begin())); 348 else 349 { 350 be_copied = work_stack.top().first; 351 trace_back_flag = false; 352 continue; 353 } 354 } 355 356 work_stack.push(make_pair(be_copied, be_copied->sub_ptr.begin())); 357 be_copied = be_copied->sub_ptr.begin()->second; 358 } 359 else 360 { 361 map<string, TrieTreeNode *>::iterator tempit = work_stack.top().second; 362 if (tempit->second->type_flag != TrieTreeNode::DATANODE) 363 { 364 copy_trace_stack.pop(); 365 } 366 367 if (be_copied == root) 368 { 369 if (++(work_stack.top().second) == root->sub_ptr.end()) 370 break; 371 372 be_copied = work_stack.top().second->second; 373 trace_back_flag = true; 374 } 375 else 376 { 377 if (++(work_stack.top().second) != be_copied->sub_ptr.end()) 378 { 379 be_copied = work_stack.top().second->second; 380 trace_back_flag = true; 381 } 382 else 383 { 384 work_stack.pop(); 385 be_copied = work_stack.top().first; 386 } 387 } 388 } 389 } 390 return root_of_copy; 391 } 392 393 int main() 394 { 395 vector<string> test = {"abcd", "abydb", "ary", "AFD", "abyc", "AFDGH", "AFMGB", "AFMGRQ", "cdfg", "cdgkn", "cdgkmq"}; 396 TrieTree test_obj; 397 for (vector<string>::iterator p = test.begin(); p != test.end(); ++p) 398 { 399 cout << "插入字串" << *p << endl; 400 test_obj.insert(*p); 401 } 402 403 cout << endl; 404 // TrieTreeNode *copy = test_ptr.copy(); 405 for (vector<string>::iterator p = test.begin(); p != test.end(); ++p) 406 { 407 cout << "刪除字串" << *p << endl; 408 test_obj.deleteElem(*p); 409 } 410 cout << endl; 411 }