unity c#非法字元(髒詞)檢測
阿新 • • 發佈:2019-01-12
專案中非法字元檢測是必須的,聊天系統不遮蔽各種不文明用語
先說說我的原理吧
1.讀取非法字元表,把相同的首字元歸類到字典,類似新華字典那樣
2.然後把輸入的字串,一個個字元找對應的首字元字典,遍歷首字元字典,在當前字元後面擷取對應的字元長度得到的字串然後比較,如果字串相同則認為有非法字元
下面是測試結果
下面為完整程式碼,有註釋應該比較容易看懂
using System; using System.Collections.Generic; using System.Linq; using System.Text; using UnityEngine; /// <summary> /// 非法關鍵詞過濾(自動忽略漢字數字字母間的其他字元) /// </summary> public class FilterWord { public FilterWord() { TextAsset asset = Resources.Load("dirtywords") as TextAsset; m_AllFilterWord = asset.text; } private string m_AllFilterWord = string.Empty; /// <summary> /// 詞庫路徑 /// </summary> public string AllFilterWord { get { return m_AllFilterWord; } set { m_AllFilterWord = value; } } /// <summary> /// 記憶體詞典 /// </summary> private WordGroup[] MEMORYLEXICON = new WordGroup[(int)char.MaxValue]; private string sourctText = string.Empty; private bool m_IsInitalize = false; /// <summary> /// 檢測源 /// </summary> public string SourceText { get { return sourctText; } set { sourctText = value; } } /// <summary> /// 檢測源遊標 /// </summary> int cursor = 0; /// <summary> /// 匹配成功後偏移量 /// </summary> int wordlenght = 0; /// <summary> /// 檢測詞遊標 /// </summary> int nextCursor = 0; private List<string> illegalWords = new List<string>(); /// <summary> /// 檢測到的非法詞集 /// </summary> public List<string> IllegalWords { get { return illegalWords; } } /// <summary> /// 判斷是否是中文 /// </summary> /// <param name="character"></param> /// <returns></returns> private bool isCHS(char character) { // 中文表意字元的範圍 4E00-9FA5 int charVal = (int)character; return (charVal >= 0x4e00 && charVal <= 0x9fa5); } /// <summary> /// 判斷是否是數字 /// </summary> /// <param name="character"></param> /// <returns></returns> private bool isNum(char character) { int charVal = (int)character; return (charVal >= 48 && charVal <= 57); } /// <summary> /// 判斷是否是字母 /// </summary> /// <param name="character"></param> /// <returns></returns> private bool isAlphabet(char character) { int charVal = (int)character; return ((charVal >= 97 && charVal <= 122) || (charVal >= 65 && charVal <= 90)); } /// <summary> /// 轉半形小寫的函式(DBC case) /// </summary> /// <param name="input">任意字串</param> /// <returns>半形字串</returns> ///<remarks> ///全形空格為12288,半形空格為32 ///其他字元半形(33-126)與全形(65281-65374)的對應關係是:均相差65248 ///</remarks> private string ToDBC(string input) { char[] c = input.ToCharArray(); for (int i = 0; i < c.Length; i++) { if (c[i] == 12288) { c[i] = (char)32; continue; } if (c[i] > 65280 && c[i] < 65375) c[i] = (char)(c[i] - 65248); } return new string(c).ToLower(); } /// <summary> /// 載入記憶體詞庫 /// </summary> public void LoadDictionary() { if (m_IsInitalize) { return; } m_IsInitalize = true; List<string> wordList = new List<string>(); Array.Clear(MEMORYLEXICON, 0, MEMORYLEXICON.Length); string[] words = AllFilterWord.Split('\n'); foreach (string word in words) { string str = word.Replace("\r", ""); string key = this.ToDBC(str); wordList.Add(key); } Comparison<string> cmp = delegate (string key1, string key2) { return key1.CompareTo(key2); }; wordList.Sort(cmp); for (int i = wordList.Count - 1; i > 0; i--) { if (wordList[i].ToString() == wordList[i - 1].ToString()) { wordList.RemoveAt(i); } } foreach (var word in wordList) { if (string.IsNullOrEmpty(word)) { continue; } WordGroup group = MEMORYLEXICON[word[0]]; if (group == null) { group = new WordGroup(); MEMORYLEXICON[(int)word[0]] = group; } group.Add(word.Substring(1)); } } /// <summary> /// 檢測 /// </summary> /// <param name="blackWord"></param> /// <returns></returns> private bool Check(string blackWord) { wordlenght = 0; //檢測源下一位遊標 nextCursor = cursor + 1; bool found = false; string tempStr = ToDBC(sourctText); //遍歷詞的每一位做匹配 for (int i = 0; i < blackWord.Length; i++) { //特殊字元偏移遊標 int offset = 0; if (nextCursor >= tempStr.Length) { break; } else { if (i >= blackWord.Length || nextCursor + offset >= tempStr.Length) { found = false; break; } if ((int)blackWord[i] == (int)tempStr[nextCursor + offset]) { if (isAlphabet(tempStr[nextCursor + offset])) { if(tempStr.Length < blackWord.Length) { found = false; break; } if (i >= blackWord.Length - 1) { int temp = nextCursor + offset + 1; if(tempStr.Length > temp) { if(isAlphabet(tempStr[temp])) { found = false; break; } else { found = true; } } else { found = true; } } } else { if (i >= blackWord.Length - 1) { found = true; } } } else { found = false; break; } } nextCursor = nextCursor + 1 + offset; wordlenght++; } return found; } /// <summary> /// 查詢並替換 /// </summary> /// <param name="replaceChar"></param> public string Filter(char replaceChar) { cursor = 0; nextCursor = 0; LoadDictionary(); if (sourctText != string.Empty) { //sourctText = sourctText.Replace("\n", ""); //sourctText = sourctText.Trim(); char[] tempString = sourctText.ToCharArray(); for (int i = 0; i < SourceText.Length; i++) { //查詢以該字為首字元的片語 WordGroup group = MEMORYLEXICON[(int)ToDBC(SourceText)[i]]; if (group != null) { for (int z = 0; z < group.Count(); z++) { string word = group.GetWord(z); if (word.Length == 0 || Check(word)) { string blackword = string.Empty; for (int pos = 0; pos < wordlenght + 1; pos++) { blackword += tempString[pos + cursor].ToString(); tempString[pos + cursor] = replaceChar; } illegalWords.Add(blackword); cursor = cursor + wordlenght; i = i + wordlenght; } } } cursor++; } return new string(tempString); } else { return string.Empty; } } } /// <summary> /// 具有相同首字元的片語集合 /// </summary> class WordGroup { /// <summary> /// 集合 /// </summary> private List<string> groupList; public WordGroup() { groupList = new List<string>(); } /// <summary> /// 新增詞 /// </summary> /// <param name="word"></param> public void Add(string word) { groupList.Add(word); } /// <summary> /// 獲取總數 /// </summary> /// <returns></returns> public int Count() { return groupList.Count; } /// <summary> /// 根據下標獲取詞 /// </summary> /// <param name="index"></param> /// <returns></returns> public string GetWord(int index) { return groupList[index]; } }
下面是抽出一個統一方法來呼叫檢測
主要兩個方法
1.檢測是否有非法字元,返回bool
2.把非法字元轉成*號,返回string
using System.Collections; using System.Collections.Generic; using UnityEngine; public class SystemUtil { /// <summary> /// 判斷是否非法字元 /// </summary> /// <param name="str"></param> /// <returns></returns> public static bool IsInvaild(string str) { string source = Filter(str); return str != source; } /// <summary> /// 把非法字元變成*號 /// </summary> /// <param name="str"></param> /// <returns></returns> public static string Filter(string str) { filterWord.SourceText = str; return filterWord.Filter('*'); } public static FilterWord filterWord { get { if (null == m_FilterWord) { m_FilterWord = new FilterWord(); } return m_FilterWord; } } private static FilterWord m_FilterWord; }
下面是工程下載地址
連結:https://pan.baidu.com/s/1x1RyEugV6N4D_Sj2_JgkUQ
提取碼:lvc3