淘寶h5 頁面 sign加密演算法
阿新 • • 發佈:2020-12-10
JAVA基於DFA實現敏感詞過濾
1、用途
提升大量敏感詞過濾/字串替換效率
2、原理
敏感詞庫構建為二叉樹,將文字與二叉樹進行比對,從而快速定位敏感詞
3、實現
import java.util.*;
/**
* DFA工具
*
* @author rye
* @date 2020/12/02
*/
public class DfaTool {
/**
* 敏感詞(key:敏感詞,value:替換詞)
*/
private Map<String, String> sensitiveMap;
/**
* DFA演算法模型
*/
private Map dfaMap;
public static void main(String[] args) {
DfaTool tool1 = new DfaTool(new String[]{"明月", "地上", "低頭"});
DfaTool tool2 = new DfaTool(new String[][]{{"明月", "*"}, {"地上", "*" }, {"低頭", ""}});
System.out.println(tool1.replace("床前明月光,疑是地上霜。舉頭望明月,低頭思故鄉。"));
System.out.println(tool2.replace("床前明月光,疑是地上霜。舉頭望明月,低頭思故鄉。"));
}
public DfaTool(Map<String, String> sensitiveMap) {
this.sensitiveMap = sensitiveMap;
initDfaMap();
}
public DfaTool(String[][] sensitiveArr) {
sensitiveMap = new HashMap<>();
for (String[] sensitive : sensitiveArr) {
sensitiveMap.put(sensitive[0], sensitive[1]);
}
initDfaMap();
}
public DfaTool(String[] sensitiveArr) {
sensitiveMap = new HashMap<>();
for (String sensitive : sensitiveArr) {
sensitiveMap.put(sensitive, "");
}
initDfaMap();
}
/**
* 構建DFA演算法模型
*/
private void initDfaMap() {
if (sensitiveMap == null || sensitiveMap.size() == 0) {
return;
}
// 減少擴容操作
dfaMap = new HashMap(sensitiveMap.size());
Map nowMap, newWordMap;
for (String word : sensitiveMap.keySet()) {
if (word == null || word.length() == 0) {
continue;
}
nowMap = dfaMap;
for (int i = 0; i < word.length(); i++) {
char wordChar = word.charAt(i);
Object wordMap = nowMap.get(wordChar);
if (wordMap != null) {
nowMap = (Map) wordMap;
} else {
newWordMap = new HashMap<>();
nowMap.put(wordChar, newWordMap);
nowMap = newWordMap;
}
if (i == word.length() - 1) {
nowMap.put("isEnd", "1");
}
}
}
}
/**
* DFA替換字串
*
* @param input 待處理文字
* @return 敏感詞替換結果
*/
public String replace(String input) {
return replace(input, MatchEnum.MAX_MATCH);
}
/**
* DFA替換字串
*
* @param input 待處理文字
* @param type 匹配規則
* @return 敏感詞替換結果
*/
public String replace(String input, MatchEnum type) {
if (input == null || "".equals(input) || dfaMap == null || dfaMap.size() == 0) {
return input;
}
String sensitiveWord = null;
while ((sensitiveWord = getSensitiveWord(input, type)) != null) {
// System.out.println("input:" + input +
// " | sensitiveWord:" + sensitiveWord +
// " | replace:" + sensitiveMap.get(sensitiveWord) +
// " | out:" + input.substring(0, input.indexOf(sensitiveWord)) + (sensitiveMap.get(sensitiveWord) == null ? "" : sensitiveMap.get(sensitiveWord)) + input.substring(input.indexOf(sensitiveWord) + sensitiveWord.length())
// );
input = input.substring(0, input.indexOf(sensitiveWord)) + (sensitiveMap.get(sensitiveWord) == null ? "" : sensitiveMap.get(sensitiveWord)) + input.substring(input.indexOf(sensitiveWord) + sensitiveWord.length());
}
return input;
}
/**
* 獲取文字中的敏感詞
*
* @param input 待處理文字
* @param type 匹配規則
* @return 敏感詞
*/
private String getSensitiveWord(String input, MatchEnum type) {
Map nowMap = dfaMap;
Map map;
char inputChar;
StringBuffer sb = new StringBuffer();
String rtn = null;
int inputLen = input.length();
for (int i = 0; i < inputLen; i++) {
inputChar = input.charAt(i);
map = (Map) nowMap.get(inputChar);
if (map != null) {
nowMap = map;
sb.append(inputChar);
if ("1".equals(map.get("isEnd"))) {
rtn = sb.toString();
if (type == MatchEnum.MIN_MATCH) {
return rtn;
}
}
} else {
if (rtn != null) {
return rtn;
}
if (inputLen > 1) {
return getSensitiveWord(input.substring(1), type);
}
}
}
return rtn;
}
public enum MatchEnum {
/**
* 最小匹配
*/
MIN_MATCH,
/**
* 最大匹配
*/
MAX_MATCH
}
}
輸出結果: