简易敏感词工具
package com.benwunet.bks.util;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Map;
public class SensitiveFilterService {
private Map sensitiveWordMap = null;
public static int minMatchType = 1;
public static int maxMatchType = 2;
private static SensitiveFilterService instance = null;
private SensitiveFilterService() {
sensitiveWordMap = new SensitiveWordInit().initKeyWord();
}
public static SensitiveFilterService getInstance() {
if (null == instance) {
instance = new SensitiveFilterService();
}
return instance;
}
public Set<String> getSensitiveWord(String txt, int matchType) {
Set<String> sensitiveWordList = new HashSet<String>();
for (int i = 0; i < txt.length(); i++) {
int length = CheckSensitiveWord(txt, i, matchType);
if (length > 0) {
sensitiveWordList.add(txt.substring(i, i + length));
i = i + length - 1;
}
}
return sensitiveWordList;
}
public String replaceSensitiveWord(String txt, int matchType,
String replaceChar) {
String resultTxt = txt;
Set<String> set = getSensitiveWord(txt, matchType);
Iterator<String> iterator = set.iterator();
String word = null;
String replaceString = null;
while (iterator.hasNext()) {
word = iterator.next();
replaceString = getReplaceChars(replaceChar, word.length());
resultTxt = resultTxt.replaceAll(word, replaceString);
}
return resultTxt;
}
private String getReplaceChars(String replaceChar, int length) {
String resultReplace = replaceChar;
for (int i = 1; i < length; i++) {
resultReplace += replaceChar;
}
return resultReplace;
}
public int CheckSensitiveWord(String txt, int beginIndex, int matchType) {
boolean flag = false;
int matchFlag = 0;
Map nowMap = sensitiveWordMap;
for (int i = beginIndex; i < txt.length(); i++) {
char word = txt.charAt(i);
nowMap = (Map) nowMap.get(word);
if (nowMap != null) {
matchFlag++;
if ("1".equals(nowMap.get("isEnd"))) {
flag = true;
if (SensitiveFilterService.minMatchType == matchType) {
break;
}
}
} else {
break;
}
}
if (SensitiveFilterService.maxMatchType == matchType) {
if (matchFlag < 2 || !flag) {
matchFlag = 0;
}
}
if (SensitiveFilterService.minMatchType == matchType) {
if (matchFlag < 2 && !flag) {
matchFlag = 0;
}
}
return matchFlag;
}
}
package com.benwunet.bks.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.*;
public class SensitiveWordInit {
private String ENCODING = "UTF-8";
public Map initKeyWord() {
Set<String> wordSet = readSensitiveWordFile();
return addSensitiveWordToHashMap(wordSet);
}
private Set<String> readSensitiveWordFile() {
Set<String> wordSet = null;
File file = null;
try {
file = UrlToFileUtil.urlToFile("https://***********/phic.txt");
}catch (Exception e) {
e.printStackTrace();
}
try {
InputStreamReader read = new InputStreamReader(new FileInputStream(file), ENCODING);
if (file.isFile() && file.exists()) {
wordSet = new HashSet<String>();
BufferedReader br = new BufferedReader(read);
String txt = null;
while ((txt = br.readLine()) != null) {
wordSet.add(txt);
}
br.close();
}
read.close();
} catch (Exception e) {
e.printStackTrace();
}
return wordSet;
}
private Map addSensitiveWordToHashMap(Set<String> wordSet) {
Map wordMap = new HashMap(wordSet.size());
for (String word : wordSet) {
Map nowMap = wordMap;
for (int i = 0; i < word.length(); i++) {
char keyChar = word.charAt(i);
Object tempMap = nowMap.get(keyChar);
if (tempMap != null) {
nowMap = (Map) tempMap;
} else {
Map<String, String> newMap = new HashMap<String, String>();
newMap.put("isEnd", "0");
nowMap.put(keyChar, newMap);
nowMap = newMap;
}
if (i == word.length() - 1) {
nowMap.put("isEnd", "1");
}
}
}
return wordMap;
}
}
urlTofile工具类
package com.benwunet.bks.util;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
public class UrlToFileUtil {
public static File urlToFile(String url)throws Exception{
String fileName = url.substring(url.lastIndexOf("."),url.length());
File file = null;
URL urlfile;
InputStream inStream = null;
OutputStream os = null;
try {
file = File.createTempFile("net_url", fileName);
urlfile = new URL(url);
inStream = urlfile.openStream();
os = new FileOutputStream(file);
int bytesRead = 0;
byte[] buffer = new byte[8192];
while ((bytesRead = inStream.read(buffer, 0, 8192)) != -1) {
os.write(buffer, 0, bytesRead);
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (null != os) {
os.close();
}
if (null != inStream) {
inStream.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
return file;
}
}
测试用例
package com.benwunet.bks;
import com.benwunet.bks.util.SensitiveFilterService;
public class SensitiveWordTest {
public static void main(String[] args) throws Exception {
SensitiveFilterService filter = SensitiveFilterService.getInstance();
String txt = "xx需要进行检测的字符串_____________________狗杂种";
String hou = filter.replaceSensitiveWord(txt, 1, "*");
System.out.println("替换前的文字为:" + txt);
System.out.println("替换后的文字为:" + hou);
}
}
测试结果 敏感词库
|