[HttpPost]
public ActionResult SendMsg(string content)
{
content = SensitiveWordsHelper.SensitiveTextFilter(content,);
return Json(content);
}
//敏感词验证类
public class SensitiveWordsHelper
{
protected static readonly Logger _logger = LogManager.GetCurrentClassLogger(); //日志记录一下,用户上传的内容 存在哪些敏感词
private static Dictionary<char, List<string>> _sensitiveWordsDictionary;
/// <summary>
/// 敏感词库(懒加载)
/// </summary>
private static Dictionary<char, List<string>> GetSensitiveWordsDictionary()
{
if (_sensitiveWordsDictionary == null)
{
#region 从Excel中加载敏感词
var sensitiveWordList = new List<string>(17200);
var folder = HttpContext.Current.Server.MapPath("~/Data/SensitiveWord.xlsx"); //敏感词Excel存放路径(excel工作簿存放的就是需要比对的词汇)
var workbook = new XSSFWorkbook(folder);
var sheet = workbook.GetSheet("敏感词");
var firstRow = sheet.FirstRowNum;
var lastRow = sheet.LastRowNum;
for (var i = firstRow; i <= lastRow; i++)
{
var row = sheet.GetRow(i);
if (row == null) continue;
string name;
try
{
name = row.GetCell(0).StringCellValue;
}
catch
{
name = row.GetCell(0).NumericCellValue.ToString();
}
if (!string.IsNullOrWhiteSpace(name))
{
sensitiveWordList.Add(name);
}
}
sheet = null;
workbook.Close();
workbook = null;
#endregion
#region 初始化敏感词库
_sensitiveWordsDictionary = new Dictionary<char, List<string>>();
//根据首字母建立字典
foreach (var item in sensitiveWordList)
{
if (!string.IsNullOrWhiteSpace(item))
{
char value = item[0];
if (_sensitiveWordsDictionary.ContainsKey(value))
_sensitiveWordsDictionary[value].Add(item);
else
_sensitiveWordsDictionary.Add(value, new List<string>() { item });
}
}
#endregion
sensitiveWordList = null;
}
return _sensitiveWordsDictionary;
}
/// <summary>
/// 敏感文字过滤
/// </summary>
/// <param name="text">需要进行敏感词汇过滤的文字</param>
/// <returns></returns>
public static string SensitiveTextFilter(string text)
{
if (text == null)
return null;
//var srcText = text;
var htmlTagRegex = new Regex("<[^>]*>");
var hasHtmlTag = htmlTagRegex.IsMatch(text);
//text = StringHelper.RemoveHtmlTag(text);
StringBuilder sb = new StringBuilder(text.Length);
var sensitiveWordDict = GetSensitiveWordsDictionary();
StringBuilder filterLogSb = new StringBuilder();
int textLength = text.Length;
for (int i = 0; i < textLength; i++)
{
char tChar = text[i];
if (sensitiveWordDict.ContainsKey(tChar))//如果在字典表中存在这个key
{
int num = 0;//是否找到匹配的关键字 1找到0未找到
var charWordList = sensitiveWordDict[tChar].OrderBy(g => g.Length);//把该key的字典集合按 字符数排序(方便下面从少往多截取字符串查找)
foreach (var wordItem in charWordList)
{
var wordItemLength = wordItem.Length;
if (i + wordItemLength <= textLength)//如果需截取的字符串的索引小于总长度 则执行截取
{
string result = text.Substring(i, wordItemLength);
//根据关键字长度往后截取相同的字符数进行比较
if (result == wordItem)
{
//找到了敏感词
#region 如果当前这个敏感词在一个html标签里面,则不过滤
bool isTextInsideAHtmlTag = false;
if (hasHtmlTag)
{
isTextInsideAHtmlTag = IsTextInsideAHtmlTag(text, wordItem, i);
}
if(isTextInsideAHtmlTag)
{
continue;
}
#endregion
num = 1;
sb.Append(GetStarwordString(wordItemLength));
filterLogSb.Append(" ");
filterLogSb.Append(wordItem);
i = i + wordItemLength - 1;
//比较成功 同时改变i的索引
break;
}
}
}
if (num == 0)
sb.Append(tChar);
}
else
sb.Append(tChar);
}
var filterWords = filterLogSb.ToString();
if (filterLogSb.Length > 0)
{
_logger.Info($"敏感词过滤:{filterWords}, 上下文:{text}");
}
return sb.ToString();
}
/// <summary>
/// 判断一段文字是否被一个html标签包着
/// </summary>
/// <param name="text">整段富文本</param>
/// <param name="sensitiveWord">敏感词</param>
/// <param name="i">敏感词 在 整段富文本 中的位置</param>
private static bool IsTextInsideAHtmlTag(string text, string sensitiveWord, int i)
{
//这种情况下不过滤: "<..."我是敏感词"...>"
bool isFindStartTag = false;
bool isFindEndTag = false;
//往前找到"<"标签
for (int j = i - 1; j >= 0; j--)
{
char tmpChar = text[j];
if (tmpChar == '>')
{
//如果找到'>',说明一定在是:“>..."我是敏感词"” 这种情况,那一定是不在标签内部
return false;
}
if (tmpChar == '<')
{
//找到了开头'<'
isFindStartTag = true;
break;
}
}
if(!isFindStartTag)
{
//没找到开头'<',肯定不再标签内部
return false;
}
//往后找到">"标签
var textLength = text.Length;
for (int j = i + sensitiveWord.Length; j < textLength; j++)
{
char tmpChar = text[j];
if (tmpChar == '<')
{
//如果找到'<',说明一定在是:“"我是敏感词"...<” 这种情况,那一定是不在标签内部
return false;
}
if (tmpChar == '>')
{
//找到了结尾'<'
isFindEndTag = true;
break;
}
}
return isFindEndTag && isFindStartTag;//找到开头'<' 并且 找到了 结尾'<'
}
/// <summary>
/// 替换星号
/// </summary>
/// <param name="value"></param>
/// <returns></returns>
private static string GetStarwordString(int length)
{
string starNum = string.Empty;
for (int i = 0; i < length; i++)
{
starNum += "*";
}
return starNum;
}
}
|