关键词匹配类
<?php
namespace App\Library;
use App\Library\Redis;
class SensitiveWordFilter
{
protected $dict;
protected $key;
public function __construct($key)
{
$this->dict = [];
$this->key = $key;
}
public function loadData($data,$time = 7200)
{
ini_set("memory_limit", "2048M");
set_time_limit(0);
if(!Redis::connection()->hkeys($this->key)){
Redis::connection()->del($this->key);
}
foreach ($data as $v){
if (empty($v)) {
continue;
}
$this->addWords(trim($v));
}
Redis::connection()->setex(
$this->key,
$time,
json_encode($this->dict, JSON_UNESCAPED_UNICODE)
);
}
public function checkLKey()
{
return Redis::connection()->exists($this->key);
}
protected function splitStr($str)
{
return preg_split("//u", $str, -1, PREG_SPLIT_NO_EMPTY);
}
protected function addWords($words)
{
$wordArr = $this->splitStr($words);
$curNode = &$this->dict;
foreach ($wordArr as $char) {
if (!isset($curNode)) {
$curNode[$char] = [];
}
$curNode = &$curNode[$char];
}
$curNode['end'] = 1;
}
public function filter($str, $replace = '*', $skipDistance = 0)
{
$finalRes = [];
$this->dict = json_decode(Redis::connection()->get($this->key),true);
$maxDistance = max($skipDistance, 0) + 1;
$strArr = $this->splitStr($str);
$length = count($strArr);
for ($i = 0; $i < $length; $i++) {
$char = $strArr[$i];
if (!isset($this->dict[$char])) {
continue;
}
$curNode = &$this->dict[$char];
$dist = 0;
$matchIndex = [$i];
for ($j = $i + 1; $j < $length && $dist < $maxDistance; $j++) {
if (!isset($curNode[$strArr[$j]])) {
$dist ++;
continue;
}
$matchIndex[] = $j;
$curNode = &$curNode[$strArr[$j]];
}
if (isset($curNode['end'])) {
$res = [];
foreach ($matchIndex as $index) {
$res[] = $strArr[$index];
$strArr[$index] = $replace;
}
$finalRes[] = implode("",$res);
unset($res);
$i = max($matchIndex);
}
}
return $finalRes;
}
public function isMatch($strArr)
{
$strArr = is_array($strArr) ? $strArr : $this->splitStr($strArr);
$curNode = &$this->dict;
foreach ($strArr as $char) {
if (!isset($curNode[$char])) {
return false;
}
}
return isset($curNode['end']) ? $curNode['end'] : false;
}
}
调用示例
$wordFilter = new SensitiveWordFilter('keywords_dict');
if(!$wordFilter->checkLKey()){
$keywordData = Keyword::query()->pluck('keyword');
if(!empty($keywordData)){
$keywordData = $keywordData->toArray();
$wordFilter->loadData($keywordData);
}
unset($keywordData);
}
$keywords = $wordFilter->filter(‘努力读书,报效祖国’);
var_dump($keywords);
|