2020-06-17 23:02:12 +08:00
|
|
|
<?php
|
2020-11-29 09:29:14 +08:00
|
|
|
declare(strict_types=1);
|
|
|
|
|
2020-06-17 23:02:52 +08:00
|
|
|
namespace catcher\library;
|
|
|
|
|
|
|
|
use catcher\CatchCacheKeys;
|
|
|
|
use think\facade\Cache;
|
|
|
|
|
|
|
|
class Trie
|
|
|
|
{
|
|
|
|
protected $tree = [];
|
|
|
|
|
|
|
|
protected $end = 'end';
|
|
|
|
|
|
|
|
protected $sensitiveWord = '';
|
|
|
|
|
|
|
|
protected $sensitiveWords = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* add
|
|
|
|
*
|
|
|
|
* @time 2020年06月17日
|
|
|
|
* @param string $word
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function add(string $word)
|
|
|
|
{
|
|
|
|
$words = mb_str_split($word);
|
|
|
|
|
|
|
|
$array = [];
|
|
|
|
|
|
|
|
$len = count($words);
|
|
|
|
|
|
|
|
$end = true;
|
|
|
|
while ($len > 0) {
|
|
|
|
if ($end) {
|
|
|
|
$array[] = [
|
|
|
|
$words[$len - 1] => ['end' => true],
|
|
|
|
];
|
|
|
|
} else {
|
|
|
|
$latest = array_pop($array);
|
|
|
|
$array[] = [
|
|
|
|
$words[$len-1] => $latest,
|
|
|
|
];
|
|
|
|
}
|
|
|
|
$end = false;
|
|
|
|
$len--;
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->tree = array_merge_recursive($this->tree, array_pop($array));
|
|
|
|
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* 获取
|
|
|
|
*
|
|
|
|
* @time 2020年06月17日
|
|
|
|
* @throws \Psr\SimpleCache\InvalidArgumentException
|
|
|
|
* @return array|bool
|
|
|
|
*/
|
|
|
|
public function getTries()
|
|
|
|
{
|
|
|
|
if (!empty($this->tree)) {
|
|
|
|
return $this->tree;
|
|
|
|
}
|
|
|
|
|
|
|
|
return Cache::store('redis')->get(CatchCacheKeys::TRIE_TREE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* 获取敏感词
|
|
|
|
*
|
|
|
|
* @time 2020年06月17日
|
|
|
|
* @param array $trieTree
|
|
|
|
* @param string $content
|
|
|
|
* @param bool $all
|
|
|
|
* @return array|string
|
|
|
|
*/
|
|
|
|
public function getSensitiveWords(array $trieTree, string $content, $all = true)
|
|
|
|
{
|
|
|
|
$words = mb_str_split($content);
|
|
|
|
$len = count($words);
|
|
|
|
for ($start = 0; $start < $len; $start++) {
|
|
|
|
// 未搜索到
|
|
|
|
if (!isset($trieTree[$words[$start]])) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
$node = $trieTree[$words[$start]];
|
|
|
|
$this->sensitiveWord = $words[$start];
|
2020-06-18 16:29:26 +08:00
|
|
|
// 从敏感词开始查找内容中是否又符合的
|
2020-06-17 23:02:52 +08:00
|
|
|
for ($i = $start+1; $i< $len; $i++) {
|
|
|
|
$node = $node[$words[$i]] ?? null;
|
|
|
|
$this->sensitiveWord .= $words[$i];
|
|
|
|
if (isset($node['end'])) {
|
|
|
|
if ($all) {
|
|
|
|
$this->sensitiveWords[] = $this->sensitiveWord;
|
|
|
|
$this->sensitiveWord = '';
|
|
|
|
} else {
|
|
|
|
break 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!$node) {
|
|
|
|
$this->sensitiveWord = '';
|
|
|
|
$start = $i-1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2020-06-18 16:29:26 +08:00
|
|
|
// 防止内容比敏感词短 导致验证过去
|
|
|
|
// 使用敏感词【傻子】校验【傻】这个词
|
|
|
|
// 会提取【傻】
|
|
|
|
// 再次判断是否是尾部
|
|
|
|
if (!isset($node['end'])) {
|
|
|
|
$this->sensitiveWord = '';
|
|
|
|
}
|
2020-06-17 23:02:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return $all ? $this->sensitiveWords : $this->sensitiveWord;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* replace
|
|
|
|
*
|
|
|
|
* @time 2020年06月17日
|
|
|
|
* @param $tree
|
|
|
|
* @param string $content
|
|
|
|
* @return string|string[]
|
|
|
|
*/
|
|
|
|
public function replace($tree, string $content)
|
|
|
|
{
|
|
|
|
$sensitiveWords = $this->getSensitiveWords($tree, $content);
|
|
|
|
|
|
|
|
$replace = [];
|
|
|
|
|
|
|
|
foreach ($sensitiveWords as $word) {
|
|
|
|
$replace[] = str_repeat('*', mb_strlen($word));
|
|
|
|
}
|
|
|
|
|
|
|
|
return str_replace($sensitiveWords, $replace, $content);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* cache
|
|
|
|
*
|
|
|
|
* @time 2020年06月17日
|
|
|
|
*/
|
|
|
|
public function cached()
|
|
|
|
{
|
2020-06-20 16:57:37 +08:00
|
|
|
return Cache::store('redis')->set(CatchCacheKeys::TRIE_TREE, $this->tree);
|
2020-06-17 23:02:52 +08:00
|
|
|
}
|
|
|
|
}
|