全站違禁詞掃描下載地址
https://files.cnblogs.com/files/kingchou/%E8%BF%9D%E7%A6%81%E8%AF%8D%E7%B1%BB%E6%96%87%E4%BB%B6.rar
全站掃描違禁詞代碼
require_once ROOT_PATH."/Banned.php"; $banned=new Banned(); $banned->write_html=true; $banned->write_log=true; $banned->check_type="file"; $banned->checkFileAll();
類文件
<?php date_default_timezone_set('Asia/Shanghai'); /*error_reporting(E_WARNING);*/ /** * 違禁詞: * Created by PhpStorm. * Author: zhouzj -周宗君 * Date: 2017/11/17 15:52 */ class Banned { private $data;//數據 private $match_banned;//違禁詞規則 private $match_mingan;//敏感詞規則 private $match_field;//字符類型規則 private $finish_path=array();//已完成的路徑 private $finish_table=array();//已完成的表 private $logtime; private $path; private $check_sub_dir; private $check_type; private $banned_from; private $clean; private $write_html; private $write_log; private $fornum; private $bannedword; public $document_root; public $_config=array( "path"=>'', "check_sub_dir"=>array('web'), 'check_type'=>'file', 'banned_from'=>'table', 'clean'=>false, 'fornum'=>20, 'write_html'=>false, 'write_log'=>false, 'bannedword'=>true,); /** *初始化方法 */ public function __construct($config=array()) { if(!empty($config)) { $this->_config = array_merge($this->_config, $config); } $this->BannedInit(); } /** * 設置key * @param $key * @param $value */ public function __set($key, $value) { $this->_config[$key] = $value; } /** * 讀取key * @param $key * @return mixed */ public function __get($key) { return $this->_config[$key]; } /** * 初始化違禁詞和敏感詞 */ private function BannedInit(){ $this->logtime=date("YmdHis"); $this->document_root=$_SERVER['DOCUMENT_ROOT']; //初始化加載違禁詞 否則默認讀取文件中的違禁詞 //敏感詞正則規則 $mingan_words= $this->getMinganWords(); $this->match_mingan=$this->generateRegularExpression($mingan_words); //違禁詞正則規則 $banned_words= $this->getBannedWords(); $this->match_banned=$this->generateRegularExpression($banned_words); } /** * 檢測所有內容 */ public function checkAll(){ //判斷類型是數據庫 if($this->_config['check_type']=="file"){ return $this->checkFileAll();//檢測文件 } } /** * 檢查文件 */ public function checkFileAll(){ $path=$this->_config['path']; if(!$path){ $path=$this->document_root.""; } $laststr= substr($path, -1); if($laststr!="/"){ $path.="/"; } $dir_list= $this->getDir($path); //判斷是否包含子目錄 if(count($dir_list)>0){ foreach ($dir_list as $dirkey=>$dirvalue){ if(empty($dirvalue)){ continue; } //判斷是否是允許檢測的子目錄 // if(in_array($dirvalue,$this->_config['check_sub_dir'])){ $subpath=$path.$dirvalue.'/'; $this->check_sub_dir($subpath); // } } } return true; } /** * 檢測單條內容 */ public function check($content){ if($this->_config['check_type']=="file"){ return $this->check_file($content);//檢測文件 } } /** * 檢測單條文本內容 * @param $content */ public function check_text($content){ $res_mingan=array();$res_banned=array(); $this->__check_content($content,$res_mingan,$res_banned); $data['mingan']=$res_mingan; $data['banned']=$res_banned; return $data; } /** * 檢測單條文件內容 * @param $content */ public function check_file($content){ $res= $this->__check_file($content); $this->set_finish_path($content); return $res; } /** * 迭代檢查子文件目錄 * @param $path */ function check_sub_dir($path){ $file_list= $this->getFile($path);//獲取文件目錄 if(count($file_list)>0){ foreach ($file_list as $filekey=>$filevalue){ if(empty($filevalue)) continue; $this->__check_file($filevalue);//執行檢查文件 } } $dir_list= $this->getDir($path);//獲取文件夾目錄 if(count($dir_list)>0){ foreach ($dir_list as $dirkey=>$dirvalue){ if(empty($dirvalue)){ continue; } $subpath=$path.$dirvalue.'/'; $this->check_sub_dir($subpath); } } } /** * 驗證單個文件 * @param $filepath */ function __check_file($filepath){ //判斷文件是否已經完成檢查,如果已經完成則不需要檢查 if(!$filepath) return; if(in_array($filepath,$this->finish_path)) return; if(!file_exists($filepath)) return; if(stripos($filepath,"mingan_words.txt")>0 || stripos($filepath,"banned_words.txt")>0 ){ return ; } //判斷文件如果是 違禁詞或敏感詞文件則跳過不處理 $content = file_get_contents($filepath); $res_mingan=array();$res_banned=array(); $this->__check_content($content,$res_mingan,$res_banned); $data=array(); if($res_mingan || $res_banned){//如果有敏感詞或違禁詞則寫日志 $this->write_log($filepath,$res_mingan,$res_banned); $this->write_html($filepath,$res_mingan,$res_banned); $data['mingan']=$res_mingan; $data['banned']=$res_banned; } //執行保存文件路徑 return $data; } /** * 檢查內容 * @param $content * @param $res_mingan * @param $res_banned */ private function __check_content($content,&$res_mingan,&$res_banned){ //檢查敏感詞 $res_mingan=$this->check_words($this->match_mingan,$content); //檢查違禁詞 $res_banned=$this->check_words($this->match_banned,$content); } /** * 檢查敏感詞 * @param $banned * @param $string * @return bool|string */ private function check_words($banned,$string) { $match_banned=array(); //循環查出所有敏感詞 $new_banned=strtolower($banned); $i=0; do{ $matches=null; if (!empty($new_banned) && preg_match($new_banned, $string, $matches)) { $isempyt=empty($matches[0]); if(!$isempyt){ $match_banned = array_merge($match_banned, $matches); $matches_str=strtolower($this->generateRegularExpressionString($matches[0])); $new_banned=str_replace("|".$matches_str."|","|",$new_banned); $new_banned=str_replace("/".$matches_str."|","/",$new_banned); $new_banned=str_replace("|".$matches_str."/","/",$new_banned); } } $i++; if($i>$this->_config['fornum']){ $isempyt=true; break; } }while(count($matches)>0 && !$isempyt); //查出敏感詞 if($match_banned){ return $match_banned; } //沒有查出敏感詞 return array(); } /** * @describe 生成正則表達式 * @param array $words * @return string */ private function generateRegularExpression($words) { $regular = implode('|', array_map('preg_quote', $words)); return "/$regular/i"; } /** * @describe 生成正則表達式 * @param array $words * @return string */ private function generateRegularExpressionString($string){ $str_arr[0]=$string; $str_new_arr= array_map('preg_quote', $str_arr); return $str_new_arr[0]; } /** * 寫日志 * @param $path * @param $content */ private function write_log($location,$contentarr,$weijinciarr){ if($this->_config['write_log']) { if (!$contentarr && !$weijinciarr) { return; } $content = $location; if (count($contentarr) > 0) { $content .= "," . count($contentarr) . "," . implode('|', $contentarr); } else { $content .= ",,"; } if (count($weijinciarr) > 0) { $content .= "," . count($weijinciarr) . "," . implode('|', $weijinciarr); } else { $content .= ",,"; } $content .= "\r\n"; $filename =$this->document_root."/logs/file" . $this->logtime . "/file_bannwords.csv"; /* 文件日志路徑 */ // $file = './' . $filename; $file = $filename; if (!file_exists($file)) { $pathdir = dirname($file); if (!is_dir($pathdir)) { mkdir($pathdir, 0775, true); } $content_title = "位置,敏感詞數量,敏感詞,違禁詞數量,違禁詞" . "\r\n"; error_log(iconv('UTF-8', 'GB2312', $content_title), 3, $file); } error_log(iconv('UTF-8', 'GB2312', $content), 3, $file); } } /** * 打印到頁面上 * @param $filepath * @param $res_mingan * @param $res_banned */ private function write_html($location,$res_mingan,$res_banned){ if($this->_config['write_html']){ print_r(iconv('GB2312','UTF-8',$location)); if($res_mingan){ print_r(" <font color='red'>敏感詞(".count($res_mingan)."):</font>".implode('|',$res_mingan)); } if($res_banned){ print_r(" <font color='red'>違禁詞(".count($res_banned)."):</font>".implode('|',$res_banned)); } echo "<br>"; } } /** * 保存已完成文件 * @param $path */ private function set_finish_path($path){ if(!$path){ return; } $content =$path. "\r\n"; $filename=$this->document_root."/logs/file" . $this->logtime . "/banned_finish_path.txt"; /* 文件日志路徑 */ // $file ='./' . $filename; $file = $filename; if (!file_exists($file)) { mkdir(dirname($file), 0775, true); } error_log(iconv('GB2312','UTF-8',$content), 3, $file); } //重置已完成文件 /* function clean_finish_file(){ $filename=$this->document_root."/logs/banned_finish_path.txt"; file_put_contents($filename,''); } */ //獲取文件目錄列表,該方法返回數組 private function getDir($dir) { $dirArray[]=NULL; if (is_dir($dir)) { try{ if (false != ($handle = @opendir($dir))) { $i = 0; while (false !== ($file = @readdir($handle))) { //去掉"“.”、“..”以及帶“.xxx”后綴的文件 if ($file != "." && $file != ".." && !strpos("*" . $file, ".")) { $dirArray[$i] = $file; $i++; } } //關閉句柄 @closedir($handle); } }catch (Exception $ex){ } } return $dirArray; } //獲取文件列表 private function getFile($dir) { $fileArray[]=NULL; if (false != ($handle = @opendir ( $dir ))) { $i=0; while ( false !== ($file = @readdir ( $handle )) ) { //去掉"“.”、“..”以及帶“.xxx”后綴的文件 if ($file != "." && $file != ".." && (strpos($file,".php") || strpos($file,".html"))) { $fileArray[$i]=$dir.$file; if($i==1000){//當同一個文件下超出1000個文件則跳出循環 break; } $i++; } } //關閉句柄 @closedir ( $handle ); } return $fileArray; } //獲取敏感詞文件 private function getMinganWords(){ $shehuangwords=file_get_contents($this->document_root."/words/forbidden.txt"); // $shehuangwords=iconv("GBK","UTF-8",$shehuangwords); $shehuangword_arr=explode("\r\n",$shehuangwords); return $shehuangword_arr; } //獲取違禁詞文件 private function getBannedWords(){ if($this->_config['bannedword']){ $guanggaowords=file_get_contents($this->document_root."/words/banned.txt"); // $guanggaowords=iconv("GBK","UTF-8",$guanggaowords); $guanggaowords_arr=explode("\r\n",$guanggaowords); return $guanggaowords_arr; }else{ return array(); } } }