說到敏感詞,小伙伴們可能就要吐糟了吧!
剛好最近做一個電商評論模塊,簡單的總結一下!
由於天朝的規則比較復雜,在評論的時候要求稍微會高一些,所以評論區都要進行后台敏感詞過濾。
而對於敏感詞的大致分為:禁用詞(BNNED)、審查詞(MOD)、替換詞(REPLACE)三種。
而對於這些詞最令人印象深刻就是:Java/JavaScript了,這里可以猜猜為什么會把這兩個詞語列入敏感詞的行列里呢?
恐怕也不是這么容易就能想到的吧,理由很簡單-那就是因為艾微了(自行腦補吧),這不多說,說多就犯規了(畢竟我們天朝規則復雜);
下面就進入正題了:
首先接手這個項目:我先去down了網上現有的敏感詞庫.txt;然后寫了個html界面用異步實現來向數據庫添加敏感詞(當然數據庫是先前就建好的),寫html界面的原因也是為了后續方便添加產生新的敏感詞;
一、異步添加敏感詞的代碼:
1、DAL層(數據庫層):
public int add(Sensitive_words model) { string sql = "insert into Sensitive_words(wordPattern,IsForbid,IsMod,Replace) values(@wordpattern,@isforbid,@ismod,@replace)"; SqlParameter[] parameter = { new SqlParameter("@wordpattern",Model.wordPattern), new SqlParameter("@wordpattern",Model.IsForbid), new SqlParameter("@wordpattern",Model.IsMod), new SqlParameter("@wordpattern",Model.Replace), }; return DbSqlHelper.ExecuteSql(sql, parameter); }
2、BLL層(邏輯層)
public bool Add(Sensitive_words model) { return dal.Add(model); }
1、Web層(一般處理程序)
public void ProcessRequest(HttpContext context) { string msg = context.Request["Msg"];//獲取傳過來的敏感詞 msg = msg.Trim();//移除空白 string[] words = msg.Split(new char[] { '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);//對讀入的數據進行分割 BLL.Sensitive_Word bll = new BLL.Sensitive_Word(); foreach (string item in words) { string[] word = item.Split('='); Model.Sensitive_Word model = new Model.Sensitive_Word(); model.WordPattern = word[0]; //WORD = {XXX} 讀入的數據格式 if (word[1]=="{BANNED}")//禁止 { model.IsForbid = true; } else if (word[1] == "{MOD}")//審查 { model.IsMod = true; } else if (word[1]=="{REPLACE}")//替換 { model.Replace = word[1]; } bll.Add(model); } }
到此完成了敏感詞的數據庫添加
二、對於用戶的輸入進行敏感詞過濾
1、獲取數據庫相關敏感詞
public class Sensitive_Word { /// <summary> /// 添加敏感詞 /// </summary> /// <param name="model"></param> /// <returns></returns> public int Add(Model.Sensitive_Word model) { string sql = "insert into Sensitive_words(wordPattern,IsForbid,IsMod,ReplaceWord) values(@wordpattern,@isforbid,@ismod,@replace)"; SqlParameter[] parameter = { new SqlParameter("@wordpattern",model.WordPattern), new SqlParameter("@wordpattern",model.IsForbid), new SqlParameter("@wordpattern",model.IsMod), new SqlParameter("@wordpattern",model.ReplaceWord), }; return DbHelperSQL.ExecuteSql(sql, parameter); } /// <summary> /// 獲取所有禁用詞 /// </summary> /// <returns></returns> public List<string> GetBanned() { string sql = "select wordPattern form Sensitive_words where IsForbid=1"; List<string> list = null; using (SqlDataReader reader = DbHelperSQL.ExecuteReader(sql)) { if (reader.HasRows) { if (reader.Read()) { list.Add(reader.GetString(0)); } } } return list; } /// <summary> /// 獲取所有審查詞 /// </summary> /// <returns></returns> public List<string> GetMod() { string sql = "select wordPattern form Sensitive_words where IsMod=1"; List<string> list = null; using (SqlDataReader reader = DbHelperSQL.ExecuteReader(sql)) { if (reader.HasRows) { if (reader.Read()) { list.Add(reader.GetString(0)); } } } return list; } /// <summary> /// 獲取所有替換詞 /// </summary> /// <returns></returns> public List<Model.Sensitive_Word> GetReplace() { string sql = "select wordPattern,ReplaceWord form Sensitive_words where IsForbid=0 and IsMod=0"; List<Model.Sensitive_Word> list = null; using (SqlDataReader reader = DbHelperSQL.ExecuteReader(sql)) { if (reader.HasRows) { list = new List<Model.Sensitive_Word>(); if (reader.Read()) { Model.Sensitive_Word model = new Model.Sensitive_Word(); model.WordPattern = reader.GetString(0); model.ReplaceWord = reader.GetString(1); list.Add(model); } } } return list; } }
2、校驗敏感詞方法
我這里把對敏感詞的操作封裝到一個類中,當然首先你的實現添加評論的功能(代碼實現類似插入敏感詞操作)
Sensitive_WoedManager.cs
public class Sensitive_WordManager { DAL.Sensitive_Word dal = new DAL.Sensitive_Word(); /// <summary> /// 校驗禁用詞 /// </summary> /// <param name="msg"></param> /// <returns></returns> public bool CheckBanned(string msg) { //獲取所有的禁用詞 List<string> list = dal.GetBanned(); string regex = string.Join("|", list.ToArray());//將list中的數據的存入數組中,並在數組中以|分割元素 return Regex.IsMatch(msg, regex); } /// <summary> /// 校驗審查詞 /// </summary> /// <param name="msg"></param> /// <returns></returns> public bool CheckMod(string msg) { //獲取所有的審查詞 List<string> list = dal.GetMod(); string regex = string.Join("|", list.ToArray());//將list中的數據的存入數組中,並在數組中以|分割元素 regex.Replace(@"\", @"\\").Replace("{2}", "{0,2}");//如果審查詞格式存在\,需要轉義 return Regex.IsMatch(msg, regex); } /// <summary> /// 校驗替換詞 /// </summary> /// <param name="msg"></param> /// <returns></returns> public string CheckReplace(string msg) { //獲取所有的替換詞 List<Model.Sensitive_Word> list = dal.GetReplace(); foreach (Model.Sensitive_Word model in list) { msg.Replace(model.WordPattern, model.ReplaceWord);//替換詞 } return msg; } }
3、校驗用戶名評論
public class Comment : IHttpHandler { BLL.Sensitive_WordManager sensitive = new BLL.Sensitive_WordManager(); public void ProcessRequest(HttpContext context) { context.Response.ContentType = "text/plain"; context.Response.Write("Hello World"); } public void AddComment(HttpContext context) { string msg = context.Request["msg"]; if (sensitive.CheckBanned(msg))//禁用詞過濾 { context.Response.Write("no:評論中含有禁用詞!"); } else if (sensitive.CheckMod(msg))//審查詞過濾 { context.Response.Write("no:評論中含有審查詞!"); AddComment(context,msg);//審查詞允許插入評論 } else//替換詞過濾 { msg = sensitive.CheckReplace(msg); AddComment(context,msg);//替換以后添加到評論 } } /// <summary> /// 添加評論 /// </summary> /// <param name="context"></param> /// <param name="msg">用戶評論內容</param> public void AddComment(HttpContext context,string msg) { Model.Comment comment = new Model.Comment(); BLL.CommentManager commentManager = new BLL.CommentManager(); comment.commentContent = msg; comment.productId = Convert.ToInt32(context.Request["productId"]); comment.commentTime = DateTime.Now; if (commentManager.Add(comment)) { context.Response.Write("ok"); } } public bool IsReusable { get { return false; } } }