1 import java.util.HashSet;
2 import java.util.Iterator;
3 import java.util.Map;
4 import java.util.Set;
5
6 /**
7 * 敏感詞過濾
8 */
9 public class SensitivewordFilter {
10 private Map sensitiveWordMap = null;
11 public static int minMatchTYpe = 1; //最小匹配規則
12 public static int maxMatchType = 2; //最大匹配規則
13 private static String replaceString = null;
14 /**例如:敏感詞中含有中國人、中國
15 * 最小匹配規則minMatchTYpe為1時,會匹配出**人,為2時,會匹配出***
16 * */
17 public static void main(String[] args) throws Exception{
18 SensitivewordFilter filter = new SensitivewordFilter();
19 System.out.println("敏感詞的數量:" + filter.sensitiveWordMap.size());
20 String string = "dfa是面向三級裝配的設計(Design for assembly)的英文簡稱,是指在產品設計階段設計產品使得產品具有良好" +
21 "的可裝配性,確保裝配工序簡單、裝配效率高、裝配質量高、裝配不良率低和裝配成本低。面向裝配的設計通過一系" +
22 "列有利於裝配的設計指南例如簡化產品設計、減少零件數量等,女女並同裝配工程師一起合作,被逼簡化產品結構,近親使其便於" +
23 "裝配,為提高產品質量、縮短產品開發周期和降低產品成本奠定基礎";
24 // ------獲取敏感詞---------
25 Set<String> set = filter.getSensitiveWord(string, 1);
26 System.out.println("含敏感詞的個數為:" + set.size() + "。包含:" + set);
27 // ------------------------替換敏感字begin----------------------
28 Iterator<String> iterator = set.iterator();
29 String word = null;
30 while (iterator.hasNext()) {
31 word = iterator.next();
32 /**
33 * 得到word中敏感關鍵詞被替換后的字符串,例如:***
34 * */
35 getReplaceCharsS("*", word.length());
36 /**
37 * 將原字符串中的敏感關鍵詞替換成帶有replaceChar
38 * 或全部為replaceChar的關鍵詞
39 * */
40 string = string.replaceAll(word, replaceString);
41 }
42 // ------------------------替換敏感字end----------------------
43 System.out.println(string);
44 }
45
46 /**
47 * 構造函數,初始化敏感詞庫
48 */
49 public SensitivewordFilter(){
50 sensitiveWordMap = new SensitiveWordInit().initKeyWord();
51 }
52
53 /**
54 * 判斷文字是否包含敏感字符
55 * @param matchType 匹配規則 1:最小匹配規則,2:最大匹配規則
56 */
57 public boolean isContaintSensitiveWord(String txt,int matchType){
58 boolean flag = false;
59 for(int i = 0 ; i < txt.length() ; i++){
60 int matchFlag = this.CheckSensitiveWord(txt, i, matchType); //判斷是否包含敏感字符
61 if(matchFlag > 0){ //大於0存在,返回true
62 flag = true;
63 }
64 }
65 return flag;
66 }
67
68 /**
69 * 獲取文字中的敏感詞
70 * @param matchType 匹配規則 1:最小匹配規則,2:最大匹配規則
71 */
72 public Set<String> getSensitiveWord(String txt , int matchType){
73 Set<String> sensitiveWordList = new HashSet<String>();
74
75 for(int i = 0 ; i < txt.length() ; i++){
76 int length = CheckSensitiveWord(txt, i, matchType); //判斷是否包含敏感字符
77 if(length > 0){ //存在,加入list中
78 sensitiveWordList.add(txt.substring(i, i+length));
79 i = i + length - 1; //減1的原因,是因為for會自增
80 }
81 }
82
83 return sensitiveWordList;
84 }
85
86 /**
87 * 替換敏感字字符,默認*
88 */
89 public String replaceSensitiveWord(String txt,int matchType,String replaceChar){
90 String resultTxt = txt;
91 Set<String> set = getSensitiveWord(txt, matchType); //獲取所有的敏感詞
92 Iterator<String> iterator = set.iterator();
93 String word = null;
94 String replaceString = null;
95 while (iterator.hasNext()) {
96 word = iterator.next();
97 replaceString = getReplaceChars(replaceChar, word.length());
98 resultTxt = resultTxt.replaceAll(word, replaceString);
99 }
100
101 return resultTxt;
102 }
103
104 /**
105 * 獲取替換字符串
106 */
107 private String getReplaceChars(String replaceChar,int length){
108 String resultReplace = replaceChar;
109 for(int i = 1 ; i < length ; i++){
110 resultReplace += replaceChar;
111 }
112
113 return resultReplace;
114 }
115
116 /**
117 * 獲取替換字符串,無返回值
118 */
119 private static void getReplaceCharsS(String replaceChar,int length){
120 replaceString = "";
121 String resultReplace = replaceChar;
122 for(int i = 1 ; i < length ; i++){
123 resultReplace += replaceChar;
124 }
125 replaceString = resultReplace;
126 }
127
128 /**
129 * 檢查文字中是否包含敏感字符,檢查規則如下:<br>
130 */
131 @SuppressWarnings({ "rawtypes"})
132 public int CheckSensitiveWord(String txt,int beginIndex,int matchType){
133 boolean flag = false; //敏感詞結束標識位:用於敏感詞只有1位的情況
134 int matchFlag = 0; //匹配標識數默認為0
135 char word = 0;
136 Map nowMap = sensitiveWordMap;
137 for(int i = beginIndex; i < txt.length() ; i++){
138 word = txt.charAt(i);
139 nowMap = (Map) nowMap.get(word); //獲取指定key
140 if(nowMap != null){ //存在,則判斷是否為最后一個
141 matchFlag++; //找到相應key,匹配標識+1
142 if("1".equals(nowMap.get("isEnd"))){ //如果為最后一個匹配規則,結束循環,返回匹配標識數
143 flag = true; //結束標志位為true
144 if(SensitivewordFilter.minMatchTYpe == matchType){ //最小規則,直接返回,最大規則還需繼續查找
145 break;
146 }
147 }
148 }
149 else{ //不存在,直接返回
150 break;
151 }
152 }
153 if(matchFlag < 2 || !flag){ //長度必須大於等於1,為詞
154 matchFlag = 0;
155 }
156 return matchFlag;
157 }
158
159 }