OWASP HTML Sanitizer 是一個簡單快捷的java類庫,主要用於放置XSS
優點如下:
1.使用簡單。不需要繁瑣的xml配置,只用在代碼中少量的編碼
2.由Mike Samuel(谷歌工程師)維護
3.通過了AntiSamy超過95%的UT覆蓋
4.高性能,低內存消耗
5.是 AntiSamy DOM性能的4倍
1.POM中增加
<!--html標簽過濾-->
<dependency>
<groupId>com.googlecode.owasp-java-html-sanitizer</groupId>
<artifactId>owasp-java-html-sanitizer</artifactId>
<version>r136</version>
</dependency>
2.工具類
import org.owasp.html.ElementPolicy;
import org.owasp.html.HtmlPolicyBuilder;
import org.owasp.html.PolicyFactory;
import java.util.List;
/**
* @author : RandySun
* @date : 2018-10-08 10:32
* Comment :
*/
public class HtmlUtils {
//允許的標簽
private static final String[] allowedTags = {"h1", "h2", "h3", "h4", "h5", "h6",
"span", "strong",
"img", "video", "source",
"blockquote", "p", "div",
"ul", "ol", "li",
"table", "thead", "caption", "tbody", "tr", "th", "td", "br",
"a"
};
//需要轉化的標簽
private static final String[] needTransformTags = {"article", "aside", "command","datalist","details","figcaption", "figure",
"footer","header", "hgroup","section","summary"};
//帶有超鏈接的標簽
private static final String[] linkTags = {"img","video","source","a"};
public static String sanitizeHtml(String htmlContent){
PolicyFactory policy = new HtmlPolicyBuilder()
//所有允許的標簽
.allowElements(allowedTags)
//內容標簽轉化為div
.allowElements( new ElementPolicy() {
@Override
public String apply(String elementName, List<String> attributes){
return "div";
}
},needTransformTags)
.allowAttributes("src","href","target").onElements(linkTags)
//校驗鏈接中的是否為http
.allowUrlProtocols("https")
.toFactory();
String safeHTML = policy.sanitize(htmlContent);
return safeHTML;
}
public static void main(String[] args){
String inputHtml = "<img src=\"https://a.jpb\"/>";
System.out.println(sanitizeHtml(inputHtml));
}
}
其中.allowElements(allowedTags)是添加所有允許的html標簽,
以下是需要轉化的標簽,把needTransformTags中的內容全部轉化為div
//內容標簽轉化為div
.allowElements( new ElementPolicy() {
@Override
public String apply(String elementName, List<String> attributes){
return "div";
}
},needTransformTags)
.allowAttributes("src","href","target").onElements(linkTags)是在特定的標簽上允許的屬性
.allowUrlProtocols("https")表示href或者src鏈接中只允許https協議
