Java對html標籤的過濾和清洗
阿新 • • 發佈:2018-10-31
OWASP HTML Sanitizer 是一個簡單快捷的java類庫,主要用於放置XSS
優點如下:
1.使用簡單。不需要繁瑣的xml配置,只用在程式碼中少量的編碼
2.由Mike Samuel(谷歌工程師)維護
3.通過了AntiSamy超過95%的UT覆蓋
4.高效能,低記憶體消耗
5.是 AntiSamy DOM效能的4倍
1.POM中增加
<!--html標籤過濾--> <dependency> <groupId>com.googlecode.owasp-java-html-sanitizer</groupId> <artifactId>owasp-java-html-sanitizer</artifactId> <version>r136</version> </dependency>
2.工具類
import org.owasp.html.ElementPolicy; import org.owasp.html.HtmlPolicyBuilder; import org.owasp.html.PolicyFactory; import java.util.List; /** * @author : RandySun * @date : 2018-10-08 10:32 * Comment : */ public class HtmlUtils { //允許的標籤 private static final String[] allowedTags = {"h1", "h2", "h3", "h4", "h5", "h6", "span", "strong", "img", "video", "source", "blockquote", "p", "div", "ul", "ol", "li", "table", "thead", "caption", "tbody", "tr", "th", "td", "br", "a" }; //需要轉化的標籤 private static final String[] needTransformTags = {"article", "aside", "command","datalist","details","figcaption", "figure", "footer","header", "hgroup","section","summary"}; //帶有超連結的標籤 private static final String[] linkTags = {"img","video","source","a"}; public static String sanitizeHtml(String htmlContent){ PolicyFactory policy = new HtmlPolicyBuilder() //所有允許的標籤 .allowElements(allowedTags) //內容標籤轉化為div .allowElements( new ElementPolicy() { @Override public String apply(String elementName, List<String> attributes){ return "div"; } },needTransformTags) .allowAttributes("src","href","target").onElements(linkTags) //校驗連結中的是否為http .allowUrlProtocols("https") .toFactory(); String safeHTML = policy.sanitize(htmlContent); return safeHTML; } public static void main(String[] args){ String inputHtml = "<img src=\"https://a.jpb\"/>"; System.out.println(sanitizeHtml(inputHtml)); } }
其中.allowElements(allowedTags)是新增所有允許的html標籤,
以下是需要轉化的標籤,把needTransformTags中的內容全部轉化為div
//內容標籤轉化為div
.allowElements( new ElementPolicy() {
@Override
public String apply(String elementName, List<String> attributes){
return "div";
}
},needTransformTags)
.allowAttributes("src","href","target").onElements(linkTags)是在特定的標籤上允許的屬性
.allowUrlProtocols("https")表示href或者src連結中只允許https協議