去除String中的html標籤,並對<td><tr>進行替換
阿新 • • 發佈:2018-11-04
import java.util.regex.Matcher; import java.util.regex.Pattern; private String cleanHtmlTags(String htmlText) { if (StringUtils.isEmpty(htmlText)) { return ""; } htmlText = htmlText.replaceAll(" ", " "); // 過濾html標籤 String regEx_html = "<[^>]+>"; String regEx_td = "<[td]+>"; String regEx_tr = "<[tr]+>"; Pattern p_html = Pattern.compile(regEx_td, Pattern.CASE_INSENSITIVE); Matcher m_html = p_html.matcher(htmlText); htmlText = m_html.replaceAll(" "); // td替換成空格 p_html = Pattern.compile(regEx_tr, Pattern.CASE_INSENSITIVE); m_html = p_html.matcher(htmlText); htmlText = m_html.replaceAll("\n"); // tr替換成換行 p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); m_html = p_html.matcher(htmlText); htmlText = m_html.replaceAll(""); // 過濾html標籤 return htmlText; }