1. 程式人生 > >去除String中的html標籤,並對<td><tr>進行替換

去除String中的html標籤,並對<td><tr>進行替換

import java.util.regex.Matcher;
import java.util.regex.Pattern;

private String cleanHtmlTags(String htmlText)
    {
        if (StringUtils.isEmpty(htmlText))
        {
            return "";
        }
        htmlText = htmlText.replaceAll("&nbsp;", " "); // 過濾html標籤
        String regEx_html = "<[^>]+>";
        String regEx_td = "<[td]+>";
        String regEx_tr = "<[tr]+>";
        Pattern p_html = Pattern.compile(regEx_td, Pattern.CASE_INSENSITIVE);
        Matcher m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll(" "); // td替換成空格
        
        p_html = Pattern.compile(regEx_tr, Pattern.CASE_INSENSITIVE);
        m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll("\n"); // tr替換成換行
        
        p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
        m_html = p_html.matcher(htmlText);
        htmlText = m_html.replaceAll(""); // 過濾html標籤
        
        return htmlText;
    }