html轉換text-分段落,實現富文字匯入word的格式轉換,標籤過濾
阿新 • • 發佈:2021-10-23
html轉換text-分段落,實現富文字匯入word的格式轉換,標籤過濾
一、工具類 html2Text
import javax.swing.text.html.HTMLEditorKit; import javax.swing.text.html.parser.ParserDelegator; import java.io.*; public class Html2Text extends HTMLEditorKit.ParserCallback { private static Html2Text html2Text = new Html2Text(); StringBuffer s; public Html2Text() { } public void parse(String str) throws IOException { InputStream iin = new ByteArrayInputStream(str.getBytes()); Reader in = new InputStreamReader(iin); s = new StringBuffer(); ParserDelegator delegator = new ParserDelegator(); // the third parameter is TRUE to ignore charset directive delegator.parse(in, this, Boolean.TRUE); iin.close(); in.close(); } public void handleEndOfLineString(String eol) { } /** *按標籤分割過濾後執行 */ public void handleText(char[] text, int pos) { s.append(text); } public String getText() { return s.toString(); } public static String getContent(String str) { try { html2Text.parse(str); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return html2Text.getText(); } }
二、分段實現
/** * @Name :getTextContentP * @Description :<富文字html轉換text段落> * @Author :gaogushenling * @Date :2021/10/23 14:15 * @Version :1.0 * @History :<修改程式碼時說明> * @param :xmlStr * @return :List<String> */ private List<String> getTextContentP(String xmlStr) { String s = xmlStr.replaceAll("div", "p"); String[] ss = s.split("<p>"); List<String> textList = new ArrayList<>(); for (String s1 : ss) { textList.add(Html2Text.getContent(s1)); } if (textList.size() == 0) { textList.add("富文字是空的呦"); } return textList; }
呼叫
List<String> textList = getTextContentP("富文字(html格式)");