獲取html網頁中的body部分內容
1.匯入htmlparser.jar架包
2./**
* 獲取HTML頁面中BODY標籤的內容
* @param inputHTML
*
* @return void [返回型別說明]
* @throws [違例型別] [違例說明]
* @since NLP V100R001C01
*/
public static String getHtmlBody(String inputHTML)
{
StringBuilder strBu = new StringBuilder();
Parser parser = new Parser();
try
{
parser.setInputHTML(inputHTML);
parser.setEncoding(parser.getURL());
HtmlPage page = new HtmlPage(parser);
parser.visitAllNodesWith(page);
NodeList list = page.getBody();
for (NodeIterator iterator = list.elements(); iterator.hasMoreNodes();)
{
Node node = iterator.nextNode();
String html = node.toHtml();
strBu.append(html);
}
}
catch (ParserException e)
{
logger.error("getHtmlBody() excute ParserException....");
}
//System.out.println(strBu.toString());
return strBu.toString();
}