【垂直搜尋引擎搭建14】HtmlParser中Filter方法(URL網路地址)
阿新 • • 發佈:2019-01-06
1、TagNameFilter
import java.io.IOException;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class TagNameFilterDemo {
public static void getContent(String url) throws IOException,ParserException{
Parser parser = new Parser(url);
NodeFilter filter = new TagNameFilter("div");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
String nodex = "";
if(nodes!=null){
for (int i=0;i<nodes.size();i++){
Node node = nodes.elementAt(i);
nodex = node.toString();
System.out.println(nodex);
}
}
}
public static void main(String[] args) throws ParserException, IOException {
String url="http://yemacaijing.baijia.baidu.com/article/598342" ;
getContent(url);
}
}
2、HasChildFilter
import java.io.IOException;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasChildFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class HasChildFilterDemo {
public static void getContent(String url)throws ParserException,IOException{
Parser parser = new Parser(url);
NodeFilter innerfilter = new TagNameFilter("div");
NodeFilter filter = new HasChildFilter(innerfilter);
NodeList nodes = parser.extractAllNodesThatMatch(filter);
String nodex = "";
if(nodes!=null){
for(int i=0;i<nodes.size();i++){
Node node = nodes.elementAt(i);
nodex = node.toString();
System.out.println(nodex);
}
}
}
public static void main(String[] args) throws ParserException, IOException {
String url="http://yemacaijing.baijia.baidu.com/article/598342";
getContent(url);
}
}
3、HasAttributeFilter
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class HasAttributeFilterDemo {
public static void getContent(String url) throws ParserException{
Parser parser = new Parser(url);
NodeFilter filter = new HasAttributeFilter("href");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
String nodex = "";
if(nodes!=null){
for(int i=0;i<nodes.size();i++){
Node node = nodes.elementAt(i);
nodex = node.toString();
System.out.println(nodex);
}
}
}
public static void main(String[] args) throws ParserException {
String url = "http://yemacaijing.baijia.baidu.com/article/598342";
getContent(url);
}
}