網路爬蟲示例
阿新 • • 發佈:2018-12-18
import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class WebSpider1 { public static String getContent(String urlStr, String s) { StringBuilder sb = new StringBuilder(); try { URL url = new URL(urlStr); BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream(), s)); String temp = ""; while((temp=bf.readLine())!=null) { sb.append(temp); } } catch (MalformedURLException e) { e.printStackTrace(); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return sb.toString(); } public static List<String> getStr(String des, String regexStr){ Pattern p = Pattern.compile(regexStr); Matcher m = p.matcher(des); List<String> list = new ArrayList<String>(); while(m.find()) { list.add(m.group(1)); } return list; } public static void main(String[] args) { String des = getContent("http://www.163.com", "gbk"); List<String> str = getStr(des,"href=\"([\\w\\s./:]*?)\""); for(String temp : str) { System.out.println(temp); } } }