正則表示式中的多行和非貪婪模式
阿新 • • 發佈:2020-08-21
多行就是在前邊加"(?s)"
非貪婪模式就是在*、?、+ 後邊再加?
例子:
URL url = new URL("http://money.finance.sina.com.cn/corp/go.php/vMS_MarketHistory/stockid/600000.phtml"); URLConnection conn = url.openConnection(); InputStream is = conn.getInputStream(); ByteArrayOutputStream out = new ByteArrayOutputStream(); byte[] arr = new byte[1024];int len = -1; while((len = is.read(arr)) > 0) { out.write(arr); } String content = out.toString("GBK"); System.out.println(content); String regstr = "(?s)<a target='_blank'\\s+href='http://vip.stock.finance.sina.com.cn/quotes_service/view/vMS_tradehistory.php\\?symbol=\\w{2}\\d{6}&date=\\d{4}-\\d{2}-\\d{2}'>" + "\\s+(\\d{4}-\\d{2}-\\d{2})\\s+.*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+" + "<td[^\\d]*([^<]*).*?</td>\\s+"; Pattern p = Pattern.compile(regstr); Matcher m = p.matcher(content); while(m.find()){ System.out.println(m.group()); System.out.println(m.group(1)); System.out.println(m.group(2)); System.out.println(m.group(3)); System.out.println(m.group(4)); System.out.println(m.group(5)); System.out.println(m.group(6)); System.out.println(m.group(7)); } //System.out.println(content); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }