java編寫網頁爬蟲(分頁——插入資料庫——匯出資料)
阿新 • • 發佈:2019-02-17
)(.*?)()(.*?)()(.*)");
MatchertdMatcher = tdPattern.matcher(innerTR);
//彈出層的
PatterntdPatternPop = Pattern.compile("(.*)()(.*)");
Matcher m1 =p1.matcher(content);
if(m1.matches()){
vo.setMobile(getStringNotNullValue(m1.group(5)));
}
//email
Pattern p2 =Pattern.compile("(.*)(電郵)(.*?)()(.*?)( )(.*)");
Matcher m2 =p2.matcher(content);
if(m2.matches()){
vo.setEmail(getStringNotNullValue(m2.group(5)));
}
//公司URL
Pattern p3 = Pattern.compile("(.*)(網址)(.*?)(<a href=\")(.*?)(\" target=\"_blank\")(.*)");
Matcher m3 = p3.matcher(content);
if(m3.matches()){
vo.setUrl(getStringNotNullValue(m3.group(5)));
}
//標籤
Pattern p4 = Pattern.compile("(.*)(產品分類)(.*?)(-)(.*?)(</td>)(.*)");
Matcher m4 =p4.matcher(content);
if(m4.matches()){
vo.setTags(getStringNotNullValue(m4.group(5)));
}
list.add(vo);
}
i++;
System.out.println("解析第"+i+"條!!!");
}
System.out.println("------------------解析結束-------------------");
returnlist;
}
privatestatic void insertData2DataBase(Listlist, Connection conn)throwsException{
System.out.println("待插入的資料條數:" + list.size());
String sql ="insert into test_enterprise_info(enterprise_name,url,email,mobile,tags) values (?,?,?,?,?)";
PreparedStatement stmt = conn.prepareStatement(sql);
for(EnterprisInfoVo vo:list){
stmt.setString(1, vo.getName());
stmt.setString(2, vo.getUrl());
stmt.setString(3, vo.getEmail());
stmt.setString(4, vo.getMobile());
stmt.setString(5, vo.getTags());
stmt.addBatch();
}
stmt.executeBatch();
System.out.println("資料插入完畢!!!");
}
publicstatic Connection connectDataBase() throws SQLException{
Connectionconn = null;
String url = "jdbc:mysql://localhost:3306/smartpr?"
+"user=root&password=root&useUnicode=true&characterEncoding=UTF8";
try {
Class.forName("com.mysql.jdbc.Driver");// 動態載入mysql驅動
System.out.println("成功載入MySQL驅動程式");
conn = DriverManager.getConnection(url);
} catch (SQLException e) {
System.out.println("MySQL操作錯誤");
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
publicstatic String getStringNotNullValue(Object object) {
if (object== null) {
return"";
} else{
returnobject.toString().trim();
}
}
}
以上是把網頁上的資料插入到資料庫的過程,這裡需要注意一下java的正則表示式,要注意find()方法和matches方法的區別,find()方法是會自動將符合條件的目標移動到下一個,並且是按照部分匹配的,注意我的程式碼中matches的正則開始和結尾都用到了(.*),而find()方法沒有用;matches()方法剛好相反;
下面把資料庫中的資料匯入到桌面上:
package com;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.Region;
import com.mysql.jdbc.PreparedStatement;
public class ExcelTest {
publicstatic void main(String[] args) throws Exception{
//1、連線資料庫
Connectionconn = connectDataBase();
//2、從資料庫中取資料
Listlist =getDataFromDataBase(conn);
//3、把資料插入excel表中
insertData2Excel(list);
}
privatestatic ListgetDataFromDataBase(Connection conn) throwsException{
String sql ="select * from test_enterprise_info";
PreparedStatement stmt = (PreparedStatement)conn.prepareStatement(sql);
ResultSet rs= stmt.executeQuery();
Listlist =new ArrayList();
while(rs.next()){
EnterprisInfoVo vo = new EnterprisInfoVo();
String name= rs.getString(2);
String url =rs.getString(3);
String email= rs.getString(4);
Stringmobile = rs.getString(5);
String tags= rs.getString(6);
vo.setEmail(email);
vo.setMobile(mobile);
vo.setName(name);
vo.setTags(getStringNotNullValue(tags).replace("
", ""));
vo.setUrl(url);
list.add(vo);
}
returnlist;
}
@SuppressWarnings("deprecation")
publicstatic void insertData2Excel(Listlist){
//宣告一個工作薄
HSSFWorkbook wb = new HSSFWorkbook();
//宣告一個單子並命名
HSSFSheet sheet = wb.createSheet("企業資訊表");
//給單子名稱一個長度
sheet.setDefaultColumnWidth((short)15);
// 生成一個樣式
HSSFCellStyle style = wb.createCellStyle();
//建立第一行(也可以稱為表頭)
sheet.addMergedRegion(new Region(0, (short)0, 0, (short)4));
HSSFRow row0 = sheet.createRow(0);
HSSFCell cell0 = row0.createCell(0);
row0.setHeight((short)500);
HSSFCellStyle style0 = wb.createCellStyle();
row0.setRowStyle(style0);
//設定字型
HSSFFont font = wb.createFont();
style0.setFont(font);
font.setFontHeightInPoints((short)14);
font.setColor(HSSFFont.BOLDWEIGHT_BOLD);
//單元格內容
cell0.setCellValue("企業資訊表");
HSSFRow row = sheet.createRow(1);
//樣式字型居中
style.setAlignment(HSSFCellStyle.ALIGN_CENTER);
//給表頭第一行一次建立單元格
HSSFCell cell = row.createCell((short) 0);
cell.setCellValue("企業名稱");
cell.setCellStyle(style);
cell = row.createCell( (short) 1);
cell.setCellValue("企業網站URL");
cell.setCellStyle(style);
cell = row.createCell((short) 2);
cell.setCellValue("企業email");
cell.setCellStyle(style);
cell = row.createCell((short) 3);
cell.setCellValue("企業聯絡電話");
cell.setCellStyle(style);
cell = row.createCell((short) 4);
cell.setCellValue("企業標籤");
cell.setCellStyle(style);
//向單元格里填充資料
for (int i = 0; i < list.size(); i++) {
row = sheet.createRow(i + 2);
EnterprisInfoVo vo = list.get(i);
row.createCell(0).setCellValue(vo.getName());
row.createCell(1).setCellValue(vo.getUrl());
row.createCell(2).setCellValue(vo.getEmail());
row.createCell(3).setCellValue(vo.getMobile());
row.createCell(4).setCellValue(vo.getTags());
}
try {
//預設匯出到E盤下
FileOutputStream out = newFileOutputStream("C://Users//Administrator//Desktop/EnterpriseInfo.xls");
wb.write(out);
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
publicstatic Connection connectDataBase() throws SQLException{
Connectionconn = null;
String url = "jdbc:mysql://localhost:3306/smartpr?"
+"user=root&password=root&useUnicode=true&characterEncoding=UTF8";
try {
Class.forName("com.mysql.jdbc.Driver");// 動態載入mysql驅動
System.out.println("成功載入MySQL驅動程式");
conn = DriverManager.getConnection(url);
} catch (SQLException e) {
System.out.println("MySQL操作錯誤");
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
publicstatic String getStringNotNullValue(Object object) {
if (object== null) {
return"";
} else{
returnobject.toString().trim();
}
}
}
MatchertdMatcher = tdPattern.matcher(innerTR);
//彈出層的
PatterntdPatternPop = Pattern.compile("(.*)()(.*)");
Matcher m1 =p1.matcher(content);
if(m1.matches()){
vo.setMobile(getStringNotNullValue(m1.group(5)));
}
Pattern p2 =Pattern.compile("(.*)(電郵)(.*?)()(.*?)(
Matcher m2 =p2.matcher(content);
if(m2.matches()){
vo.setEmail(getStringNotNullValue(m2.group(5)));
}
//公司URL
Pattern p3 = Pattern.compile("(.*)(網址)(.*?)(<a href=\")(.*?)(\" target=\"_blank\")(.*)");
Matcher m3 = p3.matcher(content);
if(m3.matches()){
vo.setUrl(getStringNotNullValue(m3.group(5)));
}
//標籤
Pattern p4 = Pattern.compile("(.*)(產品分類)(.*?)(-)(.*?)(</td>)(.*)");
Matcher m4 =p4.matcher(content);
if(m4.matches()){
vo.setTags(getStringNotNullValue(m4.group(5)));
}
list.add(vo);
}
i++;
System.out.println("解析第"+i+"條!!!");
}
System.out.println("------------------解析結束-------------------");
returnlist;
}
privatestatic void insertData2DataBase(Listlist, Connection conn)throwsException{
System.out.println("待插入的資料條數:" + list.size());
String sql ="insert into test_enterprise_info(enterprise_name,url,email,mobile,tags) values (?,?,?,?,?)";
PreparedStatement stmt = conn.prepareStatement(sql);
for(EnterprisInfoVo vo:list){
stmt.setString(1, vo.getName());
stmt.setString(2, vo.getUrl());
stmt.setString(3, vo.getEmail());
stmt.setString(4, vo.getMobile());
stmt.setString(5, vo.getTags());
stmt.addBatch();
}
stmt.executeBatch();
System.out.println("資料插入完畢!!!");
}
publicstatic Connection connectDataBase() throws SQLException{
Connectionconn = null;
String url = "jdbc:mysql://localhost:3306/smartpr?"
+"user=root&password=root&useUnicode=true&characterEncoding=UTF8";
try {
Class.forName("com.mysql.jdbc.Driver");// 動態載入mysql驅動
System.out.println("成功載入MySQL驅動程式");
conn = DriverManager.getConnection(url);
} catch (SQLException e) {
System.out.println("MySQL操作錯誤");
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
publicstatic String getStringNotNullValue(Object object) {
if (object== null) {
return"";
} else{
returnobject.toString().trim();
}
}
}
以上是把網頁上的資料插入到資料庫的過程,這裡需要注意一下java的正則表示式,要注意find()方法和matches方法的區別,find()方法是會自動將符合條件的目標移動到下一個,並且是按照部分匹配的,注意我的程式碼中matches的正則開始和結尾都用到了(.*),而find()方法沒有用;matches()方法剛好相反;
下面把資料庫中的資料匯入到桌面上:
package com;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.Region;
import com.mysql.jdbc.PreparedStatement;
public class ExcelTest {
publicstatic void main(String[] args) throws Exception{
//1、連線資料庫
Connectionconn = connectDataBase();
//2、從資料庫中取資料
Listlist =getDataFromDataBase(conn);
//3、把資料插入excel表中
insertData2Excel(list);
}
privatestatic ListgetDataFromDataBase(Connection conn) throwsException{
String sql ="select * from test_enterprise_info";
PreparedStatement stmt = (PreparedStatement)conn.prepareStatement(sql);
ResultSet rs= stmt.executeQuery();
Listlist =new ArrayList();
while(rs.next()){
EnterprisInfoVo vo = new EnterprisInfoVo();
String name= rs.getString(2);
String url =rs.getString(3);
String email= rs.getString(4);
Stringmobile = rs.getString(5);
String tags= rs.getString(6);
vo.setEmail(email);
vo.setMobile(mobile);
vo.setName(name);
vo.setTags(getStringNotNullValue(tags).replace("
", ""));
vo.setUrl(url);
list.add(vo);
}
returnlist;
}
@SuppressWarnings("deprecation")
publicstatic void insertData2Excel(Listlist){
//宣告一個工作薄
HSSFWorkbook wb = new HSSFWorkbook();
//宣告一個單子並命名
HSSFSheet sheet = wb.createSheet("企業資訊表");
//給單子名稱一個長度
sheet.setDefaultColumnWidth((short)15);
// 生成一個樣式
HSSFCellStyle style = wb.createCellStyle();
//建立第一行(也可以稱為表頭)
sheet.addMergedRegion(new Region(0, (short)0, 0, (short)4));
HSSFRow row0 = sheet.createRow(0);
HSSFCell cell0 = row0.createCell(0);
row0.setHeight((short)500);
HSSFCellStyle style0 = wb.createCellStyle();
row0.setRowStyle(style0);
//設定字型
HSSFFont font = wb.createFont();
style0.setFont(font);
font.setFontHeightInPoints((short)14);
font.setColor(HSSFFont.BOLDWEIGHT_BOLD);
//單元格內容
cell0.setCellValue("企業資訊表");
HSSFRow row = sheet.createRow(1);
//樣式字型居中
style.setAlignment(HSSFCellStyle.ALIGN_CENTER);
//給表頭第一行一次建立單元格
HSSFCell cell = row.createCell((short) 0);
cell.setCellValue("企業名稱");
cell.setCellStyle(style);
cell = row.createCell( (short) 1);
cell.setCellValue("企業網站URL");
cell.setCellStyle(style);
cell = row.createCell((short) 2);
cell.setCellValue("企業email");
cell.setCellStyle(style);
cell = row.createCell((short) 3);
cell.setCellValue("企業聯絡電話");
cell.setCellStyle(style);
cell = row.createCell((short) 4);
cell.setCellValue("企業標籤");
cell.setCellStyle(style);
//向單元格里填充資料
for (int i = 0; i < list.size(); i++) {
row = sheet.createRow(i + 2);
EnterprisInfoVo vo = list.get(i);
row.createCell(0).setCellValue(vo.getName());
row.createCell(1).setCellValue(vo.getUrl());
row.createCell(2).setCellValue(vo.getEmail());
row.createCell(3).setCellValue(vo.getMobile());
row.createCell(4).setCellValue(vo.getTags());
}
try {
//預設匯出到E盤下
FileOutputStream out = newFileOutputStream("C://Users//Administrator//Desktop/EnterpriseInfo.xls");
wb.write(out);
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
publicstatic Connection connectDataBase() throws SQLException{
Connectionconn = null;
String url = "jdbc:mysql://localhost:3306/smartpr?"
+"user=root&password=root&useUnicode=true&characterEncoding=UTF8";
try {
Class.forName("com.mysql.jdbc.Driver");// 動態載入mysql驅動
System.out.println("成功載入MySQL驅動程式");
conn = DriverManager.getConnection(url);
} catch (SQLException e) {
System.out.println("MySQL操作錯誤");
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return conn;
}
publicstatic String getStringNotNullValue(Object object) {
if (object== null) {
return"";
} else{
returnobject.toString().trim();
}
}
}