java爬蟲實現百度地圖資料爬取
本次專案主要實現百度地圖地點檢索功能的資料爬取,可以獲得檢索的相關資訊。主要是採用百度地圖API介面實現,採用的是servlet,資料庫採用的是mybatis。話不多說,上程式碼。
1.DAO層資料
package dao;
import java.util.List;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
/**
*
* @author Administrator
*
*/
public interface PageInfoDAO {
public void save(PageInfoDTO pageInfo);
public List<PageInfoDTO> findPageData(PageInfoDTO param);
public Integer getPageCount();
public Integer getCityCode(CityCodeDTO cityCode);
}
2.百度地圖的地區編碼與區號不同,因此封裝地區編碼與百度對應關係
package dto;
import java.io.Serializable;
public class CityCodeDTO implements Serializable{
/**
*
*/
private static final long serialVersionUID = -7429099700161706593L;
private String code;
private String city;
public String getCode() {
return code;
}
public void setCode(String code) {
this.code = code;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
@Override
public String toString() {
return "CityCodeDTO [code=" + code + ", city=" + city + "]";
}
}
3.實體類檔案(內部含有資料庫調取引數)
package dto;
import java.io.Serializable;
import java.util.Date;
/**
* 儲存頁面資訊的實體類
* @author Administrator
*
*/
public class PageInfoDTO implements Serializable{
private static final long serialVersionUID = -8667380964768477281L;
/*資訊id*/
private int id;
/*資訊名字*/
private String name;
/*資訊地址*/
private String address;
/*資訊電話*/
private String telephone;
/*資訊街景圖id*/
private String street_id;
/*資訊經緯度*/
private Double lat;
private Double lng;
private Integer valid;
private Date createdTime;
private Date modifiedTime;
private String createdName;
private Integer startIndex;
private Integer pageSize;
private String wordKey;
public String getWordKey() {
return wordKey;
}
public void setWordKey(String wordKey) {
this.wordKey = wordKey;
}
public Integer getStartIndex() {
return startIndex;
}
public void setStartIndex(Integer startIndex) {
this.startIndex = startIndex;
}
public Integer getPageSize() {
return pageSize;
}
public void setPageSize(Integer pageSize) {
this.pageSize = pageSize;
}
public Integer getValid() {
return valid;
}
public void setValid(Integer valid) {
this.valid = valid;
}
public Date getCreatedTime() {
return createdTime;
}
public void setCreatedTime(Date createdTime) {
this.createdTime = createdTime;
}
public Date getModifiedTime() {
return modifiedTime;
}
public void setModifiedTime(Date modifiedTime) {
this.modifiedTime = modifiedTime;
}
public String getCreatedName() {
return createdName;
}
public void setCreatedName(String createdName) {
this.createdName = createdName;
}
public String getModifiedName() {
return modifiedName;
}
public void setModifiedName(String modifiedName) {
this.modifiedName = modifiedName;
}
private String modifiedName;
public String getStreet_id() {
return street_id;
}
public void setStreet_id(String street_id) {
this.street_id = street_id;
}
public Double getLat() {
return lat;
}
public void setLat(Double lat) {
this.lat = lat;
}
public Double getLng() {
return lng;
}
public void setLng(Double lng) {
this.lng = lng;
}
public PageInfoDTO() {
super();
}
public PageInfoDTO(int id, String name, String address, String telephone) {
super();
this.id = id;
this.name = name;
this.address = address;
this.telephone = telephone;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getAddress() {
return address;
}
public void setAddress(String address) {
this.address = address;
}
public String getTelephone() {
return telephone;
}
public void setTelephone(String telephone) {
this.telephone = telephone;
}
@Override
public String toString() {
return "PageInfoDTO [id=" + id + ", name=" + name + ", address=" + address + ", telephone=" + telephone
+ ", street_id=" + street_id + ", lat=" + lat + ", lng=" + lng + ", valid=" + valid + ", createdTime="
+ createdTime + ", modifiedTime=" + modifiedTime + ", createdName=" + createdName + ", startIndex="
+ startIndex + ", pageSize=" + pageSize + ", wordKey=" + wordKey + ", modifiedName=" + modifiedName
+ "]";
}
}
4.mapper檔案
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//ibatis.apache.org//DTD Mapper 3.0//EN"
"http://ibatis.apache.org/dtd/ibatis-3-mapper.dtd">
<mapper namespace="dao.PageInfoDAO">
<insert id="save" parameterType="dto.PageInfoDTO">
insert ignore into pageInf
(
wordKey,
name,
address,
telephone,
street_id,
lat,
lng,
valid,
createdTime,
createdName,
modifiedTime,
modifiedName
)
values
(
#{wordKey},
#{name},#{address},#{telephone},#{street_id},#{lat},#{lng},
0,
now(),
'zt',
now(),
'zt'
)
</insert>
<select id="findPageData" parameterType="dto.PageInfoDTO" resultType="dto.PageInfoDTO">
select *
from pageInf
<where>
<if test='wordKey!=null and wordKey!=""'>
and wordKey = #{wordKey}
</if>
</where>
limit #{startIndex},#{pageSize}
</select>
<select id="getPageCount" resultType="int">
select count(id) from pageInf
</select>
<select id="getCityCode" parameterType="dto.CityCodeDTO" resultType="Integer">
select city_code from area_code
<where>
<if test='city!=null and city!=""'>
and city like concat("%",#{city},"%")
</if>
<if test='code!=null and code!=""'>
and code = #{code}
</if>
</where>
</select>
</mapper>
5.servlet檔案
package servlet;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;
import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;
import util.LngAndLatUtil;
/**
* Servlet implementation class MapServlet
*/
public class MapServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
protected void service(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// 設定編碼格式
request.setCharacterEncoding("UTF-8");
response.setContentType("text/html;charset=UTF-8");
PrintWriter out = response.getWriter();
// 請求地址解析
String uri = request.getRequestURI();
String action = uri.substring(uri.lastIndexOf("/"), uri.lastIndexOf("."));
if ("/query".equals(action)) {
// 處理query.do請求實現資料入庫
String area = request.getParameter("area");
String word = request.getParameter("word");
LngAndLatUtil util = new LngAndLatUtil();
JSONObject obj = util.getLngAndLat(area, word);
out.println(obj);
} else if ("/list".equals(action)) {
// 處理list請求實現頁面分頁顯示資料
// 獲取dao物件
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd
.build(LngAndLatUtil.class.getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
// 獲取頁面引數
String area = request.getParameter("area");
String word = request.getParameter("word");
System.out.println("area"+area);
Integer pageCurrent = Integer.valueOf(request.getParameter("pageCurrent"));
if (pageCurrent == null) {
pageCurrent = 1;
}
int pageSize = 10;
int startIndex = (pageCurrent - 1) * 10;
PageInfoDTO param = new PageInfoDTO();
param.setPageSize(pageSize);
param.setStartIndex(startIndex);
//判斷area是區號,還是名稱
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
System.out.println(666);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);
param.setWordKey(code+word);
System.out.println(code+word);
// 獲取查詢資料
List<PageInfoDTO> data = dao.findPageData(param);
Integer count = dao.getPageCount();
// 封裝傳輸資料
Map<String, Object> map = new HashMap<String, Object>();
map.put("count", count);
map.put("data", data);
JSONObject obj = JSONObject.fromObject(map);
// 將資料傳給頁面
out.println(obj);
}
}
public static Integer getCityCode(CityCodeDTO cityCode){
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
}
}
6.從頁面上獲取資料的工具類
package util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;
import dao.PageInfoDAO;
import dto.CityCodeDTO;
import dto.PageInfoDTO;
import net.sf.json.JSONObject;
public class LngAndLatUtil{
/**
* 爬取資料存入資料庫
* @param address 查詢區域
* @param param查詢關鍵詞
* @return json字串
*/
public static JSONObject getLngAndLat(String area, String word) {
Integer code = getCode(area);
JSONObject obj = null;
for (int i = 0; i < 10; i++) {
String url = "http://api.map.baidu.com/place/v2/search?query=" + word + "&tag=&scope=2®ion=" + code
+ "&output=json&page_size=10&page_num=" + i + "&ret_coordtype=gcj02ll&ak=申請百度地圖獲取AK";
String str = loadJSON(url);
obj = JSONObject.fromObject(str);
//資料解析
if (obj.get("status").toString().equals("0")) {
List<JSONObject> list = obj.getJSONArray("results");
//遍歷資料實現儲存
for (JSONObject jx : list) {
String name = (String) jx.get("name");
String addre = (String) jx.get("address");
String telephone = (String) jx.get("telephone");
Double lat = (Double) jx.getJSONObject("location").get("lat");
Double lng = (Double) jx.getJSONObject("location").get("lng");
String street_id = (String) jx.get("street_id");
PageInfoDTO pi = new PageInfoDTO();
pi.setName(name);
pi.setAddress(addre);
pi.setStreet_id(street_id);
pi.setTelephone(telephone);
pi.setLat(lat);
pi.setLng(lng);
Integer citycode = getCode(area);
pi.setWordKey(citycode+word);
getDao(pi);
}
}
}
return obj;
}
private static Integer getCode(String area) {
//判斷area是區號,還是名稱
CityCodeDTO cityCode =new CityCodeDTO();
Pattern p = Pattern.compile("[0-9]*");
Matcher m = p.matcher(area);
if (m.matches()) {
cityCode.setCode(area);
}else{
cityCode.setCity(area);
}
Integer code = getCityCode(cityCode);
return code;
}
public static Integer getCityCode(CityCodeDTO cityCode){
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
Integer code = dao.getCityCode(cityCode);
return code;
}
/**
* 實現儲存資料
* @param pi 需要儲存的資料
*/
public static void getDao(PageInfoDTO pi){
SqlSessionFactoryBuilder ssfd = new SqlSessionFactoryBuilder();
SqlSessionFactory ssf = ssfd.build(LngAndLatUtil.class.
getClassLoader().getResourceAsStream("SqlMapConfig.xml"));
SqlSession session = ssf.openSession();
PageInfoDAO dao = session.getMapper(PageInfoDAO.class);
dao.save(pi);
session.commit();
session.close();
}
/**
* 根據地址(url)獲取頁面資料
* @param uri
* @return
*/
public static String loadJSON (String uri) {
StringBuilder json = new StringBuilder();
try {
URL url = new URL(uri);
URLConnection yc = url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(yc.getInputStream(),"utf-8"));
String inputLine = null;
while ( (inputLine = in.readLine()) != null) {
json.append(inputLine);
}
in.close();
} catch (MalformedURLException e) {
} catch (IOException e) {
}
return json.toString();
}
}
7.mybatis的配置檔案
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE configuration PUBLIC "-//ibatis.apache.org//DTD Config 3.0//EN"
"http://ibatis.apache.org/dtd/ibatis-3-config.dtd">
<configuration>
<environments default="environment">
<environment id="environment">
<transactionManager type="JDBC" />
<!-- 連線池的配置 -->
<dataSource type="POOLED">
<property name="driver" value="com.mysql.jdbc.Driver" />
<property name="url" value="jdbc:mysql:///baiduMap?"/>
<property name="username" value="root" />
<property name="password" value="123456" />
</dataSource>
</environment>
</environments>
<!-- 指定對映檔案的位置 -->
<mappers>
<mapper resource="dto/PageInfoMapper.xml" />
</mappers>
</configuration>
8.web.xml檔案
<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://java.sun.com/xml/ns/javaee" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_2_5.xsd" version="2.5">
<display-name>baiduMap</display-name>
<servlet>
<description></description>
<servlet-name>MapServlet</servlet-name>
<servlet-class>servlet.MapServlet</servlet-class>
<init-param>
<param-name>character</param-name>
<param-value>utf-8</param-value>
</init-param>
</servlet>
<servlet-mapping>
<servlet-name>MapServlet</servlet-name>
<url-pattern>*.do</url-pattern>
</servlet-mapping>
</web-app>
9.頁面js
$(document).ready(function(){
$("#body").on('click','#nextPage,#prePage,#firstPage,#lastPage',changePageData);
$(".query").on('click',getData);
})
function changePageData(){
var pageCount = $("#body").data("pageCount");
if(pageCount%10 == 0){
var page= pageCount/10;
}else{
page = parseInt(pageCount/10) + 1;
}
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
if($(this).val()=="首頁"){
pageCurrent = 1;
}
if($(this).val()=="上一頁"){
pageCurrent --;
}
if($(this).val()=="下一頁"&& page){
pageCurrent ++;
}
if($(this).val()=="尾頁"){
pageCurrent = page;
}
$("#body").data("pageCurrent",pageCurrent);
getPageData();
}
function getData(){
var url = "query.do";
var area = $(".area").val();
var word = $(".word").val();
var param = {"area":area,"word":word};
console.log(param);
$.getJSON(url,param,function(obj){
console.log(888);
getPageData();
});
}
function getPageData(){
var url = "list.do";
console.log(url);
var pageCurrent = $("#body").data("pageCurrent");
if(!pageCurrent){
pageCurrent = 1;
}
var area = $(".area").val();
var word = $(".word").val();
var param = {"pageCurrent":pageCurrent,"area":area,"word":word};
console.log(345);
$.getJSON(url,param,function(data){
setTableRows(data);
console.log(666);
});
}
function setTableRows(map){
var tbody = $(".tBody");
var pageCount = map.count;
$("#body").data("pageCount",pageCount);
tbody.empty();
console.log(map.data);
for(var i in map.data){
var tr = $("<tr class='tr'></tr>");
var td = "<td>"+map.data[i].name+"</td>"+
"<td>"+map.data[i].address+"</td>"+
"<td>"+map.data[i].telephone+"</td>"+
"<td>"+map.data[i].street_id+"</td>"+
"<td>經度:"+map.data[i].lat+"<br/>緯度:"+map.data[i].lng+"</td>"+
"<td>"+map.data[i].valid+"</td>"+
"<td><input type='button'value='修改'></td>";
tr.append(td);
tbody.append(tr);
}
}
10.頁面<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Insert title here</title>
<script type="text/javascript" src="js/jquery-3.2.1.min.js"></script>
<link rel="stylesheet" type="text/css" href="css/map.css">
<script type="text/javascript" src="js/map.js"></script>
</head>
<body>
<div id="body">
<div class="getData">
<input class="area" type="text" placeholder="請輸入區域" autocomplete="on">
<input class="word" type="text" placeholder="請輸入關鍵詞" autocomplete="on">
<input class="query" type="button" value="搜尋">
</div>
<div class="tableBody">
<table id="table" border='1' cellpadding="10" cellspacing="0">
<thead>
<tr id="tr">
<th class="th">名稱</th>
<th class="th">地址</th>
<th class="th">電話</th>
<th class="th">街景圖</th>
<th class="th">地址經緯度</th>
<th class="th">有效性</th>
<th class="th">操作</th>
</tr>
</thead>
<tbody class="tBody">
</tbody>
</table>
</div>
<div class="foot">
<input id="firstPage" class="page" type="button" value="首頁">
<input id="prePage" class="page" type="button" value="上一頁">
<input id="nextPage" class="page" type="button" value="下一頁">
<input id="lastPage" class="page" type="button" value="尾頁">
</div>
</div>
</body>
</html>
11.頁面css
@charset "utf-8";
*{
margin:0;
padding:0;
}
.getData{
width:500px;
margin:20px auto 10px auto;
}
.tableBody{
width:1000px !important;
}
#table{
width:auto;
margin:20px auto 10px auto;
padding:0 auto;
width:1000px;
}
#body{
width:1000px;
margin:0 auto;
}
.th{
width: 80px;
}
.tr{
width:300px;
}
.tr td{
width:150px;
text-align:center;
}
.foot{
width:300px;
margin:0 auto;
}
.page{
margin-right:30px;
}
12.專案所涉及的jar包
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.3b</groupId>
<artifactId>baiduMap</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>war</packaging>
<dependencies>
<dependency>
<groupId>net.sf.json-lib</groupId>
<artifactId>json-lib</artifactId>
<version>2.2.3</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>javax.servlet-api</artifactId>
<version>3.1.0</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.2.8</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
</dependency>
<dependency>
<groupId>com.oracle</groupId>
<artifactId>ojdbc14</artifactId>
<version>10.2.0.4.0</version>
<type>pom.lastUpdated</type>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.40</version>
</dependency>
</dependencies>
</project>
13.sql語句
SET FOREIGN_KEY_CHECKS=0;
-- ----------------------------
-- Table structure for pageinf
-- ----------------------------
DROP TABLE IF EXISTS `pageinf`;
CREATE TABLE `pageinf` (
`id` int(11) NOT NULL,
`wordKey` varchar(255) DEFAULT NULL,
`name` varchar(255) DEFAULT NULL,
`address` varchar(500) DEFAULT NULL,
`telephone` varchar(255) DEFAULT NULL,
`street_id` varchar(500) DEFAULT NULL,
`lat` varchar(255) DEFAULT NULL,
`lng` varchar(255) DEFAULT NULL,
`valid` int(11) DEFAULT NULL,
`createdTime` datetime DEFAULT NULL,
`createdName` varchar(255) DEFAULT NULL,
`modifiedTime` datetime DEFAULT NULL,
`modifiedName` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `bdname` (`name`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;