R-指定城市天氣爬取
業務需求:爬取指定城市天氣
操作:基於R語言和sql server資料庫儲存,目前仍未使用網頁展示
爬取結果示例:
R原始碼:
# install.packages("rvest")
library(rvest)
# 第一步:定義幾個函式
# 定義讀取城市URL地址
city.urls<-c("http://www.weather.com.cn/weather/101190101.shtml",
"http://www.weather.com.cn/weather/101190601.shtml",
"http://www.weather.com.cn/weather/101190401.shtml",
"http://www.weather.com.cn/weather/101190301.shtml",
"http://www.weather.com.cn/weather/101190201.shtml",
"http://www.weather.com.cn/weather/101191101.shtml",
"http://www.weather.com.cn/weather/101191201.shtml",
"http://www.weather.com.cn/weather/101190501.shtml",
"http://www.weather.com.cn/weather/101190701.shtml",
"http://www.weather.com.cn/weather/101190901.shtml",
"http://www.weather.com.cn/weather/101191301.shtml",
"http://www.weather.com.cn/weather/101190801.shtml",
"http://www.weather.com.cn/weather/101191001.shtml",
"http://www.weather.com.cn/weather/101220101.shtml",
"http://www.weather.com.cn/weather/101221101.shtml",
"http://www.weather.com.cn/weather/101220601.shtml",
"http://www.weather.com.cn/weather/101220201.shtml",
"http://www.weather.com.cn/weather/101220401.shtml",
"http://www.weather.com.cn/weather/101221201.shtml",
"http://www.weather.com.cn/weather/101221501.shtml",
"http://www.weather.com.cn/weather/101220501.shtml",
"http://www.weather.com.cn/weather/101220301.shtml",
"http://www.weather.com.cn/weather/101221301.shtml",
"http://www.weather.com.cn/weather/101221401.shtml",
"http://www.weather.com.cn/weather/101210101.shtml",
"http://www.weather.com.cn/weather/101210401.shtml",
"http://www.weather.com.cn/weather/101211101.shtml",
"http://www.weather.com.cn/weather/101210507.shtml",
"http://www.weather.com.cn/weather/101210901.shtml",
"http://www.weather.com.cn/weather/101210201.shtml",
"http://www.weather.com.cn/weather/101180101.shtml",
"http://www.weather.com.cn/weather/101181501.shtml",
"http://www.weather.com.cn/weather/101181101.shtml",
"http://www.weather.com.cn/weather/101270101.shtml",
"http://www.weather.com.cn/weather/101290101.shtml")
# 定義爬取單個城市天氣資料函式
read.weather<-function(city.url){
web<-read_html(city.url,encoding = "utf8")
city<-web%>%
html_nodes("div")%>%
html_nodes("div")%>%
html_nodes("div")%>%
html_nodes("div.crumbs.fl")%>%
html_nodes("a")%>%
html_text()
city<-paste(city[1],city[2],sep = "")
weather<-web%>%
html_nodes("div")%>%
html_nodes("ul.t.clearfix")%>%
html_nodes("li")%>%
html_text()%>%
strsplit(split="[\n]+")%>%
as.data.frame(fix.empty.names = FALSE,row.names=c("無","date","weather","temperature","wind"))%>%
t()
weather<-weather[,c(-1,-2)]
date<-c(Sys.Date(),Sys.Date()+1,Sys.Date()+2,Sys.Date()+3,Sys.Date()+4,Sys.Date()+5,Sys.Date()+6)
weather<-data.frame(city,date,weather)
return(weather)
}
# 定義爬取國內所有城市天氣資料
rbind.weather<-function(city.urls){
for(city.url in city.urls){
if(!exists("weathers")){
weathers<-read.weather(city.url)
}else{
weather0<-read.weather(city.url)
weathers<-rbind(weathers,weather0)
}
}
return(weathers)
}
#第三步:爬取實時天氣資料**
# 開始爬取各城市天氣資料
weathers<-rbind.weather(city.urls)
#write.csv(weathers,"allcity_weathers_7days.csv",row.names=F)
#將網上抓取結果儲存在資料庫中
#連線資料庫testdb
library(RODBC)
#odbcDataSources()#列出可用的odbc連線
channel <- odbcConnect("yyy", uid="sa", pwd="Passw0rd")#建立連線
#da<-sqlQuery(channel,"select top 2 * from dbo.Persons")#一個簡單的SQL查詢語句,結果將儲存到data.frame型別的變數ba中
#將結果儲存在在weather表中
sqlSave(channel, weathers, tablename = "weather", append = TRUE,rownames = FALSE, colnames = FALSE, safer = TRUE)
#關閉odbc資料來源連結
close(channel)