獲取豆瓣電影
阿新 • • 發佈:2019-01-02
package main import ( "encoding/json" "fmt" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" "io/ioutil" "net/http" "os" "strings" "time" ) type A struct { Rating []string `json:"rating"` Rank int `json:"rank"` CoverUrl string `json:"cover_url"` IsPlayable bool `json:"is_playable"` Id string `json:"id"` Types []string `json:"types"` Regions []string `json:"regions"` Title string `json:"title"` Url string `json:"url"` ReleaseDate string `json:"release_date"` ActorCount int `json:"actor_count"` VoteCount int `json:"vote_count"` Score string `json:"score"` Actors []string `json:"actors"` IsWatched bool `json:"is_watched"` } func main() { // 獲取豆瓣戲劇排行榜單資料 // 明確目標 // 爬取 // 清洗 // 儲存 //https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20 //https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=20&limit=20 // 初始頁 結束頁 startPage, endPage := getPage() fmt.Printf("起始頁=%v,終止頁=%v", startPage, endPage) // // 網址列表 urlList := getUrlList(startPage, endPage) fmt.Println("urlList:", urlList) // 對多條資料進行解析 // 獲取資源 //url := urlList[0] //data := []A{} //handleUrl(url, &data) //fmt.Println(data) for i := 0; i < len(urlList); i++ { url := urlList[i] data := []A{} handleUrl(url, &data) // 儲存到excel表格 saveCSV(data) time.Sleep(5 * time.Second) } } func getPage() (int, int) { var startPage, endPage int fmt.Println("請輸入起始頁(小於1預設為1):") fmt.Scanln(&startPage) if startPage < 1 { startPage = 1 } fmt.Println("請輸入終止頁(小於起始頁預設為起始頁):") fmt.Scanln(&endPage) if endPage < startPage { endPage = startPage } return startPage, endPage } func getUrlList(start, end int) []string { baseUrl := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=` var urlList []string for i := start; i <= end; i++ { Url := baseUrl + fmt.Sprintf("%v", (i-1)*20) + `&limit=20` urlList = append(urlList, Url) } return urlList } func handleUrl(url string, data *[]A) []A { //url := `https://movie.douban.com/j/chart/top_list?type=24&interval_id=100%3A90&action=&start=0&limit=20` r, err1 := http.Get(url) if err1 != nil { os.Exit(1) } // 讀取資源 //buf := make([]byte,1024*4) body, err2 := ioutil.ReadAll(r.Body) if err2 != nil { os.Exit(1) } // 關閉資源流 defer r.Body.Close() // 反序列化內容 errData := json.Unmarshal(body, &data) if errData != nil { fmt.Println("反序列化失敗") os.Exit(1) } //fmt.Println("序列化後內容:", data) return *data } func saveCSV(data []A) { fileName := "douBan.csv" fp, err := os.OpenFile(fileName, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0766) if err != nil { fmt.Println("開啟檔案失敗") } defer fp.Close() if dealTitle(fileName) == true { column := []string{"rating", "rank", "cover_url", "is_playable", "id", "types", "regions", "title", "url", "release_date", "actor_count", "vote_count", "score", "actors", "is_watched"} if line, err := utf82GBK(strings.Join(column, ",")); err == nil { fp.WriteString(line + "\n") } } for i := 0; i < len(data); i++ { columns := []string{fmt.Sprintf("%v", data[i].Rating), fmt.Sprintf("%v", data[i].Rank), data[i].CoverUrl, fmt.Sprintf("%v", data[i].IsPlayable), data[i].Id, fmt.Sprintf("%v", data[i].Types), fmt.Sprintf("%v", data[i].Regions), data[i].Title, data[i].Url, data[i].ReleaseDate, fmt.Sprintf("%v", data[i].ActorCount), fmt.Sprintf("%v", data[i].VoteCount), data[i].Score, fmt.Sprintf("%v", data[i].Actors), fmt.Sprintf("%v", data[i].IsWatched), } if line, err := utf82GBK(strings.Join(columns, ",")); err == nil { fp.WriteString(line + "\n") } } } func utf82GBK(src string) (string, error) { reader := transform.NewReader(strings.NewReader(src), simplifiedchinese.GBK.NewEncoder()) if buf, err := ioutil.ReadAll(reader); err != nil { return "", err } else { return string(buf), nil } } func dealTitle(name string) bool { path := `D:\workspace1\src\test` dirs, err := ioutil.ReadDir(path) if err != nil { fmt.Println(err) } n := 0 for i := 0; i < len(dirs); i++ { if dirs[i].Name() == name { n = 1 } } if n == 1 { return true } else { return false } }