go語言爬取椎名真白
阿新 • • 發佈:2018-07-08
regexp highlight defer reg write rul png span link
單任務版:
package main import ( "net/http" "regexp" "io/ioutil" "os" "strconv" "time" "fmt" "runtime" ) func get_mashiro(link,rule,target string)[]string{ res,_:=http.Get(link) defer res.Body.Close() page_content,_:= ioutil.ReadAll(res.Body) re := regexp.MustCompile(rule) url_arr := re.FindAllStringSubmatch(string(page_content),-1) urls := make([]string,0) for _,url:=range url_arr{ if len(url[1]) != len(target){ continue } urls = append(urls,url[1]) } return urls } func download_mashiro(urls []string){ for i,v:=range urls{ res,_:=http.Get(v) f,_:=os.Create(`F:\project\黑馬go\mashiro\`+strconv.Itoa(i)+".jpg") defer f.Close() data,_:=ioutil.ReadAll(res.Body) f.Write(data) } } func main(){ start_time := time.Now().Unix() link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237" rule := `src="(http.+?(?:jpg|png))"` target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg" pic_urls := get_mashiro(link,rule,target) download_mashiro(pic_urls) end_time := time.Now().Unix() fmt.Println("總用時:",end_time-start_time) }//2
多任務版:
package main import ( "net/http" "regexp" "io/ioutil" "os" "strconv" "time" "fmt" "runtime" ) func get_mashiro(link,rule,target string)[]string{ res,_:=http.Get(link) defer res.Body.Close() page_content,_:= ioutil.ReadAll(res.Body) re := regexp.MustCompile(rule) url_arr := re.FindAllStringSubmatch(string(page_content),-1) urls := make([]string,0) for _,url:=range url_arr{ if len(url[1]) != len(target){ continue } urls = append(urls,url[1]) } return urls } func download_mashiro(urls []string){ runtime.GOMAXPROCS(4) ch := make(chan int) for i,v:=range urls{ go func(i int,v string) { fmt.Println(i) res,_:=http.Get(v) f,_:=os.Create(`F:\project\黑馬go\mashiro\`+strconv.Itoa(i)+".jpg") defer f.Close() data,_:=ioutil.ReadAll(res.Body) f.Write(data) ch<-i }(i,v) } for i:=0;i<len(urls);i++{ <-ch } } func main(){ start_time := time.Now().Unix() link := "https://tieba.baidu.com/p/5290405550?red_tag=0872096237" rule := `src="(http.+?(?:jpg|png))"` target := "https://imgsa.baidu.com/forum/w%3D580/sign=5a28bf191fce36d3a20483380af13a24/5f572ae93901213fb9930d1f5ee736d12e2e951c.jpg" pic_urls := get_mashiro(link,rule,target) download_mashiro(pic_urls) end_time := time.Now().Unix() fmt.Println("總用時:",end_time-start_time) }//1
go語言爬取椎名真白