1. 程式人生 > 其它 >golang 非同步併發http輪詢(爬蟲)

golang 非同步併發http輪詢(爬蟲)

工作中總會用到密集http查詢,我們採用非同步+併發的策略,看看golang是怎麼實現的吧

一 同步http

func Http_curl(url string, payload_str string, method string) []byte{
    payload := strings.NewReader(payload_str)
    req, _ := http.NewRequest(method, url, payload)
    req.Header.Add("content-type", "application/x-www-form-urlencoded")
    req.Header.Add("cache-control", "no-cache")
    res, _ :
= http.DefaultClient.Do(req) defer res.Body.Close() body, _ := ioutil.ReadAll(res.Body) //fmt.Println(string(body)) return body }

二 非同步http

func Http_curl_async(url string, payload_str string, method string){
    go func() {
        Http_curl(url,payload_str,method)
    }()
}

三 控制非同步併發速度

var(
    chSem = make(chan int, 5)
)

func Http_curl_async(url string, payload_str string, method string){
    go func() {
        chSem <- 1
        Http_curl(url,payload_str,method)
        <- chSem
    }()
}

四 非同步併發實現

package utils

import (
    "fmt"
    "io/ioutil"
    "net/http"
    "strings
" "sync" ) var( chSem = make(chan int, 5) chSemWg sync.WaitGroup ) func Http_curl(url string, payload_str string, method string) []byte{ payload := strings.NewReader(payload_str) req, _ := http.NewRequest(method, url, payload) req.Header.Add("content-type", "application/x-www-form-urlencoded") req.Header.Add("cache-control", "no-cache") res, _ := http.DefaultClient.Do(req) defer res.Body.Close() body, _ := ioutil.ReadAll(res.Body) //fmt.Println(string(body)) return body } func Http_curl_async_loop(looptimes int,url string, payload_str string, method string){ chSemWg.Add(looptimes) for i := 0; i < looptimes; i++ { cur_i := i go func() { chSem <- 1 fmt.Println(cur_i) Http_curl(url,payload_str,method) <- chSem chSemWg.Done() }() } chSemWg.Wait() }
utils.Http_curl_async_loop(100,"http://www.baidu.com", "s=s01","POST")

五 說明

  • 如果不用wg,輪詢沒走完程式就會退出
  • 要控制併發速度,不然協程多了,開啟和維護的資源也不少