c#爬需要登陸的網頁的資料
用c#寫爬蟲時。遇到網頁需要使用者登陸才能訪問,爬資料時需要設定cookie。程式碼如下
/// <summary>
/// Get提交-UTF-8編碼
/// </summary>
/// <param name="postUrl">地址</param>
/// <param name="postData">引數</param>
/// <param name="cookie">cookie</param>
/// <param name="headDict">headDict</param>
/// <returns></returns>
public static string GetHtmlByCookie(string getUrl,string cookie)
{
HttpWebRequest request = null;
try
{
request = (HttpWebRequest)WebRequest.Create(getUrl);
CookieContainer cookieContainer = getCookie("cookie");
request.Accept = "*/*";
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)";
request.AllowAutoRedirect = true;
request.CookieContainer = cookieContainer;
using (HttpWebResponse res = (HttpWebResponse)request.GetResponse())
{
using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.GetEncoding("utf-8")))
{
string line = sr.ReadToEnd().Trim();
return line;
}
}
}
catch (Exception e)
{
throw e;
}
finally
{
if (request != null)
request.Abort();
}
}