1. 程式人生 > >c#爬需要登陸的網頁的資料

c#爬需要登陸的網頁的資料

用c#寫爬蟲時。遇到網頁需要使用者登陸才能訪問,爬資料時需要設定cookie。程式碼如下

  /// <summary>
        /// Get提交-UTF-8編碼
        /// </summary>
        /// <param name="postUrl">地址</param>
        /// <param name="postData">引數</param>
        /// <param name="cookie">cookie</param>
        /// <param name="headDict">headDict</param>
        /// <returns></returns>
        public static string GetHtmlByCookie(string getUrl,string cookie)
        {
            HttpWebRequest request = null;
            try
            {

                request = (HttpWebRequest)WebRequest.Create(getUrl);
                CookieContainer cookieContainer = getCookie("cookie");
                request.Accept = "*/*";
                request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)";
                request.AllowAutoRedirect = true;
                request.CookieContainer = cookieContainer;

                using (HttpWebResponse res = (HttpWebResponse)request.GetResponse())
                {
                    using (StreamReader sr = new StreamReader(res.GetResponseStream(), Encoding.GetEncoding("utf-8")))
                    {
                        string line = sr.ReadToEnd().Trim();

                        return line;
                    }
                }
            }
            catch (Exception e)
            {
                throw e;
            }
            finally
            {
                if (request != null)
                    request.Abort();
            }
        }