爬蟲系列 一次採集.NET WebForm網站的坎坷歷程
阿新 • • 發佈:2020-07-23
public static string Get(string url, Action<string> SuccessCallback, Action<string> FailCallback) { HttpWebRequest req = WebRequest.Create(url) as HttpWebRequest; req.Method = "GET"; req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; req.Accept = "*/*"; req.KeepAlive = true; req.ServicePoint.ConnectionLimit = int.MaxValue; req.ServicePoint.Expect100Continue = false; req.CookieContainer = sznyCookie; #靜態變數 req.Credentials = System.Net.CredentialCache.DefaultCredentials; string msg = ""; using (HttpWebResponse rsp = req.GetResponse() as HttpWebResponse) { using (StreamReader reader = new StreamReader(rsp.GetResponseStream())) { msg = reader.ReadToEnd(); } } return msg; } public static string Post(string url, Dictionary<string, string> dicParms, Action<string> SuccessCallback, Action<string> FailCallback) { StringBuilder data = new StringBuilder(); foreach (var kv in dicParms) { if (kv.Key.StartsWith("header")) continue; data.Append($"&{Common.UrlEncode( kv.Key,Encoding.UTF8)}={ Common.UrlEncode( kv.Value,Encoding.UTF8)}"); } if (data.Length > 0) data.Remove(0, 1); HttpWebRequest req = WebRequest.Create(url) as HttpWebRequest; req.Method = "POST"; req.KeepAlive = true; req.CookieContainer = sznyCookie; req.Connection = "KeepAlive"; req.KeepAlive = true; req.ContentType = "application/x-www-form-urlencoded"; req.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9"; req.Referer = url; if (dicParms.ContainsKey("ScriptManager1")) { req.Headers.Add("X-MicrosoftAjax", "Delta=true"); req.Headers.Add("X-Requested-With", "XMLHttpRequest"); req.ContentType = "application/x-www-form-urlencoded; charset=UTF-8"; req.Accept = "*/*"; } req.Headers.Add("Cache-Control", "no-cache"); req.UserAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"; req.ServicePoint.ConnectionLimit = int.MaxValue; req.ServicePoint.Expect100Continue = false; req.AllowAutoRedirect = true; req.Credentials = System.Net.CredentialCache.DefaultCredentials; byte[] buffer = Encoding.UTF8.GetBytes(data.ToString()); using (Stream reqStream = req.GetRequestStream()) { reqStream.Write(buffer, 0, buffer.Length); } string msg = ""; using (HttpWebResponse rsp = req.GetResponse() as HttpWebResponse) { using (StreamReader reader = new StreamReader(rsp.GetResponseStream())) { msg = reader.ReadToEnd(); if (msg.Contains("images/dl.jpg") || msg.Contains("pageRedirect||%2flogin.aspx")) { //登入失敗 if (FailCallback != null) FailCallback(msg); } else { if (SuccessCallback!=null) SuccessCallback(msg); } } } return msg; }
整個過程分為登陸、使用者資訊列表、使用者資訊詳情,分三步走來完成這個專案
登陸
根據Chrome抓包結果編寫Login,帳號密碼沒有任何加密,直接明文顯示了,直接用了,根據是否跳轉頁面判斷是否登陸成功。除錯檢視結果登陸成功了。