1. 程式人生 > >抓取http網頁的全部連結

抓取http網頁的全部連結

{
this.TextBox2.Text="";
stringweb_url=this.TextBox1.Text;
stringall_code="";
HttpWebRequestall_codeRequest
=(HttpWebRequest)WebRequest.Create(web_url);
WebResponseall_codeResponse
=all_codeRequest.GetResponse();
StreamReadersr
=newStreamReader(all_codeResponse.GetResponseStream());
all_code
=sr.ReadToEnd();
sr.Close();
ArrayListmy_list
=newArrayList();
stringp=@"http://([w-]+.)+[w-]+(/[w-./?%&=]*)?";
Regexre
=newRegex(p,RegexOptions.IgnoreCase);
MatchCollectionmc
=re.Matches(all_code);

for(inti=0;i<=mc.Count-1;i++)
...{
bool_foo=false;
stringname=mc[i].ToString();
foreach(stringlistinmy_list)
...{
if(name==list)
...{
_foo
=true
;
break;
}

}
//過濾

if(!_foo)
...{
TextBox2.Text
+=name+"";
}

}



}