c#資訊抓取二：HTMLParser.net使用詳解

阿新 • • 發佈：2019-02-08

第一步還是新增引用，在上文已經說過，不再贅述。

程式碼：

using System; using System.Collections.Generic; using System.ComponentModel; using System.Data; using System.Drawing; using System.Linq; using System.Text; using System.Windows.Forms; using Winista.Text.HtmlParser; using Winista.Text.HtmlParser.Lex; using Winista.Text.HtmlParser.Nodes; using Winista.Text.HtmlParser.Util; using Winista.Text.HtmlParser.Visitors; using Winista.Text.HtmlParser.Filters; using Winista.Text.HtmlParser.Tags; using Winista.Text.HtmlParser.Http; using System.Threading; using System.IO; using System.Net; namespace parsertitle { public partial class Form1 : Form { public Form1() { InitializeComponent(); } /// <summary> /// 該軟體的功能實現你在Textbox1裡輸入一個網址，Textbox2裡會返回該網頁的標題 /// 有點慢，您需要耐心等待 /// </summary> /// <param name="sender"></param> /// <param name="e"></param> private void button1_Click(object sender, EventArgs e) { downhtml_1(textBox1 .Text ); textBox2 .Text = tohtml(htmlText ); } string htmlText = ""; private string downhtml_1(string WebUrl)//抓取網頁原始碼方法一 { try { WebClient myWebClient = new WebClient(); myWebClient.Encoding = System.Text.Encoding.Default;//獲取和設定用於上載和下載字串的encoding，預設值是default //myWebClient.Encoding = System.Text.Encoding.Default; htmlText = myWebClient.DownloadString(WebUrl);//將下載的資源付給字串 } catch (Exception ex) { MessageBox.Show(ex.Message); } if (htmlText.Trim() == "") htmlText = "失敗！"; return htmlText; } private string tohtml( string str) { string strParser = ""; Lexer lexer1 = new Lexer(str ); Parser parser1 = new Parser(lexer1); //Creates a TagNameFilter that accepts tags with the given name. NodeFilter filter_title1 = new TagNameFilter("TITLE");//TagNameFilter是NodeFilter的子類，用子類初始化NodeFilter物件 //NodeList nodelistoftitle = parser1.Parse(filter_title1);//Parse方法將返回HTML文件包含的字元 NodeList nodelistoftitle = parser1.ExtractAllNodesThatMatch(filter_title1);//此方法能實現同上的功能 //int i = nodelistoftitle.Count; INode node_title1 = nodelistoftitle.ElementAt(0); string title1 = ""; if (node_title1 != null) { title1 = node_title1.ToHtml();//Return the HTML for this node. } else return ""; Lexer lexer2 = new Lexer(title1); Parser parser_title1 = new Parser(lexer2); TextExtractingVisitor title_visitor1 = new TextExtractingVisitor(); parser_title1.VisitAllNodesWith(title_visitor1);//遍歷所有節點,提取純文字；相當於遊客進去轉了一圈，他手裡就得到了全部的資訊 strParser = title_visitor1.ExtractedText.ToString();//提取出所有的純文字資訊 return strParser; } private void Form1_Load(object sender, EventArgs e) { textBox1.Text = "http://www.sina.com"; } } }

c#資訊抓取二：HTMLParser.net使用詳解

c#資訊抓取二：HTMLParser.net使用詳解

python爬蟲學習筆記二：Requests庫詳解及HTTP協議

hihoCoder題二：A + B詳解

mybatis學習二：mybatis配置詳解

Spark 入門之二：Spark RDD詳解

grep -A -B -C 顯示抓取的前後幾行參數

Python爬蟲抓取煎蛋(jandan.net)無聊圖

c#實現抓取高清美女妹紙圖片

Python開發簡單爬蟲之靜態網頁抓取篇：爬取“豆瓣電影 Top 250”電影數據

C# webrequest 抓取數據時，多個域Cookie的問題

使用Puppeteer進行數據抓取(二)——Page對象

python網絡數據抓取二（bing圖片抓取）

base標簽抓取錯誤：我不再用discuz程序建設門戶網站[圖]

知乎內容抓取二（內含百度知道、百度熱點和代理ip抓取）

基於.NET的CAD二次開發學習筆記二：AutoCAD .NET中的物件

基於類的Python多求職網站資訊抓取！

天眼查pc端公司資訊抓取

爬蟲原理與資料抓取----- urllib2：GET請求和POST請求

Python-Requests-瓜子二手車資訊抓取

快遞100資訊抓取！Python就是這麼神奇！

c#資訊抓取二：HTMLParser.net使用詳解

相關推薦