簡單地學習Java爬蟲->使用Jsoup
阿新 • • 發佈:2019-02-17
簡單地學習Java爬蟲->使用Jsoup
一、gradle環境搭建
implementation 'org.jsoup:jsoup:1.11.3'
二、Activity
package com.example.testforjsoup; import android.support.v7.app.AppCompatActivity; import android.os.Bundle; import android.util.Log; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; public class MainActivity extends AppCompatActivity { private String url = "https://en.wikipedia.org/wiki/Main_Page"; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_main); ExecutorService executorService= Executors.newSingleThreadExecutor(); executorService.execute(new Runnable() { @Override public void run() { try { //獲取Jsoup訪問url連結的文件物件 Document document = Jsoup.connect(url).timeout(10000).get(); Log.d("zbv", "document of title=" + document.title()); Elements newsHeadlines = document.select("#mp-itn b a"); for (Element element : newsHeadlines) { String title = element.attr("title"); String text = element.text(); //補全作為可使用的URL // String absUrl = element.absUrl("href"); //一般的String文字 要達到absUrl的效果可以這樣:"abs:href" String absUrl=element.attr("href"); Log.d("zbv", "title=" + title + ";text=" + text + ";absUrl=" + absUrl); } } catch (IOException e) { e.printStackTrace(); Log.e("zbv", "IOException", e); } } }); } }
後續更新中,寫個爬取資料後的簡單App。。。