1. 程式人生 > >簡單地學習Java爬蟲->使用Jsoup

簡單地學習Java爬蟲->使用Jsoup

簡單地學習Java爬蟲->使用Jsoup

一、gradle環境搭建

implementation 'org.jsoup:jsoup:1.11.3'

二、Activity

package com.example.testforjsoup;

import android.support.v7.app.AppCompatActivity;
import android.os.Bundle;
import android.util.Log;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.IOException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {

    private String url = "https://en.wikipedia.org/wiki/Main_Page";

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);

        ExecutorService executorService= Executors.newSingleThreadExecutor();
        executorService.execute(new Runnable() {
            @Override
            public void run() {
                try {
                    //獲取Jsoup訪問url連結的文件物件
                    Document document = Jsoup.connect(url).timeout(10000).get();
                    Log.d("zbv", "document of title=" + document.title());

                    Elements newsHeadlines = document.select("#mp-itn b a");
                    for (Element element : newsHeadlines) {
                        String title = element.attr("title");
                        String text = element.text();
                        //補全作為可使用的URL
//                        String absUrl = element.absUrl("href");
                        //一般的String文字 要達到absUrl的效果可以這樣:"abs:href"
                        String absUrl=element.attr("href");

                        Log.d("zbv", "title=" + title + ";text=" + text + ";absUrl=" + absUrl);
                    }

                } catch (IOException e) {
                    e.printStackTrace();
                    Log.e("zbv", "IOException", e);
                }
            }
        });

    }
}

後續更新中,寫個爬取資料後的簡單App。。。