Java之請求傳送工具類(HttpClientUtils,爬蟲)-yellowcong
阿新 • • 發佈:2019-02-02
Java傳送請求,之前做過一段時間的爬蟲,所以寫了這個請求傳送的工具,這個工具偽裝成百度,然後去爬取推酷的資料,當時是由於推酷有ip訪問限制,你如果是爬蟲,就不讓訪問了,所以我偽裝成了百度,然後就可以隨便爬取推庫的資料了,當時爬了1GB多的文字資料,然後圖片資料大概有15GB左右,然而,我卻根本沒有用這些資料,只是爬下來了而已。。。。
pom.xml依賴
<dependency>
<groupId>commons-httpclient</groupId>
<artifactId>commons-httpclient</artifactId >
<version>3.1</version>
</dependency>
請求傳送工具
package com.yellowcong.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Map;
import org.apache.commons.httpclient.Credentials;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.UsernamePasswordCredentials;
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.cookie.CookiePolicy;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpOptions;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.entity.mime.MultipartEntity;
import org.apache.http.entity.mime.content.FileBody;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
/**
* 這個工具包是用來 做代理服務 爬去資料的,結果好多代理資料沒有
* 2015-10 爬取 推酷資料 來做服務
* 通過這個工具來來新增代理,來處理資料
* @author yellowcong
*
*
* ---------------------------------------------------
* 2016-8-9 更新,添加了setGet()中添加了,設定編碼,解決獲取的網頁亂碼問題
*
*/
public class HttpClientUtils {
private static int timeout = 50000;
/**
* 通過url來獲取我們的GetMethod
* @param url
* @return
*/
public static GetMethod setGetMethod(String url) {
// TODO Auto-generated method stub
/* 2.生成 GetMethod 物件並設定引數 */
GetMethod getMethod = null;
try{
//可能會在查詢的時候出現異常,我們簡單的丟去
getMethod = new GetMethod(url);
// 設定 get 請求超時 5s
getMethod.getParams().setParameter(HttpMethodParams.SO_TIMEOUT, timeout);
// 設定請求重試處理
getMethod.getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler());
//Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1
//Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070309 Firefox/2.0.0.3
//Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12
//Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; WOW64; Trident/4.0; SLCC1)
//Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)
//Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13
//設定USER_AGENT
getMethod.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/5.0 (Windows; U; Windows NT 5.1) Gecko/20070803 Firefox/1.5.0.12");
}catch (Exception e){
throw new RuntimeException("-------------------------請求協議存在問題-----------------------");
}
return getMethod;
}
/**
*
* @param host 要訪問的主機
* @param proxyIP 代理ip
* @param proxyPort 代理埠
* @return
* @throws Exception
*/
public static int testProxy(String host,String proxyIP,int proxyPort){
int code = 0;
try {
//獲取到HttpClient
HttpClient httpClient = new HttpClient();
httpClient.getHostConfiguration().setHost(host);
//設定超時 5000 毫秒的時間
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(timeout);
//設定代理
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
//設定代理 ip 和埠
httpClient.getHostConfiguration().setProxy(proxyIP, proxyPort);
//設定代理的使用者和密碼
Credentials defaultcreds = new UsernamePasswordCredentials("", "");
httpClient.getState().setProxyCredentials(new AuthScope(proxyIP, proxyPort, null), defaultcreds);
//獲取GetMethod
GetMethod method = setGetMethod(host);
if(method != null){
code = httpClient.executeMethod(method);
//獲取請求的資料
}
} catch (Exception e) {
// TODO Auto-generated catch block
//throw new RuntimeException("-------------"+proxyIP+":"+proxyPort+"\t 無效----------");
}
return code;
}
/**
* 獲取到我們的HttpClient
* @param url
* @return
*/
private static HttpClient getHttpClient(String url){
HttpClient httpClient = new HttpClient();
httpClient.getHostConfiguration().setHost(url);
//設定超時 5000 毫秒的時間
httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(50000);
return httpClient;
}
/**
* 設定我們帶有代理的HttpClieantProxy
* @param url
* @param proxyIP
* @param proxyPort
* @return
*/
private static HttpClient setHttpClientProxy(String url,String proxyIP,int proxyPort){
HttpClient httpClient = getHttpClient(url);
//設定代理
httpClient.getParams().setCookiePolicy(CookiePolicy.BROWSER_COMPATIBILITY);
//設定代理 ip 和埠
httpClient.getHostConfiguration().setProxy(proxyIP, proxyPort);
//設定代理的使用者和密碼
Credentials defaultcreds = new UsernamePasswordCredentials("", "");
httpClient.getState().setProxyCredentials(new AuthScope(proxyIP, proxyPort, null), defaultcreds);
//反悔
return httpClient;
}
/**
* 傳送Get請求
* @param url
* @return
*/
public static String sendGet(String url){
return sendGet(url,false);
}
/**
*
* @param url
* @param isProxy
* @param encoding
* @return
*/
public static String sendGet(String url,boolean isProxy,String encoding){
String content = null;
HttpClient client = null;
try {
if(isProxy){
//當是代理的時候,獲取資料
//ProxyHttps porxy = ProxyUtils.getRandomPropertisProxy();
//106.38.194.199:80 // 好用
//client = setHttpClientProxy(url, porxy.getIp(),Integer.parseInt(porxy.getPort()));
//System.out.println("-------------------使用代理"+porxy.getIp()+":"+porxy.getPort());
//209.66.193.186 ,s
client = setHttpClientProxy(url,"121.14.138.56",81);
}else{
client = getHttpClient(url);
}
GetMethod method = HttpClientUtils.setGetMethod(url);
if(method != null){
content = dealHtml(client, method,encoding);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return content;
}
/**
* 傳送get請求 待遇proxy
* @param url
* @param isProxy
* @return
*/
public static String sendGet(String url,boolean isProxy){
return HttpClientUtils.sendGet(url, isProxy, "UTF-8");
}
/**
* 獲取網頁的資料
* @param url 網頁的地址
* @param encoding 網頁資料的編碼方式
* @return
*/
public static String sendGet(String url,String encoding){
return HttpClientUtils.sendGet(url, false,encoding);
}
/**
* 處理網頁
* @param client
* @param method
* @param encoding
* @return
*/
public static String dealHtml(HttpClient client,GetMethod method,String encoding){
String content = null;
try {
//執行資料
int code = client.executeMethod(method);
if(code == 200){
//當數請求成功
Header header = method.getResponseHeader("Content-Type");
if(header != null){
String applicationType = header.getValue();
if(applicationType != null){
//當是網頁的情況
if(applicationType.indexOf("html") != -1 || applicationType.indexOf("json") != -1){
content = FileUtils.copyInput2String(method.getResponseBodyAsStream(),encoding);
}
}
}
}else if ((code == HttpStatus.SC_MOVED_TEMPORARILY)
|| (code == HttpStatus.SC_MOVED_PERMANENTLY)
|| (code == HttpStatus.SC_SEE_OTHER)
|| (code == HttpStatus.SC_TEMPORARY_REDIRECT)) {
//System.err.println("------------------------請求失敗: " + method.getStatusLine());
return null;
//當我們的ip被限制的情況
}else if(code == HttpStatus.SC_FORBIDDEN){
}
} catch (HttpException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return content;
}
/**
* 傳送json資料到伺服器
* @param url
* @param json
* @return
*/
public static String postJson(String url,String json){
//使用DefaultHttpClient 這個物件才可以獲取到Json
String str = null;
try {
DefaultHttpClient client = new DefaultHttpClient();
//新增json
HttpPost post =new HttpPost(url);
StringEntity entity = new StringEntity(json,ContentType.create("application/json", "utf-8"));
post.setEntity(entity);
//返回的資料
HttpResponse response = client.execute(post);
int code = response.getStatusLine().getStatusCode();
if(code >=200 && code <300){
InputStream in = response.getEntity().getContent();
str = FileUtils.copyInput2String(in);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return str;
}
/**
* 下載檔案
* @param url 下載的路徑
* @return
*/
public static InputStream downLoad(String url){
InputStream in = null;
try {
DefaultHttpClient client = new DefaultHttpClient();
HttpGet get = new HttpGet(url);
HttpResponse response = client.execute(get);
int code = response.getStatusLine().getStatusCode();
if(code >=200 && code<300){
in = response.getEntity().getContent();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return in;
}
/**
* 下載檔案
* @param url 下載的路徑
* @return
*/
public static InputStream downLoadByPost(String url){
InputStream in = null;
try {
DefaultHttpClient client = new DefaultHttpClient();
HttpPost post = new HttpPost(url);
HttpResponse response = client.execute(post);
int code = response.getStatusLine().getStatusCode();
if(code >=200 && code<300){
in = response.getEntity().getContent();
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return in;
}
/**
* 傳送檔案到摸個地方
* @param url 路徑
* @param field 檔案的欄位
* @param file 檔案物件
* @return
*/
public static String upload(String url,String field,File file){
String result = null;
try {
DefaultHttpClient client = new DefaultHttpClient();
HttpPost post = new HttpPost(url);
//這個上傳的MultipartEntity 是httpmime中的
MultipartEntity entity = new MultipartEntity();
FileBody fileBody = new FileBody(file);
entity.addPart(field, fileBody);
post.setEntity(entity);
HttpResponse response = client.execute(post);
int code = response.getStatusLine().getStatusCode();
if(code >=200 && code<300){
InputStream in = response.getEntity().getContent();
//將InputStream 資料轉化為String
result =FileUtils.copyInput2String(in);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return result;
}
/**
* 通過post來提交資料,沒有帶引數
* @param url 請求的地址
* @return
*/
public static String post(String url){
return HttpClientUtils.post(url,null);
}
/**
* 通過post來提交資料,帶引數的方法
* @param url 請求地址
* @param params 引數
* @return
*/
public static String post(String url,Map<String,String> params){
String str = null;
try {
HttpClient client = new HttpClient();
PostMethod method = new PostMethod(url);
//設定請求頭的樣式
method.setRequestHeader("Content-Type","application/x-www-form-urlencoded;charset=utf-8");
if(params != null && params.size() >0){
for(Map.Entry<String,String> entry:params.entrySet()){
method.setParameter(entry.getKey(),entry.getValue());
}
}
int code = client.executeMethod(method);
if(code >=200 && code <300){
InputStream in = method.getResponseBodyAsStream();
str = FileUtils.copyInput2String(in);
}
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return str;
}
}