1. 程式人生 > >HttpClient中頭部Header的使用

HttpClient中頭部Header的使用

HttpUriRequest request = ...;  
  
HttpResponse response =httpClient.execute(request);  
  
// 從response中取出HttpEntity物件   
  
HttpEntity entity =response.getEntity();  
  
// 檢視entity的各種指標   
  
System.out.println(entity.getContentType());  
  
System.out.println(entity.getContentLength());  
  
System.out.println(EntityUtils.getContentCharSet(entity));  
  
// 取出伺服器返回的資料流   
  
InputStream stream =entity.getContent();  

或者採用如下的介面方式httpClient.execute(request,new ResponseHandler<T> response)進行呼叫,它的返回值直接對應的即為使用者自己想獲取的資料的型別及值。

具體例項解析,通過下述方法,即可獲取到指定url的頁面內容。

public static String executeStringByGet(String url, final Charset charset) {  
  
        String result = "";  
  
        HttpClient client = new DefaultHttpClient();  
  
        HttpGet get = new HttpGet(url);  
  
         
  
        try {  
  
            result = client.execute(get, new ResponseHandler<String>() {  
  
                @Override  
  
                public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException {  
  
                    HttpEntity entity = response.getEntity();  
  
                    if(entity != null) {  
  
                        if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {  
  
                            return new String(EntityUtils.toByteArray(entity), charset.getValue());  
  
                        }  
  
                    }  
  
                    return "";  
  
                }  
  
            });  
  
        } catch (Exception e) {  
  
            e.printStackTrace();  
  
        }  
  
   
  
        return result;  
  
    } 



import java.io.IOException;  
  
import java.util.regex.Matcher;  
  
import java.util.regex.Pattern;  
  
   
  
import org.apache.http.Header;  
  
import org.apache.http.HttpEntity;  
  
import org.apache.http.HttpResponse;  
  
import org.apache.http.HttpStatus;  
  
import org.apache.http.client.ClientProtocolException;  
  
import org.apache.http.client.HttpClient;  
  
import org.apache.http.client.methods.HttpGet;  
  
import org.apache.http.impl.client.DefaultHttpClient;  
  
import org.apache.http.util.EntityUtils;  
  
   
  
/** 
 
 * 類HttpClientTest.java的實現描述:TODO 類實現描述 
 
 * @author zheng.zhaoz 2012-2-9 下午07:33:18 
 
 */  
  
public class HttpClientTest {  
  
   
  
    public static void main(String[] args) {  
  
        HttpClient httpClient = new DefaultHttpClient();  
  
        //建立一個httpGet方法   
  
        HttpGet httpGet = new HttpGet("http://www.cnblogs.com/loveyakamoz/archive/2011/07/21/2113252.html");  
  
         
  
        //設定httpGet的引數資訊   
  
        httpGet.setHeader("Accept", "Accept text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");  
  
        httpGet.setHeader("Accept-Charset", "GB2312,utf-8;q=0.7,*;q=0.7");  
  
        httpGet.setHeader("Accept-Encoding", "gzip, deflate");  
  
        httpGet.setHeader("Accept-Language", "zh-cn,zh;q=0.5");  
  
        httpGet.setHeader("Connection", "keep-alive");  
  
        httpGet.setHeader("Cookie", "__utma=226521935.73826752.1323672782.1325068020.1328770420.6;");  
  
        httpGet.setHeader("Host", "www.cnblogs.com");  
  
        httpGet.setHeader("refer", "http://www.baidu.com/s?tn=monline_5_dg&bs=httpclient4+MultiThreadedHttpConnectionManager");  
  
        httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2");  
  
        System.out.println("Accept-Charset: " + httpGet.getFirstHeader("Accept-Charset"));  
  
        System.out.println("Execute request: " + httpGet.getURI());  
  
         
  
        HttpResponse response = null;  
  
        try {  
  
            response = httpClient.execute(httpGet);  
  
        } catch (ClientProtocolException e) {  
  
            e.printStackTrace();  
  
        } catch (IOException e) {  
  
            e.printStackTrace();  
  
        }  
  
         
  
        //輸出響應的所有頭資訊   
  
        if(response != null) {  
  
            Header headers[] = response.getAllHeaders();  
  
            int i = 0;  
  
            while (i < headers.length) {  
  
                System.out.println(headers[i].getName() + ":  " + headers[i].getValue());  
  
                i++;  
  
            }  
  
            if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {  
  
                try {  
  
                    HttpEntity entity = response.getEntity();  
  
                    // 將原始碼流儲存在一個byte陣列當中,因為可能需要兩次用到該流   
  
                    byte[] bytes = EntityUtils.toByteArray(entity);  
  
                    String charSet = "";  
  
                    // 如果頭部Content-Type中包含了編碼資訊,那麼我們可以直接在此處獲取   
  
                    charSet = EntityUtils.getContentCharSet(entity);  
  
                    System.out.println("In header: " + charSet);  
  
                    // 如果頭部中沒有,需要 檢視頁面原始碼,這個方法雖然不能說完全正確,因為有些粗糙的網頁編碼者沒有在頁面中寫頭部編碼資訊   
  
                    if (charSet == "") {  
  
                        String regEx="(?=<meta).*?(?<=charset=[\\'|\\\"]?)([[a-z]|[A-Z]|[0-9]|-]*)";  
  
                        Pattern p=Pattern.compile(regEx, Pattern.CASE_INSENSITIVE);  
  
                        Matcher m=p.matcher(new String(bytes));  // 預設編碼轉成字串,因為我們的匹配中無中文,所以串中可能的亂碼對我們沒有影響   
  
                        boolean result = m.find();  
  
                        if (m.groupCount() == 1) {  
  
                            charSet = m.group(1);  
  
                        } else {  
  
                            charSet = "";  
  
                        }  
  
                    }  
  
                    System.out.println("Last get: " + charSet);  
  
                    // 可以將原byte陣列按照正常編碼專成字串輸出(如果找到了編碼的話)   
  
                    System.out.println("Encoding string is: " + new String(bytes, charSet));  
  
                } catch (IOException e) {  
  
                    e.printStackTrace();  
  
                }  
  
            }  
  
        }  
  
        //關閉連線
  
        httpClient.getConnectionManager().shutdown();     
  
    }  
  
}