1. 程式人生 > >Java網路爬蟲crawler4j學習筆記 AuthInfo類

Java網路爬蟲crawler4j學習筆記 AuthInfo類

原始碼

package edu.uci.ics.crawler4j.crawler.authentication;

import javax.swing.text.html.FormSubmitEvent.MethodType;
import java.net.MalformedURLException;
import java.net.URL;

/**
 * Created by Avi Hayun on 11/23/2014.
 *
 * Abstract class containing authentication information needed to login into a user/password protected site<br>
 * This class should be extended by specific authentication types like form authentication and basic authentication etc<br>
 * <br>
 * This class contains all of the mutual authentication data for all authentication types
 */
// 用於登入驗證的抽象類,主要包括form和base的authentication。 public abstract class AuthInfo { public enum AuthenticationType { BASIC_AUTHENTICATION, FORM_AUTHENTICATION } protected AuthenticationType authenticationType; //驗證型別 protected MethodType httpMethod; // 包括MethodType.GET和MethodType.Post protected
String protocol; // 協議 protected String host; // 主機域名 protected String loginTarget; // path protected int port; // 埠 protected String username; protected String password; /** Constructs a new AuthInfo. */ public AuthInfo() { } /** * This constructor should only be used by extending classes * * @param
authenticationType Pick the one which matches your authentication * @param httpMethod Choose POST / GET * @param loginUrl Full URL of the login page * @param username Username for Authentication * @param password Password for Authentication * * @throws MalformedURLException Make sure your URL is valid */
protected AuthInfo(AuthenticationType authenticationType, MethodType httpMethod, String loginUrl, String username, String password) throws MalformedURLException { this.authenticationType = authenticationType; this.httpMethod = httpMethod; URL url = new URL(loginUrl); this.protocol = url.getProtocol(); this.host = url.getHost(); this.port = url.getDefaultPort(); this.loginTarget = url.getFile(); this.username = username; this.password = password; } /** * @return Authentication type (BASIC, FORM) */ public AuthenticationType getAuthenticationType() { return authenticationType; } /** * * @param authenticationType Should be set only by extending classes (BASICAuthInfo, FORMAuthInfo) */ public void setAuthenticationType(AuthenticationType authenticationType) { this.authenticationType = authenticationType; } /** * * @return httpMethod (POST, GET) */ public MethodType getHttpMethod() { return httpMethod; } /** * @param httpMethod Should be set by extending classes (POST, GET) */ public void setHttpMethod(MethodType httpMethod) { this.httpMethod = httpMethod; } /** * @return protocol type (http, https) */ public String getProtocol() { return protocol; } /** * @param protocol Don't set this one unless you know what you are doing (protocol: http, https) */ public void setProtocol(String protocol) { this.protocol = protocol; } /** * @return host (www.sitename.com) */ public String getHost() { return host; } /** * @param host Don't set this one unless you know what you are doing (sets the domain name) */ public void setHost(String host) { this.host = host; } /** * @return file/path which is the rest of the url after the domain name (eg: /login.php) */ public String getLoginTarget() { return loginTarget; } /** * @param loginTarget Don't set this one unless you know what you are doing (eg: /login.php) */ public void setLoginTarget(String loginTarget) { this.loginTarget = loginTarget; } /** * @return port number (eg: 80, 443) */ public int getPort() { return port; } /** * @param port Don't set this one unless you know what you are doing (eg: 80, 443) */ public void setPort(int port) { this.port = port; } /** * @return username used for Authentication */ public String getUsername() { return username; } /** * @param username username used for Authentication */ public void setUsername(String username) { this.username = username; } /** * @return password used for Authentication */ public String getPassword() { return password; } /** * @param password password used for Authentication */ public void setPassword(String password) { this.password = password; } }

分析

如果需要登入驗證,可以繼承AuthInfo類來實現自定義的爬蟲登入功能。