1. 程式人生 > >Java網路爬蟲crawler4j學習筆記 RobotstxtConfig類

Java網路爬蟲crawler4j學習筆記 RobotstxtConfig類

原始碼

package edu.uci.ics.crawler4j.robotstxt;

// robot.txt的配置類
public class RobotstxtConfig {

  /**
   * Should the crawler obey Robots.txt protocol? More info on Robots.txt is
   * available at http://www.robotstxt.org/
   */
  private boolean enabled = true;

  /**
   * user-agent name that will be used to determine whether some servers have
   * specific rules for this agent name.
   */
private String userAgentName = "crawler4j"; /** * The maximum number of hosts for which their robots.txt is cached. */ private int cacheSize = 500; public boolean isEnabled() { return enabled; } public void setEnabled(boolean enabled) { this.enabled = enabled; } public
String getUserAgentName() { return userAgentName; } public void setUserAgentName(String userAgentName) { this.userAgentName = userAgentName; } public int getCacheSize() { return cacheSize; } public void setCacheSize(int cacheSize) { this.cacheSize = cacheSize; } }