1. 程式人生 > >Java網路爬蟲crawler4j學習筆記 exceptions

Java網路爬蟲crawler4j學習筆記 exceptions





package edu.uci.ics.crawler4j.crawler.exceptions;

 * Created by Avi Hayun on 12/8/2014.
 * Thrown when there is a problem with the content fetching - this is a tagging exception
public class ContentFetchException extends Exception { }


package edu.uci.ics.crawler4j.crawler.exceptions;

 * Created by Avi Hayun on 12/8/2014.
 * Thrown when trying to fetch a page which is bigger than allowed size
public class PageBiggerThanMaxSizeException
extends Exception {
long pageSize; public PageBiggerThanMaxSizeException(long pageSize) { super("Aborted fetching of this URL as it's size ( " + pageSize + " ) exceeds the maximum size"); this.pageSize = pageSize; } public long getPageSize() { return pageSize; } }


package edu.uci.ics.crawler4j.crawler.exceptions;

 * Created by Avi Hayun on 12/8/2014.
 * Thrown when there is a problem with the parsing of the content - this is a tagging exception
public class ParseException extends Exception {



package edu.uci.ics.crawler4j.crawler.exceptions;

import uk.org.lidalia.slf4jext.Level;

 * Created by Avi Hayun on 12/8/2014.
 * Occurs when the crawler encounters a Redirect problem, like redirecting to a visited-already page, or redirecting to nothing
public class RedirectException extends Exception {
  public Level level;

  public RedirectException(Level level, String msg) {
    this.level = level;


package edu.uci.ics.crawler4j.parser;

 * Created by Avi on 8/19/2014.
 * This Exception will be thrown whenever the parser tries to parse not allowed content<br>
 * For example when the parser tries to parse binary content although the user configured it not to do it
public class NotAllowedContentException extends Exception {
    public NotAllowedContentException() {
      super("Not allowed to parse this type of content");