1. 程式人生 > >Java網路爬蟲crawler4j學習筆記 exceptions

Java網路爬蟲crawler4j學習筆記 exceptions

簡介

edu.uci.ics.crawler4j.crawler.exceptions包比較簡單,裡面都是一些自定義的異常類。edu.uci.ics.crawler4j.parser包裡面也有一個異常類

原始碼

ContentFetchException

package edu.uci.ics.crawler4j.crawler.exceptions;

/**
 * Created by Avi Hayun on 12/8/2014.
 *
 * Thrown when there is a problem with the content fetching - this is a tagging exception
 */
public class ContentFetchException extends Exception { }

PageBiggerThanMaxSizeException

package edu.uci.ics.crawler4j.crawler.exceptions;

/**
 * Created by Avi Hayun on 12/8/2014.
 * Thrown when trying to fetch a page which is bigger than allowed size
 */
public class PageBiggerThanMaxSizeException
extends Exception {
long pageSize; public PageBiggerThanMaxSizeException(long pageSize) { super("Aborted fetching of this URL as it's size ( " + pageSize + " ) exceeds the maximum size"); this.pageSize = pageSize; } public long getPageSize() { return pageSize; } }

ParseException

package edu.uci.ics.crawler4j.crawler.exceptions;

/**
 * Created by Avi Hayun on 12/8/2014.
 *
 * Thrown when there is a problem with the parsing of the content - this is a tagging exception
 */
public class ParseException extends Exception {

}

RedirectException

package edu.uci.ics.crawler4j.crawler.exceptions;

import uk.org.lidalia.slf4jext.Level;

/**
 * Created by Avi Hayun on 12/8/2014.
 *
 * Occurs when the crawler encounters a Redirect problem, like redirecting to a visited-already page, or redirecting to nothing
 */
public class RedirectException extends Exception {
  public Level level;

  public RedirectException(Level level, String msg) {
    super(msg);
    this.level = level;
  }
}

parser.NotAllowedContentException

package edu.uci.ics.crawler4j.parser;

/**
 * Created by Avi on 8/19/2014.
 *
 * This Exception will be thrown whenever the parser tries to parse not allowed content<br>
 * For example when the parser tries to parse binary content although the user configured it not to do it
 */
public class NotAllowedContentException extends Exception {
    public NotAllowedContentException() {
      super("Not allowed to parse this type of content");
    }
}