Java網路爬蟲crawler4j學習筆記 exceptions
阿新 • • 發佈:2018-12-26
簡介
edu.uci.ics.crawler4j.crawler.exceptions包比較簡單,裡面都是一些自定義的異常類。edu.uci.ics.crawler4j.parser包裡面也有一個異常類
原始碼
ContentFetchException
package edu.uci.ics.crawler4j.crawler.exceptions;
/**
* Created by Avi Hayun on 12/8/2014.
*
* Thrown when there is a problem with the content fetching - this is a tagging exception
*/
public class ContentFetchException extends Exception {
}
PageBiggerThanMaxSizeException
package edu.uci.ics.crawler4j.crawler.exceptions;
/**
* Created by Avi Hayun on 12/8/2014.
* Thrown when trying to fetch a page which is bigger than allowed size
*/
public class PageBiggerThanMaxSizeException extends Exception {
long pageSize;
public PageBiggerThanMaxSizeException(long pageSize) {
super("Aborted fetching of this URL as it's size ( " + pageSize + " ) exceeds the maximum size");
this.pageSize = pageSize;
}
public long getPageSize() {
return pageSize;
}
}
ParseException
package edu.uci.ics.crawler4j.crawler.exceptions;
/**
* Created by Avi Hayun on 12/8/2014.
*
* Thrown when there is a problem with the parsing of the content - this is a tagging exception
*/
public class ParseException extends Exception {
}
RedirectException
package edu.uci.ics.crawler4j.crawler.exceptions;
import uk.org.lidalia.slf4jext.Level;
/**
* Created by Avi Hayun on 12/8/2014.
*
* Occurs when the crawler encounters a Redirect problem, like redirecting to a visited-already page, or redirecting to nothing
*/
public class RedirectException extends Exception {
public Level level;
public RedirectException(Level level, String msg) {
super(msg);
this.level = level;
}
}
parser.NotAllowedContentException
package edu.uci.ics.crawler4j.parser;
/**
* Created by Avi on 8/19/2014.
*
* This Exception will be thrown whenever the parser tries to parse not allowed content<br>
* For example when the parser tries to parse binary content although the user configured it not to do it
*/
public class NotAllowedContentException extends Exception {
public NotAllowedContentException() {
super("Not allowed to parse this type of content");
}
}