public class WebCrawlingContext extends CrawlingContext
| Constructor and Description |
|---|
WebCrawlingContext(TaskContext taskContext)
creates a crawling context from the taskContext.
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String |
getCurrentInputBulkId() |
WebCrawlerConstants.ErrorHandling |
getErrorHandling() |
java.util.Set<java.lang.String> |
getExtractedUrls() |
FilterConfiguration |
getFilterConfiguration() |
int |
getLinksPerBulk() |
RobotsTxt |
getRobotsTxt(java.lang.String hostAndPort,
JobRunDataProvider jobRunDataProvider) |
TaskLog |
getTaskLog() |
AnyMap |
getTaskParameters() |
java.util.Set<java.lang.String> |
getVisitedUrls() |
void |
putRobotsTxt(java.lang.String hostAndPort,
RobotsTxt robotsTxt,
JobRunDataProvider jobRunDataProvider)
store robots.txt in context and job run data, if available.
|
void |
setCurrentInputBulkId(java.lang.String inputBulkId) |
getDataSource, getJobName, getJobRunId, getMapper, getNonNegativeParameter, getTaskContextpublic WebCrawlingContext(TaskContext taskContext)
public FilterConfiguration getFilterConfiguration()
public TaskLog getTaskLog()
public AnyMap getTaskParameters()
public java.lang.String getCurrentInputBulkId()
public void setCurrentInputBulkId(java.lang.String inputBulkId)
inputBulkId - public java.util.Set<java.lang.String> getVisitedUrls()
public int getLinksPerBulk()
public WebCrawlerConstants.ErrorHandling getErrorHandling()
public java.util.Set<java.lang.String> getExtractedUrls()
public RobotsTxt getRobotsTxt(java.lang.String hostAndPort, JobRunDataProvider jobRunDataProvider)
public void putRobotsTxt(java.lang.String hostAndPort,
RobotsTxt robotsTxt,
JobRunDataProvider jobRunDataProvider)