public interface VisitedLinksService
Modifier and Type | Method and Description |
---|---|
boolean |
checkAndMarkVisited(java.lang.String sourceId,
java.lang.String link,
java.lang.String jobRunId,
java.lang.String inputBulkId)
Determines if the link was already visited for this sourceId.
|
void |
clearAll()
delete all state information in the service about all data sources.
|
void |
clearSource(java.lang.String sourceId)
delete all state information in the service about the given data source.
|
long |
countEntries(java.lang.String sourceId,
boolean countExact) |
java.util.Collection<java.lang.String> |
getSourceIds()
get Ids of all sources that currently have entries in the VisitedLinksService.
|
boolean |
isVisited(java.lang.String sourceId,
java.lang.String link,
java.lang.String jobRunId)
Determines if the link was already visited for this sourceId in the same job run or not.
|
void |
markAsVisited(java.lang.String sourceId,
java.lang.String link,
java.lang.String jobRunId,
java.lang.String inputBulkId)
Mark the link as visited in the current crawl job run.
|
boolean checkAndMarkVisited(java.lang.String sourceId, java.lang.String link, java.lang.String jobRunId, java.lang.String inputBulkId) throws VisitedLinksException
sourceId
- the name of the data source that contains the link.link
- the link to check, e.g. an URL.jobRunId
- the current job run id in which the crawler is running.inputBulkId
- the id of the inputBulk where the URL to check originates from.VisitedLinksException
boolean isVisited(java.lang.String sourceId, java.lang.String link, java.lang.String jobRunId) throws VisitedLinksException
sourceId
- the name of the data source that contains the link.link
- the link to check, e.g. an URL.jobRunId
- the current job run id in which the crawler is running.VisitedLinksException
void markAsVisited(java.lang.String sourceId, java.lang.String link, java.lang.String jobRunId, java.lang.String inputBulkId) throws VisitedLinksException
sourceId
- the name of the data source that contains the link.link
- the link to mark, e.g. an URL.jobRunId
- the current job run id in which the crawler is running.inputBulkId
- the id of the inputBulk where the URL to mark originates from.VisitedLinksException
void clearSource(java.lang.String sourceId) throws VisitedLinksException
sourceId
- data source name.VisitedLinksException
void clearAll() throws VisitedLinksException
VisitedLinksException
java.util.Collection<java.lang.String> getSourceIds() throws VisitedLinksException
VisitedLinksException
long countEntries(java.lang.String sourceId, boolean countExact) throws DeltaException
countExact
- set to true to get an exact reault, but this may take some time. Else the service may return only an
estimated value.DeltaException