/*******************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Daniel Stucky (Attensity Europe GmbH) - initial API and implementation
 *******************************************************************************/
package org.eclipse.smila.importing;

import java.util.Collection;

/**
 * Service interface for checking if a crawled link was already crawled before.
 */
public interface VisitedLinksService {
  /**
   * Determines if the link was already visited for this sourceId or not.
   * 
   * @param sourceId
   *          the name of the data source that contains the link.
   * @param link
   *          the link to check, e.g. an URL.
   * @param jobRunId
   *          the current job run id in which the crawler is running.
   * @param inputBulkId
   *          the id of the inputBulk where the URL to check originates from.
   * @return true if the URL was already visited for this sourceId, false otherwise
   * 
   */
  boolean isVisited(String sourceId, String link, String jobRunId, String inputBulkId) throws VisitedLinksException;

  /**
   * Mark the link as visited in the current crawl job run.
   * 
   * @param sourceId
   *          the name of the data source that contains the link.
   * @param link
   *          the link to mark, e.g. an URL.
   * @param jobRunId
   *          the current job run id in which the crawler is running.
   * @param inputBulkId
   *          the id of the inputBulk where the URL to mark originates from.
   */
  void markAsVisited(String sourceId, String link, String jobRunId, String inputBulkId)
    throws VisitedLinksException;

  /**
   * delete all state information in the service about the given data source.
   * 
   * @param sourceId
   *          data source name.
   */
  void clearSource(String sourceId) throws VisitedLinksException;

  /**
   * delete all state information in the service about all data sources.
   */
  void clearAll() throws VisitedLinksException;

  /**
   * get Ids of all sources that currently have entries in the VisitedLinksService.
   */
  Collection<String> getSourceIds() throws VisitedLinksException;

  /**
   * @return number of entries for given source id.
   * @param countExact
   *          set to true to get an exact reault, but this may take some time. Else the service may return only an
   *          estimated value.
   */
  long countEntries(final String sourceId, boolean countExact) throws DeltaException;
}
