/*******************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Juergen Schumacher (Attensity Europe GmbH) - initial API and implementation
 *******************************************************************************/

package org.eclipse.smila.importing.crawler.web;

import java.util.Collection;

import org.eclipse.smila.datamodel.Record;

/**
 * interface for LinkFilter services. The LinkFilter is called on the result of the {@link LinkExtractor} to select only
 * those links that should really be followed in follow-up tasks.
 */
public interface LinkFilter {
  /**
   * filter extracted links.
   * 
   * @param extractedLinks
   *          result from {@link LinkExtractor} service.
   * @param context
   *          the {@link WebCrawlingContext}.
   * @return links to follow in follow-up tasks
   * @throws WebCrawlerException
   *           error in processing the links.
   */
  Collection<Record> filterLinks(Collection<Record> extractedLinks, WebCrawlingContext context)
    throws WebCrawlerException;

  /**
   * Check if it is allowed to follow a given link.
   * 
   * @param link
   *          a String containing the link to be checked
   * @param context
   *          the {@link WebCrawlingContext}.
   * @return true if the link is allowed to be followed, false otherwise
   * @throws WebCrawlerException
   *           error in processing the links.
   */
  boolean allowLink(String link, WebCrawlingContext context) throws WebCrawlerException;
}
