package org.eclipse.smila.importing.crawler.web.filter;

import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.taskworker.TaskLog;

/**
 * Configuration for {@link SimpleLinkFilter}. Quite simple for now: contains host from start URL and optionally a URL
 * prefix to restrict the crawling to.
 */
public class FilterConfiguration {
  /** host from crawl start URL. */
  private final String _startHost;

  /** url prefix parameter, if set. */
  private final String _urlPrefix;

  /** extract filter configuration from task parameters. */
  public FilterConfiguration(final AnyMap parameters, final TaskLog log) {
    _startHost = getStartHost(parameters, log);
    _urlPrefix = getFilterParameterString(parameters, "urlPrefix", log);
  }

  /** @return host name from start URL. */
  public String getStartHost() {
    return _startHost;
  }

  /** @return URL prefix if configured, else null. */
  public String getUrlPrefix() {
    return _urlPrefix;
  }

  /** @return host part of startUrl. */
  private String getStartHost(final AnyMap parameters, final TaskLog log) {
    final String urlString = parameters.getStringValue(ImportingConstants.TASK_PARAM_START_URL);
    if (urlString != null) {
      return SimpleLinkFilter.getHost(urlString, log);
    }
    return null;
  }

  private String getFilterParameterString(final AnyMap parameters, final String name, final TaskLog log) {
    final AnyMap filterParameter = getFilterParameters(parameters, log);
    if (filterParameter != null) {
      return filterParameter.getStringValue(name);
    }
    return null;
  }

  private AnyMap getFilterParameters(final AnyMap parameters, final TaskLog log) {
    final Any filter = parameters.get("filter");
    if (filter != null) {
      if (filter.isMap()) {
        return filter.asMap();
      } else {
        log.info("Parameter 'filter' is not a map");
      }
    }
    return null;
  }
}
