/*********************************************************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
 * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 *********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.file.filter;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;

import org.apache.commons.lang.StringUtils;
import org.eclipse.smila.importing.util.FilePathNormalizer;

/** Applies configured filters to given files and folders. */
public class FilterEvaluator {

  /** the filter configuration to use. */
  private FilterConfiguration _filterConfig;

  /** filter out files that should not be crawled. (max size, file patterns, folder include patterns) */
  public Collection<Path> applyFiltersForCrawledFiles(final Path fileDir, final Collection<Path> files)
    throws IOException {
    final Collection<Path> result = new ArrayList<Path>(files);
    if (_filterConfig != null) {
      final String normalizedFileDir = normalize(fileDir);
      final Iterator<Path> it = result.iterator();
      while (it.hasNext()) {
        final Path f = it.next();
        if (!_filterConfig.getFilePatternMatcher().matches(f.getFileName().toString())) {
          // filter out non-matching file patterns
          it.remove();
        } else if (Files.size(f) > _filterConfig.getMaxSize()) {
          // filter out > max size
          it.remove();
        } else if (!_filterConfig.getFolderPatternMatcher().isIncluded(normalizedFileDir)) {
          // filter out non-included folder patterns
          it.remove();
        } else if (Files.isSymbolicLink(f) && !_filterConfig.followSymbolicLinks()) {
          // filter out symbolic links
          it.remove();
        }
      }
    }
    return result;
  }

  /** @return 'false' if file is filtered out, otherwise 'true'. */
  public boolean applyFiltersForCrawledFile(final Path fileDir, final Path file) throws IOException {
    return !applyFiltersForCrawledFiles(fileDir, Arrays.asList(file)).isEmpty();
  }

  /** filter out folders whose subfolders and files should not be crawled. (max depth, folder exclude patterns) */
  public Collection<Path> applyFiltersForCrawledFolders(final Collection<Path> folders, final String rootFolder)
    throws IOException {
    final Collection<Path> result = new ArrayList<Path>(folders);
    if (_filterConfig != null) {
      final String normalizedRoot = normalize(rootFolder);
      final int rootDepth = StringUtils.countMatches(normalizedRoot, "/");
      final Iterator<Path> it = result.iterator();
      while (it.hasNext()) {
        final Path dir = it.next();
        final String normalizedPath = normalize(dir);
        final int depth = StringUtils.countMatches(normalizedPath, "/") - rootDepth;
        if (depth > _filterConfig.getMaxDepth()) {
          // filter out folder depth
          it.remove();
        } else if (_filterConfig.getFolderPatternMatcher().isExcluded(normalizedPath)) {
          // filter out folder exclude patterns
          it.remove();
        } else if (Files.isSymbolicLink(dir) && !_filterConfig.followSymbolicLinks()) {
          // filter out symbolic links
          it.remove();
        }
      }
    }
    return result;
  }

  /** @return 'false' if folder is filtered out, otherwise 'true'. */
  public boolean applyFiltersForCrawledFolder(final Path folder, final String rootFolder) throws IOException {
    return !applyFiltersForCrawledFolders(Arrays.asList(folder), rootFolder).isEmpty();
  }

  /**
   * @return normalized input (file) path.
   * @throws IOException
   */
  private String normalize(final Path input) throws IOException {
    return FilePathNormalizer.getNormalizedPath(input);
  }

  /** @return normalized input (file) path. */
  private String normalize(final String input) {
    return FilePathNormalizer.getNormalizedPath(input);
  }

  /** set 'null' to disable filtering. */
  public void setFilterConfiguration(final FilterConfiguration filterConfig) {
    _filterConfig = filterConfig;
  }

}
