package org.eclipse.smila.importing.crawler.file.filter;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.eclipse.smila.importing.util.FilePathNormalizer;

/** Applies configured filters to given files and folders. */
public class FilterEvaluator {

  /** the filter configuration to use. */
  private FilterConfiguration _filterConfig;

  /** filter out files that should not be crawled. (max size, file patterns, folder include patterns) */
  public Collection<File> applyFiltersForCrawledFiles(final File fileDir, final Collection<File> files)
    throws IOException {
    final Collection<File> result = new ArrayList<File>(files);
    if (_filterConfig != null) {
      final String normalizedFileDir = normalize(fileDir);
      final Iterator<File> it = result.iterator();
      while (it.hasNext()) {
        final File f = it.next();
        if (!_filterConfig.getFilePatternMatcher().matches(f.getName())) {
          // filter out non-matching file patterns
          it.remove();
        } else if (f.length() > _filterConfig.getMaxSize()) {
          // filter out > max size
          it.remove();
        } else if (!_filterConfig.getFolderPatternMatcher().isIncluded(normalizedFileDir)) {
          // filter out non-included folder patterns
          it.remove();
        } else if (FileUtils.isSymlink(f) && !_filterConfig.followSymbolicLinks()) {
          // filter out symbolic links
          it.remove();
        }
      }
    }
    return result;
  }

  /** @return 'false' if file is filtered out, otherwise 'true'. */
  public boolean applyFiltersForCrawledFile(final File fileDir, final File file) throws IOException {
    return !applyFiltersForCrawledFiles(fileDir, Arrays.asList(file)).isEmpty();
  }

  /** filter out folders whose subfolders and files should not be crawled. (max depth, folder exclude patterns) */
  public Collection<File> applyFiltersForCrawledFolders(final Collection<File> folders, final String rootFolder)
    throws IOException {
    final Collection<File> result = new ArrayList<File>(folders);
    if (_filterConfig != null) {
      final String normalizedRoot = normalize(rootFolder);
      final int rootDepth = StringUtils.countMatches(normalizedRoot, "/");
      final Iterator<File> it = result.iterator();
      while (it.hasNext()) {
        final File dir = it.next();
        final String normalizedPath = normalize(dir);
        final int depth = StringUtils.countMatches(normalizedPath, "/") - rootDepth;
        if (depth > _filterConfig.getMaxDepth()) {
          // filter out folder depth
          it.remove();
        } else if (_filterConfig.getFolderPatternMatcher().isExcluded(normalizedPath)) {
          // filter out folder exclude patterns
          it.remove();
        } else if (FileUtils.isSymlink(dir) && !_filterConfig.followSymbolicLinks()) {
          // filter out symbolic links
          it.remove();
        }
      }
    }
    return result;
  }
  
  /** @return 'false' if folder is filtered out, otherwise 'true'. */
  public boolean applyFiltersForCrawledFolder(final File folder, final String rootFolder) throws IOException {
    return !applyFiltersForCrawledFolders(Arrays.asList(folder), rootFolder).isEmpty();
  }

  /** @return normalized input (file) path. */
  private String normalize(final File input) {
    return FilePathNormalizer.getNormalizedPath(input);
  }

  /** @return normalized input (file) path. */
  private String normalize(final String input) {
    return FilePathNormalizer.getNormalizedPath(input);
  }

  /** set 'null' to disable filtering. */
  public void setFilterConfiguration(FilterConfiguration filterConfig) {
    _filterConfig = filterConfig;
  }

}
