package org.eclipse.smila.importing.crawler.file.filter.test;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;

import junit.framework.TestCase;

import org.apache.commons.io.FilenameUtils;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.importing.crawler.file.filter.FilterConfiguration;
import org.eclipse.smila.importing.crawler.file.filter.FilterEvaluator;
import org.eclipse.smila.utils.config.ConfigUtils;

/** Test class for {@link FilterEvaluator} class. */
public class TestFilterEvaluator extends TestCase {

  private final FilterEvaluator _filter = new FilterEvaluator();

  /** tests that files with size in bytes > max file size are filtered out. */
  public void testMaxFileSize() throws IOException {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    // sometimes .svn will mess up our test here...
    final AnyMap filePatterns = DataFactory.DEFAULT.createAnyMap();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    excludePatterns.add(".*\\.svn.*");
    filePatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    configAny.put(FilterConfiguration.FILE_PATTERNS, filePatterns);
    // limit size, that's what we really want to test here...
    configAny.put(FilterConfiguration.MAX_SIZE, 100);
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final File testDir = ConfigUtils.getConfigFile("filter", "maxSize");
    final Collection<File> testFiles = new ArrayList<File>();
    Collections.addAll(testFiles, testDir.listFiles());
    final Collection<File> result = _filter.applyFiltersForCrawledFiles(testDir, testFiles);
    assertEquals("1000.txt should have been filtered out", 2, result.size());
    for (final File f : result) {
      assertTrue(f.getName().equals("1.txt") || f.getName().equals("10.txt"));
      System.out.println(FilenameUtils.normalize(f.getAbsolutePath(), true));
    }
  }

  /** tests that (sub-)folders with depth > max folder depth are filtered out. */
  public void testMaxFolderDepth() throws Exception {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.MAX_DEPTH, 2);
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final File rootDir = new File("/temp");
    final File test1 = new File("/temp/dir1/");
    final File test2 = new File("/temp/dir1/dir2");
    final File test3 = new File("/temp/dir1/dir2/dir3");
    final Collection<File> testFolders = new ArrayList<File>();
    Collections.addAll(testFolders, test1, test2, test3);
    final Collection<File> result = _filter.applyFiltersForCrawledFolders(testFolders, rootDir.getAbsolutePath());
    assertEquals(test3 + " should have been filtered out", 2, result.size());
    for (final File f : result) {
      assertTrue(f.getPath().equals(test1.getPath()) || f.getPath().equals(test2.getPath()));
    }
  }

  /** tests in- and exclude patterns on files. */
  public void testFilePatterns() throws Exception {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap filePatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FILE_PATTERNS, filePatterns);
    final AnySeq includePatterns = DataFactory.DEFAULT.createAnySeq();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    filePatterns.put(FilterConfiguration.INCLUDE_PATTERNS, includePatterns);
    filePatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    includePatterns.add(".*\\.txt");
    includePatterns.add("valid.txt");
    excludePatterns.add("invalid.txt");
    excludePatterns.add(".*\\.pdf");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final File test1 = new File("/dir1/test1.txt");
    final File test2 = new File("/dir2/invalid.txt");
    final File test3 = new File("test3.txt");
    final File test4 = new File("test4.pdf");
    final Collection<File> testFiles = new ArrayList<File>();
    // test applyFiltersForCrawledFiles()
    Collections.addAll(testFiles, test1, test2, test3, test4);
    final Collection<File> result = _filter.applyFiltersForCrawledFiles(new File("dummy-dir"), testFiles);
    assertEquals(test2 + " and " + test4 + " should have been filtered out", 2, result.size());
    for (final File f : result) {
      assertTrue(f.getPath().equals(test1.getPath()) || f.getPath().equals(test3.getPath()));
    }
    // test applyFiltersForCrawledFile()
    assertTrue(_filter.applyFiltersForCrawledFile(new File("dummy-dir"), test1));
    assertFalse(_filter.applyFiltersForCrawledFile(new File("dummy-dir"), test2));
    assertTrue(_filter.applyFiltersForCrawledFile(new File("dummy-dir"), test3));
    assertFalse(_filter.applyFiltersForCrawledFile(new File("dummy-dir"), test4));
  }

  /** tests exclude patterns on folders. Folders not to crawl into should be filtered out. */
  public void testFolderPatternsOnFolders() throws Exception {
    final File rootFolder = ConfigUtils.getConfigFile("filter", "patterns");
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap folderPatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FOLDER_PATTERNS, folderPatterns);
    folderPatterns.put(FilterConfiguration.INCLUDE_PATTERNS, ".*/dir1/dir2");
    folderPatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, ".*/dir1/dir2/dir3");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final File test1 = new File(rootFolder, "/dir1/dir2/");
    final File test2 = new File(rootFolder, "/dir1/dir2/dir3/"); // excluded
    final File test3 = new File(rootFolder, "/dir1/dir2/dir3/dir4/"); // not excluded!
    final File test4 = new File(rootFolder, "/dir1/ "); // not included, but should not be filtered out!
    // test applyFiltersForCrawledFolders()
    final Collection<File> testFolders = new ArrayList<File>();
    Collections.addAll(testFolders, test1, test2, test3, test4);
    final Collection<File> result =
      _filter.applyFiltersForCrawledFolders(testFolders, rootFolder.getAbsolutePath());
    assertEquals(test2 + " should have been filtered out, was: " + result, 3, result.size());
    // test applyFiltersForCrawledFolder()
    assertTrue(_filter.applyFiltersForCrawledFolder(test1, rootFolder.getAbsolutePath()));
    assertFalse(_filter.applyFiltersForCrawledFolder(test2, rootFolder.getAbsolutePath()));
    assertTrue(_filter.applyFiltersForCrawledFolder(test3, rootFolder.getAbsolutePath()));
    assertTrue(_filter.applyFiltersForCrawledFolder(test4, rootFolder.getAbsolutePath()));
  }

  /** tests include patterns on files. Only files from included folders should be imported. */
  public void testFolderPatternsOnFiles() throws Exception {
    final File rootFolder = ConfigUtils.getConfigFile("filter", "patterns");
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap folderPatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FOLDER_PATTERNS, folderPatterns);
    final AnySeq includePatterns = DataFactory.DEFAULT.createAnySeq();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    folderPatterns.put(FilterConfiguration.INCLUDE_PATTERNS, includePatterns);
    folderPatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    includePatterns.add(".*/dir1/dir2");
    excludePatterns.add(".*/dir1/dir2/dir3");
    includePatterns.add(".*/dir1/dir2/dir3/dir4");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // tests
    final File dir1 = new File(rootFolder, "dir1"); // not included
    final File test1 = new File(dir1, "test1.txt");
    Collection<File> testFiles = new ArrayList<File>();
    testFiles.add(test1);
    Collection<File> result = _filter.applyFiltersForCrawledFiles(dir1, testFiles);
    assertTrue(result.isEmpty());
    assertFalse(_filter.applyFiltersForCrawledFile(dir1, test1));

    final File dir2 = new File(rootFolder, "dir1/dir2"); // included
    final File test2 = new File(dir2, "test2.txt");
    testFiles = new ArrayList<File>();
    testFiles.add(test2);
    result = _filter.applyFiltersForCrawledFiles(dir2, testFiles);
    assertEquals(1, result.size());
    assertTrue(_filter.applyFiltersForCrawledFile(dir2, test2));

    final File dir3 = new File(rootFolder, "dir1/dir2/dir3"); // excluded
    final File test3 = new File(dir3, "test3.txt");
    testFiles = new ArrayList<File>();
    testFiles.add(test3);
    result = _filter.applyFiltersForCrawledFiles(dir3, testFiles);
    assertTrue(result.isEmpty());
    assertFalse(_filter.applyFiltersForCrawledFile(dir3, test3));

    final File dir4 = new File(rootFolder, "dir1/dir2/dir3/dir4"); // included
    final File test4 = new File(dir4, "test4.txt");
    testFiles = new ArrayList<File>();
    testFiles.add(test4);
    result = _filter.applyFiltersForCrawledFiles(dir4, testFiles);
    assertEquals(1, result.size());
    assertTrue(_filter.applyFiltersForCrawledFile(dir4, test4));
  }
}
