/*********************************************************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved.
 * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 *********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.file.filter.test;

import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;

import junit.framework.TestCase;

import org.apache.commons.io.FilenameUtils;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.importing.crawler.file.filter.FilterConfiguration;
import org.eclipse.smila.importing.crawler.file.filter.FilterEvaluator;
import org.eclipse.smila.importing.crawler.file.test.FilesystemHelper;
import org.eclipse.smila.utils.config.ConfigUtils;

/** Test class for {@link FilterEvaluator} class. */
public class TestFilterEvaluator extends TestCase {

  private final FilterEvaluator _filter = new FilterEvaluator();

  /** tests that files with size in bytes > max Path size are filtered out. */
  public void testMaxFileSize() throws IOException {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    // sometimes .svn will mess up our test here...
    final AnyMap filePatterns = DataFactory.DEFAULT.createAnyMap();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    excludePatterns.add(".*\\.svn.*");
    filePatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    configAny.put(FilterConfiguration.FILE_PATTERNS, filePatterns);
    // limit size, that's what we really want to test here...
    configAny.put(FilterConfiguration.MAX_SIZE, 100);
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final Path testDir = ConfigUtils.getConfigFile("filter", "maxSize").toPath();
    final Collection<Path> testFiles = new ArrayList<Path>();
    try (DirectoryStream<Path> ds = Files.newDirectoryStream(testDir)) {
      final Iterator<Path> iter = ds.iterator();
      while (iter.hasNext()) {
        testFiles.add(iter.next());
      }
    }
    final Collection<Path> result = _filter.applyFiltersForCrawledFiles(testDir, testFiles);
    assertEquals("1000.txt should have been filtered out", 2, result.size());
    for (final Path f : result) {
      assertTrue(f.getFileName().toString().equals("1.txt") || f.getFileName().toString().equals("10.txt"));
      System.out.println(FilenameUtils.normalize(f.toRealPath().toString(), true));
    }
  }

  /** tests that (sub-)folders with depth > max folder depth are filtered out. */
  public void testMaxFolderDepth() throws Exception {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.MAX_DEPTH, 2);
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final Path rootDir = Files.createTempDirectory("test-filter");
    try {
      final Path test1 = rootDir.resolve("dir1");
      final Path test2 = test1.resolve("dir2");
      final Path test3 = test2.resolve("dir3");
      Files.createDirectories(test3);
      final Collection<Path> testFolders = new ArrayList<Path>();
      Collections.addAll(testFolders, test1, test2, test3);
      final Collection<Path> result =
        _filter.applyFiltersForCrawledFolders(testFolders, rootDir.toRealPath().toString());
      assertEquals(test3 + " should have been filtered out", 2, result.size());
      for (final Path f : result) {
        assertTrue(f.equals(test1) || f.equals(test2));
      }
    } finally {
      FilesystemHelper.deleteDirectory(rootDir);
    }
  }

  /** tests in- and exclude patterns on files. */
  public void testFilePatterns() throws Exception {
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap filePatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FILE_PATTERNS, filePatterns);
    final AnySeq includePatterns = DataFactory.DEFAULT.createAnySeq();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    filePatterns.put(FilterConfiguration.INCLUDE_PATTERNS, includePatterns);
    filePatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    includePatterns.add(".*\\.txt");
    includePatterns.add("valid.txt");
    excludePatterns.add("invalid.txt");
    excludePatterns.add(".*\\.pdf");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final Path tmpDir = Files.createTempDirectory("test-filter");
    try {
      final Path test1 = tmpDir.resolve("dir1/test1.txt");
      final Path test2 = tmpDir.resolve("dir2/invalid.txt");
      Files.createDirectories(test1.getParent());
      Files.createDirectories(test2.getParent());
      Files.createFile(test2);
      Files.createFile(test1);
      final Path test3 = Files.createFile(tmpDir.resolve("test3.txt"));
      final Path test4 = Files.createFile(tmpDir.resolve("test4.pdf"));
      final Collection<Path> testFiles = new ArrayList<Path>();
      // test applyFiltersForCrawledFiles()
      Collections.addAll(testFiles, test1, test2, test3, test4);
      final Collection<Path> result = _filter.applyFiltersForCrawledFiles(tmpDir, testFiles);
      assertEquals(test2 + " and " + test4 + " should have been filtered out", 2, result.size());
      for (final Path f : result) {
        assertTrue(f.equals(test1) || f.equals(test3));
      }
      // test applyFiltersForCrawledFile()
      assertTrue(_filter.applyFiltersForCrawledFile(tmpDir, test1));
      assertFalse(_filter.applyFiltersForCrawledFile(tmpDir, test2));
      assertTrue(_filter.applyFiltersForCrawledFile(tmpDir, test3));
      assertFalse(_filter.applyFiltersForCrawledFile(tmpDir, test4));

    } finally {
      FilesystemHelper.deleteDirectory(tmpDir);
    }

  }

  /** tests exclude patterns on folders. Folders not to crawl into should be filtered out. */
  public void testFolderPatternsOnFolders() throws Exception {
    final Path rootFolder = ConfigUtils.getConfigFile("filter", "patterns").toPath();
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap folderPatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FOLDER_PATTERNS, folderPatterns);
    folderPatterns.put(FilterConfiguration.INCLUDE_PATTERNS, ".*/dir1/dir2");
    folderPatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, ".*/dir1/dir2/dir3");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // test
    final Path test1 = rootFolder.resolve("dir1/dir2/");
    final Path test2 = rootFolder.resolve("dir1/dir2/dir3/"); // excluded
    final Path test3 = rootFolder.resolve("dir1/dir2/dir3/dir4/"); // not excluded!
    final Path test4 = rootFolder.resolve("dir1"); // not included, but should not be filtered out!
    // test applyFiltersForCrawledFolders()
    final Collection<Path> testFolders = new ArrayList<Path>();
    Collections.addAll(testFolders, test1, test2, test3, test4);
    final Collection<Path> result =
      _filter.applyFiltersForCrawledFolders(testFolders, rootFolder.toRealPath().toString());
    assertEquals(test2 + " should have been filtered out, was: " + result, 3, result.size());
    // test applyFiltersForCrawledFolder()
    assertTrue(_filter.applyFiltersForCrawledFolder(test1, rootFolder.toRealPath().toString()));
    assertFalse(_filter.applyFiltersForCrawledFolder(test2, rootFolder.toRealPath().toString()));
    assertTrue(_filter.applyFiltersForCrawledFolder(test3, rootFolder.toRealPath().toString()));
    assertTrue(_filter.applyFiltersForCrawledFolder(test4, rootFolder.toRealPath().toString()));
  }

  /** tests include patterns on files. Only files from included folders should be imported. */
  public void testFolderPatternsOnFiles() throws Exception {
    final Path rootFolder = ConfigUtils.getConfigFile("filter", "patterns").toPath();
    // create config
    final AnyMap configAny = DataFactory.DEFAULT.createAnyMap();
    final AnyMap folderPatterns = DataFactory.DEFAULT.createAnyMap();
    configAny.put(FilterConfiguration.FOLDER_PATTERNS, folderPatterns);
    final AnySeq includePatterns = DataFactory.DEFAULT.createAnySeq();
    final AnySeq excludePatterns = DataFactory.DEFAULT.createAnySeq();
    folderPatterns.put(FilterConfiguration.INCLUDE_PATTERNS, includePatterns);
    folderPatterns.put(FilterConfiguration.EXCLUDE_PATTERNS, excludePatterns);
    includePatterns.add(".*/dir1/dir2");
    excludePatterns.add(".*/dir1/dir2/dir3");
    includePatterns.add(".*/dir1/dir2/dir3/dir4");
    final FilterConfiguration filterConfig = new FilterConfiguration(configAny);
    _filter.setFilterConfiguration(filterConfig);

    // tests
    final Path dir1 = rootFolder.resolve("dir1"); // not included
    final Path test1 = dir1.resolve("test1.txt");
    Collection<Path> testFiles = new ArrayList<Path>();
    testFiles.add(test1);
    Collection<Path> result = _filter.applyFiltersForCrawledFiles(dir1, testFiles);
    assertTrue(result.isEmpty());
    assertFalse(_filter.applyFiltersForCrawledFile(dir1, test1));

    final Path dir2 = rootFolder.resolve("dir1/dir2"); // included
    final Path test2 = dir2.resolve("test2.txt");
    testFiles = new ArrayList<Path>();
    testFiles.add(test2);
    result = _filter.applyFiltersForCrawledFiles(dir2, testFiles);
    assertEquals(1, result.size());
    assertTrue(_filter.applyFiltersForCrawledFile(dir2, test2));

    final Path dir3 = rootFolder.resolve("dir1/dir2/dir3"); // excluded
    final Path test3 = dir3.resolve("test3.txt");
    testFiles = new ArrayList<Path>();
    testFiles.add(test3);
    result = _filter.applyFiltersForCrawledFiles(dir3, testFiles);
    assertTrue(result.isEmpty());
    assertFalse(_filter.applyFiltersForCrawledFile(dir3, test3));

    final Path dir4 = rootFolder.resolve("dir1/dir2/dir3/dir4"); // included
    final Path test4 = dir4.resolve("test4.txt");
    testFiles = new ArrayList<Path>();
    testFiles.add(test4);
    result = _filter.applyFiltersForCrawledFiles(dir4, testFiles);
    assertEquals(1, result.size());
    assertTrue(_filter.applyFiltersForCrawledFile(dir4, test4));
  }
}
