/*******************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Juergen Schumacher (Attensity Europe GmbH) - initial API and implementation
 *******************************************************************************/
package org.eclipse.smila.importing.test;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.datamodel.ipc.BinaryObjectStreamIterator;
import org.eclipse.smila.importing.DeltaImportStrategy;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.importing.crawler.file.FileCrawlerService;
import org.eclipse.smila.jobmanager.definitions.JobDefinition;
import org.eclipse.smila.jobmanager.definitions.JobRunMode;
import org.eclipse.smila.objectstore.ObjectStoreException;
import org.eclipse.smila.objectstore.StoreObject;
import org.eclipse.smila.utils.config.ConfigUtils;
import org.eclipse.smila.utils.workspace.WorkspaceHelper;

public class TestFileCrawling extends AImportingIntegrationTest {

  private static final String JOBNAME_CRAWLFILES = "crawlFiles";

  public void testInitialCrawl() throws Exception {
    crawlFilesFromConfig("files10", "testInitialCrawl");
    assertTrue(_deltaService.getSourceIds().contains("files"));
    final int expectedRecordCount = 10;
    checkAddedBulks(expectedRecordCount);
    checkDeletedBulks(0);
  }

  public void testCrawlMultipleLevels() throws Exception {
    crawlFilesFromConfig("filesMultiLevel", "testCrawlMultipleLevels");
    assertTrue(_deltaService.getSourceIds().contains("files"));
    final int expectedRecordCount = 31;
    checkAddedBulks(expectedRecordCount);
    checkDeletedBulks(0);
  }

  public void testNoUpdatesCrawl() throws Exception {
    final File crawlDir = crawlFilesFromConfig("files10", "testNoUpdatesCrawl");
    checkAddedBulks(10);
    checkDeletedBulks(0);
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFiles(crawlDir);
    checkAddedBulks(0);
    checkDeletedBulks(0);
  }

  public void testCrawlAdditionalFiles() throws Exception {
    crawlFilesFromConfig("files59", "testCrawlAdditionalFiles");
    checkAddedBulks(59);
    checkDeletedBulks(0);
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testCrawlAdditionalFiles");
    checkAddedBulks(10);
    checkDeletedBulks(0);
  }

  public void testCrawlUpdates() throws Exception {
    final File workspaceDir = copyConfigFilesToWorkspace("files10", "testCrawlUpdates");
    crawlFiles(workspaceDir);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    _objectStore.clearStore(STORENAME_BULKS);
    // modify half of the files
    final File[] files = workspaceDir.listFiles();
    for (int i = 0; i < files.length; i++) {
      if (i % 2 == 0) {
        files[i].setLastModified(System.currentTimeMillis());
      }
    }
    crawlFiles(workspaceDir);
    checkUpdateBulks(5);
    checkDeletedBulks(0);
  }

  public void testDeleteRemovedFiles() throws Exception {
    crawlFilesFromConfig("files10", "testDeleteRemovedFiles");
    checkAddedBulks(10);
    checkDeletedBulks(0);
    _objectStore.clearStore(STORENAME_BULKS);
    final File cleanDir = cleanWorkspaceDirectory("testDeleteRemovedFiles");
    crawlFiles(cleanDir);
    checkAddedBulks(0);
    checkDeletedBulks(10);
  }

  public void testCrawlOtherFiles() throws Exception {
    crawlFilesFromConfig("files59", "testCrawlOtherFiles");
    checkAddedBulks(59);
    checkDeletedBulks(0);
    _objectStore.clearStore(STORENAME_BULKS);
    cleanWorkspaceDirectory("testCrawlOtherFiles");
    crawlFilesFromConfig("files10", "testCrawlOtherFiles");
    checkAddedBulks(10);
    checkDeletedBulks(59);
  }

  /** test import with delta strategy disabled. */
  public void testDeltaDisabled() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaDisabled", DeltaImportStrategy.DISABLED);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertFalse(_deltaService.getSourceIds().contains("files"));
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaDisabled", DeltaImportStrategy.DISABLED);
    checkAddedBulks(69);
    checkDeletedBulks(0);
    assertFalse(_deltaService.getSourceIds().contains("files"));
  }

  /** test that no records are deleted with delta strategy disabled. */
  public void testDeltaDisabledNoDelete() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaDisabledNoDelete", DeltaImportStrategy.DISABLED);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertFalse(_deltaService.getSourceIds().contains("files"));
    _objectStore.clearStore(STORENAME_BULKS);
    cleanWorkspaceDirectory("testDeltaDisabledNoDelete");
    crawlFilesFromConfig("files10", "testDeltaDisabledNoDelete", DeltaImportStrategy.DISABLED);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    assertFalse(_deltaService.getSourceIds().contains("files"));
  }

  /** test import with delta strategy for initial import. */
  public void testDeltaInitial() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaIntial", DeltaImportStrategy.INITIAL);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaIntial", DeltaImportStrategy.INITIAL);
    checkAddedBulks(69);
    checkDeletedBulks(0);
    assertEquals(69, _deltaService.countEntries("files", true));
  }

  /** test that no records are deleted with delta strategy for initial import. */
  public void testDeltaInitialNoDelete() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaInitialNoDelete", DeltaImportStrategy.INITIAL);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    cleanWorkspaceDirectory("testDeltaInitialNoDelete");
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaInitialNoDelete", DeltaImportStrategy.INITIAL);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    assertEquals(69, _deltaService.countEntries("files", true));
  }

  /** test import with delta strategy for additive import. */
  public void testDeltaAdditive() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaAdditive", DeltaImportStrategy.ADDITIVE);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaAdditive", DeltaImportStrategy.ADDITIVE);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    assertEquals(69, _deltaService.countEntries("files", true));
  }

  /** test that no records are deleted with delta strategy for additive import. */
  public void testDeltaAdditiveNoDelete() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaAdditiveNoDelete", DeltaImportStrategy.ADDITIVE);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    cleanWorkspaceDirectory("testDeltaAdditiveNoDelete");
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaAdditiveNoDelete", DeltaImportStrategy.ADDITIVE);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    assertEquals(69, _deltaService.countEntries("files", true));
  }

  /** test import with delta strategy for full import. */
  public void testDeltaFull() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaFull", DeltaImportStrategy.FULL);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaFull", DeltaImportStrategy.FULL);
    checkAddedBulks(10);
    checkDeletedBulks(0);
    assertEquals(69, _deltaService.countEntries("files", true));
  }

  /** test that records are deleted with delta strategy for full import. */
  public void testDeltaFullDelete() throws Exception {
    crawlFilesFromConfig("files59", "testDeltaFullDelete", DeltaImportStrategy.FULL);
    checkAddedBulks(59);
    checkDeletedBulks(0);
    assertEquals(59, _deltaService.countEntries("files", true));
    cleanWorkspaceDirectory("testDeltaFullDelete");
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFilesFromConfig("files10", "testDeltaFullDelete", DeltaImportStrategy.FULL);
    checkAddedBulks(10);
    checkDeletedBulks(59);
    assertEquals(10, _deltaService.countEntries("files", true));
  }

  /** test crawling of compounds. */
  public void testCompoundCrawlingDeltaInitial() throws Exception {
    crawlFilesFromConfig("compounds", "testCompoundCrawlingDeltaInitial", DeltaImportStrategy.INITIAL);
    checkAddedBulks(21); // 10 files in dir, 1 compound with 10 files.
    checkDeletedBulks(0);
    assertEquals(21, _deltaService.countEntries("files", true));
  }

  /** test crawling of compounds. */
  public void testCompoundCrawlingDeltaFull() throws Exception {
    final File crawlDir =
      crawlFilesFromConfig("compounds", "testCompoundCrawlingDeltaFull", DeltaImportStrategy.FULL);
    checkAddedBulks(21); // 10 files in dir, 1 compound with 10 files.
    checkDeletedBulks(0);
    assertEquals(21, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    crawlFiles(crawlDir, DeltaImportStrategy.FULL);
    checkUpdateBulks(0); // nothing changed
    checkDeletedBulks(0);
    assertEquals(21, _deltaService.countEntries("files", true));
  }

  /** test crawling of updated compounds. */
  public void testCompoundCrawlingUpdateDeltaFull() throws Exception {
    final File crawlDir =
      crawlFilesFromConfig("compounds", "testCompoundCrawlingDeltaFull", DeltaImportStrategy.FULL);
    checkAddedBulks(21); // 10 files in dir, 1 compound with 10 files.
    checkDeletedBulks(0);
    assertEquals(21, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    final File crawlCompound = new File(crawlDir, "files10.zip");
    final File updateCompound = ConfigUtils.getConfigFile(AllTests.BUNDLE_ID, "compounds-update/files10.zip");
    FileUtils.deleteQuietly(crawlCompound);
    FileUtils.copyFile(updateCompound, crawlCompound);
    FileUtils.touch(crawlCompound);
    crawlFiles(crawlDir, DeltaImportStrategy.FULL);
    checkAddOrUpdateBulks(6); // compound changed, 2 elements touched, 3 renamed -> 6 updates
    checkDeletedBulks(3); // 3 deletes.
    assertEquals(21, _deltaService.countEntries("files", true));
  }

  /** test crawling of compounds. */
  public void testCompoundCrawlingDeleteDeltaFull() throws Exception {
    final File crawlDir =
      crawlFilesFromConfig("compounds", "testCompoundCrawlingDeleteDeltaFull", DeltaImportStrategy.FULL);
    checkAddedBulks(21); // 10 files in dir, 1 compound with 10 files.
    checkDeletedBulks(0);
    assertEquals(21, _deltaService.countEntries("files", true));
    _objectStore.clearStore(STORENAME_BULKS);
    FileUtils.deleteQuietly(new File(crawlDir, "files10.zip"));
    crawlFiles(crawlDir, DeltaImportStrategy.FULL);
    checkUpdateBulks(0);
    checkDeletedBulks(11); // compound and 10 elements deleted.
    assertEquals(10, _deltaService.countEntries("files", true));
  }

  /** remove a workspace directory. */
  private File cleanWorkspaceDirectory(final String workspaceDirName) throws IOException {
    final File workspaceDir = WorkspaceHelper.createWorkingDir(AllTests.BUNDLE_ID, workspaceDirName);
    FileUtils.cleanDirectory(workspaceDir);
    return workspaceDir;
  }

  /** copy a directory from configuration to workspace, skip .svn directories. */
  private File copyConfigFilesToWorkspace(final String configDirName, final String workspaceDirName)
    throws IOException {
    final File configDir = ConfigUtils.getConfigFile(AllTests.BUNDLE_ID, configDirName);
    final File workspaceDir = WorkspaceHelper.createWorkingDir(AllTests.BUNDLE_ID, workspaceDirName);
    FileUtils
      .copyDirectory(configDir, workspaceDir, FileFilterUtils.makeSVNAware(FileFilterUtils.trueFileFilter()));
    return workspaceDir;
  }

  private File crawlFilesFromConfig(final String configDirName, final String workspaceDirName) throws Exception {
    return crawlFilesFromConfig(configDirName, workspaceDirName, null);
  }

  private File crawlFilesFromConfig(final String configDirName, final String workspaceDirName,
    final DeltaImportStrategy deltaUsage) throws Exception {
    final File workspaceDir = copyConfigFilesToWorkspace(configDirName, workspaceDirName);
    crawlFiles(workspaceDir, deltaUsage);
    return workspaceDir;
  }

  private void crawlFiles(final File dirToCrawl) throws Exception {
    crawlFiles(dirToCrawl, null);
  }

  private void crawlFiles(final File dirToCrawl, final DeltaImportStrategy deltaUsage) throws Exception {
    final String crawlJobId = startFileCrawlerJob(dirToCrawl, deltaUsage);
    try {
      waitForJobRunCompleted(JOBNAME_CRAWLFILES, crawlJobId, 30000);
    } catch (final Error ex) {
      _jobRunEngine.cancelJob(JOBNAME_CRAWLFILES, crawlJobId);
      throw ex;
    } catch (final Exception ex) {
      _jobRunEngine.cancelJob(JOBNAME_CRAWLFILES, crawlJobId);
      throw ex;
    }
    _bulkbuilder.commitJob(JOBNAME_BUILDBULKS);
  }

  private void checkAddOrUpdateBulks(final int expectedRecordCount) throws Exception {
    final Collection<StoreObject> bulks = _objectStore.getStoreObjectInfos(STORENAME_BULKS, BUCKET_ADDED);
    assertNotNull(bulks);
    if (expectedRecordCount == 0) {
      assertTrue(bulks.isEmpty());
    } else {
      assertEquals(expectedRecordCount, checkAddedRecords(bulks, null));
    }
  }

  private void checkAddedBulks(final int expectedRecordCount) throws Exception {
    final Collection<StoreObject> bulks = _objectStore.getStoreObjectInfos(STORENAME_BULKS, BUCKET_ADDED);
    assertNotNull(bulks);
    if (expectedRecordCount == 0) {
      assertTrue(bulks.isEmpty());
    } else {
      assertEquals(expectedRecordCount, checkAddedRecords(bulks, false));
    }
  }

  private void checkUpdateBulks(final int expectedRecordCount) throws ObjectStoreException, Exception {
    final Collection<StoreObject> bulks = _objectStore.getStoreObjectInfos(STORENAME_BULKS, BUCKET_ADDED);
    assertNotNull(bulks);
    assertEquals(expectedRecordCount, checkAddedRecords(bulks, true));
  }

  private int checkAddedRecords(final Collection<StoreObject> bulks, final Boolean update) throws Exception {
    int recordCount = 0;
    for (final StoreObject bulk : bulks) {
      final InputStream bulkStream = _objectStore.readObject(STORENAME_BULKS, bulk.getId());
      try {
        final BinaryObjectStreamIterator records = new BinaryObjectStreamIterator(bulkStream);
        while (records.hasNext()) {
          final Record record = records.next();
          assertNotNull(record);
          recordCount++;
          assertNotNull(record.getId());
          assertEquals("files", record.getSource());
          final AnyMap metadata = record.getMetadata();
          if (update != null) {
            if (update) {
              assertTrue(metadata.getBooleanValue(ImportingConstants.ATTRIBUTE_UPDATE));
            } else {
              assertFalse(metadata.containsKey(ImportingConstants.ATTRIBUTE_UPDATE));
            }
          }
          assertTrue(metadata.containsKey(ImportingConstants.ATTRIBUTE_DELTA_HASH));
          assertTrue(metadata.containsKey(FileCrawlerService.PROPERTY_FILE_NAME));
          assertTrue(metadata.containsKey(FileCrawlerService.PROPERTY_FILE_PATH));
          assertTrue(metadata.containsKey(FileCrawlerService.PROPERTY_FILE_FOLDER));
          assertTrue(metadata.get(FileCrawlerService.PROPERTY_FILE_LAST_MODIFIED).isDateTime());
          assertTrue(metadata.get(FileCrawlerService.PROPERTY_FILE_SIZE).isLong());
          if (metadata.containsKey(ImportingConstants.ATTRIBUTE_COMPOUNDFLAG)) {
            assertFalse(record.hasAttachment(FileCrawlerService.ATTACHMENT_FILE_CONTENT));
          } else {
            assertTrue(record.hasAttachment(FileCrawlerService.ATTACHMENT_FILE_CONTENT));
          }
        }
      } finally {
        IOUtils.closeQuietly(bulkStream);
      }
    }
    return recordCount;
  }

  private void checkDeletedBulks(final int expectedRecordCount) throws Exception {
    final Collection<StoreObject> bulks = _objectStore.getStoreObjectInfos(STORENAME_BULKS, BUCKET_DELETED);
    assertNotNull(bulks);
    if (expectedRecordCount == 0) {
      assertTrue(bulks.isEmpty());
    } else {
      assertEquals(expectedRecordCount, checkDeletedRecords(bulks));
    }
  }

  private int checkDeletedRecords(final Collection<StoreObject> bulks) throws Exception {
    int recordCount = 0;
    for (final StoreObject bulk : bulks) {
      final InputStream bulkStream = _objectStore.readObject(STORENAME_BULKS, bulk.getId());
      try {
        final BinaryObjectStreamIterator records = new BinaryObjectStreamIterator(bulkStream);
        while (records.hasNext()) {
          final Record record = records.next();
          assertNotNull(record);
          recordCount++;
          assertNotNull(record.getId());
          assertEquals("files", record.getSource());
          assertEquals(2, record.getMetadata().size());
        }
      } finally {
        IOUtils.closeQuietly(bulkStream);
      }
    }
    return recordCount;
  }

  private String startFileCrawlerJob(final File dirToCrawl, final DeltaImportStrategy deltaUsage) throws Exception {
    final JobDefinition jobTemplate = _defPersistence.getJob(JOBNAME_CRAWLFILES + "Template");
    final AnyMap jobAny = jobTemplate.toAny(false);
    jobAny.put("name", JOBNAME_CRAWLFILES);
    jobAny.getMap("parameters").put("rootFolder", dirToCrawl.getAbsolutePath());
    if (deltaUsage != null) {
      jobAny.getMap("parameters").put("deltaImportStrategy", deltaUsage.getExternalName());
    }
    final JobDefinition job = new JobDefinition(jobAny);
    _defPersistence.addJob(job);
    return _jobRunEngine.startJob(JOBNAME_CRAWLFILES, JobRunMode.RUNONCE);
  }
}
