/***********************************************************************************************************************
 * Copyright (c) 2008,2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Schank (Attensity Europe GmbH) - initial API and implementation
 **********************************************************************************************************************/
package org.eclipse.smila.importing.compounds.compress.test;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.compounds.CompoundExtractor;
import org.eclipse.smila.importing.compounds.CompoundExtractorException;
import org.eclipse.smila.importing.compounds.compress.CommonsCompressCompoundExtractorService;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.utils.config.ConfigUtils;

/**
 * Tests for {@link CommonsCompressCompoundExtractorService}.
 */
public class TestCompressCompoundExtractorService extends DeclarativeServiceTestCase {

  private static final String BUNDLE_NAME = "org.eclipse.smila.importing.compounds.compress.test";

  /** the compound extractor under test. */
  private CompoundExtractor _compoundExtractor;

  /** {@inheritDoc} */
  @Override
  protected void setUp() throws Exception {
    super.setUp();
    _compoundExtractor = getService(CompoundExtractor.class);
    assertNotNull(_compoundExtractor);
    assertTrue(_compoundExtractor instanceof CommonsCompressCompoundExtractorService);
  }

  /** {@inheritDoc} */
  @Override
  protected void tearDown() throws Exception {
    super.tearDown();
  }

  /**
   * Test method for {@link CompoundExtractor#canExtract(String)}.
   */
  public void testCanExtractFile() {
    assertFalse(_compoundExtractor.canExtract(null));
    assertFalse(_compoundExtractor.canExtract(new File("null")));
    assertTrue(_compoundExtractor.canExtract(new File("a.zip")));
    assertTrue(_compoundExtractor.canExtract(new File("a.ZIP")));
    assertTrue(_compoundExtractor.canExtract(new File("a.zIp")));
    assertTrue(_compoundExtractor.canExtract(new File("a.gz")));
    assertTrue(_compoundExtractor.canExtract(new File("a.GZ")));
    assertTrue(_compoundExtractor.canExtract(new File("a.gZ")));
  }

  /**
   * Test method for {@link CompoundExtractor#canExtract(URL, String)}.
   * 
   * @throws MalformedURLException
   */
  public void testCanExtractURLString() throws MalformedURLException {
    assertFalse(_compoundExtractor.canExtract((URL) null, null));
    assertFalse(_compoundExtractor.canExtract((String) null, null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), ""));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gunzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/x-gunzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.GZ"), "application/x-gzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/octet-stream"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/octet-stream"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), ""));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/pdf"));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "text/plain"));
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} for a zip created by 7Zip.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInZipWith7Zip() throws CompoundExtractorException {
    doTestTestZipEntries("test-7zip.zip");
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} for a zip created by 7Zip.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInZipWindows7NativeZip() throws CompoundExtractorException {
    doTestTestZipEntries("test-windows.zip");
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file from linux
   * (internal ZIP has method STORED not DEFLATE).
   * 
   * @throws CompoundExtractorException
   */
  public void testLinuxStoredZipInZip() throws CompoundExtractorException {
    doTestTestZipEntries("test-linux.zip");
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip created by
   * WinZip.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInWinzipZip() throws CompoundExtractorException {
    doTestTestZipEntries("test-winzip.zip");
  }

  /** test the test.zip variants. */
  private void doTestTestZipEntries(final String baseZipName) throws CompoundExtractorException {
    final Collection<String> compounds =
      Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz");
    final Collection<String> files =
      Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz", baseZipName
        + "/doc.txt.gz/doc.txt", baseZipName + "/doc2.txt", baseZipName + "/\u00e4/\u00fc.txt", baseZipName
        + "/\u00e4/\u00b3zip.zip/\u00b3zip/\u00fc.txt");
    final Collection<String> internalFileNames =
      Arrays.asList(baseZipName, "\u00e4/\u00b3zip.zip", "doc.txt.gz", "doc.txt", "doc2.txt", "\u00e4/\u00fc.txt",
        "\u00b3zip/\u00fc.txt");
    final Iterator<Record> iter =
      _compoundExtractor
        .extract(ConfigUtils.getConfigStream(BUNDLE_NAME, baseZipName), baseZipName, "test-content");
    final Collection<String> foundFiles = new HashSet<String>();
    while (iter.hasNext()) {
      final Record record = iter.next();
      final String fileName = record.getId();
      foundFiles.add(fileName);
      assertTrue(fileName + " not expected.", files.contains(fileName));
      assertTrue(fileName + " not correct.",
        fileName.endsWith(record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME)));
      final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName));
      if (fileName.equals(baseZipName)) {
        assertTrue(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
        assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
      } else {
        assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
      }
      if (compounds.contains(fileName)) {
        assertTrue(fileName + " record has no compound flag set.",
          record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND));
        assertFalse(record.hasAttachments());
      } else {
        assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue(record.hasAttachment("test-content"));
      }
      AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq();
      if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) {
        compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS);
      }
      String computedId = "";
      for (final Any compound : compoundsSeq) {
        computedId += compound.asValue().asString() + "/";
      }
      computedId += record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertEquals("ids do not match.", computedId, record.getId());
    }
    assertEquals(files.size(), foundFiles.size());
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInZipInGzip() throws CompoundExtractorException {
    final Collection<String> compounds =
      Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz");
    final Collection<String> files =
      Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz", "1.zip.gz/1.zip/1/2/3/4.txt",
        "1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("1.zip.gz", "1.zip", "1/2/3/4.txt.gz", "1/2/3/4.txt", "4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz", 68L);
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt", 1018L);
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt", 1018L);
    sizes.put("1.zip.gz/1.zip", 768L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "1.zip.gz"), "1.zip.gz", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file from 7zip.
   * 
   * @throws CompoundExtractorException
   */
  public void testZipInZip() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("alldocs.zip", "alldocs.zip/docs.zip");
    final Collection<String> files =
      Arrays.asList("alldocs.zip", "alldocs.zip/doc3.txt", "alldocs.zip/docs.zip", "alldocs.zip/docs.zip/doc1.txt",
        "alldocs.zip/docs.zip/doc2.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("alldocs.zip", "docs.zip", "doc1.txt", "doc2.txt", "doc3.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("alldocs.zip/docs.zip", 310L);
    sizes.put("alldocs.zip/docs.zip/doc1.txt", 16L);
    sizes.put("alldocs.zip/docs.zip/doc2.txt", 16L);
    sizes.put("alldocs.zip/doc3.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "alldocs.zip"), "alldocs.zip",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file created by
   * windows 7 built-in zipping functionality.
   * 
   * @throws CompoundExtractorException
   */
  public void testWindows7NativeZip() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("\u00e4.zip");
    final Collection<String> files =
      Arrays.asList("\u00e4.zip", "\u00e4.zip/\u00e4/\u00b3.txt", "\u00e4.zip/\u00e4/\u00e4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("\u00e4.zip", "\u00e4/\u00b3.txt", "\u00e4/\u00e4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("\u00e4.zip/\u00e4/\u00b3.txt", 9L);
    sizes.put("\u00e4.zip/\u00e4/\u00e4.txt", 9L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e4.zip"), "\u00e4.zip",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file created by
   * 7Zip zipping functionality.
   * 
   * @throws CompoundExtractorException
   */
  public void test7ZipZip() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("\u00e47.zip");
    final Collection<String> files =
      Arrays.asList("\u00e47.zip", "\u00e47.zip/\u00e4/\u00b3.txt", "\u00e47.zip/\u00e4/\u00e4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("\u00e47.zip", "\u00e4/\u00b3.txt", "\u00e4/\u00e4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("\u00e47.zip/\u00e4/\u00b3.txt", 9L);
    sizes.put("\u00e47.zip/\u00e4/\u00e4.txt", 9L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e47.zip"), "\u00e47.zip",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file created
   * with linux.
   * 
   * @throws CompoundExtractorException
   */
  public void testLinuxZip() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("\u00e4_linux.zip");
    final Collection<String> files =
      Arrays.asList("\u00e4_linux.zip", "\u00e4_linux.zip/\u00e4/\u00b3.txt", "\u00e4_linux.zip/\u00e4/\u00e4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("\u00e4_linux.zip", "\u00e4/\u00b3.txt", "\u00e4/\u00e4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("\u00e4_linux.zip/\u00e4/\u00b3.txt", 11L);
    sizes.put("\u00e4_linux.zip/\u00e4/\u00e4.txt", 11L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e4_linux.zip"), "\u00e4_linux.zip",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a tar.
   * 
   * @throws CompoundExtractorException
   */
  public void testTar() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("docs.tar");
    final Collection<String> files = Arrays.asList("docs.tar", "docs.tar/doc1.txt", "docs.tar/doc2.txt");
    final Collection<String> internalFileNames = Arrays.asList("docs.tar", "doc1.txt", "doc2.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("docs.tar/doc1.txt", 16L);
    sizes.put("docs.tar/doc2.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "docs.tar"), "docs.tar", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a tgz.
   * 
   * @throws CompoundExtractorException
   */
  public void testTgz() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("docs.tgz", "docs.tgz/docs.tar");
    final Collection<String> files =
      Arrays.asList("docs.tgz", "docs.tgz/docs.tar", "docs.tgz/docs.tar/doc1.txt", "docs.tgz/docs.tar/doc2.txt");
    final Collection<String> internalFileNames = Arrays.asList("docs.tgz", "docs.tar", "doc1.txt", "doc2.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("docs.tgz/docs.tar", 3072L);
    sizes.put("docs.tgz/docs.tar/doc1.txt", 16L);
    sizes.put("docs.tgz/docs.tar/doc2.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "docs.tgz"), "docs.tgz", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a cpio archive
   * (created on ubuntu).
   * 
   * @throws CompoundExtractorException
   */
  public void testCpio() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("docs.cpio");
    final Collection<String> files = Arrays.asList("docs.cpio", "docs.cpio/doc1.txt", "docs.cpio/doc2.txt");
    final Collection<String> internalFileNames = Arrays.asList("docs.cpio", "doc1.txt", "doc2.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("docs.cpio/doc1.txt", 16L);
    sizes.put("docs.cpio/doc2.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "docs.cpio"), "docs.cpio",
        "application/x-cpio", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a tar.gz.
   * 
   * @throws CompoundExtractorException
   */
  public void testTarGz() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("docs.tar.gz", "docs.tar.gz/docs.tar");
    final Collection<String> files =
      Arrays.asList("docs.tar.gz", "docs.tar.gz/docs.tar", "docs.tar.gz/docs.tar/doc1.txt",
        "docs.tar.gz/docs.tar/doc2.txt");
    final Collection<String> internalFileNames = Arrays.asList("docs.tar.gz", "docs.tar", "doc1.txt", "doc2.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("docs.tar.gz/docs.tar", 3072L);
    sizes.put("docs.tar.gz/docs.tar/doc1.txt", 16L);
    sizes.put("docs.tar.gz/docs.tar/doc2.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "docs.tar.gz"), "docs.tar.gz",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a bzip2 file.
   * 
   * @throws CompoundExtractorException
   */
  public void testBzip2() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("doc.txt.bz2");
    final Collection<String> files = Arrays.asList("doc.txt.bz2", "doc.txt.bz2/doc.txt");
    final Collection<String> internalFileNames = Arrays.asList("doc.txt.bz2", "doc.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("doc.txt.bz2/doc.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "doc.txt.bz2"), "doc.txt.bz2",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a gz file.
   * 
   * @throws CompoundExtractorException
   */
  public void testGz() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("doc.txt.gz");
    final Collection<String> files = Arrays.asList("doc.txt.gz", "doc.txt.gz/doc.txt");
    final Collection<String> internalFileNames = Arrays.asList("doc.txt.gz", "doc.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("doc.txt.gz/doc.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "doc.txt.gz"), "doc.txt.gz",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a jar file.
   */
  public void testJar() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("test.jar");
    final Collection<String> files =
      Arrays.asList("test.jar", "test.jar/org/eclipse/smila/importing/compounds/compress/test/test.java");
    final Collection<String> internalFileNames =
      Arrays.asList("test.jar", "org/eclipse/smila/importing/compounds/compress/test/test.java");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("test.jar/org/eclipse/smila/importing/compounds/compress/test/test.java", 857L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "test.jar"), "test.jar",
        "application/java-archive", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Tests that unprocessed files get deleted by the finalizer. Please test manually because we don't have too much
   * confidence in the GC's expected behavior...
   */
  public void manual_testFinalize() throws CompoundExtractorException, InterruptedException {
    Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e4_linux.zip"), "\u00e4_linux.zip",
        "test-content");
    // so, we don't process the iterator, but check first, if we can detect the temporary files.
    // be sure, this test does not set the tmp.dir property...
    final File tmpFile = new File(FileUtils.getTempDirectory(), "org.eclipse.smila.importing.compounds.compress");
    String[] fileNames = tmpFile.list();
    assertTrue(fileNames.length > 0);
    // perform GC and check again, if the files have been deleted.
    iter = null;
    System.gc();
    Thread.sleep(1000);
    fileNames = tmpFile.list();
    assertEquals(0, fileNames.length);
  }

  /** check the records. */
  private void checkResultingRecords(final Collection<String> compounds, final Collection<String> files,
    final Collection<String> internalFileNames, final Map<String, Long> sizes, final Iterator<Record> iter) {
    final Collection<String> foundFiles = new HashSet<String>();
    while (iter.hasNext()) {
      final Record record = iter.next();
      final String fileName = record.getId();
      foundFiles.add(fileName);
      assertTrue(fileName + " not expected.", files.contains(fileName));
      assertEquals("Uncompressed size does not match for " + fileName, sizes.get(fileName), record.getMetadata()
        .getLongValue(CompoundExtractor.KEY_SIZE));
      final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName));
      if (compounds.contains(fileName)) {
        assertTrue("CompoundExtractor.KEY_IS_COMPOUND not present",
          record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue("CompoundExtractor.KEY_IS_COMPOUND not set to true",
          record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND));
        assertFalse(record.hasAttachments());
      } else {
        assertFalse(record.getId() + " has compound flag",
          record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue(record.hasAttachment("test-content"));
      }
      AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq();
      if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) {
        compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS);
      }
      final StringBuilder computedId = new StringBuilder();
      for (final Any compound : compoundsSeq) {
        computedId.append(compound.asValue().asString()).append("/");
      }
      computedId.append(record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME));
      assertEquals("ids do not match.", computedId.toString(), record.getId());
    }
    assertEquals(files.size(), foundFiles.size());
  }

}
