/***********************************************************************************************************************
 * Copyright (c) 2008,2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Schank (Attensity Europe GmbH) - initial API and implementation
 **********************************************************************************************************************/
package org.eclipse.smila.importing.compounds.simple.test;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;

import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.compounds.CompoundExtractor;
import org.eclipse.smila.importing.compounds.CompoundExtractorException;
import org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.utils.config.ConfigUtils;

/**
 * @author scank01
 * 
 */
public class TestSimpleCompoundExtractorService extends DeclarativeServiceTestCase {

  private static final String BUNDLE_NAME = "org.eclipse.smila.importing.compounds.simple.test";

  /** the compound extractor under test. */
  private CompoundExtractor _compoundExtractor;

  /** {@inheritDoc} */
  @Override
  protected void setUp() throws Exception {
    super.setUp();
    _compoundExtractor = getService(CompoundExtractor.class);
    assertNotNull(_compoundExtractor);
    assertTrue(_compoundExtractor instanceof SimpleCompoundExtractorService);
  }

  /** {@inheritDoc} */
  @Override
  protected void tearDown() throws Exception {
    super.tearDown();
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#canExtract(java.io.File)}.
   */
  public void testCanExtractFile() {
    assertFalse(_compoundExtractor.canExtract(null));
    assertFalse(_compoundExtractor.canExtract(new File("null")));
    assertTrue(_compoundExtractor.canExtract(new File("a.zip")));
    assertTrue(_compoundExtractor.canExtract(new File("a.ZIP")));
    assertTrue(_compoundExtractor.canExtract(new File("a.zIp")));
    assertTrue(_compoundExtractor.canExtract(new File("a.gz")));
    assertTrue(_compoundExtractor.canExtract(new File("a.GZ")));
    assertTrue(_compoundExtractor.canExtract(new File("a.gZ")));
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#canExtract(java.net.URL, java.lang.String)}
   * .
   * 
   * @throws MalformedURLException
   */
  public void testCanExtractURLString() throws MalformedURLException {
    assertFalse(_compoundExtractor.canExtract((URL) null, null));
    assertFalse(_compoundExtractor.canExtract((String) null, null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), ""));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gunzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/x-gzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/x-gunzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.GZ"), "application/x-gzip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/zip"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.ZIP"), "application/octet-stream"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/octet-stream"));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), ""));
    assertTrue(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), null));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.gz"), "application/pdf"));
    assertFalse(_compoundExtractor.canExtract(new URL("http://x.y.z:8080/a/b/c/d.zip"), "text/plain"));
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInZip() throws CompoundExtractorException {
    doTestTestZipEntries("test.zip");
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}
   * using a zip created by WinZip.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInWinzipZip() throws CompoundExtractorException {
    doTestTestZipEntries("test-winzip.zip");
  }

  /** test the test.zip variants. */
  private void doTestTestZipEntries(final String baseZipName) throws CompoundExtractorException {
    final Collection<String> compounds =
      Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz");
    final Collection<String> files =
      Arrays.asList(baseZipName, baseZipName + "/\u00e4/\u00b3zip.zip", baseZipName + "/doc.txt.gz", baseZipName
        + "/doc.txt.gz/doc.txt", baseZipName + "/doc2.txt", baseZipName + "/\u00e4/\u00fc.txt", baseZipName
        + "/\u00e4/\u00b3zip.zip/\u00b3zip/\u00fc.txt");
    final Collection<String> internalFileNames =
      Arrays.asList(baseZipName, "\u00e4/\u00b3zip.zip", "doc.txt.gz", "doc.txt", "doc2.txt", "\u00e4/\u00fc.txt",
        "\u00b3zip/\u00fc.txt");
    final Iterator<Record> iter =
      _compoundExtractor
        .extract(ConfigUtils.getConfigStream(BUNDLE_NAME, baseZipName), baseZipName, "test-content");
    final Collection<String> foundFiles = new HashSet<String>();
    while (iter.hasNext()) {
      final Record record = iter.next();
      final String fileName = record.getId();
      foundFiles.add(fileName);
      assertTrue(fileName + " not expected.", files.contains(fileName));
      assertTrue(fileName + " not correct.",
        fileName.endsWith(record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME)));
      final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName));
      if (fileName.equals(baseZipName)) {
        assertTrue(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
        assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
      } else {
        assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD));
      }
      if (compounds.contains(fileName)) {
        assertTrue(record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND));
        assertFalse(record.hasAttachments());
      } else {
        assertFalse(record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue(record.hasAttachment("test-content"));
      }
      AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq();
      if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) {
        compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS);
      }
      String computedId = "";
      for (final Any compound : compoundsSeq) {
        computedId += compound.asValue().asString() + "/";
      }
      computedId += record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertEquals("ids do not match.", computedId, record.getId());
    }
    assertEquals(files.size(), foundFiles.size());
  }

  /**
   * Test method for
   * {@link org.eclipse.smila.importing.compounds.simple.SimpleCompoundExtractorService#extract(java.io.InputStream)}.
   * 
   * @throws CompoundExtractorException
   */
  public void testExtractGzipInZipInGzip() throws CompoundExtractorException {
    final Collection<String> compounds =
      Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz");
    final Collection<String> files =
      Arrays.asList("1.zip.gz", "1.zip.gz/1.zip", "1.zip.gz/1.zip/1/2/3/4.txt.gz", "1.zip.gz/1.zip/1/2/3/4.txt",
        "1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("1.zip.gz", "1.zip", "1/2/3/4.txt.gz", "1/2/3/4.txt", "4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz", 68L);
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt.gz/4.txt", 1018L);
    sizes.put("1.zip.gz/1.zip/1/2/3/4.txt", 1018L);
    sizes.put("1.zip.gz/1.zip", 768L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "1.zip.gz"), "1.zip.gz", "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} using a zip file created
   * with linux.
   * 
   * @throws CompoundExtractorException
   */
  public void testLinuxZip() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("\u00e4_linux.zip");
    final Collection<String> files =
      Arrays.asList("\u00e4_linux.zip", "\u00e4_linux.zip/\u00e4/\u00b3.txt", "\u00e4_linux.zip/\u00e4/\u00e4.txt");
    final Collection<String> internalFileNames =
      Arrays.asList("\u00e4_linux.zip", "\u00e4/\u00b3.txt", "\u00e4/\u00e4.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("\u00e4_linux.zip/\u00e4/\u00b3.txt", 11L);
    sizes.put("\u00e4_linux.zip/\u00e4/\u00e4.txt", 11L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "\u00e4_linux.zip"), "\u00e4_linux.zip",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /**
   * Test method for {@link CompoundExtractor#extract(java.io.InputStream, String, String)} extracting a gz file.
   * 
   * @throws CompoundExtractorException
   */
  public void testGz() throws CompoundExtractorException {
    final Collection<String> compounds = Arrays.asList("doc.txt.gz");
    final Collection<String> files = Arrays.asList("doc.txt.gz", "doc.txt.gz/doc.txt");
    final Collection<String> internalFileNames = Arrays.asList("doc.txt.gz", "doc.txt");
    final Map<String, Long> sizes = new HashMap<String, Long>();
    sizes.put("doc.txt.gz/doc.txt", 16L);
    final Iterator<Record> iter =
      _compoundExtractor.extract(ConfigUtils.getConfigStream(BUNDLE_NAME, "doc.txt.gz"), "doc.txt.gz",
        "test-content");
    checkResultingRecords(compounds, files, internalFileNames, sizes, iter);
  }

  /** check the records. */
  private void checkResultingRecords(final Collection<String> compounds, final Collection<String> files,
    final Collection<String> internalFileNames, final Map<String, Long> sizes, final Iterator<Record> iter) {
    final Collection<String> foundFiles = new HashSet<String>();
    while (iter.hasNext()) {
      final Record record = iter.next();
      final String fileName = record.getId();
      foundFiles.add(fileName);
      assertTrue(fileName + " not expected.", files.contains(fileName));
      assertEquals("Uncompressed size does not match for " + fileName, sizes.get(fileName), record.getMetadata()
        .getLongValue(CompoundExtractor.KEY_SIZE));
      final String internalFileName = record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertTrue(internalFileName + " is no correct entry file name", internalFileNames.contains(internalFileName));
      if (compounds.contains(fileName)) {
        assertTrue("CompoundExtractor.KEY_IS_COMPOUND not present",
          record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue("CompoundExtractor.KEY_IS_COMPOUND not set to true",
          record.getMetadata().getBooleanValue(CompoundExtractor.KEY_IS_COMPOUND));
        assertFalse(record.hasAttachments());
      } else {
        assertFalse(record.getId() + " has compound flag",
          record.getMetadata().containsKey(CompoundExtractor.KEY_IS_COMPOUND));
        assertTrue(record.hasAttachment("test-content"));
      }
      AnySeq compoundsSeq = DataFactory.DEFAULT.createAnySeq();
      if (record.getMetadata().containsKey(CompoundExtractor.KEY_COMPOUNDS)) {
        compoundsSeq = record.getMetadata().getSeq(CompoundExtractor.KEY_COMPOUNDS);
      }
      String computedId = "";
      for (final Any compound : compoundsSeq) {
        computedId += compound.asValue().asString() + "/";
      }
      computedId += record.getMetadata().getStringValue(CompoundExtractor.KEY_FILE_NAME);
      assertEquals("ids do not match.", computedId, record.getId());
    }
    assertEquals(files.size(), foundFiles.size());
  }

}
