/*******************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
 * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
 * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
 *******************************************************************************/
package org.eclipse.smila.tika.test;

import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.tika.TikaPipelet;

public class TestTikaPipelet extends ConverterPipelineTestBase {

  /** extract HTML or text content from given file. */
  protected String executeTest(final String fileName) throws Exception {
    final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
    return executeTest(fileName, additionalRecordParams);
  }

  /** test html extraction. */
  protected void doHtmlExtraction(final String fileName) throws Exception {
    final AnyMap additionalParams = DataFactory.DEFAULT.createAnyMap();
    additionalParams.put(TikaPipelet.PROP_EXPORT_AS_HTML, true);
    checkHtmlResult(executeTest(fileName, additionalParams));
  }

  /** test text extraction with providing content type. */
  protected void doTextExtractionWithContentType(final String fileName, final String contentType) throws Exception {
    final AnyMap additionalParams = DataFactory.DEFAULT.createAnyMap();
    additionalParams.put(TikaPipelet.PROP_ATTACHMENT_CONTENT_TYPE_ATTRIBUTE, CONTENT_TYPE_ATTRIBUTE);
    additionalParams.put(CONTENT_TYPE_PARAM, contentType);
    checkTextResult(executeTest(fileName, additionalParams));
  }

  /** test text extraction with providing file name. */
  protected void doTextExtractionWithFileName(final String fileName) throws Exception {
    final AnyMap additionalParams = DataFactory.DEFAULT.createAnyMap();
    additionalParams.put(TikaPipelet.PROP_FILE_NAME_ATTRIBUTE, FILENAME_ATTRIBUTE);
    checkTextResult(executeTest(fileName, additionalParams));
  }

  /** test simple text extraction without parameters. */
  protected void doTextExtraction(final String fileName) throws Exception {
    checkTextResult(executeTest(fileName));
  }

  protected void checkTextResult(final String result) {
    assertTrue("SMILA not contained in text content of converted content: " + result, result.contains("SMILA"));
    assertFalse("Unexpected HTML tags in converted content: " + result, result.contains("<"));
  }

  protected void checkHtmlResult(final String result) {
    assertTrue("SMILA not contained in content of converted content: " + result, result.contains("SMILA"));
    assertTrue("<html> not contained in content of converted content: " + result, result.contains("<body>"));
  }

  /** Test openoffice 24 odp. */
  public void testOPENOFFICE24ODP() throws Exception {
    final String fileName = "OpenOffice.2.4.odp";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.presentation");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test openoffice 24 ods. */
  public void testOPENOFFICE24ODS() throws Exception {
    final String fileName = "OpenOffice.2.4.ods";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.spreadsheet");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test openoffice 24 odt. */
  public void testOPENOFFICE24ODT() throws Exception {
    final String fileName = "OpenOffice.2.4.odt";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.text");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test openoffice 32 odp. */
  public void testOPENOFFICE32ODP() throws Exception {
    final String fileName = "OpenOffice.3.2.odp";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.presentation");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test openoffice 32 ods. */
  public void testOPENOFFICE32ODS() throws Exception {
    final String fileName = "OpenOffice.3.2.ods";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.spreadsheet");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test openoffice 32 odt. */
  public void testOPENOFFICE32ODT() throws Exception {
    final String fileName = "OpenOffice.3.2.odt";
    doTextExtractionWithContentType(fileName, "application/vnd.oasis.opendocument.text");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test msoffic e2003 doc. */
  public void testMSOFFICE2003DOC() throws Exception {
    final String fileName = "MSWORD_97_2003.doc";
    doTextExtractionWithContentType(fileName, "application/msword");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests ppt 2010. */
  public void testMSOFFICE2010PPTX() throws Exception {
    final String fileName = "SMILA_PPTX_2010.pptx";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.presentationml.presentation");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests more complex ppt 2010. */
  public void testMSOFFICE2010PPTX_needs_jempbox() throws Exception {
    final String fileName = "SMILA_PPTX_2010_needs-jempbox.pptx";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.presentationml.presentation");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests excel 2010. */
  public void testMSOFFICE2010XLSX() throws Exception {
    final String fileName = "SMILA_XLS_2010.xlsx";
    doTextExtractionWithContentType(fileName, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test msoffic e2007 docx. */
  public void testMSOFFICE2007DOCX() throws Exception {
    final String fileName = "MSWORD_2007.docx";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests docx 2010. */
  public void testMSOFFICE2010DOCX() throws Exception {
    final String fileName = "SMILA_DOCX_2010.docx";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests docx 2010 with wrong extension. */
  public void testMSOFFICE2010DOCXWithWrongExtension() throws Exception {
    final String fileName = "SMILA_DOCX_2010.ppt";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** tests docx 2010 without extension. */
  public void testMSOFFICE2010DOCXWithoutExtension() throws Exception {
    final String fileName = "SMILA_DOCX_2010";
    doTextExtractionWithContentType(fileName,
      "application/vnd.openxmlformats-officedocument.wordprocessingml.document");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test rtf. */
  public void testRTF() throws Exception {
    final String fileName = "test.rtf";
    doTextExtractionWithContentType(fileName, "application/rtf");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test xml. */
  public void testXML() throws Exception {
    final String fileName = "test.xml";
    doTextExtractionWithContentType(fileName, "text/xml");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test msoffic e2003 xls. */
  public void testMSOFFICE2003XLS() throws Exception {
    final String fileName = "MSEXCEL_97_2003.xls";
    doTextExtractionWithContentType(fileName, "application/vnd.ms-excel");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test msoffic e2003 ppt. */
  public void testMSOFFICE2003PPT() throws Exception {
    final String fileName = "MSPPT_97_2000_XP.ppt";
    doTextExtractionWithContentType(fileName, "application/vnd.ms-powerpoint");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }

  /** Test rss feed. */
  public void testRssFeed() throws Exception {
    final String fileName = "test-feed.rss";
    doTextExtractionWithContentType(fileName, "text/xml");
    doTextExtractionWithFileName(fileName);
    doTextExtraction(fileName);
    doHtmlExtraction(fileName);
  }
}
