/*******************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
 * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
 * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
 *******************************************************************************/
package org.eclipse.smila.tika.test.manual;

import java.io.BufferedInputStream;

import org.apache.commons.io.IOUtils;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.tika.TikaPipelet;
import org.eclipse.smila.tika.internal.PageBreakWriteOutContentHandler;
import org.eclipse.smila.tika.test.AllTests;
import org.eclipse.smila.tika.test.ConverterPipelineTestBase;
import org.eclipse.smila.utils.config.ConfigUtils;

/** test for page breaks. */
public class TestPageBreaks extends ConverterPipelineTestBase {

  public void testPageBreaks() throws Exception {
    final String fileName = "PDF_FROM_MSWORD_2010.pdf";
    final AnyMap additionalRecordParams = DataFactory.DEFAULT.createAnyMap();
    additionalRecordParams.put(TikaPipelet.PROP_EXPORT_AS_HTML, false);
    additionalRecordParams.put(TikaPipelet.PROP_PAGE_BREAK, true);
    additionalRecordParams.put(TikaPipelet.PROP_PAGE_NUMBER_ATTRIBUTE, "page");
    BufferedInputStream input = null;
    try {
      input = new BufferedInputStream(ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, fileName));
      final String[] result = callPipeline(fileName, input, additionalRecordParams);
      assertEquals(2, result.length);
      for (int i = 0; i < result.length; i++) {
        final int page = i + 1;
        final String id = "key:" + fileName + "###" + page;
        assertEquals(id, result[i]);
        final AnyMap metadata = _blackboard.getMetadata(id);
        final String text = metadata.getStringValue("Text");
        assertNotNull(text);
        assertFalse(text.contains(PageBreakWriteOutContentHandler.PAGE_START_TAG));
        assertFalse(text.contains(PageBreakWriteOutContentHandler.PAGE_END_TAG));
        if (i == 0) {
          assertTrue(text.contains("Test"));
        } else {
          assertTrue(text.contains("Datenreihe"));
        }
        assertEquals(page, metadata.getLongValue("page").intValue());
        assertEquals(metadata.getStringValue(Record.RECORD_ID), id);
        assertEquals(metadata.getStringValue(Record.SOURCE), "source");
        assertEquals(metadata.getStringValue(ConverterPipelineTestBase.FILENAME_ATTRIBUTE), fileName);
        assertEquals(metadata.getMap("_parameters"), additionalRecordParams);
      }
    } finally {
      IOUtils.closeQuietly(input);
    }
  }

  @Override
  protected void setUp() throws Exception {
    super.setUp();
  }
}
