/***********************************************************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
 * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
 * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Juergen Schumacher (Empolis Information Management GmbH) - initial implementation, based on
 * XmlSplitterPipelet
 **********************************************************************************************************************/

package org.eclipse.smila.processing.pipelets.xmlprocessing;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

import javax.xml.namespace.QName;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.Blackboard.Get;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.MissingParameterException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.pipelets.ATransformationPipelet;
import org.eclipse.smila.processing.pipelets.DocumentSplitterPipelet;
import org.eclipse.smila.processing.util.ProcessingConstants;
import org.eclipse.smila.processing.util.ResultCollector;
import org.eclipse.smila.utils.xml.stax.XmlSnippetHandler;
import org.eclipse.smila.utils.xml.stax.XmlSnippetSplitter;

/**
 * The possible properties are:
 * <ul>
 * <li>beginTagName: the name of the tag to start the xml snippet</li>
 * <li>beginTagNamespace: the (optional) namespace of the tag to start the xml snippet</li>
 * <li>endTagName: the name of the tag to end the xml snippet</li>
 * <li>endTagNamespace: the (optional) namespace of the tag to end the xml snippet</li>
 * <li>inputName: name of the Attribute/Attachment to read the XML Document from.</li>
 * <li>outputName: name of the Attribute/Attachment to store the extracted value in</li>
 * <li>inputType: the type (Attribute or Attachment of the inputName). An input Attribute is not interpreted as content
 * but as a file path or an URL to the XML document</li>
 * <li>outputType: the type (Attribute or Attachment of the outputName)</li>
 * </ul>
 */
public class XmlDocumentSplitterPipelet extends ATransformationPipelet {

  /** Constant for the property beginTagName. */
  public static final String PROP_BEGIN_TAG_NAME = "beginTagName";

  /** Constant for the property beginTagNamespace. */
  public static final String PROP_BEGIN_TAG_NAMESPACE = "beginTagNamespace";

  /** Constant for the property endTagName. */
  public static final String PROP_END_TAG_NAME = "endTagName";

  /** Constant for the property endTagNamespace. */
  public static final String PROP_END_TAG_NAMESPACE = "endTagNamespace";

  /**
   * {@inheritDoc}
   */
  @Override
  public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException {
    if (recordIds == null) {
      return recordIds;
    }
    final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _config);
    final ResultCollector resultCollector =
      new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT);
    for (final String id : recordIds) {
      try {
        paramAccessor.setCurrentRecord(id);
        final String beginTagName = paramAccessor.getRequiredParameter(PROP_BEGIN_TAG_NAME);
        final String beginTagNamespace = paramAccessor.getParameter(PROP_BEGIN_TAG_NAMESPACE, "");
        final String endTagName = paramAccessor.getParameter(PROP_END_TAG_NAME, beginTagName);
        final String endTagNamespace = paramAccessor.getParameter(PROP_END_TAG_NAMESPACE, beginTagNamespace);
        final QName beginTag = new QName(beginTagNamespace, beginTagName);
        final QName endTag = new QName(endTagNamespace, endTagName);
        final String outputName = getOutputName(paramAccessor);
        final boolean storeInAttribute = isStoreInAttribute(getOutputType(paramAccessor));
        final InternalHandler snippetHandler =
          new InternalHandler(blackboard, id, outputName, storeInAttribute, resultCollector);
        final XmlSnippetSplitter splitter = new XmlSnippetSplitter(snippetHandler, beginTag, endTag);
        final InputStream inputStream = getXmlInputStream(blackboard, id, paramAccessor);
        splitter.read(inputStream);
        if (_log.isInfoEnabled()) {
          _log.info("Created " + snippetHandler.getRecordCount() + " records from processing record " + id);
        }
      } catch (final Exception e) {
        resultCollector.addFailedResult(id, e);
      }
    }
    return resultCollector.getResultIds();
  }

  /** get XML input Stream. */
  private InputStream getXmlInputStream(final Blackboard blackboard, final String id,
    final ParameterAccessor paramAccessor) throws IOException, BlackboardAccessException, MissingParameterException {
    InputStream inputStream = null;
    if (isReadFromAttribute(getInputType(paramAccessor))) {
      inputStream = loadExternalInputStream(readStringInput(blackboard, id, paramAccessor));
    } else {
      inputStream = blackboard.getAttachmentAsStream(id, getInputName(paramAccessor));
    }
    return inputStream;
  }

  /**
   * Get the external InputStream to the given url or file path.
   * 
   * @param attrtibuteValue
   *          the attrtibuteValue denoting an URL or file path
   * @return a InputStream or null
   * @throws IOException
   *           if any error occurs
   */
  private InputStream loadExternalInputStream(final String attrtibuteValue) throws IOException {
    InputStream stream = null;
    if (attrtibuteValue != null && attrtibuteValue.trim().length() > 0) {
      if (attrtibuteValue.startsWith("file")) {
        final URL url = new URL(attrtibuteValue);
        stream = new FileInputStream(url.getAuthority() + url.getPath());
      } else if (attrtibuteValue.startsWith("http")) {
        final URL url = new URL(attrtibuteValue);
        final HttpClient httpClient = new HttpClient();
        final GetMethod getMethod = new GetMethod(url.toString());
        httpClient.executeMethod(getMethod);
        stream = getMethod.getResponseBodyAsStream();
      } else {
        stream = new FileInputStream(attrtibuteValue);
      }
    } // if
    return stream;
  }

  class InternalHandler implements XmlSnippetHandler {

    private final Blackboard _blackboard;

    private final String _currentId;

    private final ResultCollector _resultCollector;

    private final String _outputName;

    private final boolean _storeInAttribute;

    private final AnyMap _cloneMetadata = DataFactory.DEFAULT.createAnyMap();

    private int _recordCount;

    private InternalHandler(final Blackboard blackboard, final String currentId, final String outputName,
      final boolean storeInAttribute, final ResultCollector resultCollector) throws BlackboardAccessException {
      _blackboard = blackboard;
      _currentId = currentId;
      _outputName = outputName;
      _storeInAttribute = storeInAttribute;
      _resultCollector = resultCollector;
      _cloneMetadata.putAll(_blackboard.getMetadata(_currentId));
      _cloneMetadata.remove(Record.RECORD_ID);
    }

    public int getRecordCount() {
      return _recordCount;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void handleSnippet(final byte[] snippet) {
      final String snippetId = _currentId + DocumentSplitterPipelet.SPLIT_ID_SEPARATOR + _recordCount++;
      try {
        final Record snippetRecord = _blackboard.getRecord(snippetId, Get.NEW);
        snippetRecord.getMetadata().put(DocumentSplitterPipelet.DOCUMENT_ID, _currentId);
        snippetRecord.getMetadata().putAll(_cloneMetadata);
        if (_storeInAttribute) {
          snippetRecord.getMetadata().put(_outputName, new String(snippet, ENCODING_CHARSET));
        } else {
          _blackboard.setAttachment(snippetId, _outputName, snippet);
        }
        _resultCollector.addResult(snippetId);
      } catch (final Exception ex) {
        _log.warn("Error creating XML-snippet record", ex);
      }
    }
  }
}
