/*********************************************************************************************************************
 * Copyright (c) 2008, 2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 **********************************************************************************************************************/
package org.eclipse.smila.importing.compounds;

import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;

import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnySeq;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.ContentFetcher;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.importing.ImportingException;
import org.eclipse.smila.objectstore.ObjectStoreException;
import org.eclipse.smila.taskworker.TaskContext;
import org.eclipse.smila.taskworker.Worker;
import org.eclipse.smila.taskworker.input.RecordInput;
import org.eclipse.smila.taskworker.output.RecordOutput;
import org.eclipse.smila.utils.MaybeRecoverableException;

/**
 * base implementation for workers doing compound extraction. Subclasses must provide a {@link ContentFetcher}
 * implementation and a method that converts the records produced by the {@link CompoundExtractor} to records that are
 * compatible with the associated crawler worker.
 */
public abstract class ExtractorWorkerBase implements Worker {

  /** name of the input slot containing bulks with records to extract. */
  private static final String INPUT_SLOT = "compounds";

  /** name of the output slot for bulks with extracted records. */
  private static final String OUTPUT_SLOT = "files";

  /** reference to the extractor service. */
  private CompoundExtractor _extractor;

  /** local log. */
  private final Log _log = LogFactory.getLog(getClass());

  @Override
  public void perform(final TaskContext taskContext) throws Exception {
    final RecordInput recordInput = taskContext.getInputs().getAsRecordInput(INPUT_SLOT);
    final RecordOutput recordOutput = taskContext.getOutputs().getAsRecordOutput(OUTPUT_SLOT);
    Record compoundRecord;
    do {
      compoundRecord = recordInput.getRecord();
      if (compoundRecord != null) {
        try {
          extractCompound(compoundRecord, recordOutput, taskContext);
        } catch (final MaybeRecoverableException ex) {
          if (ex.isRecoverable()) {
            throw ex;
          }
          taskContext.getLog().error("Failed to extract compound " + compoundRecord.getId(), ex);
        } catch (final RuntimeException ex) {
          taskContext.getLog().error("Failed to extract compound " + compoundRecord.getId(), ex);
        }
      }
    } while (compoundRecord != null && !taskContext.isCanceled());

  }

  /** extract the compound and write the extracted records to the output bulk. */
  private void extractCompound(final Record compoundRecord, final RecordOutput recordOutput,
    final TaskContext taskContext) throws MaybeRecoverableException, ObjectStoreException, IOException {
    if (_log.isDebugEnabled()) {
      _log.debug("extracting compound " + compoundRecord.getId());
    }
    final InputStream compoundContent = getCompoundContentTimed(compoundRecord, taskContext);
    if (compoundContent != null) {
      try {
        final Iterator<Record> entryRecords = invokeExtractorTimed(compoundRecord, compoundContent, taskContext);
        extractEntries(compoundRecord, entryRecords, recordOutput, taskContext);
      } finally {
        IOUtils.closeQuietly(compoundContent);
      }
    }
  }

  /** invoke content fetcher and measure time as "fetchCompoundContent". */
  private InputStream getCompoundContentTimed(final Record compoundRecord, final TaskContext taskContext)
    throws ImportingException {
    final long startTime = taskContext.getTimestamp();
    try {
      return getContentFetcher().getContent(compoundRecord, taskContext);
    } finally {
      taskContext.measureTime("fetchCompoundContent", startTime);
    }
  }

  /** invoke extractor and measure time as "extractCompound". */
  private Iterator<Record> invokeExtractorTimed(final Record compoundRecord, final InputStream compoundContent,
    final TaskContext taskContext) throws CompoundExtractorException {
    final long startTime = taskContext.getTimestamp();
    try {
      return invokeExtractor(_extractor, compoundRecord, compoundContent, taskContext);
    } finally {
      taskContext.measureTime("extractCompound", startTime);
    }
  }

  /**
   * get extracted records, convert them according to data source and write them to the output bulk. Add time for
   * getting extracted records to "extractCompound" timer.
   */
  private void extractEntries(final Record compoundRecord, final Iterator<Record> entryRecords,
    final RecordOutput recordOutput, final TaskContext taskContext) throws MaybeRecoverableException,
    ObjectStoreException, IOException {
    long startTime = taskContext.getTimestamp();
    while (entryRecords.hasNext() && !taskContext.isCanceled()) {
      final Record entryRecord = entryRecords.next();
      taskContext.measureTime("extractCompound", startTime);
      final Record convertedRecord = convertRecordTimed(compoundRecord, entryRecord, taskContext);
      if (convertedRecord != null) {
        recordOutput.writeRecord(convertedRecord);
        if (_log.isDebugEnabled()) {
          _log.debug("added record " + entryRecord.getId());
        }
      }
      startTime = taskContext.getTimestamp();
    }
  }

  /** convert extracted record to data source conforming record and measure time as "convertRecord". */
  private Record convertRecordTimed(final Record compoundRecord, final Record extractedRecord,
    final TaskContext taskContext) {
    final long startTime = taskContext.getTimestamp();
    try {
      final Record convertedRecord = convertRecord(compoundRecord, extractedRecord, taskContext);
      if (convertedRecord != null) {
        copyCompoundAttributes(compoundRecord, extractedRecord, convertedRecord);
      }
      if (!filterRecord(convertedRecord, taskContext)) {
        return null;
      }
      mapRecord(convertedRecord, taskContext);
      return convertedRecord;
    } finally {
      taskContext.measureTime("convertRecord", startTime);
    }

  }

  /**
   * Hook for subclasses to support mapping of the converted record according to mapping rules.
   * 
   * @param record
   *          the {@link Record}
   * @param taskContext
   *          the {@link TaskContext}
   */
  protected void mapRecord(final Record record, final TaskContext taskContext) {
    ; // if no mapping is supported or required, just don't do anything here
  }

  /**
   * Filter extracted records.
   * 
   * @param record
   *          the record to check
   * @param taskContext
   *          the task context containing the task parameters
   * @return <code>true</code> if the record passes the filter(s), <code>false</code> if not.
   */
  protected boolean filterRecord(final Record record, final TaskContext taskContext) {
    return true;
  }

  /** invoke extractor with data from the crawled record. */
  protected abstract Iterator<Record> invokeExtractor(CompoundExtractor extractor, Record compoundRecord,
    InputStream compoundContent, TaskContext taskContext) throws CompoundExtractorException;

  /** create a record from the extracted record that conforms to the records produced by the matching crawler. */
  protected abstract Record convertRecord(final Record compoundRecord, Record extractedRecord,
    TaskContext taskContext);

  /** get a content fetcher for the data source type. */
  protected abstract ContentFetcher getContentFetcher();

  /** utility method for subclasses: copy attachment from sourceRecord to targetRecord, if it exists. */
  protected void copyAttachment(final Record sourceRecord, final Record targetRecord, final String attachmentName) {
    if (sourceRecord.hasAttachment(attachmentName)) {
      targetRecord.setAttachment(sourceRecord.getAttachment(attachmentName));
    }
  }

  /** utility method for subclasses: copy an attribute if it exists. */
  protected void copyAttribute(final Record sourceRecord, final String sourceAttribute, final Record targetRecord,
    final String targetAttribute) {
    final Any sourceAttributeValue = sourceRecord.getMetadata().get(sourceAttribute);
    if (sourceAttributeValue != null) {
      targetRecord.getMetadata().put(targetAttribute, sourceAttributeValue);
    }
  }

  /** utility method for subclasses: copy a set attribute to a plain string attribute. */
  protected void copySetToStringAttribute(final Record sourceRecord, final String sourceAttribute,
    final Record targetRecord, final String targetAttribute, final String separator) {
    final Any sourceAttributeValue = sourceRecord.getMetadata().get(sourceAttribute);
    if (sourceAttributeValue != null) {
      if (sourceAttributeValue.isSeq()) {
        final AnySeq seq = sourceAttributeValue.asSeq();
        final StringBuilder newValueBuilder = new StringBuilder();
        for (final Any value : seq) {
          newValueBuilder.append(value.asValue().asString()).append(separator);
        }
        String newValue = newValueBuilder.toString();
        newValue = newValue.substring(0, newValue.lastIndexOf(separator));
        targetRecord.getMetadata().put(targetAttribute, newValue);
      } else if (sourceAttributeValue.isValue()) {
        // source attribute is already a plain value so we just copy it
        targetRecord.getMetadata().put(targetAttribute, sourceAttributeValue);
      }
    }
  }

  /** utility method for subclasses: concat a source attribute value to a target attribute string value. */
  protected void concatAttributeValues(final Record sourceRecord, final String sourceAttribute,
    final Record targetRecord, final String targetAttribute, final String separator) {
    final Any sourceAttributeValue = sourceRecord.getMetadata().get(sourceAttribute);
    final Any targetAttributeValue = targetRecord.getMetadata().get(targetAttribute);
    if (sourceAttributeValue != null && sourceAttributeValue.isValue()) {
      if (targetAttributeValue == null) {
        // target attribute value doesn't exist, so we just copy the source value
        targetRecord.getMetadata().put(targetAttribute, sourceAttributeValue);
      } else if (targetAttributeValue.isString()) {
        final String concatenatedValue =
          targetAttributeValue.asValue().asString() + separator + sourceAttributeValue.asValue().asString();
        targetRecord.getMetadata().put(targetAttribute, concatenatedValue);
      }
    }
  }

  /** add compound related system attributes to the converted record. */
  protected void copyCompoundAttributes(final Record compoundRecord, final Record extractedRecord,
    final Record convertedRecord) {
    copyAttribute(extractedRecord, CompoundExtractor.KEY_IS_COMPOUND, convertedRecord,
      ImportingConstants.ATTRIBUTE_COMPOUNDFLAG);
    copyAttribute(extractedRecord, CompoundExtractor.KEY_COMPOUNDS, convertedRecord,
      ImportingConstants.ATTRIBUTE_COMPOUNDPATH);
    if (!extractedRecord.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD)) {
      copyAttribute(compoundRecord, Record.RECORD_ID, convertedRecord, ImportingConstants.ATTRIBUTE_COMPOUNDID);
    }
  }

  /** DS service reference bind method. */
  public void setCompoundExtractor(final CompoundExtractor extractor) {
    _extractor = extractor;
  }

  /** DS service reference unbind method. */
  public void unsetCompoundExtractor(final CompoundExtractor extractor) {
    if (_extractor == extractor) {
      _extractor = null;
    }
  }
}
