/***********************************************************************************************************************
 * Copyright (c) 2008,2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Schank (Attensity Europe GmbH) - initial API
 **********************************************************************************************************************/
package org.eclipse.smila.importing.compounds;

import java.io.File;
import java.io.InputStream;
import java.net.URL;
import java.util.Iterator;

import org.eclipse.smila.datamodel.Record;

/**
 * Service interface for extracting compounds and to determine if a file is a compound that can be handled by this
 * service.
 */
public interface CompoundExtractor {
  /** key for entry size. */
  String KEY_SIZE = "size";

  /** key for entry time. */
  String KEY_TIME = "time";

  /** key for entry compressed size. */
  String KEY_COMPRESSED_SIZE = "compressedSize";

  /** key for entry comment. */
  String KEY_COMMENT = "comment";

  /** key for entry name. */
  String KEY_FILE_NAME = "fileName";

  /** key to mark compounds. */
  String KEY_IS_COMPOUND = "isCompound";

  /** key for the compound list. */
  String KEY_COMPOUNDS = "compounds";

  /**
   * key for the base compound record flag. The record for the initial compound (and only this one) will be flagged with
   * this flag.
   */
  String KEY_IS_ROOT_COMPOUND_RECORD = "isRootCompound";

  /**
   * Can the file be extracted by the CompoundExtractor service? The service may or may not invest the file more closely
   * or may simply guess by the file extension. So a <code>true</code> result does not guarantee, that the file may be
   * extracted without any exceptions.
   * 
   * @param file
   *          the file in question.
   * @return <code>true</code> if the given file can be extracted, <code>false</code> if not.
   */
  boolean canExtract(File file);

  /**
   * Can the file be extracted by the CompoundExtractor service? The service may or may not invest the file more closely
   * or may simply guess by the given mime type and the file extension. So a <code>true</code> result does not
   * guarantee, that the file may be extracted without any exceptions.
   * 
   * @param url
   *          URL in question
   * @param mimeType
   *          mimetype (if any could be determined)
   * @return
   */
  boolean canExtract(URL url, String mimeType);

  /**
   * Can the file be extracted by the CompoundExtractor service? The service may or may not invest the file more closely
   * or may simply guess by the given mime type and the file extension. So a <code>true</code> result does not
   * guarantee, that the file may be extracted without any exceptions.
   * 
   * @param fileName
   *          the name of the file in question.
   * @param mimeType
   *          mimetype (if any could be determined)
   * @return
   */
  boolean canExtract(String fileName, String mimeType);

  /**
   * Extract the compounds (recursively) and return an iterator over the resulting records that have been created from
   * the extracted compound. The Extractor should also return a Record for the compound itself, also if the content of
   * that record might be empty.
   * 
   * @param compoundInputStream
   *          the input stream of the compound object.
   * @param fileName
   *          the name of the file in question.
   * @param contentAttachmentName
   *          name of attachment to store content of extracted elements in.
   * @return an iterator for the records that resulted from the entries included in the compound along with their
   *         content. The Iterator must not be null but empty if there are no records to be extracted.
   * @throws CompoundExtractorException
   *           on errors while extracting the compound.
   */
  Iterator<Record> extract(InputStream compoundInputStream, String fileName, String contentAttachmentName)
    throws CompoundExtractorException;

  /**
   * Extract the compounds (recursively) and return an iterator over the resulting records that have been created from
   * the extracted compound. The Extractor should also return a Record for the compound itself, also if the content of
   * that record might be empty.
   * 
   * @param compoundInputStream
   *          the input stream of the compound object.
   * @param fileName
   *          the name of the file in question.
   * @param mimeType
   *          mimetype (if any could be determined)
   * @param contentAttachmentName
   *          name of attachment to store content of extracted elements in.
   * @return an iterator for the records that resulted from the entries included in the compound along with their
   *         content. The Iterator must not be null but empty if there are no records to be extracted.
   * @throws CompoundExtractorException
   *           on errors while extracting the compound.
   */
  Iterator<Record> extract(InputStream compoundInputStream, String fileName, String mimeType,
    String contentAttachmentName) throws CompoundExtractorException;
}
