/*********************************************************************************************************************
 * Copyright (c) 2008, 2012 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 **********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.file;

import java.io.InputStream;
import java.util.Date;
import java.util.Iterator;

import org.apache.commons.io.FilenameUtils;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.ContentFetcher;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.importing.compounds.CompoundExtractor;
import org.eclipse.smila.importing.compounds.CompoundExtractorException;
import org.eclipse.smila.importing.compounds.ExtractorWorkerBase;
import org.eclipse.smila.importing.crawler.file.filter.FilterConfiguration;
import org.eclipse.smila.importing.util.FilePathNormalizer;
import org.eclipse.smila.importing.util.PropertyNameMapper;
import org.eclipse.smila.taskworker.TaskContext;

/** Compound extractor worker to use in file crawling workflows. */
public class FileExtractorWorker extends ExtractorWorkerBase {
  /** name of worker. */
  public static final String NAME = "fileExtractor";

  /** reference to the file crawler service. */
  private FileCrawlerService _fileCrawler;

  @Override
  public String getName() {
    return NAME;
  }

  @Override
  protected Iterator<Record> invokeExtractor(final CompoundExtractor extractor, final Record compoundRecord,
    final InputStream compoundContent, final TaskContext taskContext) throws CompoundExtractorException {
    final PropertyNameMapper mapper = PropertyNameMapper.createFrom(taskContext);
    // the name is mapped...
    final String fileName =
      compoundRecord.getMetadata().getStringValue(mapper.get(FileCrawlerService.PROPERTY_FILE_PATH).get(0));
    return extractor.extract(compoundContent, fileName, FileCrawlerService.ATTACHMENT_FILE_CONTENT);
  }

  @Override
  protected Record convertRecord(final Record compoundRecord, final Record extractedRecord,
    final TaskContext taskContext) {
    final PropertyNameMapper mapper = PropertyNameMapper.createFrom(taskContext);
    final String dataSource = compoundRecord.getSource();
    final Record convertedRecord;
    if (extractedRecord.getMetadata().containsKey(CompoundExtractor.KEY_IS_ROOT_COMPOUND_RECORD)) {
      // it's the compound's record.
      convertedRecord = compoundRecord;
    } else {
      convertedRecord =
        extractedRecord.getFactory().createRecord(dataSource + ":" + extractedRecord.getId(), dataSource);
    }
    copyAttachment(extractedRecord, convertedRecord, FileCrawlerService.ATTACHMENT_FILE_CONTENT);
    copySetToStringAttribute(extractedRecord, CompoundExtractor.KEY_COMPOUNDS, convertedRecord,
      FileCrawlerService.PROPERTY_FILE_PATH, "/"); // use compounds as prefix for URL of extracted record
    concatAttributeValues(extractedRecord, CompoundExtractor.KEY_FILE_NAME, convertedRecord,
      FileCrawlerService.PROPERTY_FILE_PATH, "/"); // add file name to URL of extracted record
    // normalize filePath and compute filder and name from it...
    final String fileName =
      FilePathNormalizer.getNormalizedPath(convertedRecord.getMetadata().getStringValue(
        FileCrawlerService.PROPERTY_FILE_PATH));
    final AnyMap convertedMetadata = convertedRecord.getMetadata();
    convertedMetadata.put(FileCrawlerService.PROPERTY_FILE_PATH, fileName);
    convertedMetadata.put(FileCrawlerService.PROPERTY_FILE_FOLDER, FilenameUtils.getPathNoEndSeparator(fileName));
    convertedMetadata.put(FileCrawlerService.PROPERTY_FILE_NAME, FilenameUtils.getName(fileName));
    convertedMetadata.put(FileCrawlerService.PROPERTY_FILE_EXTENSION, FilenameUtils.getExtension(fileName));
    copyAttribute(extractedRecord, CompoundExtractor.KEY_SIZE, convertedRecord,
      FileCrawlerService.PROPERTY_FILE_SIZE);
    // fallback for last modified: set the last modification date of the compound record if the
    // extracted record does not provide an own value:
    final String attributeNameForLastModified = FileCrawlerService.PROPERTY_FILE_LAST_MODIFIED;
    copyAttribute(compoundRecord, attributeNameForLastModified, convertedRecord, attributeNameForLastModified);
    copyAttribute(extractedRecord, CompoundExtractor.KEY_TIME, convertedRecord, attributeNameForLastModified);
    final Date lastModified = convertedMetadata.getDateTimeValue(attributeNameForLastModified);
    if (lastModified != null) {
      convertedMetadata.put(ImportingConstants.ATTRIBUTE_DELTA_HASH, Long.toString(lastModified.getTime()));
    }

    // now apply mapping to converted record.
    mapper.mapNames(convertedRecord, _fileCrawler.getFilePropertyNames());
    return convertedRecord;
  }

  /**
   * {@inheritDoc}
   * 
   * Filters applied to extracted records:
   * <ul>
   * <li>filePatterns (to the name of the extracted file),</li>
   * <li>folderPatterns (combined compound pathes and the path within the compound),</li>
   * <li>the file size of the compressed file.</li>
   * </ul>
   */
  @Override
  protected boolean filterRecord(final Record record, final TaskContext taskContext) {
    final AnyMap filterParams = taskContext.getTaskParameters().getMap(ImportingConstants.TASK_PARAM_FILTERS);
    final PropertyNameMapper mapper = PropertyNameMapper.createFrom(taskContext);
    if (filterParams != null) {
      final FilterConfiguration filterConfiguration = new FilterConfiguration(filterParams);
      // is mapped, get the first occurrence if multi-mapped...
      final String mappedFilePathAttributeName = mapper.get(FileCrawlerService.PROPERTY_FILE_PATH).get(0);
      if (record.getMetadata().containsKey(mappedFilePathAttributeName)) {
        final String filePath = record.getMetadata().getStringValue(mappedFilePathAttributeName);
        if (!filterConfiguration.getFilePatternMatcher().matches(FilenameUtils.getName(filePath))) {
          return false;
        }
        if (!filterConfiguration.getFolderPatternMatcher().matches(filePath)) {
          return false;
        }
      }
      if (mapper.containsMapping(FileCrawlerService.PROPERTY_FILE_SIZE)) {
        if (record.getMetadata().containsKey(mapper.get(FileCrawlerService.PROPERTY_FILE_SIZE).get(0))
          && filterConfiguration.getMaxSize() < record.getMetadata().getLongValue(
            mapper.get(FileCrawlerService.PROPERTY_FILE_SIZE).get(0))) {
          return false;
        }
      }
    }
    return true;
  }

  @Override
  protected ContentFetcher getContentFetcher() {
    return _fileCrawler;
  }

  /** DS service reference bind method. */
  public void setFileCrawlerService(final FileCrawlerService fileCrawler) {
    _fileCrawler = fileCrawler;
  }

  /** DS service reference unbind method. */
  public void unsetFileCrawlerService(final FileCrawlerService fileCrawler) {
    if (_fileCrawler == fileCrawler) {
      _fileCrawler = null;
    }
  }
}
