/*******************************************************************************
 * Copyright (c) 2013, Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This program
 * and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
 * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Daniel Stucky (Empolis Information Management GmbH) - initial implementation
 *******************************************************************************/
package org.eclipse.smila.processing.pipelets;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.blackboard.Blackboard;
import org.eclipse.smila.blackboard.BlackboardAccessException;
import org.eclipse.smila.common.language.Language;
import org.eclipse.smila.common.language.LanguageIdentifyService;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.processing.Pipelet;
import org.eclipse.smila.processing.ProcessingException;
import org.eclipse.smila.processing.parameters.ParameterAccessor;
import org.eclipse.smila.processing.util.ProcessingConstants;
import org.eclipse.smila.processing.util.ResultCollector;
import org.eclipse.smila.utils.service.ServiceUtils;

/**
 * Language Identify Pipelet.
 * 
 * @author Daniel Stucky
 */
public class LanguageIdentifyPipelet implements Pipelet {

  /**
   * Constant for the configuration property ContentAttribute.
   */
  public static final String CONTENT_ATTRIBUTE = "ContentAttribute";

  /**
   * Constant for the configuration property LanguageAttribute.
   */
  public static final String LANGUAGE_ATTRIBUTE = "LanguageAttribute";

  /**
   * Constant for the configuration property DefaultLanguage.
   */
  public static final String DEFAULT_LANGUAGE = "DefaultLanguage";

  /**
   * Constant for the configuration property AlternativeNameAttribute.
   */
  public static final String ALTERNATIVE_NAME_ATTRIBUTE = "AlternativeNameAttribute";

  /**
   * Constant for the configuration property DefaultAlternativeName.
   */
  public static final String DEFAULT_ALTERNATIVE_NAME = "DefaultAlternativeName";

  /**
   * Constant for the configuration property UseCertainLanguagesOnly.
   */
  public static final String USE_CERTAIN_LANGUAGES_ONLY = "UseCertainLanguagesOnly";

  private static class Parameters {
    private final String _contentAttributeName;

    private final String _languageAttributeName;

    private final String _defaultLanguage;

    private final String _alternativeNameAttributeName;

    private final String _defaultAlternativeName;

    private final boolean _useCertainLanguagesOnly;

    Parameters(final ParameterAccessor paramAccessor) throws ProcessingException {
      _contentAttributeName = paramAccessor.getRequiredParameter(CONTENT_ATTRIBUTE);
      _languageAttributeName = paramAccessor.getParameter(LANGUAGE_ATTRIBUTE, null);
      _defaultLanguage = paramAccessor.getParameter(DEFAULT_LANGUAGE, null);
      _alternativeNameAttributeName = paramAccessor.getParameter(ALTERNATIVE_NAME_ATTRIBUTE, null);
      _defaultAlternativeName = paramAccessor.getParameter(DEFAULT_ALTERNATIVE_NAME, null);
      _useCertainLanguagesOnly = paramAccessor.getBooleanParameter(USE_CERTAIN_LANGUAGES_ONLY, false);
      if (_languageAttributeName == null && _alternativeNameAttributeName == null) {
        throw new ProcessingException("One of the config properties " + LANGUAGE_ATTRIBUTE + " or "
          + ALTERNATIVE_NAME_ATTRIBUTE + " have to be specified!");
      }
    }
  }

  /**
   * local logger.
   */
  private final Log _log = LogFactory.getLog(getClass());

  /**
   * The configuration.
   */
  private AnyMap _configuration;

  /**
   * Language Identifier Service.
   */
  private LanguageIdentifyService _languageIdentifier;

  /**
   * read configuration parameters.
   * 
   * {@inheritDoc}
   */
  @Override
  public void configure(final AnyMap configuration) throws ProcessingException {
    _configuration = configuration;
  }

  /**
   * {@inheritDoc}
   */
  @Override
  public String[] process(final Blackboard blackboard, final String[] recordIds) throws ProcessingException {
    // process records
    final LanguageIdentifyService identifier = getLanguageIdentifier();
    final ParameterAccessor paramAccessor = new ParameterAccessor(blackboard, _configuration);
    final ResultCollector resultCollector =
      new ResultCollector(paramAccessor, _log, ProcessingConstants.DROP_ON_ERROR_DEFAULT);
    for (final String id : recordIds) {
      try {
        paramAccessor.setCurrentRecord(id);
        final Parameters parameters = new Parameters(paramAccessor);
        identifyAndStore(blackboard, id, identifier, parameters);
        resultCollector.addResult(id);
      } catch (final Exception ex) {
        resultCollector.addFailedResult(id, ex);
      }
    } // for
    return resultCollector.getResultIds();
  }

  /**
   * @return a LanguageIdentifier service.
   * 
   * @throws ProcessingException
   *           could not find a service
   */
  private synchronized LanguageIdentifyService getLanguageIdentifier() throws ProcessingException {
    if (_languageIdentifier == null) {
      try {
        _languageIdentifier = ServiceUtils.getService(LanguageIdentifyService.class);
      } catch (final Exception ex) {
        _log.warn("Error while waiting for LanguageIdentifier service to come up.", ex);
      }
      if (_languageIdentifier == null) {
        throw new ProcessingException("No LanguageIdentifier service available, giving up");
      }
    }
    return _languageIdentifier;
  }

  /**
   * Identifies the Language and stores it in the BlackboardService.
   * 
   * @param blackboard
   *          the BlackboardService
   * @param id
   *          the Id
   * @param identifier
   *          LanguageIdentifier service to use.
   * @throws BlackboardAccessException
   *           if any error occurs
   */
  private void identifyAndStore(final Blackboard blackboard, final String id,
    final LanguageIdentifyService identifier, final Parameters p) throws BlackboardAccessException {
    final AnyMap metaData = blackboard.getMetadata(id);
    if (metaData.containsKey(p._contentAttributeName)) {
      final String text = metaData.getStringValue(p._contentAttributeName);
      final Language identifiedLanguage = identifier.identify(text);
      if (identifiedLanguage != null) {
        if (identifiedLanguage.isCertain() || !p._useCertainLanguagesOnly) {
          storeIdentifiedLanguage(id, identifiedLanguage, metaData, p);
        } else {
          if (_log.isInfoEnabled()) {
            _log.info("Detected language '" + identifiedLanguage.getIsoLanguage() + "' for Id '" + id
              + "' is not certain. Detected language is not used.");
          }
          setDefaultLanguage(id, metaData, p);
        }
      } else {
        if (_log.isWarnEnabled()) {
          _log.warn("Unable to identify Language for Id '" + id + "'.");
        }
        setDefaultLanguage(id, metaData, p);
      }
    } else {
      if (_log.isWarnEnabled()) {
        _log.warn("Unable to identify Language for Id '" + id + "'. No input value found for '" + CONTENT_ATTRIBUTE
          + "'");
      }
      setDefaultLanguage(id, metaData, p);
    }
  }

  /**
   * set language attributes from identifiedLanguage.
   */
  private void storeIdentifiedLanguage(final String id, final Language identifiedLanguage, final AnyMap metaData,
    final Parameters p) {
    final String language = identifiedLanguage.getIsoLanguage();
    final String alternativeName = identifiedLanguage.getAlternativeName();
    if (_log.isTraceEnabled()) {
      _log.trace("Detected language '" + language + "' for Id '" + id + "'.");
    }

    if (p._languageAttributeName != null) {
      metaData.put(p._languageAttributeName, language);
    }
    if (alternativeName != null && p._alternativeNameAttributeName != null) {
      metaData.put(p._alternativeNameAttributeName, alternativeName);
    }
  }

  /**
   * Set default language.
   * 
   * @param id
   *          record Id
   * @param metaData
   *          record metadata
   */
  private void setDefaultLanguage(final String id, final AnyMap metaData, final Parameters p) {
    if (p._defaultLanguage != null) {
      if (_log.isInfoEnabled()) {
        _log.info("Using default language '" + p._defaultLanguage + "' for Id '" + id + "'.");
      }
      metaData.put(p._languageAttributeName, p._defaultLanguage);
    }
    if (p._defaultAlternativeName != null) {
      if (_log.isInfoEnabled()) {
        _log.info("Using default alternativeName '" + p._defaultAlternativeName + "' for Id '" + id + "'.");
      }
      metaData.put(p._alternativeNameAttributeName, p._defaultAlternativeName);
    }
  }
}
