/*******************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. 
 * This program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 
 * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Andreas Weber (Empolis Information Management GmbH) - initial API and implementation
 *******************************************************************************/
package org.eclipse.smila.tika.test;

import java.util.Set;

import org.eclipse.smila.common.language.Language;
import org.eclipse.smila.common.language.LanguageIdentifyService;
import org.eclipse.smila.test.DeclarativeServiceTestCase;

public class TestTikaLanguageIdentifier extends DeclarativeServiceTestCase {

  private LanguageIdentifyService _identifier;

  @Override
  protected void setUp() throws Exception {
    super.setUp();
    _identifier = getService(LanguageIdentifyService.class);
  }

  public void testSupportedLanguages() {
    final String[] expectedLanguages =
      { "ro", "ca", "no", "hu", "lt", "th", "de", "fi", "sv", "fr", "be", "sl", "sk", "uk", "da", "is", "it", "gl",
        "el", "pl", "pt", "eo", "en", "ru", "et", "es", "nl" };
    final Set<String> supportedLanguages = _identifier.getSupportedLanguages();
    assertEquals(expectedLanguages.length, supportedLanguages.size());
    for (final String lang : expectedLanguages) {
      assertTrue("Language " + lang + " should be supported.", supportedLanguages.contains(lang));
    }
  }

  public void testGermanText() throws Exception {
    assertLanguage("de", "german", false, "Sein oder nicht sein, das ist hier die Frage.");
  }

  public void testEnglishText() throws Exception {
    assertLanguage("en", "english", false, "To be or not to be that is the question.");
  }

  public void testFrenchText() throws Exception {
    assertLanguage("fr", "french", false, "ÃŠtre, ou ne pas Ãªtre, telle est la question.");
  }

  public void testItalianText() throws Exception {
    assertLanguage("it", "italian", false, "Essere, o non essere, questo Ã¨ il dilemma");
  }

  public void testSpanishText() throws Exception {
    assertLanguage("es", "spanish", false, "Ser o no ser, Ã©sa es la pregunta. "
      + "Â¿QuÃ© es mÃ¡s noble para el espÃ­ritu?"
      + "Â¿Sufrir los dardos y golpes del destino o tomar las armas contra un "
      + "mar de angustias y terminar con ellas combatiÃ©ndolas?");
  }

  public void testPortugueseText() throws Exception {
    assertLanguage("pt", "portuguese", false, "Ser ou nÃ£o ser, eis a questÃ£o");
  }

  public void testDutchText() throws Exception {
    assertLanguage("nl", "dutch", false, "Te zijn of niet te zijn, dat is de kwestie");
  }

  public void testRussianText() throws Exception {
    // TODO this is reported as "is", maybe we can find a better text.  
    // assertLanguage("ru", "russian", false, "Ð‘Ñ‹Ñ‚ÑŒ Ð¸Ð»Ð¸ Ð½Ðµ Ð±Ñ‹Ñ‚ÑŒ. Ð’Ð¾Ð¿Ñ€Ð¾Ñ� Ð² Ñ‚Ð¾Ð¼, Ñ‡Ñ‚Ð¾ Ð±Ð»Ð°Ð³Ð¾Ñ€Ð¾Ð´Ð½ÐµÐ¹");
  }

  public void testSlovakText() throws Exception {
    assertLanguage("sk", "slovak", false,
      "Hamlet, celÃ½ nÃ¡zov TragÃ©dia o Hamletovi, Princovi dÃ¡nskom je nÃ¡jznÃ¡mejÅ¡ou a "
        + "najÄ�astejÅ¡ie citovanou divadelnou hrou anglickÃ©ho dramatika Williama Shakespearea.");
  }

  public void testSlovenianText() throws Exception {
    assertLanguage("sl", "slovenian", false, "Hamlet je tragedija angleÅ¡kega dramatika Williama Shakespeara. "
      + "Igra je nastala v njegovem tako imenovanem Â»drugem obdobjuÂ« njegovega ustvarjanja v letih med "
      + "1601 in 1608, ki Å¡tejejo za dramatikovo najzrelejÅ¡e obdobje.");
  }

  public void testDanishText() throws Exception {
    assertLanguage("da", "danish", false, "At vÃ¦re eller ikke at vÃ¦re: det er spÃ¸rgsmÃ¥let");
  }

  public void testNynorskText() throws Exception {
    assertLanguage("no", "nynorsk", false, "Ã¥ vere eller Ã¥ ikkje vere, det er spÃ¸rsmÃ¥let");
  }

  public void testSwedish() throws Exception {
    assertLanguage("sv", "swedish", false, "Att vara eller icke vara, det Ã¤r frÃ¥gan");
  }

  public void testFinnishText() throws Exception {
    assertLanguage("fi", "finnish", false, "ollako vai eikÃ¶ olla");
  }

  public void testEsperantoText() {
    assertLanguage("eo", null, false, "Äˆu esti aÅ­ ne esti, tio estas la demando");
  }

  public void testEmptyString() {
    assertNull(_identifier.identify(""));
    assertNull(_identifier.identify(null));
  }

  public void testAlternativeNames() throws Exception {
    assertEquals("german", _identifier.getAlternativeName("de"));
    assertEquals("english", _identifier.getAlternativeName("en"));
    assertEquals("french", _identifier.getAlternativeName("fr"));
    assertEquals("spanish", _identifier.getAlternativeName("es"));
    assertEquals("dutch", _identifier.getAlternativeName("nl"));
    assertEquals("russian", _identifier.getAlternativeName("ru"));
    assertEquals("italian", _identifier.getAlternativeName("it"));
    assertNull(_identifier.getAlternativeName("tlh"));
  }

  private void assertLanguage(final String expectedLanguage, final String expectedName,
    final boolean expectedIsCertain, final String text) {
    final Language detectedLanguage = _identifier.identify(text);
    assertEquals("wrong ISO code", expectedLanguage, detectedLanguage.getIsoLanguage());
    assertEquals("wrong language name", expectedName, detectedLanguage.getAlternativeName());
    assertEquals("wrong isCertain flag", expectedIsCertain, detectedLanguage.isCertain());
  }

}
