/*******************************************************************************
 * Copyright (c) 2008, 2011 Attensity Europe GmbH and brox IT Solutions GmbH. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0 which accompanies this
 * distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: Juergen Schumacher (Attensity Europe GmbH) - initial API and implementation
 *******************************************************************************/

package org.eclipse.smila.importing.crawler.web.test;

import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.Arrays;

import org.apache.commons.io.IOUtils;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.http.server.HttpService;
import org.eclipse.smila.importing.crawler.web.Fetcher;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.importing.crawler.web.fetcher.SimpleFetcher;
import org.eclipse.smila.taskworker.DefaultTaskLogFactory;
import org.eclipse.smila.taskworker.TaskLog;
import org.eclipse.smila.test.DeclarativeServiceTestCase;
import org.eclipse.smila.utils.config.ConfigUtils;

/** test class for SimpleFetcher. */
public class TestSimpleFetcher extends DeclarativeServiceTestCase {
  /** dummy logger. */
  private final TaskLog _taskLog = new DefaultTaskLogFactory().getTaskLog(null);

  private Fetcher _fetcher;

  /** test if SimpleFetcher is active. */
  @Override
  protected void setUp() throws Exception {
    _fetcher = getService(Fetcher.class);
  }

  /** assert that we are testing the right implementation. */
  public void testService() throws Exception {
    assertTrue(_fetcher instanceof SimpleFetcher);
  }

  /** just check if the test web server has been started correctly. */
  public void testWebServerAvailable() throws Exception {
    final HttpService server = getService(HttpService.class);
    assertNotNull(server);
    final URL testUrl = new URL("http://localhost:8765/files/");
    final InputStream testStream = testUrl.openStream();
    try {
      final byte[] content = IOUtils.toByteArray(testStream);
      Arrays.equals(getFileContent("index.html"), content);
    } finally {
      IOUtils.closeQuietly(testStream);
    }
  }

  /** crawl a simple HTML page, fetch content for extraction. */
  public void testCrawlHtml() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testCrawlHtml", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/");
    _fetcher.crawl(link, null, _taskLog);
    assertTrue(link.hasAttachment("http.content"));
    final byte[] content = link.getAttachmentAsBytes("http.content");
    assertEquals(83, content.length);
    Arrays.equals(getFileContent("index.html"), content);
    assertEquals("text/html", link.getMetadata().getStringValue("http.contenttype"));
    assertEquals("text/html", link.getMetadata().getStringValue("http.mimetype"));
    assertFalse(link.getMetadata().containsKey("http.charset"));
    assertEquals(83, link.getMetadata().getLongValue("http.size").intValue());
    assertTrue(link.getMetadata().containsKey("http.lastModified"));
    assertTrue(link.getMetadata().get("http.lastModified").isDateTime());
  }

  /** crawl plain text page, don't fetch content yet. */
  public void testCrawlPlainText() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testCrawlPlainText", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/plain.txt");
    _fetcher.crawl(link, null, _taskLog);
    assertFalse(link.hasAttachment("http.content"));
    assertEquals("text/plain", link.getMetadata().getStringValue("http.contenttype"));
    assertEquals("text/plain", link.getMetadata().getStringValue("http.mimetype"));
    assertFalse(link.getMetadata().containsKey("http.charset"));
    assertEquals(18, link.getMetadata().getLongValue("http.size").intValue());
    assertTrue(link.getMetadata().containsKey("http.lastModified"));
    assertTrue(link.getMetadata().get("http.lastModified").isDateTime());
  }

  /** crawl binary, don't fetch content yet. */
  public void testCrawlBinary() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testCrawlBinary", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/icon.png");
    _fetcher.crawl(link, null, _taskLog);
    assertFalse(link.hasAttachment("http.content"));
    assertEquals("image/png", link.getMetadata().getStringValue("http.contenttype"));
    assertEquals("image/png", link.getMetadata().getStringValue("http.mimetype"));
    assertFalse(link.getMetadata().containsKey("http.charset"));
    assertEquals(1157, link.getMetadata().getLongValue("http.size").intValue());
    assertTrue(link.getMetadata().containsKey("http.lastModified"));
    assertTrue(link.getMetadata().get("http.lastModified").isDateTime());
  }

  /** throw non-recoverable exception when crawling a resource that does not exist. */
  public void testCrawlMissingLink() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testCrawlMissingLink", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/no.such.resource");
    try {
      _fetcher.crawl(link, null, _taskLog);
      fail("should not work");
    } catch (final WebCrawlerException ex) {
      assertFalse(ex.isRecoverable());
    }
  }

  /** fetch a single HTML page. no other attributes are set. */
  public void testFetchHtml() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testFetchHtml", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/");
    _fetcher.fetch(link, null, _taskLog);
    assertTrue(link.hasAttachment("http.content"));
    final byte[] content = link.getAttachmentAsBytes("http.content");
    assertEquals(83, content.length);
    Arrays.equals(getFileContent("index.html"), content);
    assertEquals(4, link.getMetadata().size()); // only source, id, size and url attribute set.
  }

  /** fetch a single HTML page. no other attributes are set. */
  public void testFetchPlainText() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testFetchPlainText", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/plain.txt");
    _fetcher.fetch(link, null, _taskLog);
    assertTrue(link.hasAttachment("http.content"));
    final byte[] content = link.getAttachmentAsBytes("http.content");
    assertEquals(18, content.length);
    Arrays.equals(getFileContent("plain.txt"), content);
    assertEquals(4, link.getMetadata().size()); // only source, id, size and url attribute set.
  }

  /** fetch binary, don't fetch content yet. */
  public void testFetchBinary() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testFetchBinary", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/icon.png");
    _fetcher.fetch(link, null, _taskLog);
    assertTrue(link.hasAttachment("http.content"));
    final byte[] content = link.getAttachmentAsBytes("http.content");
    assertEquals(1157, content.length);
    Arrays.equals(getFileContent("icon.png"), content);
    assertEquals(4, link.getMetadata().size()); // only source, id, size and url attribute set.
  }

  /** throw non-recoverable exception when crawling a resource that does not exist. */
  public void testFetchMissingLink() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testFetchMissingLink", getClass().getName());
    link.getMetadata().put("http.url", "http://localhost:8765/files/no.such.resource");
    try {
      _fetcher.fetch(link, null, _taskLog);
      fail("should not work");
    } catch (final WebCrawlerException ex) {
      assertFalse(ex.isRecoverable());
    }
  }

  /** crawl a simple HTML page, fetch content for extraction. */
  public void testCrawlNoRedirect() throws Exception {
    final Record link = DataFactory.DEFAULT.createRecord("testCrawlNoRedirect", getClass().getName());
    // this link is redirected to "http://localhost:8765/files/"
    link.getMetadata().put("http.url", "http://localhost:8765/files");
    try {
      _fetcher.crawl(link, null, _taskLog);
      fail("should not work");
    } catch (final WebCrawlerException ex) {
      assertFalse(ex.isRecoverable());
    }
  }

  /** get content of a test file from file immediately. */
  private byte[] getFileContent(final String fileName) throws IOException {
    return IOUtils.toByteArray(ConfigUtils.getConfigStream(AllTests.BUNDLE_ID, "files/" + fileName));
  }

}
