/*********************************************************************************************************************
 * Copyright (c) 2008, 2013 Empolis Information Management GmbH and brox IT Solutions GmbH. All rights reserved. This
 * program and the accompanying materials are made available under the terms of the Eclipse Public License v1.0 which
 * accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html
 *********************************************************************************************************************/
package org.eclipse.smila.importing.crawler.jdbc.test;

import java.util.Arrays;
import java.util.Collection;

import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.importing.ImportingConstants;
import org.eclipse.smila.importing.crawler.jdbc.JdbcCrawlerWorker;
import org.eclipse.smila.jobmanager.JobState;

public class TestSplitting extends JdbcCrawlerJobTestBase {

  private static final String BUCKET = "crawledRows";

  private static final String WORKFLOWNAME = "crawlDatabaseWithSplitting";

  private static final int NO_OF_DB_ROWS = 1000;

  private static final int SPLIT_INC = 100;

  private static final String DEFAULT_CRAWL_SQL = "SELECT * FROM " + DB_TABLE_NAME
    + " WHERE int_val >= ? AND int_val <= ?";

  private static final String DEFAULT_SPLIT_SQL = "SELECT min(int_val) AS \"min\", max(int_val) AS \"max\" FROM "
    + DB_TABLE_NAME;

  /** should not influence the number of created tasks */
  private static final int MAX_RECORDS_PER_BULK = Integer.MAX_VALUE;

  /** simple standard test for splitting functionality. */
  public void testSimpleSplitting() throws Exception {
    final String jobName = "testSimpleSplitting";
    final AnyMap jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), DEFAULT_CRAWL_SQL, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, SPLIT_INC);
    defineJob(jobDefinition);
    final String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRunCompleted(jobName, jobRunId, DEFAULT_WAIT_TIME);
    checkResult();

    // chech splitting was done -> check that more than one task was created for jdbcCrawler worker
    final int expectedTaskCount = NO_OF_DB_ROWS / SPLIT_INC + 1; // +1 due to initial task
    final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobRunId);
    final int successfulTasks =
      runData.getMap("worker").getMap("0_" + JdbcCrawlerWorker.NAME).getLongValue("successfulTaskCount").intValue();
    assertEquals(expectedTaskCount, successfulTasks);
  }

  /** test for splitting functionality where min = max. */
  public void testMinEqualsMax() throws Exception {
    final String jobName = "testMinEqualsMax";
    final String crawlSql =
      "SELECT * FROM " + DB_TABLE_NAME + " WHERE repeating_int_val >= ? AND repeating_int_val <= ?";
    final String splitLimitsSql = "SELECT 1 AS \"min\", 1 AS \"max\" FROM " + DB_TABLE_NAME; // min = max

    final AnyMap jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), crawlSql, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, splitLimitsSql, SPLIT_INC);
    defineJob(jobDefinition);
    final String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRunCompleted(jobName, jobRunId, DEFAULT_WAIT_TIME);

    final int expectedRecordCount = NO_OF_DB_ROWS / 5; // why 5? see repeating_int_val in JdbcCrawlerTestBase
    checkCrawledRecords(expectedRecordCount, BUCKET, null, null, Arrays.asList("my-binary-attachment"));

    final int expectedTaskCount = 2; // 1 initial + 1 created
    final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobRunId);
    final int successfulTasks =
      runData.getMap("worker").getMap("0_" + JdbcCrawlerWorker.NAME).getLongValue("successfulTaskCount").intValue();
    assertEquals(expectedTaskCount, successfulTasks);
  }

  /** test with split increment = 1. */
  public void testWithMinimalSplitInc() throws Exception {
    final int splitIncrement = 1;
    final String jobName = "testWithMinimalSplitInc";
    final String splitLimitsSql = "SELECT 1 AS \"min\", 20 AS \"max\" FROM " + DB_TABLE_NAME;

    final AnyMap jobDefinition = createJob(getConnectUrl(), DEFAULT_CRAWL_SQL, null, Integer.MAX_VALUE);
    addSplitParams(jobDefinition, splitLimitsSql, splitIncrement);
    defineJob(jobDefinition);
    final String jobRunId = _jobRunEngine.startJob(getName());
    waitForJobRunCompleted(getName(), jobRunId, DEFAULT_WAIT_TIME);

    final int expectedRecordCount = 20;
    checkCrawledRecords(expectedRecordCount, BUCKET, null, null, Arrays.asList("my-binary-attachment"));

    final int expectedTaskCount = 20 + 1; // +1 due to initial task
    final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobRunId);
    final int successfulTasks =
      runData.getMap("worker").getMap("0_" + JdbcCrawlerWorker.NAME).getLongValue("successfulTaskCount").intValue();
    assertEquals(expectedTaskCount, successfulTasks);
  }

  /** tests that job run fails if we set the splitting parameters but didn't set the split input/output slots. */
  public void testErrorNoSplitSlots() throws Exception {
    final String jobName = "testErrorNoSplitSlots";
    // workflow exists, but has no 'splitsToCrawl' input/output slot for jdbcCrawler
    final String workflow = "crawlDatabase";
    final AnyMap jobDefinition =
      createJob(jobName, workflow, getConnectUrl(), DEFAULT_CRAWL_SQL, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, SPLIT_INC);
    defineJob(jobDefinition);
    final String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);
  }

  /** tests that job run fails if only one of the two splitting parameters is set. */
  public void testErrorMissingParameter() throws Exception {
    final int splitInc = 100;
    final String jobName = "testErrorMissingParameter";

    // missing "splitInc" param
    AnyMap jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), DEFAULT_CRAWL_SQL, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, null);
    defineJob(jobDefinition);
    String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);

    // missing "splitLimitsSql" param
    jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), DEFAULT_CRAWL_SQL, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, null, splitInc);
    defineJob(jobDefinition);
    jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);
  }

  /** tests that job run fails if "splitInc" parameter has invalid value . */
  public void testErrorInvalidParameterValue() throws Exception {
    final String jobName = "testErrorInvalidParameterValue";
    AnyMap jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), DEFAULT_CRAWL_SQL, null, MAX_RECORDS_PER_BULK);

    // invalid "splitInc" param value
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, -10);
    defineJob(jobDefinition);
    String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);

    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, 0);
    defineJob(jobDefinition);
    jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);

    // invalid "splitLimitsSql" param value
    addSplitParams(jobDefinition, "SELECT min(int_val) AS \"max\", max(int_val) AS \"moritz\" FROM "
      + DB_TABLE_NAME, SPLIT_INC);
    defineJob(jobDefinition);
    jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);

    // invalid "crawlSql" param value
    jobDefinition = createJob(getConnectUrl(), "SELECT * FROM " + DB_TABLE_NAME, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, SPLIT_INC);
    defineJob(jobDefinition);
    jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRun(jobName, jobRunId, DEFAULT_WAIT_TIME, JobState.FAILED);
  }

  public void testEmptySplits() throws Exception {
    final String jobName = "testErrorInvalidParameterValue";
    final String crawlSql =
      "SELECT * FROM " + DB_TABLE_NAME + " WHERE int_val >= ? AND int_val <= ?"
        + " AND (int_val < 250 OR int_val >= 450)" + " AND (int_val < 650 OR int_val >= 850)";
    final AnyMap jobDefinition =
      createJob(jobName, WORKFLOWNAME, getConnectUrl(), crawlSql, null, MAX_RECORDS_PER_BULK);
    addSplitParams(jobDefinition, DEFAULT_SPLIT_SQL, SPLIT_INC);
    defineJob(jobDefinition);
    final String jobRunId = _jobRunEngine.startJob(jobName);
    waitForJobRunCompleted(jobName, jobRunId, DEFAULT_WAIT_TIME);

    // check splitting was done -> check that more than one task was created for jdbcCrawler worker
    final int expectedSplitCount = NO_OF_DB_ROWS / SPLIT_INC;
    final AnyMap runData = _jobRunDataProvider.getJobRunData(jobName, jobRunId);
    final int successfulCrawlerTasks =
      runData.getMap("worker").getMap("0_" + JdbcCrawlerWorker.NAME).getLongValue("successfulTaskCount").intValue();
    assertEquals(expectedSplitCount + 1, successfulCrawlerTasks);
    checkResult(getNumberOfRowsToCreate() - 400, expectedSplitCount - 2);
  }

  /** helper method check the result records. */
  protected void checkResult() throws Exception {
    checkResult(getNumberOfRowsToCreate(),
      getExpectedNumberOfBulks(getNumberOfRowsToCreate(), MAX_RECORDS_PER_BULK));
  }

  /** helper method check the result records. */
  protected void checkResult(final int expectedNumberOfRecords, final int expectedNumberOfBulks) throws Exception {
    checkBulks(expectedNumberOfBulks, BUCKET);
    final Collection<String> expectedAttributes =
      Arrays.asList(ImportingConstants.ATTRIBUTE_DELTA_HASH, "my-int-value", "my-big-int-value", "my-double-value",
        "my-varchar-value");
    final Collection<String> unexpectedAttributes = Arrays.asList("float_val");
    final Collection<String> expectedAttachments = Arrays.asList("my-binary-attachment");
    checkCrawledRecords(expectedNumberOfRecords, BUCKET, expectedAttributes, unexpectedAttributes,
      expectedAttachments);
  }

  /** add splitting params to job def. */
  private void addSplitParams(final AnyMap jobDefinition, final String splitSql, final Integer splitInc)
    throws Exception {
    if (splitSql != null) {
      jobDefinition.getMap("parameters").put(JdbcCrawlerWorker.TASK_PARAM_SPLIT_LIMITS_SQL, splitSql);
    }
    if (splitInc != null) {
      jobDefinition.getMap("parameters").put(JdbcCrawlerWorker.TASK_PARAM_SPLIT_INCREMENT, splitInc);
    }
  }

  @Override
  protected String getWorkflow() {
    return WORKFLOWNAME;
  }

  @Override
  protected AnyMap initMapping() {
    final AnyMap map = DataFactory.DEFAULT.createAnyMap();
    map.put("int_val", "my-int-value");
    map.put("bigint_val", "my-big-int-value");
    map.put("double_val", "my-double-value");
    // map.put("float_val", "my-file-extension"); // don't map to check if the value is excluded in the result record
    map.put("varchar_val", "my-varchar-value");
    map.put("varbinary_val", "my-binary-attachment");
    return map;
  }

  @Override
  protected int getExpectedNumberOfBulks(final int numberOfRecords, final int maxRecordsPerBulk) {
    return NO_OF_DB_ROWS / SPLIT_INC;
  }

  @Override
  protected int getNumberOfRowsToCreate() {
    return NO_OF_DB_ROWS;
  }

}
