/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.importing.crawler.web.WebCrawlerConstants;
import org.eclipse.smila.importing.crawler.web.filter.FilterConfiguration;
import org.eclipse.smila.importing.crawler.web.utils.RobotsTxt;
import org.eclipse.smila.importing.util.CrawlingContext;
import org.eclipse.smila.jobmanager.JobRunDataProvider;
import org.eclipse.smila.jobmanager.exceptions.JobManagerException;
import org.eclipse.smila.taskworker.TaskContext;
import org.eclipse.smila.taskworker.TaskLog;

public class WebCrawlingContext
extends CrawlingContext {
    private final FilterConfiguration _filterConfiguration;
    private String _currentInputBulkId;
    private final Set<String> _visitedUrls = new HashSet<String>();
    private final Set<String> _extractedUrls = new HashSet<String>();
    private final Map<String, RobotsTxt> _robotsTxts = new HashMap<String, RobotsTxt>();
    private int _linksPerBulk = 10;
    private WebCrawlerConstants.ErrorHandling _errorHandling = WebCrawlerConstants.ErrorHandling.DROP;

    public WebCrawlingContext(TaskContext taskContext) {
        super(taskContext, false);
        this._filterConfiguration = this.getTaskParameters().containsKey((Object)"filters") ? new FilterConfiguration(this.getTaskParameters().getMap("filters")) : new FilterConfiguration(DataFactory.DEFAULT.createAnyMap());
        if (taskContext.getTaskParameters().containsKey((Object)"linksPerBulk")) {
            this._linksPerBulk = taskContext.getTaskParameters().getLongValue("linksPerBulk").intValue();
        }
        if (taskContext.getTaskParameters().containsKey((Object)"linkErrorHandling")) {
            this._errorHandling = WebCrawlerConstants.ErrorHandling.valueOf(taskContext.getTaskParameters().getStringValue("linkErrorHandling").toUpperCase());
        }
    }

    public FilterConfiguration getFilterConfiguration() {
        return this._filterConfiguration;
    }

    public TaskLog getTaskLog() {
        return this.getTaskContext().getLog();
    }

    public AnyMap getTaskParameters() {
        return this.getTaskContext().getTaskParameters();
    }

    public String getCurrentInputBulkId() {
        return this._currentInputBulkId;
    }

    public void setCurrentInputBulkId(String inputBulkId) {
        this._currentInputBulkId = inputBulkId;
    }

    public Set<String> getVisitedUrls() {
        return this._visitedUrls;
    }

    public int getLinksPerBulk() {
        return this._linksPerBulk;
    }

    public WebCrawlerConstants.ErrorHandling getErrorHandling() {
        return this._errorHandling;
    }

    public Set<String> getExtractedUrls() {
        return this._extractedUrls;
    }

    public RobotsTxt getRobotsTxt(String hostAndPort, JobRunDataProvider jobRunDataProvider) {
        byte[] binaryRobotsTxt;
        RobotsTxt robotsTxt = this._robotsTxts.get(hostAndPort);
        if (robotsTxt == null && (binaryRobotsTxt = this.getStoredRobotsTxt(hostAndPort, jobRunDataProvider)) != null) {
            robotsTxt = new RobotsTxt(binaryRobotsTxt);
            this._robotsTxts.put(hostAndPort, robotsTxt);
        }
        return robotsTxt;
    }

    private byte[] getStoredRobotsTxt(String hostAndPort, JobRunDataProvider jobRunDataProvider) {
        if (jobRunDataProvider != null && this.getJobName() != null && this.getJobRunId() != null) {
            try {
                return jobRunDataProvider.getCustomData(this.getJobName(), this.getJobRunId(), "webCrawler", new String[]{"robots.txt", hostAndPort});
            }
            catch (JobManagerException ex) {
                this.getTaskLog().warn("Failed to read robots.txt for " + hostAndPort + " from job run data: " + (Object)((Object)ex));
            }
        }
        return null;
    }

    public void putRobotsTxt(String hostAndPort, RobotsTxt robotsTxt, JobRunDataProvider jobRunDataProvider) {
        this._robotsTxts.put(hostAndPort, robotsTxt);
        if (jobRunDataProvider != null && this.getJobName() != null && this.getJobRunId() != null) {
            try {
                jobRunDataProvider.setCustomData(this.getJobName(), this.getJobRunId(), "webCrawler", robotsTxt.asBinary(), new String[]{"robots.txt", hostAndPort});
            }
            catch (JobManagerException ex) {
                this.getTaskLog().warn("Failed to store robots.txt for " + hostAndPort + " in job run data: " + (Object)((Object)ex));
            }
        }
    }
}

