/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.feed;

import java.util.Collection;
import java.util.Iterator;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.datamodel.util.RecordMerger;
import org.eclipse.smila.importing.ImportingException;
import org.eclipse.smila.importing.crawler.feed.FeedCrawlingContext;
import org.eclipse.smila.importing.crawler.feed.FeedProperties;
import org.eclipse.smila.importing.crawler.feed.RomeFeedReader;
import org.eclipse.smila.importing.util.RecordOutputHandler;
import org.eclipse.smila.taskworker.TaskContext;
import org.eclipse.smila.taskworker.Worker;
import org.eclipse.smila.taskworker.input.Inputs;
import org.eclipse.smila.taskworker.input.RecordInput;
import org.eclipse.smila.utils.digest.DigestHelper;

public class FeedCrawlerWorker
implements Worker {
    public static final String NAME = "feedCrawler";
    public static final String INPUT_SLOT_LINKS_TO_CRAWL = "linksToCrawl";
    public static final String OUTPUT_SLOT_CRAWLED_RECORDS = "crawledRecords";
    public static final String TASK_PARAM_FEED_URL = "feedUrls";
    public static final String TASK_PARAM_MAX_RECORDS_PER_BULK = "maxRecordsPerBulk";
    public static final String TASK_PARAM_DELTA_PROPERTIES = "deltaProperties";
    public static final String INPUT_ATTRIBUTE_FEED_URL = "httpUrl";
    public static final Long MAX_RECORDS_PER_BULK_DEFAULT = 1000L;
    private final Log _log = LogFactory.getLog(this.getClass());

    public String getName() {
        return NAME;
    }

    public void perform(TaskContext taskContext) throws Exception {
        FeedCrawlingContext crawlContext = new FeedCrawlingContext(taskContext);
        Inputs inputs = taskContext.getInputs();
        RecordOutputHandler recordOutput = new RecordOutputHandler(taskContext.getOutputs(), crawlContext.getMaxRecordsPerBulk(), OUTPUT_SLOT_CRAWLED_RECORDS);
        StringBuilder errorMessage = new StringBuilder();
        boolean success = false;
        if (inputs.getDataObjectCount(INPUT_SLOT_LINKS_TO_CRAWL) == 0) {
            success = this.crawlConfiguredFeeds(crawlContext, recordOutput, errorMessage);
        } else {
            RecordInput linksToCrawl = inputs.getAsRecordInput(INPUT_SLOT_LINKS_TO_CRAWL);
            success = this.crawlInputRecordFeeds(crawlContext, linksToCrawl, recordOutput, errorMessage);
        }
        if (!success) {
            throw new ImportingException(errorMessage.toString());
        }
    }

    private boolean crawlInputRecordFeeds(FeedCrawlingContext crawlContext, RecordInput linksToCrawl, RecordOutputHandler recordOutput, StringBuilder errorMessage) throws Exception {
        boolean success = false;
        Record inputRecord = linksToCrawl.getRecord();
        while (inputRecord != null && !crawlContext.getTaskContext().isCanceled()) {
            String feedUrl = inputRecord.getMetadata().getStringValue(INPUT_ATTRIBUTE_FEED_URL);
            if (feedUrl != null) {
                success = this.crawl(crawlContext, feedUrl, recordOutput, inputRecord, errorMessage) || success;
            } else {
                this._log.warn((Object)("FeedCrawler input record contains no attribute 'httpUrl' which is needed for feed crawling. Record was: " + inputRecord));
            }
            inputRecord = linksToCrawl.getRecord();
        }
        return success;
    }

    private boolean crawlConfiguredFeeds(FeedCrawlingContext crawlContext, RecordOutputHandler recordOutput, StringBuilder errorMessage) throws Exception {
        boolean success = false;
        Iterator it = crawlContext.getFeedUrls().iterator();
        while (it.hasNext() && !crawlContext.getTaskContext().isCanceled()) {
            Record baseRecord;
            Any url = (Any)it.next();
            String feedUrl = url.toString();
            boolean bl = success = this.crawl(crawlContext, feedUrl, recordOutput, baseRecord = DataFactory.DEFAULT.createRecord(), errorMessage) || success;
        }
        return success;
    }

    private boolean crawl(FeedCrawlingContext crawlContext, String feedUrl, RecordOutputHandler recordOutput, Record baseRecord, StringBuilder errorMessage) throws Exception {
        if (this._log.isInfoEnabled()) {
            this._log.info((Object)("Crawling feed " + feedUrl));
        }
        try {
            RomeFeedReader feedReader = new RomeFeedReader();
            Collection<Record> results = feedReader.readFeed(feedUrl);
            Iterator<Record> it = results.iterator();
            while (it.hasNext() && !crawlContext.getTaskContext().isCanceled()) {
                Record feedRecord = it.next();
                this.augmentRecord(crawlContext, feedUrl, feedRecord);
                crawlContext.getMapper().mapNames(feedRecord, FeedProperties.ALL_PROPS);
                Record resultRecord = DataFactory.DEFAULT.cloneRecord(baseRecord, feedRecord.getId());
                RecordMerger.mergeRecords((Record)resultRecord, (Record)feedRecord, (boolean)false, (boolean)false);
                recordOutput.writeRecord(resultRecord);
            }
        }
        catch (Exception e) {
            String m = "Error while crawling feed '" + feedUrl + "': " + e.getMessage() + ". ";
            this._log.warn((Object)m);
            errorMessage.append(m);
            return false;
        }
        return true;
    }

    private Record augmentRecord(FeedCrawlingContext crawlContext, String feedUrl, Record record) throws ImportingException {
        record.setId(this.createId(crawlContext, record));
        record.setSource(crawlContext.getDataSource());
        record.getMetadata().put("feedSourceUrl", feedUrl);
        record.getMetadata().put("_deltaHash", this.createDeltaHash(crawlContext, record));
        return record;
    }

    private String createId(FeedCrawlingContext crawlContext, Record record) throws ImportingException {
        String feedEntryUri = record.getMetadata().getStringValue("itemUri");
        return String.valueOf(crawlContext.getDataSource()) + ":" + feedEntryUri;
    }

    private String createDeltaHash(FeedCrawlingContext crawlContext, Record record) throws ImportingException {
        AnyMap deltaValues;
        Any deltaProps = crawlContext.getDeltaProperties();
        if (deltaProps == null) {
            deltaProps = crawlContext.getMapper().getMappingKeys();
        }
        if ((deltaValues = this.collectDeltaPropertyValues(record, deltaProps)).isEmpty()) {
            deltaValues = this.collectDeltaPropertyValues(record, (Any)crawlContext.getMapper().getMappingKeys());
        }
        String deltaHash = DigestHelper.calculateDigest((String)deltaValues.toString());
        if (this._log.isDebugEnabled()) {
            this._log.debug((Object)("Delta Hash for record " + record.getId() + ": " + deltaHash));
        }
        return deltaHash;
    }

    private AnyMap collectDeltaPropertyValues(Record record, Any deltaProps) {
        AnyMap deltaValues = record.getFactory().createAnyMap();
        for (Any p : deltaProps) {
            String deltaProp = p.toString();
            Any deltaValue = (Any)record.getMetadata().get((Object)deltaProp);
            if (deltaValue == null) continue;
            deltaValues.put(deltaProp, deltaValue);
        }
        return deltaValues;
    }
}

