/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web;

import java.io.InputStream;
import java.util.Iterator;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.ContentFetcher;
import org.eclipse.smila.importing.compounds.CompoundExtractor;
import org.eclipse.smila.importing.compounds.CompoundExtractorException;
import org.eclipse.smila.importing.compounds.ExtractorWorkerBase;
import org.eclipse.smila.importing.crawler.web.Fetcher;
import org.eclipse.smila.importing.crawler.web.WebCrawlerConstants;
import org.eclipse.smila.importing.crawler.web.filter.FilterConfiguration;
import org.eclipse.smila.importing.crawler.web.utils.DeltaHash;
import org.eclipse.smila.importing.util.PropertyNameMapper;
import org.eclipse.smila.taskworker.TaskContext;

public class WebExtractorWorker
extends ExtractorWorkerBase {
    public static final String NAME = "webExtractor";
    private Fetcher _fetcher;

    public String getName() {
        return NAME;
    }

    protected Iterator<Record> invokeExtractor(CompoundExtractor extractor, Record compoundRecord, InputStream compoundContent, TaskContext taskContext) throws CompoundExtractorException {
        PropertyNameMapper mapper = PropertyNameMapper.createFrom((TaskContext)taskContext);
        String url = compoundRecord.getMetadata().getStringValue((String)mapper.get("httpUrl").get(0));
        String mimeType = compoundRecord.getMetadata().getStringValue((String)mapper.get("httpMimetype").get(0));
        return extractor.extract(compoundContent, url, mimeType, "httpContent");
    }

    protected Record convertRecord(Record compoundRecord, Record extractedRecord, TaskContext taskContext) {
        Record convertedRecord;
        String dataSource = compoundRecord.getSource();
        if (extractedRecord.getMetadata().containsKey((Object)"isRootCompound")) {
            convertedRecord = compoundRecord;
        } else {
            convertedRecord = extractedRecord.getFactory().createRecord(String.valueOf(dataSource) + ":" + extractedRecord.getId(), dataSource);
            this.copySetToStringAttribute(extractedRecord, "compounds", convertedRecord, "httpUrl", "/");
            this.concatAttributeValues(extractedRecord, "fileName", convertedRecord, "httpUrl", "/");
        }
        this.copyAttachment(extractedRecord, convertedRecord, "httpContent");
        this.copyAttribute(extractedRecord, "size", convertedRecord, "httpSize");
        this.copyAttribute(compoundRecord, "httpLastModified", convertedRecord, "httpLastModified");
        this.copyAttribute(extractedRecord, "time", convertedRecord, "httpLastModified");
        DeltaHash.calculate(convertedRecord);
        return convertedRecord;
    }

    protected boolean filterRecord(Record record, TaskContext taskContext) {
        AnyMap filterParams = taskContext.getTaskParameters().getMap("filters");
        if (filterParams != null) {
            FilterConfiguration filterConfiguration = new FilterConfiguration(filterParams);
            if (record.getMetadata().containsKey((Object)"httpUrl") && !filterConfiguration.getUrlPatternMatcher().matches(record.getMetadata().getStringValue("httpUrl"))) {
                return false;
            }
        }
        return true;
    }

    protected void mapRecord(Record record, TaskContext taskContext) {
        PropertyNameMapper mapper = PropertyNameMapper.createFrom((TaskContext)taskContext);
        mapper.mapNames(record, WebCrawlerConstants.PROPERTY_NAMES);
    }

    protected ContentFetcher getContentFetcher() {
        return this._fetcher;
    }

    public void setFetcher(Fetcher fetcher) {
        this._fetcher = fetcher;
    }

    public void unsetFetcher(Fetcher fetcher) {
        if (this._fetcher == fetcher) {
            this._fetcher = null;
        }
    }
}

