/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web.extractor;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.commons.httpclient.URI;
import org.apache.commons.httpclient.URIException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.crawler.web.LinkExtractor;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtml;
import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlSoup;
import org.eclipse.smila.taskworker.TaskLog;

public class SimpleLinkExtractor
implements LinkExtractor {
    private LinkExtractorHtml _extractorHtml = new LinkExtractorHtmlSoup();
    private final Log _log = LogFactory.getLog(this.getClass());

    @Override
    public Collection<Record> extractLinks(Record inputRecord, AnyMap parameters, TaskLog taskLog) throws WebCrawlerException {
        ArrayList<Record> arrayList;
        ArrayList<Record> result = new ArrayList<Record>();
        ByteArrayInputStream contentStream = null;
        String baseUri = inputRecord.getMetadata().getStringValue("http.url");
        try {
            byte[] htmlContent = inputRecord.getAttachmentAsBytes("http.content");
            contentStream = new ByteArrayInputStream(htmlContent);
            Collection<String> links = this._extractorHtml.extractLinks(contentStream, parameters);
            for (String link : links) {
                String absLink = this.getAbsoluteUri(baseUri, link);
                if (absLink == null) continue;
                Record r = DataFactory.DEFAULT.createRecord();
                r.getMetadata().put("http.url", absLink);
                result.add(r);
            }
            arrayList = result;
        }
        catch (Exception exception) {
            try {
                throw new WebCrawlerException("Error while extracting links from record with base URI " + baseUri);
            }
            catch (Throwable throwable) {
                IOUtils.closeQuietly(contentStream);
                throw throwable;
            }
        }
        IOUtils.closeQuietly((InputStream)contentStream);
        return arrayList;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public String getAbsoluteUri(String baseUri, String uri) throws URIException {
        URI linkUri;
        block3: {
            try {
                linkUri = new URI(uri.trim(), false);
                if (!linkUri.isRelativeURI()) return linkUri.getEscapedURI();
                if (baseUri != null) break block3;
                return null;
            }
            catch (Exception exception) {
                this._log.warn((Object)("Error while creating result uri from link '" + uri + "' and baseURI '" + baseUri + "'"));
                return null;
            }
        }
        linkUri = new URI(new URI(baseUri, true), linkUri);
        return linkUri.getEscapedURI();
    }

    public void setLinkExtractorHtml(LinkExtractorHtml linkExtractorHtml) {
        this._extractorHtml = linkExtractorHtml;
    }
}

