/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web.extractor;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.datamodel.DataFactory;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.crawler.web.LinkExtractor;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.importing.crawler.web.WebCrawlingContext;
import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtml;
import org.eclipse.smila.importing.crawler.web.extractor.LinkExtractorHtmlSoup;
import org.eclipse.smila.importing.crawler.web.utils.UriHelper;
import org.eclipse.smila.importing.util.MessageCollector;

public class DefaultLinkExtractor
implements LinkExtractor {
    private LinkExtractorHtml _extractorHtml = new LinkExtractorHtmlSoup();
    private final Log _log = LogFactory.getLog(this.getClass());

    @Override
    public Collection<Record> extractLinks(Record inputRecord, WebCrawlingContext context) throws WebCrawlerException {
        ArrayList<Record> arrayList;
        ArrayList<Record> result = new ArrayList<Record>();
        ByteArrayInputStream contentStream = null;
        String baseUri = inputRecord.getMetadata().getStringValue("httpUrl");
        try {
            byte[] htmlContent = inputRecord.getAttachmentAsBytes("httpContent");
            contentStream = new ByteArrayInputStream(htmlContent);
            Collection<String> links = this._extractorHtml.extractLinks(contentStream, context.getTaskParameters(), (MessageCollector)new MessageCollector.TaskLogWarn(context.getTaskLog()));
            for (String link : links) {
                String normalizedLink = this.normalizeLink(link);
                if (normalizedLink != null) {
                    String absLink = UriHelper.makeAbsolute(baseUri, normalizedLink);
                    if (absLink != null) {
                        Record r = DataFactory.DEFAULT.createRecord();
                        r.getMetadata().put("httpUrl", absLink);
                        result.add(r);
                        continue;
                    }
                    this._log.warn((Object)("Couldn't create absolute link from baseUri " + baseUri + " and link " + normalizedLink));
                    continue;
                }
                this._log.info((Object)("Couldn't normalize link: " + link));
            }
            arrayList = result;
        }
        catch (Exception e) {
            try {
                throw new WebCrawlerException("Error while extracting links from record with base URI " + baseUri, e);
            }
            catch (Throwable throwable) {
                IOUtils.closeQuietly(contentStream);
                throw throwable;
            }
        }
        IOUtils.closeQuietly((InputStream)contentStream);
        return arrayList;
    }

    private String normalizeLink(String url) {
        try {
            return UriHelper.normalizeUrl(url);
        }
        catch (Exception e) {
            this._log.warn((Object)("Error while normalizing link '" + url + "': " + e.getLocalizedMessage()));
            return null;
        }
    }

    public void setLinkExtractorHtml(LinkExtractorHtml linkExtractorHtml) {
        this._extractorHtml = linkExtractorHtml;
    }
}

