/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.net.ProxySelector;
import java.net.URISyntaxException;
import java.util.Date;
import org.apache.commons.io.IOUtils;
import org.apache.http.Header;
import org.apache.http.HeaderElement;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.client.HttpClient;
import org.apache.http.client.RedirectException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.params.HttpClientParams;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.routing.HttpRoutePlanner;
import org.apache.http.impl.client.AbstractHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.ProxySelectorRoutePlanner;
import org.apache.http.impl.cookie.DateParseException;
import org.apache.http.impl.cookie.DateUtils;
import org.apache.http.params.HttpParams;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.http.client.util.HttpClientUtil;
import org.eclipse.smila.importing.ImportingException;
import org.eclipse.smila.importing.VisitedLinksException;
import org.eclipse.smila.importing.VisitedLinksService;
import org.eclipse.smila.importing.crawler.web.Fetcher;
import org.eclipse.smila.importing.crawler.web.LinkFilter;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.importing.crawler.web.WebCrawlingContext;
import org.eclipse.smila.importing.crawler.web.fetcher.HttpResponseInputStream;
import org.eclipse.smila.importing.crawler.web.filter.FilterConfiguration;
import org.eclipse.smila.importing.crawler.web.utils.UriHelper;
import org.eclipse.smila.importing.crawler.web.utils.WebCrawlerConfiguration;
import org.eclipse.smila.importing.util.PropertyNameMapper;
import org.eclipse.smila.taskworker.TaskContext;

public class DefaultFetcher
implements Fetcher {
    private static final String HEADER_LASTMODIFIED = "Last-Modified";
    private static final String HEADER_CONTENTTYPE = "Content-Type";
    private static final String HEADER_PARAM_CHARSET = "charset";
    private static final int DEFAULT_MAX_CONNECTIONS_PER_HOST = 32;
    private static final int DEFAULT_MAX_TOTAL_CONNECTIONS = 128;
    private VisitedLinksService _visitedLinks;
    private LinkFilter _linkFilter;
    private final HttpClient _httpClient = this.createAndonfigureClient();

    @Override
    public void crawl(String url, Record linkRecord, WebCrawlingContext context) throws WebCrawlerException {
        HttpResponseInputStream response = null;
        try {
            try {
                response = this.getResource(url, context);
                this.resetUrlAttributeOnRedirect(linkRecord, response, context.getMapper());
                this.readMetadata(linkRecord, response);
                this.readHtmlContent(linkRecord, response);
            }
            catch (RedirectException ex) {
                throw new WebCrawlerException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), ex, false);
            }
            catch (VisitedLinksException ex) {
                throw new WebCrawlerException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), ex, true);
            }
            catch (IOException ex) {
                throw new WebCrawlerException("IO error while getting web resource " + url + ": " + ex.getMessage(), ex, true);
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(response);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)response);
    }

    @Override
    public void fetch(String url, Record crawledRecord, WebCrawlingContext context) throws WebCrawlerException {
        HttpResponseInputStream response = null;
        try {
            try {
                response = this.getResource(url, context);
                this.resetUrlAttributeOnRedirect(crawledRecord, response, context.getMapper());
                this.readContent(crawledRecord, response);
            }
            catch (RedirectException ex) {
                throw new WebCrawlerException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), ex, false);
            }
            catch (VisitedLinksException ex) {
                throw new WebCrawlerException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), ex, true);
            }
            catch (IOException ex) {
                throw new WebCrawlerException("IO error while getting web resource " + url + ": " + ex.getMessage(), ex, true);
            }
        }
        catch (Throwable throwable) {
            IOUtils.closeQuietly(response);
            throw throwable;
        }
        IOUtils.closeQuietly((InputStream)response);
    }

    public InputStream getContent(Record crawledRecord, TaskContext taskContext) throws ImportingException {
        PropertyNameMapper mapper = PropertyNameMapper.createFrom((TaskContext)taskContext);
        String url = crawledRecord.getMetadata().getStringValue((String)mapper.get("httpUrl").get(0));
        if (url == null) {
            url = crawledRecord.getMetadata().getStringValue("httpUrl");
        }
        try {
            HttpResponseInputStream response = this.getResource(url, new WebCrawlingContext(taskContext));
            this.resetUrlAttributeOnRedirect(crawledRecord, response, mapper);
            return response;
        }
        catch (RedirectException ex) {
            throw new ImportingException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), (Throwable)ex, false);
        }
        catch (VisitedLinksException ex) {
            throw new ImportingException("Error while handling redirects for web resource " + url + ": " + ex.getMessage(), (Throwable)ex, true);
        }
        catch (IOException ex) {
            throw new ImportingException("IO error while getting web resource " + url + ": " + ex.getMessage(), (Throwable)ex, true);
        }
        catch (Exception ex) {
            throw new ImportingException("Http error while getting web resource " + url + ": " + ex.getMessage(), (Throwable)ex, false);
        }
    }

    private HttpResponseInputStream getResource(String url, WebCrawlingContext context) throws WebCrawlerException, VisitedLinksException, RedirectException, IOException {
        return this.getResource(url, context, 0);
    }

    private HttpResponseInputStream getResource(String url, WebCrawlingContext context, int redirectLevel) throws WebCrawlerException, VisitedLinksException, RedirectException, IOException {
        FilterConfiguration filterConfig = context.getFilterConfiguration();
        HttpGet request = new HttpGet(url);
        HttpResponse response = this._httpClient.execute((HttpUriRequest)request);
        HttpResponseInputStream responseStream = new HttpResponseInputStream(url, response, redirectLevel > 0);
        int responseCode = response.getStatusLine().getStatusCode();
        if (responseCode == 200) {
            return responseStream;
        }
        if (this.isRedirect(responseCode)) {
            if (filterConfig != null && filterConfig.followRedirects()) {
                return this.handleRedirects(responseStream, context, redirectLevel);
            }
            IOUtils.closeQuietly((InputStream)responseStream);
            throw new RedirectException("Follow redirects not configured, skipping link " + url);
        }
        IOUtils.closeQuietly((InputStream)responseStream);
        throw new WebCrawlerException("GET " + url + ": server responded with " + responseCode + ".");
    }

    private void readMetadata(Record record, HttpResponseInputStream response) {
        Header date;
        Header contentType;
        AnyMap metadata = record.getMetadata();
        HttpEntity entity = response.getResponseEntity();
        if (entity != null) {
            metadata.put("httpSize", (Number)entity.getContentLength());
        }
        if ((contentType = response.getResponse().getFirstHeader(HEADER_CONTENTTYPE)) != null) {
            metadata.put("httpContenttype", contentType.getValue());
            HeaderElement[] elements = contentType.getElements();
            if (elements.length > 0) {
                NameValuePair charset;
                String mimetype = elements[0].getName();
                if (mimetype != null) {
                    metadata.put("httpMimetype", mimetype);
                }
                if ((charset = elements[0].getParameterByName(HEADER_PARAM_CHARSET)) != null) {
                    metadata.put("httpCharset", charset.getValue());
                }
            }
        }
        if ((date = response.getResponse().getFirstHeader(HEADER_LASTMODIFIED)) != null) {
            try {
                Date parsedDate = DateUtils.parseDate((String)date.getValue());
                metadata.put("httpLastModified", (Any)metadata.getFactory().createDateTimeValue(parsedDate));
            }
            catch (DateParseException dateParseException) {
                metadata.put("httpLastModified", date.getValue());
            }
        }
    }

    private void readHtmlContent(Record record, InputStream contentStream) throws IOException {
        String mimetype = record.getMetadata().getStringValue("httpMimetype");
        if (mimetype != null && mimetype.equals("text/html")) {
            this.readContent(record, contentStream);
        }
    }

    private void readContent(Record record, InputStream contentStream) throws IOException {
        byte[] content = IOUtils.toByteArray((InputStream)contentStream);
        if (content != null) {
            record.setAttachment("httpContent", content);
            Long size = record.getMetadata().getLongValue("httpSize");
            if (size == null || size < 0L) {
                record.getMetadata().put("httpSize", (Number)content.length);
            }
        }
    }

    private boolean isRedirect(int statusCode) {
        switch (statusCode) {
            case 301: {
                return true;
            }
            case 302: {
                return true;
            }
            case 303: {
                return true;
            }
            case 307: {
                return true;
            }
        }
        return false;
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    private HttpResponseInputStream handleRedirects(HttpResponseInputStream responseStream, WebCrawlingContext context, int redirectLevel) throws WebCrawlerException, VisitedLinksException, IOException, RedirectException {
        try {
            String normalizedRedirectUrl;
            block10: {
                if ((long)redirectLevel >= context.getFilterConfiguration().getMaxRedirects()) {
                    throw new RedirectException("Reached maximum number of redirects");
                }
                HttpResponse response = responseStream.getResponse();
                Header locationHeader = response.getFirstHeader("location");
                if (locationHeader == null) {
                    throw new RedirectException("Received redirect response " + response.getStatusLine() + " but no location header");
                }
                String location = locationHeader.getValue();
                try {
                    String redirectUrl = UriHelper.makeAbsolute(responseStream.getUrl(), location);
                    if (redirectUrl == null) {
                        throw new RedirectException("Couldn't create absolute link from baseUri " + responseStream.getUrl() + " and link " + location);
                    }
                    normalizedRedirectUrl = UriHelper.normalizeUrl(redirectUrl);
                    if (!this._linkFilter.allowLink(normalizedRedirectUrl, context)) throw new RedirectException("Redirect to URL '" + normalizedRedirectUrl + "' is not allowed by filter configuration");
                    if (context.getVisitedUrls().contains(normalizedRedirectUrl) || this._visitedLinks.checkAndMarkVisited(context.getDataSource(), normalizedRedirectUrl, context.getJobRunId(), context.getCurrentInputBulkId())) break block10;
                    context.getVisitedUrls().add(normalizedRedirectUrl);
                    HttpResponseInputStream httpResponseInputStream = this.getResource(normalizedRedirectUrl, context, ++redirectLevel);
                    return httpResponseInputStream;
                }
                catch (URISyntaxException ex) {
                    throw new RedirectException("Invalid Redirect location '" + location + "'", (Throwable)ex);
                }
            }
            throw new RedirectException("Redirect to URL '" + normalizedRedirectUrl + "' is not allowed. URL was already visited");
        }
        finally {
            IOUtils.closeQuietly((InputStream)responseStream);
        }
    }

    private void resetUrlAttributeOnRedirect(Record record, HttpResponseInputStream response, PropertyNameMapper mapper) {
        if (response.isRedirect()) {
            for (String attributeName : mapper.get("httpUrl")) {
                if (!record.getMetadata().containsKey((Object)attributeName)) continue;
                record.getMetadata().put(attributeName, response.getUrl());
            }
            if (record.getMetadata().containsKey((Object)"httpUrl")) {
                record.getMetadata().put("httpUrl", response.getUrl());
            }
        }
    }

    private HttpClient createAndonfigureClient() {
        ClientConnectionManager connectionManager = HttpClientUtil.createThreadSafeConnectionManager((int)128, (int)32);
        DefaultHttpClient httpClient = new DefaultHttpClient(connectionManager);
        HttpClientParams.setRedirecting((HttpParams)httpClient.getParams(), (boolean)false);
        WebCrawlerConfiguration config = new WebCrawlerConfiguration();
        HttpHost proxyHost = config.getProxyHost();
        if (proxyHost != null) {
            httpClient.getParams().setParameter("http.route.default-proxy", (Object)proxyHost);
        } else {
            ((AbstractHttpClient)httpClient).setRoutePlanner((HttpRoutePlanner)new ProxySelectorRoutePlanner(httpClient.getConnectionManager().getSchemeRegistry(), ProxySelector.getDefault()));
        }
        Integer socketTimeout = config.getSocketTimeout();
        if (socketTimeout > 0) {
            httpClient.getParams().setParameter("http.socket.timeout", (Object)socketTimeout);
        }
        return httpClient;
    }

    public void setVisitedLinks(VisitedLinksService visitedLinks) {
        this._visitedLinks = visitedLinks;
    }

    public void unsetVisitedLinks(VisitedLinksService visitedLinks) {
        if (this._visitedLinks == visitedLinks) {
            this._visitedLinks = null;
        }
    }

    public void setLinkFilter(LinkFilter linkFilter) {
        this._linkFilter = linkFilter;
    }

    public void unsetLinkFilter(LinkFilter linkFilter) {
        if (this._linkFilter == linkFilter) {
            this._linkFilter = null;
        }
    }
}

