/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.importing.crawler.web.fetcher;

import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HeaderElement;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpConnectionManager;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.NameValuePair;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpClientParams;
import org.apache.commons.httpclient.util.DateParseException;
import org.apache.commons.httpclient.util.DateUtil;
import org.apache.commons.io.IOUtils;
import org.eclipse.smila.datamodel.Any;
import org.eclipse.smila.datamodel.AnyMap;
import org.eclipse.smila.datamodel.Record;
import org.eclipse.smila.importing.crawler.web.Fetcher;
import org.eclipse.smila.importing.crawler.web.WebCrawlerException;
import org.eclipse.smila.taskworker.TaskLog;

public class SimpleFetcher
implements Fetcher {
    private static final String HEADER_LASTMODIFIED = "Last-Modified";
    private static final String HEADER_CONTENTTYPE = "Content-Type";
    private static final String HEADER_PARAM_CHARSET = "charset";
    private static final int DEFAULT_MAX_CONNECTIONS_PER_HOST = 32;
    private static final int DEFAULT_MAX_TOTAL_CONNECTIONS = 128;
    private final HttpClient _httpClient;

    public SimpleFetcher() {
        HttpClientParams params = new HttpClientParams();
        params.setIntParameter("http.protocol.max-redirects", 0);
        MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
        connectionManager.getParams().setDefaultMaxConnectionsPerHost(32);
        connectionManager.getParams().setMaxTotalConnections(128);
        this._httpClient = new HttpClient(params, (HttpConnectionManager)connectionManager);
    }

    @Override
    public void crawl(Record linkRecord, AnyMap parameters, TaskLog taskLog) throws WebCrawlerException {
        AnyMap metadata = linkRecord.getMetadata();
        String url = metadata.getStringValue("http.url");
        GetMethod request = null;
        try {
            try {
                request = this.getResource(url, taskLog);
                this.readMetadata(metadata, request);
                this.readHtmlContent(linkRecord, request);
            }
            catch (HttpException ex) {
                throw new WebCrawlerException("Http error while getting web resource " + url + ": " + ex.getMessage(), ex, false);
            }
            catch (IOException ex) {
                throw new WebCrawlerException("IO error while getting web resource " + url + ": " + ex.getMessage(), ex, true);
            }
        }
        finally {
            this.releaseQuietly((HttpMethod)request, taskLog);
        }
    }

    @Override
    public void fetch(Record crawledRecord, AnyMap parameters, TaskLog taskLog) throws WebCrawlerException {
        AnyMap metadata = crawledRecord.getMetadata();
        String url = metadata.getStringValue("http.url");
        GetMethod request = null;
        try {
            try {
                request = this.getResource(url, taskLog);
                this.readContent(crawledRecord, request);
            }
            catch (HttpException ex) {
                throw new WebCrawlerException("Http error while getting web resource " + url + ": " + ex.getMessage(), ex, false);
            }
            catch (IOException ex) {
                throw new WebCrawlerException("IO error while getting web resource " + url + ": " + ex.getMessage(), ex, true);
            }
        }
        finally {
            this.releaseQuietly((HttpMethod)request, taskLog);
        }
    }

    private GetMethod getResource(String url, TaskLog log) throws WebCrawlerException, IOException {
        GetMethod request = new GetMethod(url);
        int responseCode = this._httpClient.executeMethod((HttpMethod)request);
        if (responseCode != 200) {
            this.releaseQuietly((HttpMethod)request, log);
            throw new WebCrawlerException("GET " + url + ": server responded with " + responseCode + ".");
        }
        return request;
    }

    private void readMetadata(AnyMap metadata, GetMethod request) {
        Header date;
        metadata.put("http.size", (Number)request.getResponseContentLength());
        Header contentType = request.getResponseHeader(HEADER_CONTENTTYPE);
        if (contentType != null) {
            metadata.put("http.contenttype", contentType.getValue());
            HeaderElement[] elements = contentType.getElements();
            if (elements.length > 0) {
                NameValuePair charset;
                String mimetype = elements[0].getName();
                if (mimetype != null) {
                    metadata.put("http.mimetype", mimetype);
                }
                if ((charset = elements[0].getParameterByName(HEADER_PARAM_CHARSET)) != null) {
                    metadata.put("http.charset", charset.getValue());
                }
            }
        }
        if ((date = request.getResponseHeader(HEADER_LASTMODIFIED)) != null) {
            try {
                Date parsedDate = DateUtil.parseDate((String)date.getValue());
                metadata.put("http.lastModified", (Any)metadata.getFactory().createDateTimeValue(parsedDate));
            }
            catch (DateParseException dateParseException) {
                metadata.put("http.lastModified", date.getValue());
            }
        }
    }

    private void readHtmlContent(Record record, GetMethod request) throws IOException {
        String mimetype = record.getMetadata().getStringValue("http.mimetype");
        if (mimetype != null && mimetype.equals("text/html")) {
            this.readContent(record, request);
        }
    }

    private void readContent(Record record, GetMethod request) throws IOException {
        InputStream contentStream = request.getResponseBodyAsStream();
        byte[] content = IOUtils.toByteArray((InputStream)contentStream);
        if (content != null) {
            record.setAttachment("http.content", content);
            Long size = record.getMetadata().getLongValue("http.size");
            if (size == null || size < 0L) {
                record.getMetadata().put("http.size", (Number)content.length);
            }
        }
    }

    private void releaseQuietly(HttpMethod request, TaskLog log) {
        if (request != null) {
            try {
                this.consumeContent(request, log);
                request.releaseConnection();
            }
            catch (RuntimeException ex) {
                log.info("Error releasing connection after Http request done.", (Throwable)ex);
            }
        }
    }

    private void consumeContent(HttpMethod request, TaskLog log) {
        InputStream content = null;
        try {
            try {
                content = request.getResponseBodyAsStream();
                if (content != null) {
                    IOUtils.skip((InputStream)content, (long)Long.MAX_VALUE);
                }
            }
            catch (Exception ex) {
                log.info("Error consuming the resource stream", (Throwable)ex);
                IOUtils.closeQuietly((InputStream)content);
            }
        }
        finally {
            IOUtils.closeQuietly((InputStream)content);
        }
    }
}

