/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.connectivity.framework.crawler.web.fetcher;

import java.net.MalformedURLException;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.connectivity.framework.crawler.web.WebCrawlerPerformanceAgent;
import org.eclipse.smila.connectivity.framework.crawler.web.configuration.Configuration;
import org.eclipse.smila.connectivity.framework.crawler.web.configuration.Configured;
import org.eclipse.smila.connectivity.framework.crawler.web.crawl.CrawlMode;
import org.eclipse.smila.connectivity.framework.crawler.web.fetcher.FetcherOutput;
import org.eclipse.smila.connectivity.framework.crawler.web.filter.FilterProcessor;
import org.eclipse.smila.connectivity.framework.crawler.web.http.Http;
import org.eclipse.smila.connectivity.framework.crawler.web.http.HttpOutput;
import org.eclipse.smila.connectivity.framework.crawler.web.http.HttpStatus;
import org.eclipse.smila.connectivity.framework.crawler.web.http.SitemapParser;
import org.eclipse.smila.connectivity.framework.crawler.web.metadata.Metadata;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Content;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Outlink;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Parse;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParseImpl;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParseStatus;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Parser;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParserManager;
import org.eclipse.smila.connectivity.framework.performancecounters.CrawlerPerformanceCounterHelper;

public class Fetcher
extends Configured {
    private static final int MILLISECS_IN_SEC = 1000;
    private final Log _log = LogFactory.getLog(Fetcher.class);
    private long _bytes;
    private int _pages;
    private int _errors;
    private final int _maxRedirect;
    private final int _maxRetries;
    private final int _waitRetry;
    private FetcherOutput _output;
    private final boolean _sitemapsEnabled;
    private SitemapParser _sitemap;
    private final CrawlerPerformanceCounterHelper<WebCrawlerPerformanceAgent> _performanceCounters;
    private final ParserManager _parserManager;

    public Fetcher(Configuration configuration, ParserManager parserManager, CrawlerPerformanceCounterHelper<WebCrawlerPerformanceAgent> performanceCounters) {
        super(configuration);
        this._performanceCounters = performanceCounters;
        this._maxRedirect = configuration.getInt("fetcher.max.redirects");
        this._maxRetries = configuration.getInt("fetcher.max.retries");
        this._waitRetry = configuration.getInt("fetcher.wait.retry") * 1000;
        this._sitemapsEnabled = configuration.getBoolean("fetcher.use.sitemaps");
        if (this._sitemapsEnabled) {
            this._sitemap = new SitemapParser(configuration);
        }
        this._parserManager = parserManager;
    }

    private synchronized void updateStatus(int bytesInPage) {
        ++this._pages;
        this._performanceCounters.increment("pages");
        this._bytes += (long)bytesInPage;
        this._performanceCounters.incrementBy("bytes", (long)bytesInPage);
    }

    public FetcherOutput fetch(Outlink link, FilterProcessor filterProcessor, Set<Outlink> linksDone) {
        try {
            boolean retrying;
            boolean redirecting;
            boolean continueFetching;
            int redirectCount = 0;
            int retriesCount = 0;
            String url = link.toString();
            Http http = new Http();
            http.setConf(this.getConf());
            Outlink[] sitemapLinks = new Outlink[]{};
            if (this._sitemapsEnabled) {
                if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)"Trying to download sitemap.xml");
                }
                sitemapLinks = this._sitemap.getSitemapLinks(http, link.getUrl());
            }
            if (this._log.isDebugEnabled()) {
                this._log.debug((Object)("processing link: " + url));
            }
            long start = System.currentTimeMillis();
            do {
                if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)("redirectCount=" + redirectCount));
                    this._log.debug((Object)("retriesCount=" + retriesCount));
                }
                redirecting = false;
                retrying = false;
                HttpOutput output = http.getHttpOutput(link, filterProcessor);
                HttpStatus status = output.getStatus();
                Content content = output.getContent();
                ParseStatus pstatus = null;
                switch (status.getCode()) {
                    case 1: {
                        pstatus = this.output(url, content, 1, sitemapLinks);
                        this.updateStatus(content.getContent().length);
                        if (pstatus == null || !pstatus.isSuccess() || pstatus.getMinorCode() != 100 || (link = this.getRedirectLink(link, pstatus.getMessage(), filterProcessor, linksDone)) == null) break;
                        redirecting = true;
                        ++redirectCount;
                        break;
                    }
                    case 12: 
                    case 13: {
                        link = this.getRedirectLink(link, status.getMessage(), filterProcessor, linksDone);
                        if (link == null) break;
                        redirecting = true;
                        ++redirectCount;
                        break;
                    }
                    case 16: {
                        this.logError(url, status.getMessage());
                    }
                    case 15: {
                        Thread.sleep(this._waitRetry);
                        ++retriesCount;
                        retrying = true;
                        break;
                    }
                    case 11: 
                    case 14: 
                    case 17: 
                    case 18: 
                    case 21: {
                        this.output(url, null, 11, sitemapLinks);
                        break;
                    }
                    case 20: {
                        if (this._log.isDebugEnabled()) {
                            this._log.debug((Object)("Won't fetch url " + url));
                        }
                        this.output(url, null, 20, sitemapLinks);
                        break;
                    }
                    default: {
                        if (this._log.isWarnEnabled()) {
                            this._log.warn((Object)("Unknown HttpStatus: " + status.getCode()));
                        }
                        this.output(url, null, 11, sitemapLinks);
                    }
                }
                if (redirecting && redirectCount >= this._maxRedirect) {
                    if (this._log.isInfoEnabled()) {
                        this._log.info((Object)(" - redirect count exceeded " + url));
                    }
                    this.output(url, null, 11, sitemapLinks);
                }
                if (!retrying || retriesCount < this._maxRetries) continue;
                if (this._log.isInfoEnabled()) {
                    this._log.info((Object)(" - retries count exceeded " + url));
                }
                this.output(url, null, 11, sitemapLinks);
            } while (continueFetching = redirecting && redirectCount < this._maxRedirect || retrying && retriesCount < this._maxRetries);
            this._performanceCounters.incrementBy("averageHttpFetchTime", (System.currentTimeMillis() - start) / 1000L);
        }
        catch (InterruptedException exception) {
            this.logError(link.getUrlString(), exception.getMessage());
        }
        return this._output;
    }

    private void logError(String url, String message) {
        this._log.error((Object)("fetch of " + url + " failed with " + message));
        ++this._errors;
    }

    private ParseStatus output(String url, Content content, int status, Outlink[] sitemapLinks) {
        if (content == null) {
            content = new Content(url, url, new byte[0], "", new Metadata());
        }
        Parse parse = null;
        if (status == 1 && this._parserManager != null) {
            ParseStatus parseStatus = null;
            if (this._parserManager != null) {
                Parser parser = this._parserManager.getParser(content.getContentType());
                if (parser != null) {
                    if (this._log.isDebugEnabled()) {
                        this._log.debug((Object)("Using webcrawler parser: " + parser.getClass().getName() + " for content-type " + content.getContentType()));
                    }
                    parser.setConf(this.getConf());
                    parse = parser.getParse(content);
                    parseStatus = parse.getData().getStatus();
                    if (!parseStatus.isSuccess()) {
                        this._log.error((Object)("Error parsing: " + url + ": " + parseStatus));
                        parse = parseStatus.getEmptyParse(this.getConf());
                    }
                } else if (this._log.isWarnEnabled()) {
                    this._log.warn((Object)("Parser for content-type: " + content.getContentType() + " not found"));
                }
            } else if (this._log.isErrorEnabled()) {
                this._log.error((Object)"Parser manager is not set! Unable to get any parsers.");
            }
        }
        this._output = parse == null ? new FetcherOutput(content, null, sitemapLinks) : new FetcherOutput(content, new ParseImpl(parse), sitemapLinks);
        if (parse != null) {
            return parse.getData().getStatus();
        }
        return null;
    }

    private Outlink getRedirectLink(Outlink fromLink, String toUrlString, FilterProcessor filterProcessor, Set<Outlink> linksDone) {
        Outlink newLink;
        block10: {
            newLink = null;
            try {
                newLink = new Outlink(toUrlString, fromLink.getAnchor(), this.getConf());
                CrawlMode crawlMode = filterProcessor.evaluateUrlFilters(newLink);
                if (!(crawlMode.equals((Object)CrawlMode.Skip) || newLink.equals(fromLink) || linksDone.contains(newLink))) {
                    if (this._log.isDebugEnabled()) {
                        this._log.debug((Object)("redirect to " + newLink.getUrlString()));
                    }
                    break block10;
                }
                if (this._log.isDebugEnabled()) {
                    if (crawlMode.equals((Object)CrawlMode.Skip)) {
                        this._log.debug((Object)("Won't redirect: CrawlMode=Skip, url = " + newLink.getUrlString()));
                    } else if (linksDone.contains(newLink)) {
                        this._log.debug((Object)("Won't redirect: url already crawled, url = " + newLink.getUrlString()));
                    } else if (newLink.equals(fromLink)) {
                        this._log.debug((Object)("Won't redirect: redirect to the same url, url = " + newLink.getUrlString()));
                    }
                }
                return null;
            }
            catch (MalformedURLException malformedURLException) {
                this._log.error((Object)("Malformed redirect url: " + toUrlString));
            }
        }
        return newLink;
    }

    public long getBytes() {
        return this._bytes;
    }

    public void setBytes(long bytes) {
        this._bytes = bytes;
    }

    public int getErrors() {
        return this._errors;
    }

    public void setErrors(int errors) {
        this._errors = errors;
    }

    public int getPages() {
        return this._pages;
    }

    public void setPages(int pages) {
        this._pages = pages;
    }
}

