/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.connectivity.framework.crawler.web;

import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.connectivity.framework.CrawlerCriticalException;
import org.eclipse.smila.connectivity.framework.crawler.web.IndexDocument;
import org.eclipse.smila.connectivity.framework.crawler.web.WebCrawlerPerformanceAgent;
import org.eclipse.smila.connectivity.framework.crawler.web.configuration.Configuration;
import org.eclipse.smila.connectivity.framework.crawler.web.crawl.CrawlMode;
import org.eclipse.smila.connectivity.framework.crawler.web.fetcher.Fetcher;
import org.eclipse.smila.connectivity.framework.crawler.web.fetcher.FetcherOutput;
import org.eclipse.smila.connectivity.framework.crawler.web.filter.FilterProcessor;
import org.eclipse.smila.connectivity.framework.crawler.web.filter.impl.FilterProcessorImpl;
import org.eclipse.smila.connectivity.framework.crawler.web.messages.ModelType;
import org.eclipse.smila.connectivity.framework.crawler.web.messages.WebSite;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Outlink;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParserManager;
import org.eclipse.smila.connectivity.framework.performancecounters.CrawlerPerformanceCounterHelper;

public class WebSiteIterator
implements Iterator<IndexDocument> {
    private final Log _log = LogFactory.getLog(WebSiteIterator.class);
    private final Set<Outlink> _linksDone = new HashSet<Outlink>();
    private Set<Outlink> _linksToDo = new HashSet<Outlink>();
    private Set<Outlink> _linksToDoNextLevel = new HashSet<Outlink>();
    private int _iterationsDone;
    private int _currentDepth;
    private Configuration _configuration;
    private Fetcher _fetcher;
    private int _wait;
    private boolean _randomWait;
    private FilterProcessor _filterProcessor;
    private long _startTime;
    private IndexDocument _currentIndexDocument;
    private final CrawlerPerformanceCounterHelper<WebCrawlerPerformanceAgent> _performanceCounters;

    public WebSiteIterator(WebSite webSite, ParserManager parserManager, CrawlerPerformanceCounterHelper<WebCrawlerPerformanceAgent> performanceCounters) throws CrawlerCriticalException {
        try {
            this._performanceCounters = performanceCounters;
            this._configuration = new Configuration();
            this._configuration.loadConfiguration(webSite);
            this._fetcher = new Fetcher(this._configuration, parserManager, performanceCounters);
            this._wait = this._configuration.getInt("crawl.wait");
            this._randomWait = this._configuration.getBoolean("crawl.random.wait");
            if (this._log.isDebugEnabled()) {
                this._log.debug((Object)("Starting new project: " + this._configuration.get("crawl.project.name")));
            }
            this._linksToDo = this._configuration.getSeeds();
            this._filterProcessor = new FilterProcessorImpl(this._configuration);
            this._startTime = System.currentTimeMillis();
        }
        catch (IllegalAccessException exception) {
            throw new CrawlerCriticalException("Error loading configuration", (Throwable)exception);
        }
        catch (InvocationTargetException exception) {
            throw new CrawlerCriticalException("Error loading configuration", (Throwable)exception);
        }
        catch (IOException exception) {
            throw new CrawlerCriticalException("Error loading configuration", (Throwable)exception);
        }
    }

    @Override
    public boolean hasNext() {
        while (this._linksToDo.size() > 0 && this._currentIndexDocument == null && !this.limitsExceeded()) {
            ++this._iterationsDone;
            Outlink link = this._linksToDo.iterator().next();
            this._linksToDo.remove(link);
            if (!this._linksDone.contains(link)) {
                this._linksDone.add(link);
                CrawlMode crawlMode = this._filterProcessor.evaluateUrlFilters(link);
                if (!crawlMode.equals((Object)CrawlMode.Skip)) {
                    try {
                        if (this._log.isDebugEnabled()) {
                            this._log.debug((Object)("Link = " + link.getUrlString() + " crawled"));
                        }
                        this._currentIndexDocument = this.indexDocs(link, this._configuration, crawlMode);
                    }
                    catch (InterruptedException interruptedException) {
                        this._log.error((Object)("Error fetching link " + link.getUrlString()));
                    }
                } else if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)("Link = " + link.getUrlString() + " not included (cause: SKIP, Filter)"));
                }
            } else if (this._log.isDebugEnabled()) {
                this._log.debug((Object)("Link = " + link.getUrlString() + " already crawled"));
            }
            if (this._linksToDo.size() != 0 || this._linksToDoNextLevel.size() <= 0) continue;
            this._log.debug((Object)("Number of next level links: " + this._linksToDoNextLevel.size()));
            this._linksToDo = this._linksToDoNextLevel;
            this._linksToDoNextLevel = new HashSet<Outlink>();
            ++this._currentDepth;
            this._log.debug((Object)("Current depth is: " + this._currentDepth));
        }
        return this._currentIndexDocument != null;
    }

    @Override
    public IndexDocument next() {
        if (this._currentIndexDocument == null) {
            this.hasNext();
        }
        IndexDocument result = this._currentIndexDocument;
        this._currentIndexDocument = null;
        return result;
    }

    private IndexDocument indexDocs(Outlink outlink, Configuration conf, CrawlMode crawlMode) throws InterruptedException {
        IndexDocument document = null;
        int delay = 0;
        if (this._randomWait) {
            delay = (int)(Math.random() * (double)this._wait * 2.0);
        } else if (this._wait > 0) {
            delay = this._wait;
        }
        this._log.debug((Object)("Wait before next retrieval, seconds: " + delay));
        Thread.sleep(delay * 1000);
        FetcherOutput fetcherOutput = this._fetcher.fetch(outlink, this._filterProcessor, this._linksDone);
        if (fetcherOutput.getParse() != null) {
            if (crawlMode.equals((Object)CrawlMode.Index) && (crawlMode = this._filterProcessor.evaluateHtmlMetaTagFilters(fetcherOutput.getParse().getData().getHtmlMetaTags())).equals((Object)CrawlMode.Index)) {
                document = this.createDocument(fetcherOutput);
            }
            if (!crawlMode.equals((Object)CrawlMode.Skip)) {
                this.updateTodoLinks(fetcherOutput);
            }
        }
        return document;
    }

    private boolean limitsExceeded() {
        if (this.limitExceeded(this._fetcher.getBytes(), "fetcher.max.bytes")) {
            this._log.info((Object)"Max bytes limit exceeded");
            return true;
        }
        if (this.limitExceeded(this._fetcher.getPages(), "fetcher.max.documents")) {
            this._log.info((Object)"Max pages limit exceeded");
            return true;
        }
        float elapsedTime = (float)(System.currentTimeMillis() - this._startTime) / 1000.0f;
        if (this.limitExceeded((long)elapsedTime, "crawl.max.time")) {
            this._log.info((Object)"Max time exceeded");
            return true;
        }
        if (ModelType.MAX_ITERATIONS.value().equals(this._configuration.get("crawl.model.type")) && this.limitExceeded(this._iterationsDone, "crawl.model.value")) {
            this._log.info((Object)"Maximum number of iterations exceeded");
            return true;
        }
        if (ModelType.MAX_DEPTH.value().equals(this._configuration.get("crawl.model.type")) && this.limitExceeded(this._currentDepth, "crawl.model.value")) {
            this._log.info((Object)"Maximum depth exceeded!");
            return true;
        }
        return false;
    }

    private boolean limitExceeded(long test, String propertyName) {
        return this._configuration.getInt(propertyName) > 0 && test >= (long)this._configuration.getInt(propertyName);
    }

    private void updateTodoLinks(FetcherOutput fetcherOutput) {
        Outlink[] sitemapOutlinks;
        int n;
        Outlink[] outlinks = fetcherOutput.getParse().getData().getOutlinks();
        if (outlinks != null && outlinks.length > 0) {
            Outlink[] outlinkArray = outlinks;
            n = outlinks.length;
            int n2 = 0;
            while (n2 < n) {
                Outlink link = outlinkArray[n2];
                this._linksToDoNextLevel.add(link);
                if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)("added new link to do:" + link.toString()));
                }
                ++n2;
            }
        }
        if ((sitemapOutlinks = fetcherOutput.getSitemapLinks()) != null && sitemapOutlinks.length > 0) {
            Outlink[] outlinkArray = sitemapOutlinks;
            int n3 = sitemapOutlinks.length;
            n = 0;
            while (n < n3) {
                Outlink link = outlinkArray[n];
                this._linksToDo.add(link);
                if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)("added new link from sitemap file:" + link.toString()));
                }
                ++n;
            }
        }
    }

    private IndexDocument createDocument(FetcherOutput fetcherOutput) {
        String url = fetcherOutput.getContent().getUrl();
        String title = fetcherOutput.getParse().getData().getTitle();
        byte[] content = fetcherOutput.getContent().getContent();
        List<String> responseHeaders = fetcherOutput.getParse().getData().getContentMeta().toArrayList();
        List<String> htmlMetaData = fetcherOutput.getParse().getData().getHtmlMetaTags().toArrayList();
        ArrayList<String> metaDataWithResponseHeaderFallBack = new ArrayList<String>();
        metaDataWithResponseHeaderFallBack.addAll(responseHeaders);
        metaDataWithResponseHeaderFallBack.addAll(htmlMetaData);
        IndexDocument document = new IndexDocument(url, title, content, responseHeaders, htmlMetaData, metaDataWithResponseHeaderFallBack);
        return document;
    }

    @Override
    public void remove() {
    }
}

