/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.connectivity.framework.crawler.web.parse.js;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.connectivity.framework.crawler.web.configuration.Configured;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Content;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ExtractUtils;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Outlink;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Parse;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParseData;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParseImpl;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.ParseStatus;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Parser;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.js.JavascriptParser;

public class JavascriptParserImpl
extends Configured
implements Parser,
JavascriptParser {
    private static final String[] CONTENT_TYPES = new String[]{"application/x-javascript", "application/javascript", "text/javascript"};
    private static final String AMPERSAND = "&";
    private static final String ESCAPED_AMPERSAND = "&amp;";
    private static final String WWW_PREFIX = "www.";
    private static final String HTTP_PREFIX = "http://";
    private static final Pattern TEXT_PATTERN = Pattern.compile("(\\\\*(?:\"|'))([^\\s\"']+?)(?:\\1)", 10);
    private final Log _log = LogFactory.getLog(JavascriptParserImpl.class);

    @Override
    public String[] getContentTypes() {
        return CONTENT_TYPES;
    }

    @Override
    public Parse getParse(Content content) {
        String scriptCode = new String(content.getContent());
        Outlink[] outlinks = this.getOutlinks(scriptCode, content.getUrl(), content.getUrl());
        ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, "", outlinks, content.getMetadata());
        parseData.setConf(this._configuration);
        ParseImpl parse = new ParseImpl(parseData);
        return parse;
    }

    @Override
    public Outlink[] getOutlinks(String scriptCode, String anchor, String base) {
        URL baseUrl;
        ArrayList<Outlink> outlinks;
        block6: {
            outlinks = new ArrayList<Outlink>();
            baseUrl = null;
            try {
                baseUrl = new URL(base);
            }
            catch (MalformedURLException exception) {
                if (!this._log.isErrorEnabled()) break block6;
                this._log.error((Object)("Malformed base url: " + base), (Throwable)exception);
            }
        }
        Matcher textMatcher = TEXT_PATTERN.matcher(scriptCode);
        String url = null;
        while (textMatcher.find()) {
            try {
                url = textMatcher.group(2);
                if (!ExtractUtils.isUrl(url)) continue;
                url = url.startsWith(WWW_PREFIX) ? HTTP_PREFIX + url : new URL(baseUrl, url).toString();
                url = url.replaceAll(ESCAPED_AMPERSAND, AMPERSAND);
                if (this._log.isDebugEnabled()) {
                    this._log.debug((Object)("Extracted url from javascript code: " + url));
                }
                outlinks.add(new Outlink(url, anchor, this.getConf()));
            }
            catch (MalformedURLException exception) {
                if (!this._log.isDebugEnabled()) continue;
                this._log.debug((Object)("JavaScript Parser: Malformed extracted url: " + url + ", base url: " + base), (Throwable)exception);
            }
        }
        return outlinks.toArray(new Outlink[0]);
    }
}

