/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.smila.connectivity.framework.crawler.web.parse.html;

import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.eclipse.smila.connectivity.framework.crawler.web.configuration.Configuration;
import org.eclipse.smila.connectivity.framework.crawler.web.http.HttpResponse;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Outlink;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.Parser;
import org.eclipse.smila.connectivity.framework.crawler.web.parse.js.JavascriptParser;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

public class DOMContentUtils {
    private static final String JAVASCRIPT_PREFIX = "javascript:";
    private static final String ON = "on";
    private static final String HREF = "href";
    private static final String SCRIPT = "script";
    private final Log _log = LogFactory.getLog(HttpResponse.class);
    private JavascriptParser _javascriptParser;
    private final Map<String, LinkParams> _linkParams = new HashMap<String, LinkParams>();
    private Configuration _conf;

    public DOMContentUtils(Configuration conf) {
        this.setConf(conf);
    }

    public void setConf(Configuration conf) {
        this._conf = conf;
        this._linkParams.clear();
        this._linkParams.put("a", new LinkParams("a", HREF, 1));
        this._linkParams.put("area", new LinkParams("area", HREF, 0));
        if (conf.getBoolean("parser.html.form.use_action", false)) {
            this._linkParams.put("form", new LinkParams("form", "action", 1));
        }
        this._linkParams.put("frame", new LinkParams("frame", "src", 0));
        this._linkParams.put("iframe", new LinkParams("iframe", "src", 0));
        this._linkParams.put(SCRIPT, new LinkParams(SCRIPT, "src", 0));
        this._linkParams.put("link", new LinkParams("link", HREF, 0));
    }

    public boolean getText(StringBuffer sb, Node node, boolean abortOnNestedAnchors) {
        return this.getTextHelper(sb, node, abortOnNestedAnchors, 0);
    }

    public void getText(StringBuffer sb, Node node) {
        this.getText(sb, node, false);
    }

    private boolean getTextHelper(StringBuffer sb, Node node, boolean abortOnNestedAnchors, int anchorDepth) {
        if (SCRIPT.equalsIgnoreCase(node.getNodeName())) {
            return false;
        }
        if ("style".equalsIgnoreCase(node.getNodeName())) {
            return false;
        }
        if (abortOnNestedAnchors && "a".equalsIgnoreCase(node.getNodeName()) && ++anchorDepth > 1) {
            return true;
        }
        if (node.getNodeType() == 8) {
            return false;
        }
        if (node.getNodeType() == 3) {
            String text = node.getNodeValue();
            text = text.replaceAll("\\s+", " ");
            if ((text = text.trim()).length() > 0) {
                if (sb.length() > 0) {
                    sb.append(' ');
                }
                sb.append(text);
            }
        }
        boolean abort = false;
        NodeList children = node.getChildNodes();
        if (children != null) {
            int len = children.getLength();
            int i = 0;
            while (i < len) {
                if (this.getTextHelper(sb, children.item(i), abortOnNestedAnchors, anchorDepth)) {
                    abort = true;
                    break;
                }
                ++i;
            }
        }
        return abort;
    }

    public boolean getTitle(StringBuffer sb, Node node) {
        if ("body".equalsIgnoreCase(node.getNodeName())) {
            return false;
        }
        if (node.getNodeType() == 1 && "title".equalsIgnoreCase(node.getNodeName())) {
            this.getText(sb, node);
            return true;
        }
        NodeList children = node.getChildNodes();
        if (children != null) {
            int len = children.getLength();
            int i = 0;
            while (i < len) {
                if (this.getTitle(sb, children.item(i))) {
                    return true;
                }
                ++i;
            }
        }
        return false;
    }

    public URL getBase(Node node) {
        NodeList children;
        if (node.getNodeType() == 1) {
            if ("body".equalsIgnoreCase(node.getNodeName())) {
                return null;
            }
            if ("base".equalsIgnoreCase(node.getNodeName())) {
                NamedNodeMap attrs = node.getAttributes();
                int i = 0;
                while (i < attrs.getLength()) {
                    Node attr = attrs.item(i);
                    if (HREF.equalsIgnoreCase(attr.getNodeName())) {
                        try {
                            return new URL(attr.getNodeValue());
                        }
                        catch (MalformedURLException exception) {
                            this.logError(attr.getNodeValue(), exception);
                        }
                    }
                    ++i;
                }
            }
        }
        if ((children = node.getChildNodes()) != null) {
            int len = children.getLength();
            int i = 0;
            while (i < len) {
                URL base = this.getBase(children.item(i));
                if (base != null) {
                    return base;
                }
                ++i;
            }
        }
        return null;
    }

    private boolean hasOnlyWhiteSpace(Node node) {
        String val = node.getNodeValue();
        int i = 0;
        while (i < val.length()) {
            if (!Character.isWhitespace(val.charAt(i))) {
                return false;
            }
            ++i;
        }
        return true;
    }

    private boolean shouldThrowAwayLink(Node node, NodeList children, int childLen, LinkParams params) {
        if (childLen == 0) {
            return params.childLen != 0;
        }
        if (childLen == 1 && children.item(0).getNodeType() == 1 && params.elName.equalsIgnoreCase(children.item(0).getNodeName())) {
            return true;
        }
        if (childLen == 2) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            if (c0.getNodeType() == 1 && params.elName.equalsIgnoreCase(c0.getNodeName()) && c1.getNodeType() == 3 && this.hasOnlyWhiteSpace(c1)) {
                return true;
            }
            if (c1.getNodeType() == 1 && params.elName.equalsIgnoreCase(c1.getNodeName()) && c0.getNodeType() == 3 && this.hasOnlyWhiteSpace(c0)) {
                return true;
            }
        } else if (childLen == 3) {
            Node c0 = children.item(0);
            Node c1 = children.item(1);
            Node c2 = children.item(2);
            if (c1.getNodeType() == 1 && params.elName.equalsIgnoreCase(c1.getNodeName()) && c0.getNodeType() == 3 && c2.getNodeType() == 3 && this.hasOnlyWhiteSpace(c0) && this.hasOnlyWhiteSpace(c2)) {
                return true;
            }
        }
        return false;
    }

    public void getOutlinks(URL base, List<Outlink> outlinks, Node node) {
        String nodeName;
        LinkParams params;
        NodeList children = node.getChildNodes();
        int childLen = 0;
        if (children != null) {
            childLen = children.getLength();
        }
        if (node.getNodeType() == 1 && (params = this._linkParams.get(nodeName = node.getNodeName().toLowerCase())) != null) {
            if (!this.shouldThrowAwayLink(node, children, childLen, params)) {
                StringBuffer linkText = new StringBuffer();
                this.getText(linkText, node, true);
                String target = null;
                boolean noFollow = false;
                boolean post = false;
                NamedNodeMap attrs = node.getAttributes();
                if (attrs != null) {
                    int i = 0;
                    while (i < attrs.getLength()) {
                        Node attr = attrs.item(i);
                        String attrName = attr.getNodeName();
                        if (params.attrName.equalsIgnoreCase(attrName)) {
                            target = attr.getNodeValue();
                        } else if ("rel".equalsIgnoreCase(attrName) && "nofollow".equalsIgnoreCase(attr.getNodeValue())) {
                            noFollow = true;
                        } else if ("method".equalsIgnoreCase(attrName) && "post".equalsIgnoreCase(attr.getNodeValue())) {
                            post = true;
                        }
                        ++i;
                    }
                }
                if (target != null && !noFollow && !post) {
                    try {
                        URL url = new URL(base, target);
                        outlinks.add(new Outlink(url.toString(), linkText.toString().trim(), this._conf));
                    }
                    catch (MalformedURLException exception) {
                        this.logError(target, exception);
                    }
                }
            }
            if (params.childLen == 0) {
                return;
            }
        }
        int i = 0;
        while (i < childLen) {
            this.getOutlinks(base, outlinks, children.item(i));
            ++i;
        }
    }

    public void getJavascriptOutlinks(String base, List<Outlink> outlinks, Node node) {
        NodeList children = node.getChildNodes();
        int childLen = 0;
        if (children != null) {
            childLen = children.getLength();
        }
        if (node.getNodeType() == 1) {
            String nodeName = node.getNodeName();
            if (nodeName.equalsIgnoreCase(SCRIPT)) {
                StringBuffer script = new StringBuffer();
                if (childLen > 0) {
                    int i = 0;
                    while (i < childLen) {
                        if (i > 0) {
                            script.append('\n');
                        }
                        script.append(children.item(i).getNodeValue());
                        ++i;
                    }
                    Outlink[] links = this._javascriptParser.getOutlinks(script.toString(), base, base);
                    if (links.length > 0) {
                        outlinks.addAll(Arrays.asList(links));
                    }
                    return;
                }
            } else {
                NamedNodeMap attributes = node.getAttributes();
                int attributesLength = attributes.getLength();
                int i = 0;
                while (i < attributesLength) {
                    String value;
                    Node attributeNode = attributes.item(i);
                    Outlink[] links = null;
                    if (attributeNode.getNodeName().startsWith(ON)) {
                        links = this._javascriptParser.getOutlinks(attributeNode.getNodeValue(), base, base);
                    } else if (attributeNode.getNodeName().equalsIgnoreCase(HREF) && (value = attributeNode.getNodeValue()) != null && value.toLowerCase().indexOf(JAVASCRIPT_PREFIX) != -1) {
                        links = this._javascriptParser.getOutlinks(value, base, base);
                    }
                    if (links != null && links.length > 0) {
                        outlinks.addAll(Arrays.asList(links));
                    }
                    ++i;
                }
            }
        }
        int i = 0;
        while (i < childLen) {
            this.getJavascriptOutlinks(base, outlinks, children.item(i));
            ++i;
        }
    }

    public void setJavascriptParser(Parser javascriptParser) {
        this._javascriptParser = (JavascriptParser)((Object)javascriptParser);
    }

    private void logError(String target, Throwable exception) {
        if (this._log.isDebugEnabled()) {
            this._log.debug((Object)("Error extracting the link from DOM tree: [" + target + "], Exception was: [" + exception.getMessage() + "]"));
        }
    }

    public static class LinkParams {
        public String elName;
        public String attrName;
        public int childLen;

        public LinkParams(String elName, String attrName, int childLen) {
            this.elName = elName;
            this.attrName = attrName;
            this.childLen = childLen;
        }

        public String toString() {
            return "LP[el=" + this.elName + ",attr=" + this.attrName + ",len=" + this.childLen + "]";
        }
    }
}

