/*
 * Decompiled with CFR 0.152.
 */
package org.eclipse.epf.common.html;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Properties;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Tidy;

public class HTMLParser {
    private static final int BUFFER_SIZE = 4096;
    private static final String HTML_SCRIPT_TAG = "script";
    private static final String HTML_TITLE_TAG = "title";
    private static final String HTML_META_TAG = "meta";
    private Tidy tidy;
    private String title;
    private String summary;
    private String text;
    private Properties metaTags;
    private StringBuffer htmlText;

    public HTMLParser() {
        try {
            this.tidy = new Tidy();
            this.tidy.setXHTML(true);
            this.tidy.setDropEmptyParas(true);
            this.tidy.setDropFontTags(true);
            this.tidy.setQuiet(true);
            this.tidy.setShowWarnings(false);
            this.tidy.setSmartIndent(false);
            this.tidy.setTidyMark(false);
            this.tidy.setWraplen(132);
            this.tidy.setIndentAttributes(false);
            this.tidy.setIndentContent(false);
            this.tidy.setSpaces(2);
            this.tidy.setInputEncoding("UTF-8");
            this.tidy.setOutputEncoding("UTF-8");
        }
        catch (Exception exception) {
            this.tidy = null;
        }
    }

    public void parse(File file) throws Exception {
        int charsRead;
        if (this.tidy == null || !file.exists() || !file.canRead()) {
            return;
        }
        FileInputStream fis = new FileInputStream(file);
        InputStreamReader isr = new InputStreamReader((InputStream)fis, "UTF-8");
        BufferedReader br = new BufferedReader(isr);
        StringBuffer textBuffer = new StringBuffer(4096);
        char[] buffer = new char[4096];
        while ((charsRead = br.read(buffer, 0, 4096)) > 0) {
            textBuffer.append(buffer, 0, charsRead);
        }
        this.parse(textBuffer.toString());
        if (br != null) {
            try {
                br.close();
            }
            catch (IOException iOException) {}
        }
    }

    protected void parse(String htmlSource) throws Exception {
        this.title = "";
        this.summary = "";
        this.text = "";
        this.metaTags = new Properties();
        Document doc = this.getDocument(htmlSource);
        if (doc != null) {
            this.htmlText = new StringBuffer(1024);
            this.extract(doc.getChildNodes());
            this.text = this.htmlText.toString();
        }
    }

    public String getTitle() {
        return this.title;
    }

    public Properties getMetaTags() {
        return this.metaTags;
    }

    public String getSummary() {
        return this.summary;
    }

    public String getText() {
        return this.text;
    }

    protected Document getDocument(String html) throws Exception {
        if (html == null || html.length() == 0) {
            return null;
        }
        ByteArrayInputStream input = new ByteArrayInputStream(html.getBytes("UTF-8"));
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        StringWriter sw = new StringWriter();
        PrintWriter pw = new PrintWriter(sw);
        this.tidy.setErrout(pw);
        return this.tidy.parseDOM((InputStream)input, (OutputStream)output);
    }

    protected void extract(NodeList nodes) {
        int i = 0;
        while (i < nodes.getLength()) {
            Node node = nodes.item(i);
            String nodeName = node.getNodeName();
            switch (node.getNodeType()) {
                case 1: {
                    if (nodeName.equals(HTML_SCRIPT_TAG)) break;
                    NamedNodeMap attrs = node.getAttributes();
                    int j = 0;
                    while (j < attrs.getLength()) {
                        Node attrNode = attrs.item(j);
                        String attrNodeName = attrNode.getNodeName();
                        String attrNodeValue = attrNode.getNodeValue();
                        if (attrNodeName.equals(HTML_TITLE_TAG)) {
                            this.title = attrNodeValue;
                        } else if (attrNodeName.equals(HTML_META_TAG)) {
                            this.metaTags.put(attrNodeName, attrNodeValue);
                        }
                        ++j;
                    }
                    NodeList childNodes = node.getChildNodes();
                    if (childNodes == null || childNodes.getLength() <= 0) break;
                    this.extract(childNodes);
                    break;
                }
                case 3: {
                    this.htmlText.append(node.getNodeValue()).append(' ');
                }
            }
            ++i;
        }
    }
}

