package org.eclipse.hyades.logging.core;

import java.io.ByteArrayOutputStream;
import java.io.File;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.eclipse.hyades.internal.logging.core.Constants;
import org.w3c.dom.Document;

/**********************************************************************
 * Copyright (c) 2003 Hyades project.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 * 
 * Contributors: 
 * IBM - Initial API and implementation
 **********************************************************************/

/**
 * Utility class for working with XML data.
 * <p>
 * 
 * @author Paul Slauenwhite
 * @version May 17, 2004
 * @since April 15, 2004
 */
public class XmlUtility implements Constants {

    /**
     * Serializes a DOM to an XML document and writes the XML document to an 
     * output file on the local file system .
     * <p>
     * The parameter DOM is serialized to an XML document, which formatted (e.g. 
     * line breaks, indentation, etc.) and written to an output file on the local 
     * file system.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a formatted XML document and written to the output file.
     * @param outputFile The file on the local file system where the formatted XML document is written.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static void serialize(Document document, File outputFile) throws TransformerException{
        serialize(document,outputFile,true);
    }

    /**
     * Serializes a DOM to an XML document and writes the XML document to an 
     * output file on the local file system .
     * <p>
     * The parameter DOM is serialized to an XML document, which may be potentially 
     * formatted and written to an output file on the local file system.
     * <p>
     * The serialized XML document is formatted (e.g. line breaks, indentation,
     * etc.) if the parameter <code>format</code> flag is true.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a potentially formatted XML document and written to the output file.
     * @param outputFile The file on the local file system where the potentially formatted XML document is written.
     * @param format If the serialized XML document is formatted (e.g. line breaks, indentation, etc.).
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static void serialize(Document document, File outputFile, boolean format) throws TransformerException{
        createXMLTransformer(format).transform(new DOMSource(document), new StreamResult(outputFile));
    }

    /**
     * Serializes a DOM to an XML document string.
     * <p>
     * The parameter DOM is serialized to the returned XML document string, which is
     * formatted (e.g. line breaks, indentation, etc.).
     * <p>
     * The encoding for the serialized XML document string is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document string is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a formatted XML document string.
     * @return The DOM serialized as a formatted XML document string, otherwise null.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static String serialize(Document document) throws TransformerException{
        return (serialize(document, true));
    }

    /**
     * Serializes a DOM to an XML document string.
     * <p>
     * The parameter DOM is serialized to the returned XML document string, 
     * which may be potentially formatted.
     * <p>
     * The returned XML document string is formatted (e.g. line breaks, indentation,
     * etc.) if the parameter <code>format</code> flag is true.
     * <p>
     * The encoding for the serialized XML document string is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document string is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a potentially formatted XML document string.
     * @param format If the serialized XML document string is formatted (e.g. line breaks, indentation, etc.).
     * @return The DOM serialized as a potentially formatted XML document string, otherwise null.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static String serialize(Document document, boolean format) throws TransformerException{
        return (new String(serializeAsByteArray(document,format)));
    }

    /**
     * Serializes a DOM to an XML document array of bytes.
     * <p>
     * The parameter DOM is serialized to the returned XML document array of bytes, 
     * which is  formatted (e.g. line breaks, indentation, etc.).
     * <p>
     * The encoding for the serialized XML document array of bytes is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document array of bytes is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a formatted XML document array of bytes.
     * @return The DOM serialized as a formatted XML document array of bytes, otherwise null.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static byte[] serializeAsByteArray(Document document) throws TransformerException{
        return (serializeAsByteArray(document,true));
    }
    
    /**
     * Serializes a DOM to an XML document array of bytes.
     * <p>
     * The parameter DOM is serialized to the returned XML document array of bytes, 
     * which may be potentially formatted.
     * <p>
     * The returned XML document array of bytes is formatted (e.g. line breaks, indentation,
     * etc.) if the parameter <code>format</code> flag is true.
     * <p>
     * The encoding for the serialized XML document array of bytes is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for the
     * serialized XML document array of bytes is explicitly set to "IBM-1047" for z/OS and OS/390
     * platforms only.
     * <p>
     * 
     * @param document The DOM to be serialized to a potentially formatted XML document array of bytes.
     * @param format If the serialized XML document array of bytes is formatted (e.g. line breaks, indentation, etc.).
     * @return The DOM serialized as a potentially formatted XML document array of bytes, otherwise null.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance or an error occurs during serialization.
     */
    public static byte[] serializeAsByteArray(Document document, boolean format) throws TransformerException{

        ByteArrayOutputStream transformerOutput = new ByteArrayOutputStream();

        createXMLTransformer(format).transform(new DOMSource(document), new StreamResult(transformerOutput));

        return (transformerOutput.toByteArray());
    }
    
    /**
     * Creates an XML <code>Transformer</code>.
     * <p>
     * The newly created <code>Transformer</code> is an XML <code>Transformer</code> 
     * since it uses the "XML" method for transforming DOMs.
     * <p>
     * An XML <code>Transformer</code> is created for transforming DOMs to 
     * XML documents persisted in one of a variety of output destinations.
     * <p>
     * The returned XML <code>Transformer</code> will transform DOMs to formatted 
     * (e.g. line breaks, indentation, etc.) XML documents if the parameter 
     * <code>format</code> flag is true.
     * <p>
     * The encoding for the returned XML <code>Transformer</code> is explicitly 
     * set to "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The encoding 
     * for the returned XML <code>Transformer</code> is explicitly set to "IBM-1047" 
     * for z/OS and OS/390 platforms only.
     * <p>
     * 
     * @param format If the returned XML <code>Transformer</code> is to transform DOMs to formatted (e.g. line breaks, indentation, etc.) XML documents.
     * @return The newly created XML <code>Transformer</code>.
     * @throws TransformerException If the <code>TransformerFactory</code> cannot create a new XML <code>Transformer</code> instance.
     */
    private static Transformer createXMLTransformer(boolean format) throws TransformerException {

        TransformerFactory transformerFactory = TransformerFactory.newInstance();
        Transformer transformer = transformerFactory.newTransformer();

        transformer.setOutputProperty(OutputKeys.METHOD, "xml");

        if (format) {

            transformer.setOutputProperty(OutputKeys.INDENT, "yes");

            //Unless a width is set, there will be only line breaks but no
            //indentation.
            //NOTE: The IBM and Sun JDK do not agree on the property name so
            //both are set.
            transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "2");
            transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
        }

        if ((OS_NAME.equals("z/OS")) || (OS_NAME.equals("OS/390"))) {
            transformer.setOutputProperty(OutputKeys.ENCODING, "IBM-1047");
        } 
        else {
            transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        }

        return transformer;
    }

    /**
     * Normalizes the parameter string according to the XML specification for
     * attribute-value normalization ( <a
     * href="http://www.w3.org/TR/REC-xml">http://www.w3.org/TR/REC-xml </a>)
     * and valid characters ( <a
     * href="http://www.w3.org/TR/REC-xml#charsets">http://www.w3.org/TR/REC-xml#charsets
     * </a>).
     * <p>
     * Valid characters, according to the XML specification, in all Unicode
     * characters, excluding the surrogate blocks, 0xFFFE, and 0xFFFF.
     * <p>
     * 
     * @param string The string to be normalized.
     * @return The normalized string.
     */
    public static String normalize(String string) {

        //Return 'null' if the string is null:
        if (string == null) return "null";

        //Return an empty string if the string is empty:
        if (string.length() == 0) return "";

        StringBuffer normalizedString = new StringBuffer();
        char character;

        //Check if any characters require normalization or replacement of
        // non-valid characters:
        for (int counter = 0; counter < string.length(); counter++) {

            character = string.charAt(counter);

            //0x003C:
            if (character == '<')
                normalizedString.append("&lt;");

            //0x003E:
            else if (character == '>')
                normalizedString.append("&gt;");

            //0x0026:
            else if (character == '&')
                normalizedString.append("&amp;");

            //0x0022:
            else if (character == '"')
                normalizedString.append("&quot;");

            //0x0027:
            else if (character == '\'')
                normalizedString.append("&apos;");

            //0x0009:
            else if (character == '\t')
                normalizedString.append("&#x9;");

            //0x000A:
            else if (character == '\n')
                normalizedString.append("&#xA;");

            //0x000D:
            else if (character == '\r')
                normalizedString.append("&#xD;");

            /*
             * //0x0020: else if (character == ' ')
             * normalizedString.append("&#x20;");
             */

            //Valid character range:
            else if (((((int) (character)) >= 0x0020) && (((int) (character)) <= 0xD7FF)) || ((((int) (character)) >= 0xE000) && (((int) (character)) <= 0xFFFD)) || ((((int) (character)) >= 0x10000) && (((int) (character)) <= 0x10FFFF)))
                normalizedString.append(character);

            else
                normalizedString.append('?');
        }

        return (normalizedString.toString());
    }

    /**
     * Denormalizes the parameter string.
     * <p>
     * 
     * @param string The String to be denormalized.
     * @return The denormalized String.
     */
    public static String denormalize(String string) {

        if (string == null) return "null";

        StringBuffer denormalizedString = new StringBuffer();
        char character = 0;
        int semiColonIndex = -1;
        String name = null;

        //Locate and denormalize all entity references:
        for (int counter = 0; counter < string.length(); counter++) {

            character = string.charAt(counter);

            //Check if this character is the start of a possible entity
            // reference (e.g. ampersand in &<name>;) and find a possible end to
            // the possible entity reference (e.g. semi-solon in &<name>;):
            if ((character == '&') && ((semiColonIndex = string.indexOf(';', (counter + 1))) != -1)) {

                name = string.substring((counter + 1), semiColonIndex).trim();

                if (name.equals("lt"))
                    denormalizedString.append('<');

                else if (name.equals("gt"))
                    denormalizedString.append('>');

                else if (name.equals("amp"))
                    denormalizedString.append('&');

                else if (name.equals("quot"))
                    denormalizedString.append('"');

                else if (name.equals("apos"))
                    denormalizedString.append('\'');

                else if (name.equals("#x9"))
                    denormalizedString.append('\t');

                else if (name.equals("#xA"))
                    denormalizedString.append('\n');

                else if (name.equals("#xD"))
                    denormalizedString.append('\r');

                /*
                 * else if (name.equals("#x20")) denormalizedString.append(' ');
                 */

                //Not a supported entity reference:
                else {
                    denormalizedString.append('&');
                    denormalizedString.append(name);
                    denormalizedString.append(';');
                }

                counter = semiColonIndex;
            } 
            else
                denormalizedString.append(character);
        }

        return (denormalizedString.toString());
    }
}