package org.eclipse.hyades.logging.core;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.util.StringTokenizer;

import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.eclipse.hyades.internal.logging.core.Constants;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Node;

/**********************************************************************
 * Copyright (c) 2004 Hyades project.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 * 
 * Contributors: 
 * IBM - Initial API and implementation
 **********************************************************************/

/**
 * Utility class for working with XML data.
 * <p>
 * <b>Notes:</b> 
 * <p>
 * <ul>
 * <li>
 * Serialization of a Document Object Model (DOM) to an XML document is dependent 
 * on the Java API for XML Processing (JAXP) (e.g. Java v1.4.x and above run-time 
 * environments).  In Java v1.3.x and below run-time environments, Apache's Xerces 
 * <b>MUST</b> be supplied by the user on the classpath.  All instantiations of 
 * Xerces classes/APIs are resolved reflectively at run-time so as to eliminate the 
 * Xerces dependency at compilation time.  Due to the different behavior of JAXP 
 * and Xerces XML serialization APIs, the following inconsistencies exist in the 
 * resultant XML document when serializing the same DOM:
 * </li>
 * <ul>
 * <li>
 * JAXP uses the decimal representation of entity reference values (e.g. &#10; for 
 * the Web line separator character) in attribute values (e.g. CDATA) whereas 
 * Xerces uses the hexadecimal representation of entity reference values (e.g. 
 * &#xa; for the Web line separator character).
 * </li>
 * <li>
 * Xerces uses the Web line separator character (e.g. \n) in CDATA elements whereas
 * JAXP uses the platform dependent line separator character(s) (e.g. \n\r on Windows).
 * </li>
 * <li>
 * Xerces does NOT format CDATA elements relative to their parent element whereas JAXP
 * does format CDATA elements relative to their parent element.
 * </li>
 * <li>
 * Xerces resolves entity references whereas JAXP persists reference values.
 * </li>
 * </ul>
 * <li>
 * Serialization of a DOM to an XML document is only canonical when the appropriate 
 * formatting flag is passed to the serialization API relative to the content of the 
 * DOM. For example, when the DOM contains ignorable white space, the resultant 
 * serialized XML document must be formatted (e.g. setting <code>format</code> to 
 * <code>true</code>) to achieve canonical XML serialization, and vise-versa.  
 * </li>
 * <li>
 * Structural formatting for an in-line Document Type Definition (DTD) when 
 * serialization of a DOM to an XML document is not  guaranteed since in-line 
 * structural formatting meta-information is not persisted in the DOM.
 * </li>
 * <li>
 * All comments after the XML declaration and before the internal or external 
 * DTD in the DOM when serialization of a DOM to an XML document are moved
 * to after the internal or external DTD in the serialized XML document.
 * </li>
 * <li>
 * Some JAXP implementations (e.g. Sun Java v.1.4.x with Crimson) do not support 
 * a DTD in the DOM.  As such, the resultant XML document does not contain an
 * internal or external DTD>.
 * </li>
 * </ul>
 * <p>
 * 
 * 
 * @author Paul E. Slauenwhite
 * @version November 24, 2004
 * @since April 15, 2004
 * @see org.eclipse.hyades.logging.core.SerializationException
 */
public class XmlUtility implements Constants {
   
    /**
     * Static flag for quickly determining if the
     * JAXP classes are available on the classpath 
     * (e.g. Java v1.4.x and above).
     * <p>
     * By default, a Java v1.4.x and above run-time environment is assumed.
     */
    private static boolean isJAXPAvailable = true;

    /**
     * Static flag for quickly determining the default encoding for 
     *serialized XML document strings.
     * <p>
     * The encoding for the serialized XML document string is explicitly set to
     * "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
     * encoding for the serialized XML document string is explicitly set to
     * "IBM-1047" for z/OS and OS/390 platforms only.
     */
    private static String encoding = null;
    
    static{
        
        if ((OS_NAME.equals("z/OS")) || (OS_NAME.equals("OS/390"))) {
            encoding = "IBM-1047";
        } 
        else {
            encoding = "UTF-8";
        }
    }
    
    /**
     * Serializes a DOM to an XML document string.
     * <p>
     * The parameter DOM is serialized to the returned
     * XML document string, which is formatted (e.g. line breaks and
     * indentation).
     * <p>
     * The encoding for the serialized XML document string is explicitly set to
     * "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
     * encoding for the serialized XML document string is explicitly set to
     * "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  As such, this API 
	 * is only canonical when the DOM contains ignorable white space.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            formatted XML document string.
     * @return The DOM serialized as a formatted XML
     *         document string, otherwise null.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static String serialize(Document document) throws SerializationException {
        return (serialize(document, true));
    }

    /**
     * Serializes a DOM to an XML document string.
     * <p>
     * The parameter DOM is serialized to the returned
     * XML document string, which may be potentially formatted.
     * <p>
     * The returned XML document string is formatted (e.g. line breaks and
     * indentation) if the parameter <code>format</code> flag is true. When
     * formatting is turned off, all ignorable white space is not serialized.
     * <p>
     * The encoding for the serialized XML document string is explicitly set to
     * "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
     * encoding for the serialized XML document string is explicitly set to
     * "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            potentially formatted XML document string.
     * @param format
     *            If the serialized XML document string is formatted (e.g. line
     *            breaks and indentation).
     * @return The DOM serialized as a potentially
     *         formatted XML document string, otherwise null.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static String serialize(Document document, boolean format) throws SerializationException {
        
        try {
            return (new String(serializeAsByteArray(document, format),encoding));
        } 
        catch (UnsupportedEncodingException u) {
            throw (new SerializationException(u.getMessage()));
        }        
    }
    
    /**
     * Serializes a DOM to an XML document and writes
     * the XML document to an output file on the local file system.
     * <p>
     * The parameter DOM is serialized to an XML
     * document, formatted (e.g. line breaks and indentation) and written to an
     * output file on the local file system.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  As such, this API 
	 * is only canonical when the DOM contains ignorable white space.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            formatted XML document and written to the output file.
     * @param outputFile
     *            The file on the local file system where the formatted XML
     *            document is written.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static void serialize(Document document, File outputFile) throws SerializationException {
        serialize(document, outputFile, true);
    }

    /**
     * Serializes a DOM to an XML document and writes the XML document to an
     * output file on the local file system.
     * <p>
     * The parameter DOM is serialized to an XML document, which may be
     * potentially formatted and written to an output file on the local file
     * system.
     * <p>
     * The serialized XML document is formatted (e.g. line breaks and
     * indentation) if the parameter <code>format</code> flag is true. When
     * formatting is turned off, all ignorable white space is not serialized.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
     * NOTE: Serialization of a DOM to an XML document is only canonical when
     * the appropriate formatting flag is passed to the serialization API
     * relative to the content of the DOM. For example, when the DOM contains
     * ignorable white space, the resultant serialized XML document must be
     * formatted (e.g. setting <code>format</code> to <code>true</code>) to
     * achieve canonical XML serialization, and vise-versa. Furthermore,
     * structural formatting for in-line DTD is not guaranteed since in-line
     * structural formatting meta-information is not persisted in the DOM.
     * Finally, all comments after the XML declaration and before the internal
     * or external DTD in the DOM are moved to after the internal or external
     * DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a potentially formatted XML
     *            document and written to the output file.
     * @param outputFile
     *            The file on the local file system where the potentially
     *            formatted XML document is written.
     * @param format
     *            If the serialized XML document is formatted (e.g. line breaks
     *            and indentation).
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static void serialize(Document document, File outputFile, boolean format) throws SerializationException {        
    
        try {
            serialize(document, new FileOutputStream(outputFile), format);                        
        } 
        catch (FileNotFoundException f) {
            throw (new SerializationException(f.getMessage()));
        }
    }
    
    /**
     * Serializes a DOM to an XML document and writes
     * the XML document to an output stream.
     * <p>
     * The parameter DOM is serialized to an XML
     * document, formatted (e.g. line breaks and indentation) and written to an
     * output stream.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  As such, this API 
	 * is only canonical when the DOM contains ignorable white space.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            formatted XML document and written to the output stream.
     * @param outputStream
     *            The output stream where the formatted XML document is written.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static void serialize(Document document, OutputStream outputStream) throws SerializationException {
        serialize(document, outputStream, true);
    }

    /**
     * Serializes a DOM to an XML document and writes
     * the XML document to an output stream.
     * <p>
     * The parameter DOM is serialized to an XML
     * document, which may be potentially formatted and written to an output
     * stream.
     * <p>
     * The serialized XML document is formatted (e.g. line breaks and
     * indentation) if the parameter <code>format</code> flag is true. When
     * formatting is turned off, all ignorable white space is not serialized.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            potentially formatted XML document and written to the output
     *            stream.
     * @param outputStream
     *            The stream where the potentially formatted XML document is written.
     * @param format
     *            If the serialized XML document is formatted (e.g. line breaks
     *            and indentation).
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static void serialize(Document document, OutputStream outputStream, boolean format) throws SerializationException {

        Writer writer = null;

        try {
            
            writer = new OutputStreamWriter(outputStream,encoding);
            
            serialize(document, writer, format);
        } 
        catch (Exception e) {
            throw (new SerializationException(e.getMessage()));
        } 
        finally {

            if (writer != null) {

                try {
                    writer.close();
                } 
                catch (IOException i) {
                    //Ignore since only attempting to close the writer.
                }
            }
        }
    }
    
    /**
     * Serializes a DOM to an XML document array of
     * bytes.
     * <p>
     * The parameter DOM is serialized to the returned
     * XML document array of bytes, which is formatted (e.g. line breaks and
     * indentation).
     * <p>
     * The encoding for the serialized XML document array of bytes is explicitly
     * set to "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
     * encoding for the serialized XML document array of bytes is explicitly set
     * to "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
     * NOTE: Serialization of a DOM to an XML document
     * is only canonical when the appropriate formatting flag is passed to the
     * serialization API relative to the content of the DOM. For example, when
     * the DOM contains ignorable white space, the resultant serialized XML
     * document must be formatted (e.g. setting <code>format</code> to
     * <code>true</code>) to achieve canonical XML serialization, and
     * vise-versa. As such, this API is only canonical when the DOM contains
     * ignorable white space. Furthermore, structural formatting for in-line
     * DTD is not guaranteed since in-line structural
     * formatting meta-information is not persisted in the DOM. Finally, all comments after the XML declaration and before the
     * internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            formatted XML document array of bytes.
     * @return The DOM serialized as a formatted XML
     *         document array of bytes, otherwise null.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static byte[] serializeAsByteArray(Document document) throws SerializationException {
        return (serializeAsByteArray(document, true));
    }

    /**
     * Serializes a DOM to an XML document array of
     * bytes.
     * <p>
     * The parameter DOM is serialized to the returned
     * XML document array of bytes, which may be potentially formatted.
     * <p>
     * The returned XML document array of bytes is formatted (e.g. line breaks
     * and indentation) if the parameter <code>format</code> flag is true.
     * When formatting is turned off, all ignorable white space is not
     * serialized.
     * <p>
     * The encoding for the serialized XML document array of bytes is explicitly
     * set to "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
     * encoding for the serialized XML document array of bytes is explicitly set
     * to "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            potentially formatted XML document array of bytes.
     * @param format
     *            If the serialized XML document array of bytes is formatted
     *            (e.g. line breaks and indentation).
     * @return The DOM serialized as a potentially
     *         formatted XML document array of bytes, otherwise null.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static byte[] serializeAsByteArray(Document document, boolean format) throws SerializationException {
        
        ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
        
        serialize(document, byteArrayOutputStream, format);         
        
        //NOTE: Closing the ByteArrayOutputStream is no necessary since it has no effect.

        return (byteArrayOutputStream.toByteArray());
    }

    /**
     * Serializes a DOM to an XML document and writes
     * the XML document to a writer.
     * <p>
     * The parameter DOM is serialized to an XML
     * document, formatted (e.g. line breaks and indentation) and written to a
     * writer.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
     * NOTE:  The encoding used by parameter writers that write character 
     * streams to byte streams MUST be explicitly set to "UTF-8" for all platforms 
     * excluding z/OS and OS/390 platforms. The encoding used by parameter writers 
     * that write character streams to byte streams MUST be explicitly set to 
     * "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  As such, this API 
	 * is only canonical when the DOM contains ignorable white space.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            potentially formatted XML document and written to the writer.
     * @param writer
     *            The writer where the potentially formatted XML document is
     *            written.
     * @throws SerializationException
     *             If an error occurs during serialization.
     */
    public static void serialize(Document document, Writer writer) throws SerializationException {
        serialize(document, writer, true);
    }

    /**
     * Serializes a DOM to an XML document and writes
     * the XML document to a writer.
     * <p>
     * The parameter DOM is serialized to an XML
     * document, which may be potentially formatted and written to a writer.
     * <p>
     * The serialized XML document is formatted (e.g. line breaks and
     * indentation) if the parameter <code>format</code> flag is true. When
     * formatting is turned off, all ignorable white space is not serialized.
     * <p>
     * The encoding for the serialized XML document is explicitly set to "UTF-8"
     * for all platforms excluding z/OS and OS/390 platforms. The encoding for
     * the serialized XML document is explicitly set to "IBM-1047" for z/OS and
     * OS/390 platforms only.
     * <p>
     * NOTE:  The encoding used by parameter writers that write character 
     * streams to byte streams MUST be explicitly set to "UTF-8" for all platforms 
     * excluding z/OS and OS/390 platforms. The encoding used by parameter writers 
     * that write character streams to byte streams MUST be explicitly set to 
     * "IBM-1047" for z/OS and OS/390 platforms only.
     * <p>
	 * NOTE: Serialization of a DOM to an XML document is
	 * only canonical when the appropriate formatting flag is passed to the
	 * serialization API relative to the content of the DOM. For example, when the
	 * DOM contains ignorable white space, the resultant serialized XML document
	 * must be formatted (e.g. setting <code>format</code> to <code>true</code>)
	 * to achieve canonical XML serialization, and vise-versa.  Furthermore, 
	 * structural formatting for in-line DTD is not guaranteed 
	 * since in-line structural formatting meta-information is not persisted in the 
	 * DOM.  Finally, all comments after the XML declaration 
	 * and before the internal or external DTD in the DOM are moved to after the internal or external DTD in the serialized XML document.
     * <p>
     * 
     * @param document
     *            The DOM to be serialized to a
     *            potentially formatted XML document and written to the writer.
     * @param writer
     *            The writer where the potentially formatted XML document is
     *            written.
     * @param format
     *            If the serialized XML document is formatted (e.g. line breaks,
     *            indentation, etc.).
     * @throws SerializationException
     *             If an error occurs during serialization.
     */    
    public static void serialize(Document document, Writer writer, boolean format) throws SerializationException {

        
        if (isJAXPAvailable) {

            try {

                JAXPXMLSerializer.serialize(document, writer, format);

                return;
            } 
            catch (NoClassDefFoundError n) {
                isJAXPAvailable = false;
            } 
            catch (TransformerConfigurationException t) {
                isJAXPAvailable = false;
            } 
            catch (TransformerFactoryConfigurationError t) {
                isJAXPAvailable = false;
            } 
            catch (Exception e) {
                throw (new SerializationException(e.toString()));
            }
        }

        try {
            XercesXMLSerializer.serialize(document, writer, format);
        } 
        catch (Exception e) {
            throw (new SerializationException(e.toString()));
        }
    }

    /**
     * Normalizes the parameter string according to the XML specification for
     * attribute-value normalization ( <a
     * href="http://www.w3.org/TR/REC-xml">http://www.w3.org/TR/REC-xml </a>)
     * and valid characters ( <a
     * href="http://www.w3.org/TR/REC-xml#charsets">http://www.w3.org/TR/REC-xml#charsets
     * </a>).
     * <p>
     * Valid characters, according to the XML specification, include any Unicode
     * character, excluding the surrogate blocks, 0xFFFE, and 0xFFFF.
     * <p>
     * Invalid characters are replaced by the <code><b>?</b></code>
     * character.
     * <p>
     * A <code>null</null> parameter string results in a "null" return string.  
     * Likewise, an empty parameter string results in a "" return string.
     * <p>
     * Entity reference values are represented in the hexadecimal 
     * (e.g. &#xa; for the Web line separator character) form.
     * <p>
     * 
     * @param string
     *            The string to be normalized.
     * @return The normalized string, "null" or an empty string.
     */
    public static String normalize(String string) {

        //Return 'null' if the string is null:
        if (string == null) { 
            return ("null"); 
        }

        int stringLength = string.length();

        //Return an empty string if the string is empty:
        if (stringLength == 0) { 
            return (""); 
        }

        StringBuffer normalizedString = new StringBuffer(stringLength);
        char character;

        //Check if any characters require normalization or replacement of
        //invalid characters:
        for (int counter = 0; counter < stringLength; counter++) {

            character = string.charAt(counter);

            //0x003C:
            if (character == '<') {
                normalizedString.append("&lt;");
            }

            //0x003E:
            else if (character == '>') {
                normalizedString.append("&gt;");
            }

            //0x0026:
            else if (character == '&') {
                normalizedString.append("&amp;");
            }

            //0x0022:
            else if (character == '"') {
                normalizedString.append("&quot;");
            }

            //0x0027:
            else if (character == '\'') {
                normalizedString.append("&apos;");
            }

            //0x0009:
            else if (character == '\t') {
                normalizedString.append("&#x9;");
            }

            //0x000A:
            else if (character == '\n') {
                normalizedString.append("&#xA;");
            }

            //0x000D:
            else if (character == '\r') {
                normalizedString.append("&#xD;");
            }

            /*
             * //0x0020: else if (character == ' '){
             * normalizedString.append("&#x20;"); }
             */

            //Handle valid UTF-16 character range:
            else if (((character >= 0x0020) && (character <= 0xD7FF)) || ((character >= 0xE000) && (character <= 0xFFFD))) {
                normalizedString.append(character);
            }

            //Handle valid UTF-32 character range:
            else if ((character >= 0x0001) && (character <= 0x0010) && ((counter + 1) < stringLength)) {

                normalizedString.append(character);

                normalizedString.append(string.charAt(++counter));
            } 
            else {
                normalizedString.append('?');
            }
        }

        return (normalizedString.toString());
    }

    /**
     * De-normalizes the parameter string.
     * <p>
     * Entity reference values may be represented in either the hexadecimal
     * (e.g. &#xa; for the Web line separator character) or decimal (e.g. &#10;
     * for the Web line separator character) form.
     * 
     * @param string
     *            The String to be de-normalized.
     * @return The de-normalized String.
     */
    public static String denormalize(String string) {

        if (string == null){ 
            return "null";
        }

        StringBuffer denormalizedString = new StringBuffer();
        char character = 0;
        int semiColonIndex = -1;
        String name = null;

        //Locate and de-normalize all entity references:
        for (int counter = 0; counter < string.length(); counter++) {

            character = string.charAt(counter);

            //Check if this character is the start of a possible entity
            // reference (e.g. ampersand in &<name>;) and find a possible end to
            // the possible entity reference (e.g. semi-solon in &<name>;):
            if ((character == '&') && ((semiColonIndex = string.indexOf(';', (counter + 1))) != -1)) {

                name = string.substring((counter + 1), semiColonIndex).trim();

                if (name.equals("lt")){
                    denormalizedString.append('<');
                }
                else if (name.equals("gt")){
                    denormalizedString.append('>');
                }
                else if (name.equals("amp")){
                    denormalizedString.append('&');
                }
                else if (name.equals("quot")){
                    denormalizedString.append('"');
                }
                else if (name.equals("apos")){
                    denormalizedString.append('\'');
                }
                else if ((name.equals("#x9")) || (name.equals("#09"))){
                    denormalizedString.append('\t');
                }
                else if ((name.equals("#xA")) || (name.equals("#10"))){
                    denormalizedString.append('\n');
                }
                else if ((name.equals("#xD")) || (name.equals("#13"))){
                    denormalizedString.append('\r');
                }
                /*
                 * else if ((name.equals("#x20")) || (name.equals("#32"))){
                 * denormalizedString.append(' ');
                 * }
                 */

                //Unsupported entity reference:
                else {
                    denormalizedString.append('&');
                    denormalizedString.append(name);
                    denormalizedString.append(';');
                }

                counter = semiColonIndex;
            } 
            else
                denormalizedString.append(character);
        }

        return (denormalizedString.toString());
    }


    /**
     * A specialized implementation that uses a
     * <code>javax.xml.transform.Transformer</code> to serialize a 
     * DOM to an XML document.
     * <p>
     * This specialized implementation uses the JAXP classes which are 
     * available on the classpath (e.g. Java v1.4.x and above).
     * <p>
     * 
     * 
     * @author Paul E. Slauenwhite
     * @version September 21, 2004
     * @since June 2, 2004
     */
    private static class JAXPXMLSerializer implements Constants {

        /**
         * Static reference to the platform-specific (e.g. encoding) XML declaration 
         * used in the serialized XML document.
         */
        private static String xmlDeclaration = null;

        /**
         * Static instance of the <code>javax.xml.transform.Transformer</code>
         * to serialize a DOM to an XML document.
         */
        private static Transformer transformer = null;

        /**
         * Serializes a DOM to an XML document using a
         * <code>javax.xml.transform.Transformer</code> and writes the XML
         * document to a writer.
         * <p>
         * The parameter DOM is serialized to an XML
         * document using a <code>javax.xml.transform.Transformer</code>,
         * which may be potentially formatted and written to a writer.
         * <p>
         * The serialized XML document is formatted (e.g. line breaks and
         * indentation) if the parameter <code>format</code> flag is true.
         * When formatting is turned off, all ignorable white space is not
         * serialized.
         * <p>
         * The encoding for the serialized XML document is explicitly set to
         * "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
         * encoding for the serialized XML document is explicitly set to
         * "IBM-1047" for z/OS and OS/390 platforms only.
         * <p>
         * 
         * @param document
         *            The DOM to be serialized to a
         *            potentially formatted XML document and written to the
         *            writer.
         * @param writer
         *            The writer where the potentially formatted XML document is
         *            written.
         * @param format
         *            If the serialized XML document is formatted (e.g. line
         *            breaks and indentation).
         * @throws Exception
         *             If an error occurs during serialization.
         */
        private static void serialize(final Document document, Writer writer, boolean format) throws Exception {

            //Initialize the transformer only once:
            if (transformer == null) {

                transformer = TransformerFactory.newInstance().newTransformer();

                transformer.setOutputProperty(OutputKeys.METHOD, "xml");

                //Omit the XML declaration to circumvent the problem of
                //the transformer omitting the internal or in-line document
                // type:
                transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");

                //Explicitly set the encoding of the XML document to "UTF-8"
                //for all platforms excluding z/OS and OS/390 platforms which
                // is
                //explicitly set to "IBM-1047":
                xmlDeclaration = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
                transformer.setOutputProperty(OutputKeys.ENCODING, encoding);
            }

            //Write the XML declaration to the XML document:
            writer.write(xmlDeclaration);
            writer.flush();

            //If formatting, write a new line character(s):
            if (format) {

                writer.write(LINE_SEPARATOR);
                writer.flush();
            }

            //Write the document type:
            DocumentType documentType = document.getDoctype();

            if (documentType != null) {

                String systemId = documentType.getSystemId();
                String internalSubset = documentType.getInternalSubset();

                //Confirm either an internal or external document type exists
                // in the DOM:
                if ((systemId != null) || (internalSubset != null)) {

                    StringBuffer documentTypeBuffer = new StringBuffer();

                    //Write the name of the document type:
                    documentTypeBuffer.append("<!DOCTYPE ");
                    documentTypeBuffer.append(documentType.getName());

                    //If an external document type exists in the DOM, write
                    //the public and/or system ID:
                    if (systemId != null) {

                        String publicId = documentType.getPublicId();

                        if (publicId != null) {

                            documentTypeBuffer.append(" PUBLIC \"");
                            documentTypeBuffer.append(publicId);
                            documentTypeBuffer.append("\" \"");
                        } else {
                            documentTypeBuffer.append(" SYSTEM \"");
                        }

                        documentTypeBuffer.append(systemId);
                        documentTypeBuffer.append("\">");
                    }

                    //If an internal document type exists in the DOM, write
                    //the internal document type of the document type with the
                    //formatting included or omitted:
                    else {

                        documentTypeBuffer.append(" [");

                        StringTokenizer tokens = new StringTokenizer(internalSubset.trim(), "\n\r\f");

                        //If formatting, write each line of the internal
                        // document type on a new line and indented:
                        if (format) {

                            documentTypeBuffer.append(LINE_SEPARATOR);

                            while (tokens.hasMoreTokens()) {

                                //Indent two spaces based on the value of the
                                // 'indent-amount' output property:
                                documentTypeBuffer.append(INDENT);
                                documentTypeBuffer.append(tokens.nextToken().trim());
                                documentTypeBuffer.append(LINE_SEPARATOR);
                            }
                        }

                        //If not formatting, remove all new line character(s):
                        else {

                            while (tokens.hasMoreTokens()) {
                                documentTypeBuffer.append(tokens.nextToken().trim());
                            }
                        }

                        documentTypeBuffer.append("]>");
                    }

                    //If formatting, write a new line character(s):
                    if (format) {
                        documentTypeBuffer.append(LINE_SEPARATOR);
                    }

                    //Write the document type to the XML document:
                    writer.write(documentTypeBuffer.toString());
                    writer.flush();
                }
            }

            //If formatting, configure the transformer for formatting and
            // transform the DOM to the XML document:
            if (format) {

                transformer.setOutputProperty(OutputKeys.INDENT, "yes");

                //Unless a width is set, there will be only line breaks but no
                //indentation.
                //NOTE: The IBM and Sun JDK do not agree on the property name
                //so both are set.
                transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", String.valueOf(INDENT.length()));
                transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", String.valueOf(INDENT.length()));

                transformer.transform(new DOMSource(document), new StreamResult(writer));
            }

            //If not formatting, configure the transformer for not formatting,
            // clone the DOM
            //to remove any ignorable white space characters from any text
            // nodes and transform
            //the cloned DOM to the XML document:
            else {

                transformer.setOutputProperty(OutputKeys.INDENT, "no");

                //Unless a width is set, there will be only line breaks but no
                //indentation.
                //NOTE: The IBM and Sun JDK do not agree on the property name
                //so both are set.
                transformer.setOutputProperty("{http://xml.apache.org/xalan}indent-amount", "0");
                transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "0");

                Node clonedDocument = document.cloneNode(true);

                removeIgnorableWhitespace(clonedDocument);

                transformer.transform(new DOMSource(clonedDocument), new StreamResult(writer));
            }
        }
    }

    /**
     * A specialized implementation that uses a
     * <code>org.apache.xml.serialize.XMLSerializer</code> to serialize a
     * DOM to an XML document.
     * <p>
     * This specialized implementation uses the Xerces
     * <code>org.apache.xml.serialize.*</code> classes which <b>MUST</b> be
     * supplied by the user on the classpath.
     * <p>
     * The <code>org.apache.xml.serialize.*</code> classes required to
     * serialize a DOM to an XML document are loaded
     * reflectively at run-time so as not eliminate the Xerces dependency at
     * compilation time.
     * <p>
     * 
     * @author Paul E. Slauenwhite
     * @version September 21, 2004
     * @since June 2, 2004
     */
    private static class XercesXMLSerializer implements Constants {

        /**
         * Static instance of the
         * <code>org.apache.xml.serialize.OutputFormat</code> formatter used
         * to serialize a DOM to an XML document.
         */
        private static Object outputFormat = null;

        /**
         * Static reference to the platform-specific (e.g. encoding) XML declaration 
         * used in the serialized XML document.
         */
        private static String xmlDeclaration = null;

        /**
         * Serializes a DOM to an XML document using a
         * <code>org.apache.xml.serialize.XMLSerializer</code> and writes the
         * XML document to a writer.
         * <p>
         * The parameter DOM is serialized to an XML
         * document using a <code>org.apache.xml.serialize.XMLSerializer</code>,
         * which may be potentially formatted and written to a writer.
         * <p>
         * The serialized XML document is formatted (e.g. line breaks and
         * indentation) if the parameter <code>format</code> flag is true.
         * When formatting is turned off, all ignorable white space is not
         * serialized.
         * <p>
         * The encoding for the serialized XML document is explicitly set to
         * "UTF-8" for all platforms excluding z/OS and OS/390 platforms. The
         * encoding for the serialized XML document is explicitly set to
         * "IBM-1047" for z/OS and OS/390 platforms only.
         * <p>
         * The <code>org.apache.xml.serialize.*</code> classes required to
         * serialize a DOM to an XML document are loaded
         * reflectively at run-time so as not eliminate the Xerces dependency at
         * compilation time.
         * <p>
         * 
         * @param document
         *            The DOM to be serialized to a
         *            potentially formatted XML document and written to the
         *            writer.
         * @param writer
         *            The writer where the potentially formatted XML document is
         *            written.
         * @param format
         *            If the serialized XML document is formatted (e.g. line
         *            breaks and indentation).
         * @throws Exception
         *             If an error occurs during serialization.
         */
        private static void serialize(final Document document, Writer writer, boolean format) throws Exception {

            //Initialize the formatter only once:
            if (outputFormat == null) {

                Class outputFormatClass = Class.forName("org.apache.xml.serialize.OutputFormat");

                outputFormat = outputFormatClass.newInstance();

                outputFormatClass.getMethod("setMethod", new Class[] { String.class}).invoke(outputFormat, new Object[] { "xml"});

                //WORKAROUND: Xerces inserts a trailing new line character at
                // the end of the XML declaration irrespective of the formatting
                // configuration
                //of the serializer. As such, omit the XML declaration
                // generated by the serializer and insert a generated XML
                // declaration with the
                //associated formatting.
                outputFormatClass.getMethod("setOmitXMLDeclaration", new Class[] { boolean.class}).invoke(outputFormat, new Object[] { Boolean.TRUE});

                //Required to emulate the behaviour of the
                //<code>JAXPXMLSerializer</code> that formats the
                //document type:
                outputFormatClass.getMethod("setOmitDocumentType", new Class[] { boolean.class}).invoke(outputFormat, new Object[] { Boolean.TRUE});

                //Required to align attributes from the same element on the
                //same line:
                //NOTE: Setting the line width to zero will result in no
                //line wrapping.
                outputFormatClass.getMethod("setLineWidth", new Class[] { int.class}).invoke(outputFormat, new Object[] { new Integer(0)});

                //Ensure the platform-dependant line separator character(s):
                outputFormatClass.getMethod("setLineSeparator", new Class[] { String.class}).invoke(outputFormat, new Object[] { LINE_SEPARATOR});

                //Explicitly set the encoding of the XML document to "UTF-8"
                //for all platforms excluding z/OS and OS/390 platforms which
                //is explicitly set to "IBM-1047":
                xmlDeclaration = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
                outputFormatClass.getMethod("setEncoding", new Class[] { String.class}).invoke(outputFormat, new Object[] { encoding});
            }

            //Write the XML declaration to the XML document:
            writer.write(xmlDeclaration);
            writer.flush();

            //If formatting, write a new line character(s):
            if (format) {

                writer.write(LINE_SEPARATOR);
                writer.flush();
            }

            //Write the document type:
            DocumentType documentType = document.getDoctype();

            if (documentType != null) {

                String systemId = documentType.getSystemId();
                String internalSubset = documentType.getInternalSubset();

                //Confirm either an internal or external document type exists
                // in the DOM:
                if ((systemId != null) || (internalSubset != null)) {

                    StringBuffer documentTypeBuffer = new StringBuffer();

                    //Write the name of the document type:
                    documentTypeBuffer.append("<!DOCTYPE ");
                    documentTypeBuffer.append(documentType.getName());

                    //If an external document type exists in the DOM, write
                    //the public and/or system ID:
                    if (systemId != null) {

                        String publicId = documentType.getPublicId();

                        if (publicId != null) {

                            documentTypeBuffer.append(" PUBLIC \"");
                            documentTypeBuffer.append(publicId);
                            documentTypeBuffer.append("\" \"");
                        } else {
                            documentTypeBuffer.append(" SYSTEM \"");
                        }

                        documentTypeBuffer.append(systemId);
                        documentTypeBuffer.append("\">");
                    }

                    //If an internal document type exists in the DOM, write
                    //the internal document type of the document type with the
                    //formatting included or omitted:
                    else {

                        documentTypeBuffer.append(" [");

                        StringTokenizer tokens = new StringTokenizer(internalSubset.trim(), "\n\r\f");

                        //If formatting, write each line of the internal
                        // document type on a new line and indented:
                        if (format) {

                            documentTypeBuffer.append(LINE_SEPARATOR);

                            while (tokens.hasMoreTokens()) {

                                //Indent two spaces based on the value of the
                                // 'indent-amount' output property:
                                documentTypeBuffer.append(INDENT);
                                documentTypeBuffer.append(tokens.nextToken().trim());
                                documentTypeBuffer.append(LINE_SEPARATOR);
                            }
                        }

                        //If not formatting, remove all new line character(s):
                        else {

                            while (tokens.hasMoreTokens()) {
                                documentTypeBuffer.append(tokens.nextToken().trim());
                            }
                        }

                        documentTypeBuffer.append("]>");
                    }

                    //If formatting, write a new line character(s):
                    if (format) {
                        documentTypeBuffer.append(LINE_SEPARATOR);
                    }

                    //Write the document type to the XML document:
                    writer.write(documentTypeBuffer.toString());
                    writer.flush();
                }
            }
            
            outputFormat.getClass().getMethod("setIndent", new Class[] { int.class}).invoke(outputFormat, new Object[] { new Integer(format ? 2 : 0)});

            Class xmlSerializerClass = Class.forName("org.apache.xml.serialize.XMLSerializer");

            Object xmlSerializer = xmlSerializerClass.getConstructor(new Class[] { Writer.class, outputFormat.getClass()}).newInstance(new Object[] { writer, outputFormat});

            //If formatting, configure the serializer for formatting and
            // transform the DOM to the XML document:
            if (format) {
                xmlSerializerClass.getMethod("serialize", new Class[] { Document.class}).invoke(xmlSerializer, new Object[] { document});
            } 
            
            //If not formatting, configure the serializer for not formatting,
            // clone the DOM
            //to remove any ignorable white space characters from any text
            // nodes and serialize
            //the cloned DOM to the XML document:
            else {

                Node clonedDocument = document.cloneNode(true);

                removeIgnorableWhitespace(clonedDocument);

                xmlSerializerClass.getMethod("serialize", new Class[] { Document.class}).invoke(xmlSerializer, new Object[] { clonedDocument});
            }
        }
    }

    /**
     * Recursively removes all ignorable white space from the parameter node.
     * <p>
     * Ignorable white space is defined as any <code>Text</code> node with
     * <b>only </b> white space characters. The values of any <code>Text</code>
     * node with <b>only </b> white space characters are set to an empty string.
     * <p>
     * The child nodes of the parameter node are recursively stripped of all
     * ignorable white space.
     * <p>
     * 
     * @param node
     *            The node which all ignorable white space is removed.
     */
    private static void removeIgnorableWhitespace(Node node) {

        if (node != null) {

            if (node.getNodeType() == Node.TEXT_NODE) {

                String nodeValue = node.getNodeValue();

                if ((nodeValue != null) && (nodeValue.trim().length() == 0)) {
                    node.setNodeValue(nodeValue.trim());
                }
            }

            Node child = node.getFirstChild();

            while (child != null) {

                removeIgnorableWhitespace(child);

                child = child.getNextSibling();
            }
        }
    }
}