/**********************************************************************
 * Copyright (c) 2003,2004 Scapa Technologies Limited and others
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 * 
 * Contributors: 
 * Scapa Technologies Limited - Initial API and implementation
 **********************************************************************/

package org.eclipse.hyades.perfmon.xml;

import java.io.*;
import java.util.*;

public class XMLParser {
	private final static boolean DEBUG = false;

	InputStream input;
	
	private final static byte ROOT = 0;
	private final static byte TEXT_NODE = 1;
	private final static byte OPEN_TAG = 2;
	private final static byte TAG_NAME = 3;
	private final static byte TAG_ATTRIBUTE = 4;
	private final static byte CLOSE_TAG = 5;
	private final static byte ID_OR_ATTRIBUTE = 6;
	
	int pch = 0;
	int ch = 0;
	int line = 0;
	int column = 0;
	
	public XMLParser() {
	}

	public XMLParser(String in) throws IOException {
		setInput(in);
	}

	public XMLParser(FastStringBuffer in) throws IOException {
		setInput(in);
	}

	public XMLParser(InputStream in) throws IOException {
		setInput(in);
	}

	public void setInput(InputStream in) throws IOException {
		input = new BufferedInputStream(in);
		ch = input.read();
		line = 0;
		column = 0;
	}	

	public void setInput(String in) throws IOException {
		setInput(new ByteArrayInputStream(in.getBytes()));
	}	

	public void setInput(FastStringBuffer in) throws IOException {
		setInput(new ByteArrayInputStream(in.toString().getBytes()));
	}	

	public void parse(XMLParserListener listener) throws Exception {
		parse(listener,input);
	}

	public void parse(XMLParserListener listener, InputStream in) throws Exception {
		
		setInput(in);
		
		listener.startDocument();
		
		boolean ignored_tag = false;
		boolean start_tag = true;
		String tag_name = "ERROR";
		
		String key;
		String value;
		
		FastStringBuffer tmp = new FastStringBuffer();
		HashMap attributes = new HashMap();
		
		byte mode = TEXT_NODE;
		
		while (true) {
if (DEBUG) System.out.println("ch = '"+(char)peekChar()+"'");		
			switch(mode) {
				
				//
				// Text between tags
				// <tag>[ blah ]</tag>
				//
				case TEXT_NODE:
					tmp.setLength(0);
					parseText(tmp);
if (DEBUG) System.out.println("TEXT NODE:'"+tmp+"'");					
					if (peekChar() == -1) {
						listener.endDocument();
						return;
					} else {
						if (tmp.length() > 0) {
							listener.text(tmp.toString());
						}
						mode = OPEN_TAG;
					}
					break;
				
				//
				// Tag open bracket and others
				// [<]tag ...			//start tag
				// [<!-- ... -->]		//comment
				// [<!]DOCTYPE ...		//DOCTYPE thing
				// [</]tag ...			//end tag			
				// [<?]tag ...			//xml spec thing
				//
				case OPEN_TAG:
					if (nextChar() != '<') {
						error("expected open tag < not "+prevChar());
					}
					
					//parse a comment, then return to textnode mode
					if (peekChar() == '!') {
						nextChar();
						
						if (peekChar() == '-') {
							if (nextChar() != '-') {
								error("expected comment -");
							}
							if (nextChar() != '-') {
								error("expected comment --");
							}
							parseComment(null);
							mode = TEXT_NODE;

						} else if (peekChar() == '[') {
							nextChar();
							if (nextChar() != 'C') {
								error("expected ![CDATA[ comment C");
							}
							if (nextChar() != 'D') {
								error("expected ![CDATA[ comment D");
							}
							if (nextChar() != 'A') {
								error("expected ![CDATA[ comment A");
							}
							if (nextChar() != 'T') {
								error("expected ![CDATA[ comment T");
							}
							if (nextChar() != 'A') {
								error("expected ![CDATA[ comment A");
							}
							if (nextChar() != '[') {
								error("expected ![CDATA[ comment [");
							}
							parseCDATA(null);
							mode = TEXT_NODE;
						
						} else {
							start_tag = true;
							ignored_tag = true;	
							mode = ID_OR_ATTRIBUTE;
							parseWhitespace(null);
							
						}
						
					} else {
					
						if (peekChar() == '?') {
							nextChar();
							start_tag = true; //dont allow />
							ignored_tag = true;
						} else if (peekChar() == '/') {
							nextChar();
if (DEBUG) System.out.println("OPEN END TAG:'"+tmp+"'");
							start_tag = false;
						} else {
if (DEBUG) System.out.println("OPEN START TAG:'"+tmp+"'");					
							attributes = new HashMap();
							start_tag = true;
						}

						ignored_tag = false;	
						mode = TAG_NAME;
						parseWhitespace(null);
					
					}
					break;
				
				//
				// Tag name, <[tag] blah="blah">
				//
				case TAG_NAME:
					tmp.setLength(0);
					parseIdentifier(tmp);
					
					if (tmp.length() == 0) {
						error("expected tag name (or some other valid open tag like \"<!--\", \"<!DOCTYP\" or \"<![CDATA[\" )");	
					}
					
					tag_name = tmp.toString();
if (DEBUG) System.out.println("TAG NAME:'"+tmp+"'");					
					parseWhitespace(null);
					mode = TAG_ATTRIBUTE;
					break;

				//
				// basically an identifier or an attribute
				// <![DOCTYPE] ...
				// <!DOCTYPE [SYSTEM]
				// <!DOCTYPE SYSTEM ["C:\mydtd.dtd"]
				//				
				case ID_OR_ATTRIBUTE:
					
					if (peekChar() == '/') {
						mode = CLOSE_TAG;
					} else if (peekChar() == '?') {
						mode = CLOSE_TAG;
					} else if (peekChar() == '>') {
						mode = CLOSE_TAG;
					} else {
						
if (DEBUG) System.out.println("ID OR ATTRIBUTE:");					

						if (peekChar() == '"' || peekChar() == '\'') {

							parseQuoted(null);
							
						} else {
						
							parseIdentifier(null);
	
							parseWhitespace(null);
	
							if (peekChar() == '=') {
								nextChar();
	
								parseWhitespace(null);
	
								parseQuoted(null);
	
							}

						}

						parseWhitespace(null);
					}
					
					break;
					
				//
				// Tag attribute
				// <mytag [key=value] ...
				//
				case TAG_ATTRIBUTE:
				
					if (peekChar() == '/') {
						mode = CLOSE_TAG;
					} else if (peekChar() == '?') {
						mode = CLOSE_TAG;
					} else if (peekChar() == '>') {
						mode = CLOSE_TAG;
					} else {
						
						if (!start_tag) {
							error("attributes found in end tag");	
						}
						
						//[key]=value
						tmp.setLength(0);
						parseIdentifier(tmp);
						key = tmp.toString();
						
						if (key.length() == 0) {
							error("expected tag attribute key (blah=\"...\")");
						}
						
						parseWhitespace(null);
						
						//key[=]value
						if (nextChar() != '=') {
							error("expected attribute equals = (blah=\"...\")");
						}

						parseWhitespace(null);
						
						if (peekChar() != '"'
							&& peekChar() != '\'') {
							error("expected attribute value in quotes (blah=\"value\")");		
						}
						
						//key=[value]
						tmp.setLength(0);
						parseQuoted(tmp);
						convertValue(tmp);
						value = tmp.toString();

						attributes.put(key,value);

if (DEBUG) System.out.println("TAG ATTRIBUTE:'"+key+"'='"+value+"'");					
						
						parseWhitespace(null);
					}		
					break;
				
				//
				// Any kind of close tag
				// ... [?>]		//end spec tag
				// ... [/>]		//end start tag (generate start and end tags)
				// ... [>]		//end tag (generate start or end tag)
				//
				case CLOSE_TAG: 

					if (peekChar() == '?') {
						//do nothing, its the end of a spec tag 
						nextChar();
						if (nextChar() != '>') {
							error("expected close tag \"?>\" at end of xml spec tag");	
						}
						
					} else if (peekChar() == '/') {
						if (!start_tag) {
							error("invalid close tag \"/>\" on end tag, close tag should be \">\" on end tag");	
						}
						nextChar();

						if (nextChar() != '>') {
							error("expected close tag > (e.g. \"<mytag />\")");	
						}

						if (!ignored_tag) {
							listener.startElement(tag_name,attributes);	
							listener.endElement(tag_name);
						}
						ignored_tag = false;

					} else {

						if (nextChar() != '>') {
							error("expected close tag >");	
						}

						if (!ignored_tag) {
							if (start_tag) {
								listener.startElement(tag_name,attributes);	
							} else {
								listener.endElement(tag_name);
							}
						}
						ignored_tag = false;

					}
					
if (DEBUG) System.out.println("CLOSE TAG:'"+tmp+"'");					
					mode = TEXT_NODE;
					break;				
					
			}//end switch
		}//end while
	
	}

	private int peekChar() throws IOException {
		return ch;
	}
	
	private int nextChar() throws IOException {
		int nch = ch;
		ch = input.read();
		pch = fixChar(nch);
		return pch;
	}
	
	private char prevChar() throws IOException {
		return (char)pch;	
	}
	
	private int fixChar(int ch) throws IOException {
		if (ch == '\r') {
			if (peekChar() == '\n') {
				ch = nextChar();	
			} else {
				ch = '\n'; 
			}
		}
		
		if (ch == '\n') {
			line++;
			column = 0;
		} else {
			column++;
		}
		
		return ch;
	}
	
	private void error(String s) throws Exception {
		throw new Exception(s+", line "+line+", column "+column+" pch="+prevChar()+" peek="+(char)peekChar());
	}

//	private int stringBufferIndexOf(char c, FastStringBuffer sb, int index) {
//		int sblen = sb.length();
//		for (int i = index; i < sblen; i++) {
//			if (sb.charAt(i) == c) return i;
//		}
//		return -1;
//	}
	private int stringBufferIndexOf(char c, FastStringBuffer sb, int index) {
		return sb.indexOf(c,index);
	}

	private void convertValue(FastStringBuffer sb) throws IOException {
		
		char ch;
		for (int i = 0; i < sb.length(); i++) {
			ch = sb.charAt(i);
			
			if (ch == '&') {
				
				int len = sb.length() - i;
				
				if (len > 3) {
					if (sb.charAt(i+1) == '#') {
						
						int end_parse = stringBufferIndexOf(';',sb,i+1);
						if (end_parse != -1) {
							char ccode = (char)Integer.parseInt(sb.substring(i+2,end_parse));
							sb.replace(i,end_parse+1,String.valueOf(ccode));
						}
					
					} else if (sb.charAt(i+1) == 'l' 
								&& sb.charAt(i+2) == 't'
								&& sb.charAt(i+3) == ';') {
						
						sb.replace(i,i+4,"<");
	
					} else if (	sb.charAt(i+1) == 'g' 
								&& sb.charAt(i+2) == 't'
								&& sb.charAt(i+3) == ';') {
	
						sb.replace(i,i+4,">");
					}
				} 
				
				if (len > 4) {
					if (	sb.charAt(i+1) == 'a' 
								&& sb.charAt(i+2) == 'm'
								&& sb.charAt(i+3) == 'p'
								&& sb.charAt(i+4) == ';') {
		
						sb.replace(i,i+5,"&");
	
					}
				}

				if (len > 5) {
					if (	sb.charAt(i+1) == 'q' 
								&& sb.charAt(i+2) == 'u'
								&& sb.charAt(i+3) == 'o'
								&& sb.charAt(i+4) == 't'
								&& sb.charAt(i+5) == ';') {
	
						sb.replace(i,i+6,"\"");
	
					} else if (	sb.charAt(i+1) == 'a' 
								&& sb.charAt(i+2) == 'p'
								&& sb.charAt(i+3) == 'o'
								&& sb.charAt(i+4) == 's'
								&& sb.charAt(i+5) == ';') {
	
						sb.replace(i,i+6,"\'");
	
					}
				}
			}
		}
	}

	private int parseCDATA(FastStringBuffer sb) throws IOException {
		int len = 0;
		int ch = peekChar();

		int count = 0;		

		while (	(ch > 31 && ch < 127)
				|| (ch > 127 && ch < 255)
				|| ch == '\t'
				|| ch == '\n'
				) {

			if (count == 0 && ch == ']') {
				count++;	
			} else if (count == 1 && ch == ']') {
				count++;	
			} else if (count == 2 && ch == '>') {
				nextChar();
				if (sb != null) sb.setLength(sb.length()-2);
				return len-2;	
			} else {
				count = 0;
			}
			
			if (sb != null) sb.append((char)ch);
			len++;
			nextChar();
			ch = peekChar();		
		}
		
		return len;

	}
	
	private int parseComment(FastStringBuffer sb) throws IOException {
		int len = 0;
		int ch = peekChar();

		int count = 0;		
		
		while (	(ch > 31 && ch < 127)
				|| (ch > 127 && ch < 255)
				|| ch == '\t'
				|| ch == '\n'
				) {
			
			if (count == 0 && ch == '-') {
				count++;	
			} else if (count == 1 && ch == '-') {
				count++;	
			} else if (count == 2 && ch == '>') {
				nextChar();
				if (sb != null) sb.setLength(sb.length()-2);
				return len-2;	
			} else {
				count = 0;
			}
			
			if (sb != null) sb.append((char)ch);
			len++;
			nextChar();
			ch = peekChar();		
		}
				
		return len;
	}
	
	private int parseIdentifier(FastStringBuffer sb) throws IOException {
		int len = 0;
		int ch = peekChar();
		
		while (	   (ch > 47 && ch < 59)  // 0-9 + :
				|| (ch > 64 && ch < 91)  // A-Z
				|| (ch > 96 && ch < 123) // a-z
				|| (ch == 95)			 // _
				) {
					
			if (sb != null) sb.append((char)ch);
			len++;	
			nextChar();
			ch = peekChar();			
		}
		
		return len;
	}

	private int parseQuoted(FastStringBuffer sb) throws IOException {
		int len = 0;
		int quote = nextChar();
		int ch = peekChar();
		
		while ((ch > 31 && ch < 127)
				|| (ch > 127 && ch < 255)
				) {
			
			if (ch == quote) {
				nextChar();
				return len;
			}
			
			if (sb != null) sb.append((char)ch);
			len++;
			nextChar();
			ch = peekChar();			
		}
		
		return len;
	}

	private int parseText(FastStringBuffer sb) throws IOException {
		int len = 0;
		int ch = peekChar();
		
		while (	(ch > 31 && ch < 127)
				|| (ch > 127 && ch < 255)
				|| ch == '\t'
				|| ch == '\n'
				|| ch == '\r'
				) {
			
			if (ch == '<') return len;
			
			if (sb != null) sb.append((char)ch);
			len++;
			nextChar();
			ch = peekChar();			
		}
		
		return len;
	}

	private int parseWhitespace(FastStringBuffer sb) throws IOException {
		int len = 0;
		int ch = peekChar();
		
		while (	ch == ' '
				|| ch == '\t'
				|| ch == '\n'
				|| ch == '\r'
			  ) {
			
			//skip the character
			if (sb != null) sb.append((char)ch);
			len++;
			nextChar();
			ch = peekChar();
		}
		
		return len;
	}	

}
