/* ***********************************************************
 * Copyright (c) 2005, 2008 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * $Id: SimpleXmlParser.java,v 1.5 2008/05/23 14:11:50 jcayne Exp $
 *
 * Contributors:
 * IBM - Initial API and implementation
 ************************************************************/

package org.eclipse.tptp.platform.report.sxp.internal;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;

import org.eclipse.tptp.platform.report.drivers.xml.internal.IXmlParser;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;



/**
 * UNDER TEST ... to avoid dependency on XSd, EMF ...
 *
 * @deprecated As of TPTP 4.5.0, use the TPTP Business Intelligence and Reporting Tools (BIRT) reporting infrastructure (<code>org.eclipse.tptp.platform.report.birt</code>).
 *
 */
public class SimpleXmlParser implements IXmlParser
{
  protected InputStream input_;
  protected int curr_line_, curr_col_;
  protected SXPDocument doc_;
  protected boolean utf8_encoding_;
  
  public SimpleXmlParser()
  {
  }
  
  public Document getDocument() { return (input_!=null) ? null : doc_; }
  
  /** only InputStream kind is supported for InputSource */ 
  public void parse( InputSource is )
  {
    input_=is.getByteStream();
    curr_line_ = 1; curr_col_=0;
    doc_=new SXPDocument();
    state_=S_DATA;
    try {
      parseProlog();
      
      if( curr_token_ == TK_END_PROLOG ) getNextToken();
      
      parseDataOrElement( doc_, false );
    } 
    catch (ParseError e) 
    {
//TODO: for debug only 
      e.printStackTrace();
      doc_=null;
    }
    finally
    {
      input_=null;
    }
  }
  
  private void parseDataOrElement( SXPNode node, boolean allow_end_element ) throws ParseError
  {
    lp: for(;;)
    {
      switch( curr_token_ )
      {
      case TK_END : break lp;
      case TK_DATA :
        node.appendChild( new SXPText( s_token_ ));
        break;
      case TK_START_ELEMENT:
        parseElement( node );
        break;
      case TK_START_END_ELEMENT: //returned to called so he can check...
        if( allow_end_element) break lp; else unexpectedInput();
      case TK_START_COMMENT:
        parseComment(node);
        break;
      default : unexpectedInput();
      }
      getNextToken();
    }
  }
  
  private void parseComment( SXPNode node ) throws ParseError
  {
    //note: comment is not (yet?) store in parsed element (require SXPComment class)
    for(;;)
    {
      if( getNextToken() == TK_END_COMMENT )
      {
        break ;
      }      
      //others are ignored silently
    }
  }
  
  private void unexpectedInput() throws ParseError
  {
    throw new ParseError("Unexpected input '"+s_token_+"'");
  }
  
  private void parseElement( SXPNode node ) throws ParseError
  {
    String name=null;
    //must have name
    if( getNextToken() == TK_IDENT )
    {
      name = s_token_;
    } else {
      unexpectedInput();
    }
    SXPElement element = new SXPElement( name );
     node.appendChild( element );
    
    //attribute or end (empty) element
    lp:for(;;)
    {
      switch( getNextToken() )
      {
      case TK_END_EMPTY_ELEMENT:
        //element node already created
        return;
      case TK_END_ELEMENT:  break lp;
      case TK_IDENT: //atribute = "value"
      {
        String attr_name = s_token_;
        if( getNextToken()!=TK_EQ) throw new ParseError("Missing '=' in attribute '"+attr_name+"'");
        if( getNextToken()!=TK_STRING) throw new ParseError("Missing value for attribute '"+attr_name+"'");
        element.setAttribute( attr_name, s_token_ );
        break; //continue
      }
      }
    }
    
    //continue for element inside element
    getNextToken(); //as parseDataOrElement() expect a new token
    parseDataOrElement( element, true );

    if( curr_token_ != TK_START_END_ELEMENT )
    {
      throw new ParseError("Missing end element for '"+name+"'");
    } 
    else
    {
      //get name of end element
      switch( getNextToken() )      
      {
      case TK_IDENT: 
        if( name.equals( s_token_ ) ) break; //continue
        //more precise error message ...
        if( node!=null && node.getNodeName().equals( s_token_ )){
          throw new ParseError("Missing end element for '"+name+"'");
        }
        throw new ParseError("'"+s_token_+"' can't close element '"+name+"'");
      default: unexpectedInput();
      }
      //check '>'
      switch( getNextToken() )
      {
      case TK_END_ELEMENT: break; //ok
      default: unexpectedInput();
      }
    }
  }
  
  private class ParseError extends Exception
  {
    /**
     * 
     */
    private static final long serialVersionUID = 1L;
    public int line, column;
    public ParseError(String msg) 
    {
      super(Integer.toString(curr_line_)+":"+Integer.toString(curr_col_)+": "+msg); 
      line = curr_line_;
      column=curr_col_;
    }
  }
  
  private void parseProlog() throws ParseError
  {
    lp: for(;;)
    {
      switch( getNextToken() )
      {
      case TK_END : return ;
      case TK_START_PROLOG : break lp;
      case TK_DATA : break;
      //I take this without error but prolog must exist...
      case TK_START_ELEMENT : return ;
      //other are unexpected
      default : throw new ParseError("Missing XML prolog");
      }
    }
    boolean was_encoding=false;
    //skip token until "?>"
    for(;;)
    {
      int tk = getNextToken();
      switch( tk )
      {
      case TK_END_PROLOG: return;
      case TK_END:
      case TK_START_ELEMENT:
      case TK_END_ELEMENT:
      case TK_END_EMPTY_ELEMENT: throw new ParseError("Missing end of XML prolog, got "+s_token_+"'");
      case TK_IDENT:
        //version="1.0" should be checked too !
        was_encoding = "encoding".equals(s_token_);
        break;
      case TK_EQ :
        break;
      case TK_STRING:
        if( was_encoding ) {
          if( s_token_!=null && "UTF-8".equals( s_token_.toUpperCase() ) )
          {
            utf8_encoding_ = true;
          }
          else throw new ParseError("Unknown xml encoding '"+s_token_+"'");
        }
        was_encoding=false;
        break;
      default:
        was_encoding=false;
      }
    }
  }
  
  private static final int TK_END=0;
  private static final int TK_START_PROLOG=1; // "<?"
  private static final int TK_END_PROLOG=2; // "?>"
  private static final int TK_EQ=3; // "="
  private static final int TK_START_ELEMENT=4;// "<"
  private static final int TK_END_ELEMENT=5;// ">"
  private static final int TK_END_EMPTY_ELEMENT=6;// "/>"
  private static final int TK_START_END_ELEMENT=7;// "</"
  private static final int TK_IDENT=8; // identifier
  private static final int TK_STRING=9; //"string" without ", any \" is converted to "
  private static final int TK_DATA=10; // text between
  private static final int TK_START_COMMENT=11; // "<!--"
  private static final int TK_END_COMMENT=12; // "-->"

  private static final String S_TOKEN[] = new String[] { //for debug
   "TK_END",
   "TK_START_XML_PROLOG",
   "TK_END_XML_PROLOG",
   "TK_EQ",
   "TK_START_ELEMENT",
   "TK_END_ELEMENT",
   "TK_END_EMPTY_ELEMENT",
   "TK_START_END_ELEMENT",
   "TK_IDENT",
   "TK_STRING",
   "TK_DATA",
  };
  
  private static final int S_DATA=0; // outside element
  private static final int S_ELEMENT=1; // inside element
  private static final int S_PROLOG=1; // inside element
  private static final int S_ERROR=2; // error during parse
  private static final int S_COMMENT=3; // inside comment
  
  private String s_token_;
  private int state_ = S_DATA;
  private int next_input_[]; //use this characters instead of read input stream.
  private int next_line_[], next_col_[];
  private int next_input_len_;
  private int curr_token_=-1;
  
  private void addToNextInput( int c, int line, int col )
  {
    if( next_input_==null )
    {
      next_input_ = new int[10];
      next_line_  = new int[10];
      next_col_   = new int[10];
    }
    else if( next_input_len_ >= next_input_.length )
    {
      int size = 10+next_input_len_;
      int []ni = new int[ size ];
      System.arraycopy( next_input_,0, ni,0, next_input_len_);      
      next_input_ = ni;      
      ni = new int[size];
      System.arraycopy( next_line_,0, ni,0, next_input_len_);      
      next_line_ = ni;
      ni = new int[size];
      System.arraycopy( next_col_,0, ni,0, next_input_len_);      
      next_col_ = ni;
    }
    next_input_[ next_input_len_ ] = c;
    next_line_ [ next_input_len_ ] = line;
    next_col_  [ next_input_len_ ] = col;
    next_input_len_++;
  }
  
  private int readInputStream() throws IOException, ParseError
  {
    int r = input_.read();
    curr_col_++;
    if( r=='\n') { 
      curr_line_++;
      curr_col_=1;
    }
    if( utf8_encoding_ )
    {
      if( r >= 128 )
      {
        //2 bytes following r ?
        if( (r&0xF0)==0xE0 ) 
        {
          int r1 = input_.read();
          curr_col_ ++; //'\n' can't be there.
          if( (r1&0xC0)!=0x80) throw new ParseError("Malformed UTF-8 second of 3-byte form haven't 10xx xxxx bit header");
          int r2 = input_.read();
          curr_col_ ++; //'\n' can't be there.
          if( (r2&0xC0)!=0x80) throw new ParseError("Malformed UTF-8 third of 3-byte form haven't 10xx xxxx bit header");
          r  &= 0x0F;
          r1 &= 0x3F;
          r2 &= 0x3f;
          r = (r<<12) | (r1<<6) | (r2);
        }
        //1 byte only following r ?
        else if ( (r&0xE0)==0xC0 ) 
        {
          int r1 = input_.read();
          curr_col_ ++; //'\n' can't be there.
          if( (r1&0xC0)!=0x80) throw new ParseError("Malformed UTF-8 third of 2-byte form haven't 10xx xxxx bit header");
          r  &= 0x1F;
          r1 &= 0x3F;
          r = (r<<6) | (r1);
        }
        //unknown UTF-8 ...
        else throw new ParseError("Malformed UTF-8 input, unknown bit header="+Integer.toString(r,16));        
      }
    }
    return r;
  }
  private int readNextInput() throws IOException, ParseError
  {
    if( next_input_len_==0 ) 
    {
      return readInputStream();
    }
    int r = next_input_[0];
    curr_line_ = next_line_[0];
    curr_col_  = next_col_[0];
    int j=1;
    for( int i=0; j<next_input_len_; i++,j++)
    {
      next_input_[i] = next_input_[j];
      next_line_ [i] = next_line_ [j];
      next_col_  [i] = next_col_  [j];
    }
    next_input_len_--;
    return r;
  }
  
  private int getNextToken()  throws ParseError
  {
    curr_token_ = getNextToken0();
    return curr_token_;
  }
  
  private boolean escaped_ ;
  // read a character in input stream, parse also '&'..';' special caracters
  private int readInput() throws ParseError
  {
    try {
      escaped_=false;
      int r = readInputStream();
      switch( r )
      {
      case -1: return r;
//TODO: xml can have '%name;' also ....!      
      case '&':
        r = readInputStream();
        int cl1 = curr_line_;
        int cc1 = curr_col_; 
        escaped_=true;
        switch( r )
        {
        case -1: throw new ParseError("Unexpected end of input while parsing '&' character");
        case '#':
          r = readInputStream();
          int cl2 = curr_line_;
          int cc2 = curr_col_;
          switch( r )
          {
          case -1: throw new ParseError("Unexpected end of input while parsing '&' character");
          case 'x' : { //&#DD; or &#xD; hexadecimal input            
            int h=0;
            //read up to ';' character, no overflow detection is done.
            for(;;)
            {
              r=readInputStream();
              if( r==-1) throw new ParseError("Unexpected end of input while parsing '&' character");
              char c = Character.toUpperCase( (char)r );
              if( c>='0' && c<='9' ) h = 16*h + c-'0';
              else if ( c>='A' && c<='F' ) h = 16*h + c-'A'+10;
              else if ( c==';' ) return h;
              else throw new ParseError("Unexpected character '"+(char)r+"' while parsing &#x..;");
            }             
          }//case 'x'
          default:
            char c = (char)r;
            if( c>='0' && c<= '9' ) //&#DDD; in decimal form..
            {
              int d = c-'0'; //no overflow detection is made
              for(;;)
              {
                r = readInputStream();
                if( r==-1) throw new ParseError("Unexpected end of input while parsing '&' character");
                c = (char)r;
                if( c>='0' && c<='9' ) d = 10*d + c-'0';
                else if( c==';' ) return d;
                else throw new ParseError("Unexpected character '"+(char)r+"' while parsing &#x..;");
              }
            }//if decimal form
            else
            {
              String name = ""+c;
              //read until ';'
              for(;;)
              {
                r = readInputStream();
                if( r==-1) throw new ParseError("Unexpected end of input while parsing '&' character");
                if( r==';') break;
                name += (char)r;
              }
              return characterEntity( name );
            }
          }//case '#'
        }//case '&'

        default : return r; 
      }
    }
    catch ( IOException e ) 
    {
      e.printStackTrace();
      throw new ParseError("IOException during parsing");
    }
  }
  
  private int characterEntity( String name ) throws ParseError
  {
    //is a predefined entity (with or without definition) 
    if( "lt".equals(name) ) return '<';
    if( "gt".equals(name) ) return '<';
    if( "amp".equals(name) ) return '&';
    if( "apos".equals(name) ) return '\'';
    if( "quot".equals(name) ) return '"';
    throw new ParseError("Unhandled character '&"+name+";");
  }
  
  private boolean end_comment_parsed_;
  
  private int getNextToken0() throws ParseError
  {
    //in other word, must not occur
    if( state_==S_ERROR ) throw new ParseError("Error occur but parse continue to parse.");
    s_token_="";
    try {
      int r = readNextInput();
      if( r<0 ) return TK_END;      
      if( state_==S_ELEMENT || state_==S_PROLOG ) //token which could occur in element only '<'....'>'
      {
        while( r==' ' || r=='\n' || r=='\t' || r=='\r' )
        {
          r = readInputStream();
          if( r==-1 ) throw new ParseError("Unexpected end of input stream");
        }
        if( r=='=' )
        {
          s_token_="="; return TK_EQ;
        }
        else if( r=='/' )
        {
          int rr = readInputStream();
          switch(rr)
          {
          case '>' : s_token_="/>"; state_=S_DATA; return TK_END_EMPTY_ELEMENT;
          default: throw new ParseError("Unexpected input '/"+(char)rr+"'");
          }
        }
        else if( r=='>' )
        {
          state_=S_DATA;
          s_token_=">";
          return TK_END_ELEMENT;
        }
        else if( r=='"' )
        {
          StringBuffer buf = new StringBuffer(512);
          do {
            switch( r=readInput() )
            {
            //as '"' can comme from '"' &quot; &#34; &#x22 but only '"' close the string.
            case '"' : if( escaped_ ) { buf.append('"'); } 
                       else { s_token_=buf.toString(); return TK_STRING; }
            case -1  : throw new ParseError("Unexpected end of input stream");
            default  : buf.append( (char)r );
            }
          }
          while(true);
        }
        else if( r=='?' )
        {
          r = readInput();
          switch( r )
          {
          case -1: throw new ParseError("Unexpected end of input stream");
          case '>': if( escaped_) {
                      s_token_="?"+(char)r; throw new ParseError("Unexpected input '"+s_token_+"'");
                    } else { s_token_="?>"; state_=S_DATA; return TK_END_PROLOG; }
          default: s_token_="?"+(char)r; throw new ParseError("Unexpected input '"+s_token_+"'");
          }
        }
        
        //identifier
        StringBuffer buf=new StringBuffer(256);
        buf.append( (char)r );
        for(;;)
        {
          switch( r=readInputStream() )
          {
          case -1: throw new ParseError("Unexpected end of input stream");          
          case '=': case '"': case '/' : case '>' : case ' ' : case '\n': case '\t':  case '\r':            
            addToNextInput( r, curr_line_, curr_col_ );
            s_token_ = buf.toString();
            return TK_IDENT;
          default: buf.append( (char)r );
          }
        }
      }      
      else if ( state_ == S_COMMENT )
      {
        return readComment( r );
      }
      else //token occuring outside element
      {
        //use string buffer to accelerate reading, s_token_ is updated when end of text is reached.
        StringBuffer buf = new StringBuffer(1024);
        for(;;)
        {
          if( r == '<' && !escaped_)
          {
            if( buf.length()!=0 )
            {
              addToNextInput( '<', curr_line_, curr_col_ );
              s_token_ = buf.toString();
              return TK_DATA;
            }
            else
            {
              // <, <? </
              int rr =readInputStream();
              int cl1=curr_line_;
              int cc1=curr_col_;
              state_=S_ELEMENT; 
              switch( rr )
              {
              case -1 : throw new ParseError("Unexpected end of input stream");
              case '?' : s_token_="<?"; state_=S_PROLOG; return TK_START_PROLOG;
              case '/' : s_token_="</"; return TK_START_END_ELEMENT;
              case ' ' : case '\n' : case '\t': case '\r': s_token_="<"; return TK_START_ELEMENT;
              case '!' :
                rr = readInputStream();
                int cl2 = curr_line_;
                int cc2 = curr_col_;
                switch( rr ) 
                {
                case -1:throw new ParseError("Unexpected end of input stream");
                case '-':
                  rr = readInputStream();
                  int cl3 = curr_line_;
                  int cc3 = curr_col_;
                  switch( rr )
                  {
                  case '-':
                    s_token_="<!--"; state_=S_COMMENT; return TK_START_COMMENT;
                  case -1: throw new ParseError("Unexpected end of input stream");
                  default: throw new ParseError("Unexpected char '"+(char)rr+"' while starting comment");
                  }
                //case ... other <!XXXX part
                default : addToNextInput('!', cl1,cc1 );
                          addToNextInput(rr, cl2, cc2 ); s_token_="<"; return TK_START_ELEMENT;
                }
//TODO: parse entity  <!ENTITY ...
//TODO: parse CDATA   <![CDATA[ ...]]>       
//TODO: parse conditional sections <![...[ ... ]]>
//TODO: parse ATTLIST <!ATTLIST
//TODO: parse element type declarations  <!ELEMENT ... >
//TODO: parse <!DOCTYPE ...              
              
              default: addToNextInput( rr, cl1,cc1 ); s_token_="<"; return TK_START_ELEMENT;
              }
            }
          }
          else if ( r==-1 )
          {
            if( buf.length() > 0 )
            {
              addToNextInput( r,curr_line_,curr_col_ );
              s_token_ = buf.toString();
              return TK_DATA;
            } else {
              return TK_END;
            }
          } else {
            buf.append( (char)r );
          }
          r = readInput();
        }//for
      }
    } catch ( IOException e ) {
      e.printStackTrace();
      throw new ParseError("IOException during parsing");
    }
  }
  
  private int readComment( int r ) throws ParseError
  {
    if( end_comment_parsed_ ) 
    {
      s_token_="-->";
      state_=S_DATA;
      end_comment_parsed_=false;
      return TK_END_COMMENT;
    }
    try {
      StringBuffer buf = new StringBuffer(1024);
      for(;;)
      {
        switch( r )
        {
        case -1: throw new ParseError("Unexpected end of input stream");
        default: buf.append( (char)r );
        case '-':
          switch( r=readNextInput() )
          {
          case -1: throw new ParseError("Unexpected end of input stream");
          default: buf.append('-'); buf.append( (char)r );
          case '-':
            switch( r=readNextInput() )
            {
            case -1: throw new ParseError("Unexpected end of input stream");
            default: buf.append('-');buf.append('-');buf.append( (char)r );
            case '>':
              if( buf.length()>0 ) 
              {
                end_comment_parsed_ = true; //flag to fire TK_END_COMMENT next time.
                //  addToNextInput('-'); //TODO: a flag might be better since we known comment is ended.
                //  addToNextInput('-');
                //  addToNextInput('>');
                s_token_ = buf.toString();
                return TK_DATA;
              } else {
                s_token_ = "-->";
                state_ = S_DATA;
                return TK_END_COMMENT;
              }
            }
          }
        }          
      }
    } catch ( IOException e ) {
      e.printStackTrace();
      throw new ParseError("IOException during parsing");
    }
  }

public Collection getDiagnostics() {
	// TODO Auto-generated method stub
	return null;
}
}
