-----------------------------------------------------------------------------------
-- Copyright (c) 2006, 2007 IBM Corporation and others.
-- All rights reserved. This program and the accompanying materials
-- are made available under the terms of the Eclipse Public License v1.0
-- which accompanies this distribution, and is available at
-- http://www.eclipse.org/legal/epl-v10.html
--
-- Contributors:
--     IBM Corporation - initial API and implementation
-----------------------------------------------------------------------------------

-----------------------------------------------------------------------------------
-- Lexer for C99
--
-- Does not use a separate keyword lexer.
-----------------------------------------------------------------------------------

%Options la=1
%options package=org.eclipse.cdt.internal.core.dom.parser.c99
%options template=LexerTemplateD.g
%options export_terminals=("C99LexerBaseexp.java", "TK_")
%options verbose
%Options list
%options single_productions

$Notice
/./*******************************************************************************
 * Copyright (c) 2006, 2007 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
 
  // This file was generated by LPG
./
$End

$Export
	
	-- identifiers will be converted into keywords by the preprocessor
	identifier
	integer
	floating
	charconst
	stringlit
	
	RightBracket
	LeftBracket
	RightParen
	LeftParen
	RightBrace
	LeftBrace
	Dot
	Arrow
	PlusPlus
	MinusMinus
	And
	Star
	Plus
	Minus
	Tilde
	Bang
	Slash
	Percent
	RightShift
	LeftShift
	LT
	GT
	LE
	GE
	EQ
	NE
	Caret
	Or
	AndAnd
	OrOr
	Question
	Colon
	SemiColon
	DotDotDot
	Assign
	StarAssign
	SlashAssign
	PercentAssign
	PlusAssign
	MinusAssign
	RightShiftAssign
	LeftShiftAssign
	AndAssign
	CaretAssign
	OrAssign
	Comma
	Hash
	HashHash
	NewLine
	
	-- Invalid tokens are the result of:
	--  1) a syntax error at the token level (created in the reportError() method)
	--  2) improper use of the ## operator, caught by the preprocessor
	Invalid 
	
	-- The completion token represents the location of the cursor in the working copy
	Completion
	-- After the Completion token a sequence of EndOfCompletion tokens will allow the 
	-- parser to terminate successfully without actually parsing the rest of the input
	EndOfCompletion
	
	SingleLineComment
	MultiLineComment
$End

$Globals
/.
	import org.eclipse.cdt.core.parser.CodeReader;
	import org.eclipse.cdt.internal.core.dom.parser.c99.C99LexerKind;
    import org.eclipse.cdt.internal.core.dom.parser.c99.preprocessor.TokenList;
    import org.eclipse.cdt.internal.core.dom.parser.c99.preprocessor.Token;
    import org.eclipse.cdt.core.dom.c99.ILexer;
./
$End

$Define
	$ba /.$BeginAction./
	$ea /.$EndAction./
	$additional_interfaces /. , ILexer ./
$End


$Headers
/.
	private TokenList tokenList = null;
	private boolean returnCommentTokens = false;
	private char[] input = null; // the input character buffer
       
    public $action_type(CodeReader reader) {
    	super(reader.buffer, new String(reader.filename));
    }
    
    // defined in interface ILexer
    public synchronized TokenList lex(int options) {
    	if((OPTION_GENERATE_COMMENT_TOKENS & options) != 0)
    		returnCommentTokens = true;
    		
		tokenList = new TokenList();
		input = super.getInputChars();
            
        lexParser.parseCharacters(null);  // Lex the input characters
        
        TokenList result = tokenList;
        tokenList = null;
        input = null;
        return result;
    }
    
    protected void makeToken(int kind) {
		// ignore comments if desired
		if(!returnCommentTokens && (kind == TK_MultiLineComment || kind == TK_SingleLineComment))
			return;
			
		int startOffset = lexParser.getFirstToken();
		int endOffset   = lexParser.getLastToken();
		
		// an adjustment for trigraphs, commented out for optimization purposes
	    //if(kind != C99Parsersym.TK_Question && startOffset == endOffset && input[startOffset] == '?') {
	    //    // The token starts with a '?' but its not a question token, then it must be a trigraph.
	    //    endOffset += 2; // make sure the toString() method of the token returns the entire trigraph sequence
	    //}
		
		tokenList.add(new Token(startOffset, endOffset, kind, input));
	}
	
	public void reportError(int leftOffset, int rightOffset) {
		Token token = new Token(leftOffset, rightOffset, TK_Invalid, getInputChars());
		tokenList.add(token);
	}
	
	public int getKind(int i) {
		return C99LexerKind.getKind(this, i);
	}
	
./
$End

$Terminals
	a    b    c    d    e    f    g    h    i    j    k    l    m
    n    o    p    q    r    s    t    u    v    w    x    y    z
    
    A    B    C    D    E    F    G    H    I    J    K    L    M
    N    O    P    Q    R    S    T    U    V    W    X    Y    Z
	_

    0    1    2    3    4    5    6    7    8    9
    
    EOF Space Unused
    
    LF   CR   HT   FF
    
    LF           ::= NewLine
    CR           ::= Return
    HT           ::= HorizontalTab
    FF           ::= FormFeed
    
    Space        ::= ' '
    Dot          ::= '.'
    LessThan     ::= '<'
    GreaterThan  ::= '>'
    Plus         ::= '+'
    Minus        ::= '-'
    Slash        ::= '/'
    BackSlash    ::= '\'
    Star         ::= '*'
    LeftParen    ::= '('
    RightParen   ::= ')'
    Equal        ::= '='
    LeftBracket  ::= '['
    RightBracket ::= ']'
    LeftBrace    ::= '{'
    RightBrace   ::= '}'
    Ampersand    ::= '&'
    Tilde        ::= '~'
    Bang         ::= '!'
    Percent      ::= '%'
    Caret        ::= '^'
    Bar          ::= '|'
    Question     ::= '?'
    Colon        ::= ':'
    SemiColon    ::= ';'
    Comma        ::= ','
    Hash         ::= '#'
    SingleQuote  ::= "'"
    DoubleQuote  ::= '"'
    DollarSign   ::= '$'
    
$End


$Start
	Token
$End

-----------------------------------------------------------------------------------
-- Deviation From Spec
--
-- The below grammar is slightly more lenient than the C99 spec in the following
-- ways.
--
-- 1) There is no differentiation between octal and decimal integer constants.
--    Normally an octal constant starts with a 0 and only contains octal digits.
--    This grammar allows an "integer" constant to start with any number of zeros
--    and contain any decimal digit. Therefore constants like "09" are allowed.
-- 2) Dollarsigns ($) are allowed in identifiers. 
-----------------------------------------------------------------------------------


$Rules

    -----------------------------------------------------------------------------------
	-- Tokens that the parser will see
	-----------------------------------------------------------------------------------
	
	Token ::= identifier 
	          /.$ba  makeToken($_identifier); $ea./
	
	Token ::= integer-constant  
	          /.$ba  makeToken($_integer);   $ea./
	
	Token ::= floating-constant 
	          /.$ba  makeToken($_floating);  $ea./
	
	Token ::= character-constant 
	          /.$ba  makeToken($_charconst); $ea./
	
	Token ::= string-literal 
	          /.$ba  makeToken($_stringlit); $ea./
	          
	-----------------------------------------------------------------------------------
	-- Punctuation
	--
    -- [ ] ( ) { } . ->
	-- ++ -- & * + - ~ !
	-- / % << >> < > <= >= == != ^ | && ||
	-- ? : ; ...
	-- = *= /= %= += -= <<= >>= &= ^= |=
	-- , # ##
	-- <: :> <% %> %: %:%:
	-----------------------------------------------------------------------------------
	
	Token ::= '[' 				/.$ba  makeToken($_LeftBracket);       $ea./
	Token ::= ']'   			/.$ba  makeToken($_RightBracket);      $ea./
	Token ::=  '('  			/.$ba  makeToken($_LeftParen);         $ea./
	Token ::=  ')'  			/.$ba  makeToken($_RightParen);        $ea./
	Token ::=  '{'  			/.$ba  makeToken($_LeftBrace);         $ea./
	Token ::=  '}'  			/.$ba  makeToken($_RightBrace);        $ea./
	Token ::=  '.'  			/.$ba  makeToken($_Dot);               $ea./
	Token ::=  '-' '>'  		/.$ba  makeToken($_Arrow);             $ea./
	Token ::=  '+' '+'  		/.$ba  makeToken($_PlusPlus);          $ea./
	Token ::=  '-' '-'  		/.$ba  makeToken($_MinusMinus);        $ea./
	Token ::=  '&'  			/.$ba  makeToken($_And);               $ea./
	Token ::=  '*'  			/.$ba  makeToken($_Star);              $ea./
	Token ::=  '+'  			/.$ba  makeToken($_Plus);              $ea./
	Token ::=  '-'  			/.$ba  makeToken($_Minus);             $ea./
	Token ::=  '~'  			/.$ba  makeToken($_Tilde);             $ea./
	Token ::=  '!'  			/.$ba  makeToken($_Bang);              $ea./
	Token ::=  '/'  			/.$ba  makeToken($_Slash);             $ea./
	Token ::=  '%'  			/.$ba  makeToken($_Percent);           $ea./
	Token ::=  '<' '<'  		/.$ba  makeToken($_LeftShift);         $ea./
	Token ::=  '>' '>'  		/.$ba  makeToken($_RightShift);        $ea./
	Token ::=  '<'  			/.$ba  makeToken($_LT);                $ea./
	Token ::=  '>'  			/.$ba  makeToken($_GT);                $ea./
	Token ::=  '<' '=' 			/.$ba  makeToken($_LE);                $ea./
	Token ::=  '>' '=' 			/.$ba  makeToken($_GE);                $ea./
	Token ::=  '=' '='  		/.$ba  makeToken($_EQ);                $ea./
	Token ::=  '!' '='  		/.$ba  makeToken($_NE);                $ea./
	Token ::=  '^'  			/.$ba  makeToken($_Caret);             $ea./
	Token ::=  '|'  			/.$ba  makeToken($_Or);                $ea./
	Token ::=  '&' '&'  		/.$ba  makeToken($_AndAnd);            $ea./
	Token ::=  '|' '|'  		/.$ba  makeToken($_OrOr);              $ea./
	Token ::=  '?'  			/.$ba  makeToken($_Question);          $ea./
	Token ::=  ':'  			/.$ba  makeToken($_Colon);             $ea./
	Token ::=  ';' 				/.$ba  makeToken($_SemiColon);         $ea./
	Token ::=  '.' '.' '.'  	/.$ba  makeToken($_DotDotDot);         $ea./
	Token ::=  '='  			/.$ba  makeToken($_Assign);            $ea./
	Token ::=  '*' '='  		/.$ba  makeToken($_StarAssign);        $ea./
	Token ::=  '/' '='  		/.$ba  makeToken($_SlashAssign);       $ea./
	Token ::=  '%' '='  		/.$ba  makeToken($_PercentAssign);     $ea./
	Token ::=  '+' '='  		/.$ba  makeToken($_PlusAssign);        $ea./
	Token ::=  '-' '='  		/.$ba  makeToken($_MinusAssign);       $ea./
	Token ::=  '<' '<' '='  	/.$ba  makeToken($_LeftShiftAssign);   $ea./
	Token ::=  '>' '>' '='  	/.$ba  makeToken($_RightShiftAssign);  $ea./
	Token ::=  '&' '='  		/.$ba  makeToken($_AndAssign);         $ea./
	Token ::=  '^' '='  		/.$ba  makeToken($_CaretAssign);       $ea./
	Token ::=  '|' '='  		/.$ba  makeToken($_OrAssign);          $ea./
	Token ::=  ','  			/.$ba  makeToken($_Comma);             $ea./
	Token ::=  '#'  			/.$ba  makeToken($_Hash);              $ea./
	Token ::=  '#' '#'  		/.$ba  makeToken($_HashHash);          $ea./
	
	-- digraph sequences
	Token ::=  '<' ':' 			/.$ba  makeToken($_LeftBracket);       $ea./
	Token ::=  ':' '>' 			/.$ba  makeToken($_RightBracket);      $ea./
	Token ::=  '<' '%' 			/.$ba  makeToken($_LeftBrace);         $ea./
	Token ::=  '%' '>' 			/.$ba  makeToken($_RightBrace);        $ea./
	Token ::=  '%' ':' 			/.$ba  makeToken($_Hash);              $ea./
	Token ::=  '%' ':' '%' ':' 	/.$ba  makeToken($_HashHash);          $ea./
          
	-----------------------------------------------------------------------------------
  	-- Character Sets
  	--
  	-- ! " # % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ { | } ~
  	--
  	-----------------------------------------------------------------------------------
   	
  	letter ::= 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 
  	           'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 
  	           'w' | 'x' | 'y' | 'z' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 
  	           'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 
  	           'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z'
    		
    nondigit ::= letter | '_' | '$'
    
	digit       ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9'  
	
	octal-digit ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' 
	
	hexadecimal-digit ::= digit |
	                      'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 
	                      'A' | 'B' | 'C' | 'D' | 'E' | 'F'
	                      
    -- does not include "'", '"', '\', '/', '*' or whitespace
	common-graphical-character ::= '!' | '#' | '%' | '&' | '(' | ')' |
                                   '+' | ',' | '-' | '.' | ':' | ';' |
                                   '<' | '=' | '>' | '?' | '[' | ']' |  
                                   '^' | '_' | '{' | '|' | '}' | '~' | '$'
	   
                            
    graphical-character ::= common-graphical-character | -- still does not include whitespace
                            "'" | '"' | '\' | '/' | '*'  -- these are the special chars 
                            
    string-literal-char ::= digit | letter | common-graphical-character | Unused |
                            "'" | '/' | '*' | HT | FF | ' '  
    
    
    character-literal-char ::= digit | letter | common-graphical-character | Unused |
                               '"' | '/' | '*' | HT | FF | ' '  
    
    -- newlines are significant to the preprocessor
    ws-char ::= ' ' | CR | HT | FF 
    
    not-eol ::= digit | letter | Unused |                  -- not end of line
                graphical-character | HT | FF | ' '
    
    not-slash-or-star ::= digit | letter | common-graphical-character | ws-char | Unused |
                          "'" | '"' | '\' | LF -- not these: '/', '*'
    
 
    -----------------------------------------------------------------------------------
    -- Comments and whitespace
    -----------------------------------------------------------------------------------

    -- newlines are significant to the preprocessor
    -- backslashes followed by newlines are removed in C99LexerKind.getKind()
	Token ::= NewLine          
	            /.$ba  makeToken($_NewLine); $ea./
    
    Token ::= WS
    
    
    -- create comment nodes for the AST, the parser and preprocessor will never see comments
    Token ::= SLC
    	/.$ba  makeToken($_SingleLineComment); $ea./
    	
    Token ::= MLC
    	/.$ba  makeToken($_MultiLineComment); $ea./
    	
    
    WS -> ws-char
	    | WS ws-char
	     
	SLC -> '/' '/'
         |  SLC not-eol
         
    MLC ::= '/' '*' inside-mlc stars '/'
    
    stars -> '*'
           | stars '*' 
                
	inside-mlc ::= inside-mlc stars not-slash-or-star
	             | inside-mlc '/'
	             | inside-mlc not-slash-or-star
	             | $empty
	             
  	-----------------------------------------------------------------------------------
  	-- Identifiers
  	-----------------------------------------------------------------------------------

	identifier ::= identifier-nondigit
				 | identifier identifier-nondigit
				 | identifier digit

	identifier-nondigit ::= nondigit 
	                      | universal-character-name
	                   -- other implementation defined characters

	-----------------------------------------------------------------------------------
  	-- Universal character names
  	-----------------------------------------------------------------------------------
	
	universal-character-name ::= '\' 'u' hex-quad 
	                           | '\' 'U' hex-quad hex-quad

	hex-quad ::= hexadecimal-digit 
	             hexadecimal-digit 
	             hexadecimal-digit 
	             hexadecimal-digit
	
	
	-----------------------------------------------------------------------------------
  	-- Constants
  	-----------------------------------------------------------------------------------

	
	
	-- Token ::= enumeration-constant
	
	 
	-- more lenient than spec grammar, no special rules for octal
	integer-constant ::= decimal-octal-constant
					   | decimal-octal-constant integer-suffix
					   | hexadecimal-constant
					   | hexadecimal-constant integer-suffix
	
	
	-- both decimal and octal constants are covered by one rule
	decimal-octal-constant ::= digit-sequence 
		
	-- slight deviation from spec grammar	
	hexadecimal-constant ::= hexadecimal-prefix hexadecimal-digit-sequence

    hexadecimal-prefix ::= '0' 'x' | '0' 'X'
    
    
	                   
	integer-suffix ::= unsigned-suffix
	                 | unsigned-suffix long-suffix
                     | unsigned-suffix long-long-suffix
                     | long-suffix
                     | long-suffix unsigned-suffix
                     | long-long-suffix
                     | long-long-suffix unsigned-suffix

	unsigned-suffix ::= 'u' | 'U'
	
	long-suffix ::= 'l' | 'L'

	long-long-suffix ::= 'l' 'l' | 'L' 'L'

	floating-constant ::= decimal-floating-constant
					    | hexadecimal-floating-constant

	decimal-floating-constant 
	   ::= fractional-constant
		 | fractional-constant exponent-part 
		 | fractional-constant exponent-part floating-suffix
		 | fractional-constant floating-suffix
		 | digit-sequence exponent-part    
		 | digit-sequence exponent-part floating-suffix
		 
		 
	hexadecimal-floating-constant 
	    ::= hexadecimal-prefix hexadecimal-fractional-constant binary-exponent-part
		  | hexadecimal-prefix hexadecimal-fractional-constant binary-exponent-part floating-suffix
		  | hexadecimal-prefix hexadecimal-digit-sequence binary-exponent-part
		  | hexadecimal-prefix hexadecimal-digit-sequence binary-exponent-part floating-suffix

	digit-sequence -> digit | digit-sequence digit
	
	fractional-constant ::= '.' digit-sequence
						  | digit-sequence '.'
						  | digit-sequence '.' digit-sequence

	exponent-part ::= 'e' sign digit-sequence
				    | 'e' digit-sequence
				    | 'E' sign digit-sequence
				    | 'E' digit-sequence
 
	sign ::= '+' | '-'

	hexadecimal-fractional-constant
		::= '.' hexadecimal-digit-sequence
		  | hexadecimal-digit-sequence '.'
		  | hexadecimal-digit-sequence '.' hexadecimal-digit-sequence

	binary-exponent-part ::= 'p' sign digit-sequence
	                       | 'p' digit-sequence
    					   | 'P' sign digit-sequence
						   | 'P' digit-sequence

	hexadecimal-digit-sequence ::= hexadecimal-digit
                                 | hexadecimal-digit-sequence hexadecimal-digit

	floating-suffix ::= 'f' | 'l' | 'F' | 'L'
	    
	    
	-----------------------------------------------------------------------------------
  	-- Enumeration Constants
  	-----------------------------------------------------------------------------------
	
	-- returned as identifier tokens
	-- enumeration-constant ::= identifier

    -----------------------------------------------------------------------------------
  	-- Character Constants
  	-----------------------------------------------------------------------------------
  	
    character-constant ::= "'" c-char-sequence "'"
                         | 'L' "'" c-char-sequence "'"	

    c-char-sequence ::= c-char
                      | c-char-sequence c-char
                      
    c-char ::= escape-sequence
             | character-literal-char

    escape-sequence ::= simple-escape-sequence
                      | octal-escape-sequence
                      | hexadecimal-escape-sequence
                      | universal-character-name
                      
    simple-escape-sequence ::= 
        '\' "'" | '\' '"' | '\' '?' | '\' '\' | '\' 'a' |
        '\' 'b' | '\' 'f' | '\' 'n' | '\' 'r' | '\' 't' | '\' 'v'


    octal-escape-sequence ::= '\' octal-digit
                            --| '\' octal-digit octal-digit
                           -- | '\' octal-digit octal-digit octal-digit
                            
    hexadecimal-escape-sequence ::= '\' 'x' hexadecimal-digit
                                  --| hexadecimal-escape-sequence hexadecimal-digit


    
    -----------------------------------------------------------------------------------
  	-- String Literals
  	-----------------------------------------------------------------------------------
  	
    string-literal ::= '"' '"'
                     | '"' s-char-sequence '"'
                     | 'L' '"' '"'
                     | 'L' '"' s-char-sequence '"'
    
    s-char-sequence ::= s-char
                      | s-char-sequence s-char
                      
    s-char ::= escape-sequence
             | string-literal-char
    
	
		
$End
