package org.eclipse.hyades.logging.parsers;

/**********************************************************************
 * Copyright (c) 2003 Hyades project.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/cpl-v10.html
 * 
 * Contributors: 
 * IBM - Initial API and implementation
 **********************************************************************/

import java.text.ParsePosition;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;

import org.eclipse.hyades.logging.events.ICommonBaseEvent;
import org.eclipse.hyades.logging.events.IComponentIdentification;
import org.eclipse.hyades.logging.events.IReportSituation;
import org.eclipse.hyades.logging.events.ISituation;

/** 
 * <code>AbstractAccessLogParser</code> is the abstract superclass for the <code>ApacheAccessLogParser</code>.
 * <p>
 * For each access log record, this class parses the client IP address, user ID, time 
 * stamp, method, file name, return code and file size.  This parsed information and 
 * information about the local machine (machine where the parse is performed, but not 
 * necessarily the machine that produced the access log) is used to produce a
 * <code>org.eclipse.hyades.logging.events.ICommonBaseEvent</code> object for each record.
 * 
 * This parser is based on the Apache Common Log Format (CLF) or "%h %l %u %t \"%r\" %>s %b".  
 * For more information, see the 
 * <a href="http://httpd.apache.org/docs-2.0/logs.html#accesslog">Apache HTTP Server Version 2.0 specification (Access Log File)</a>.
 *  
 * 
 * @author  Paul Slauenwhite
 * @author  Gary Dudley
 * @version	September 30, 2003
 * @see		org.eclipse.hyades.logging.parsers.Parser
 * @see		org.eclipse.hyades.logging.events.ICommonBaseEvent
 */
public abstract class AbstractAccessLogParser extends Parser {

    //Variables to hold values parsed from the access log record:  

    /**
     * Parsed value of the client's IP address field. 
     */
    protected String clientIP = null;

    /**
     * Parsed value of the method field. 
     */
    protected String method = null;

    /**
     * Parsed value of the file name field. 
     */
    protected String fileName = null;

    /**
     * Parsed value of the return code field. 
     */
    protected String returnCode = null;

    /**
     * Parsed value of the file size field. 
     */
    protected String fileSize = null;

    /**
     * Parsed value of the userid field. 
     */
    protected String userID = null;

    /**
     * Parsed value of the message portion of the access log record.  The message includes
     * the method, file name, protocol, return code, and file size fields. 
     */
    protected String message = null;

    /**
     * Parsed value of the time stamp field converted to the XML dateTime format. 
     */
    protected StringBuffer currentTimeStamp = null;

    //User-supplied variables:

    /**
     * User-supplied value of the version of the web server product.  Instantiated in 
     * the setUserInput method of the subclass.
     */
    protected String productVersion = null;

    /**
     * Web server product name and version supplied by the subclass.   
     */
    protected String sourceID = null;

    //Private variables:

    /**
     * Parsed value of the time stamp field converted to the XML dateTime format from previous access log record. 
     * NOTE:  Must be initialized to an empty string for comparison with the first log record.
     * ASSUMPTION:  All access log records appear in chronological order within the log file.
     */
    private String previousTimeStamp = "";

    /**
     * Count of records with identical time stamp values. 
     * NOTE:  Must be initialized to 0 since no log records have been parsed.
     * ASSUMPTION:  All access log records appear in chronological order within the log file.
     */
    private long duplicateTimeStampCounter = 0;

    /**
     * Source component ID of the generated CBE. 
     */
    private IComponentIdentification sourceComponentID = eventFactory.createComponentIdentification();
    
	/**
	 * Initialize this parser.
	 */
	public void preParse() throws LogParserException {
		super.preParse();
	}
	/**
	 * Parses each access log record and produces a Common Base Event (CBE) object that is 
	 * returned as a member of a CBE array.
	 *
	 * @return ICommonBaseEvent[] array of CBE's representing parsed records.  
	 * @exception LogParserException thrown if the parser is unable to parse the access log
	 */
	public ICommonBaseEvent[] parseNext() throws LogParserException {
		ICommonBaseEvent[] temp = null;
		
		curLine = readLine();
		arrayIndex = 0;

		try {

			//Iterate all the lines (e.g. until EOF/null line) within the file:
			while (curLine != null) {

				//Trim leading/trailing whitespace from the current line:
				curLine = curLine.trim();

				//Only parse non-empty (e.g. lines with one or more non-whitespace characters) lines:
				if (curLine.length() > 0) {

					//Parse the log record:
					//ASSUMPTION:  Access log records do NOT span multiple lines.  Each log record is contained on one line.
					if (parseLogRecord()) {

						//(Re)initialize the CBE:
						reinitializeCBE();

						//Increment the CBE's sequence number and running counter when duplicate time stamp values are detected. 
						//ASSUMPTION:  All access log records appear in chronological order within the log file.
						if (previousTimeStamp.equals(currentTimeStamp.toString()))
							messages[arrayIndex].setSequenceNumber(++duplicateTimeStampCounter);

						//Otherwise persist the current time stamp as the new previous time stamp and reset the running duplicate time stamp counter:
						else {
							previousTimeStamp = currentTimeStamp.toString();
							duplicateTimeStampCounter = 0;
						}

						//Add information from the access log record to the CBE:
						messages[arrayIndex].setCreationTime(currentTimeStamp.toString());
						messages[arrayIndex].setMsg(message);
						messages[arrayIndex].setSeverity(ParserConstants.CBE_SEVERITY_3);

						//Add remaining information from the access log record as extended data elements to the CBE:
						if (method != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE("method", method));
						if (clientIP != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE(ParserConstants.CLIENT, clientIP));
						if (fileName != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE(ParserConstants.FILE, fileName));
						if (returnCode != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE("return_code", returnCode));
						if (fileSize != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE("file_size", fileSize));
						if (userID != null)
							messages[arrayIndex].addExtendedDataElement(createStringEDE("userid", userID));


						arrayIndex++;

						if (arrayIndex == MessageArraySize) {
							arrayIndex = 0;
							recordCount++;
							reset();
							return messages;
						}
						
						recordCount++;

						//Reset the local properties of a log record:
						reset();
					}
					//Invalid log record:
					else
						throw new LogParserException(ParserUtilities.getResourceString("INVALID_ACCESS_LOG_ERROR_", file_path));
				}

				//Read the next line in the file:
				curLine = readLine();
			}

			// If we are not logging the message then null the array elements that weren't set on this call
			if (arrayIndex == 0) {
				temp = null;
			}
			else {
			
				for (int i=arrayIndex; i < MessageArraySize; i++) {
					messages[i] = null;
				}
				temp = messages;
			}

			
			//Throw an exception if no valid access log records are parsed/logged:
			if (recordCount == 0) {
				throw new LogParserException(ParserUtilities.getResourceString("NO_LOG_RECORDS_ERROR_", file_path));
			}

		}
		catch (LogParserException l) {
			throw l;
		}
		catch (Throwable t) {
			ParserUtilities.exceptionHandler(t, curLineNumber, curLine, ParserUtilities.getResourceString("ACCESS_LOG_PARSER_ERROR_"));
		}
/*
		finally {
			//Close the log file:
			closeFiles();
		}
*/
		return temp;
	}

    private void reinitializeCBE() {

        //(Re)initialize the CBE:
        messages[arrayIndex].init();

        //(Re)initialize the CBE's source component ID, set various properties and add to the CBE:
        sourceComponentID.init();
        sourceComponentID.setLocation(localHostId);
        sourceComponentID.setLocationType(localHostIdFormat);
        sourceComponentID.setComponent(sourceID);
        sourceComponentID.setSubComponent(ParserConstants.UNKNOWN);
        //New for CBE 1.0.1:
        sourceComponentID.setComponentType(ParserConstants.APACHE_COMPONENT_TYPE);
		sourceComponentID.setComponentIdType(ParserConstants.APACHE_COMPONENT_ID_TYPE);

		messages[arrayIndex].setSourceComponentId(sourceComponentID);

        messages[arrayIndex].setSituation(createSituation());
    }

    /**
     * Generates a generic situation for a CBE. 
     * 
     * @return ISituation The generic situation for a CBE. 
     * @since CBE 1.0.1 
     */
    private ISituation createSituation() {

        //Initialize the CBE's situation and set various properties:
        ISituation cbeSituation = eventFactory.createSituation();

        //Unknown situation therefore use the generic:
        //Initialize the CBE's situation type, set various properties and add to the situation:
        IReportSituation cbeReportSituation = eventFactory.createReportSituation();
        cbeReportSituation.setReasoningScope(ParserConstants.INTERNAL_REASONING_SCOPE);
        cbeReportSituation.setReportCategory(ParserConstants.LOG_REPORT_CATEGORY);

        cbeSituation.setCategoryName(ParserConstants.REPORT_SITUATION_CATEGORY_NAME);
        cbeSituation.setSituationType(cbeReportSituation);

        return cbeSituation;
    }

    /**
     * Parses a time stamp from an access log record, produces a Date object from the
     * parsed information, and converts the Date object into a XML DateTime String.
     *
     * @param startIndex Starting index of time stamp in curLine. 
     * @param endIndex Ending index of time stamp in curLine. 
     * @return true if time stamp is valid; otherwise false. 
     */
    protected boolean parseDate(int startIndex, int endIndex) {

        //Running example: [28/May/2003:16:36:39 -0400] --> 2003-05-28T16:36:39.000000-04:00

        //Verify that the time stamp is enclosed with square brackets:
        if ((curLine.charAt(startIndex) == '[') && (curLine.charAt(endIndex) == ']')) {

            //Find the time zone offset (e.g. -0400):
            int timeZoneIndex = curLine.indexOf('-', startIndex);

            //Check if the time zone offset is -GMT:
            if ((timeZoneIndex == -1) || (timeZoneIndex > endIndex)) {

                //Find the time zone offset (e.g. +GMT):
                timeZoneIndex = curLine.indexOf('+', startIndex);

                //Check if the time zone offset is +GMT:
				if ((timeZoneIndex == -1) || (timeZoneIndex > endIndex)) {
                    return false;
				}
            }

            //Parse the access log's time stamp excluding the time zone offset and square brackets (e.g. 28/May/2003:16:36:39) to a java.util.Date object:
            SimpleDateFormat formatter = new SimpleDateFormat(ParserConstants.APACHE_ACCESS_TIME_STAMP_FORMAT, Locale.US);
            Date creationDate = formatter.parse(curLine.substring((startIndex + 1), timeZoneIndex).trim(), new ParsePosition(0));

            //If the access log's time stamp is valid (e.g. non-null java.util.Date object), convert to its XML dateTime format:
            if (creationDate != null) {

                //Format the java.util.Date object to its XML dateTime format (e.g. "yyyy-MM-dd HH:mm:ss"):
                formatter = new SimpleDateFormat(ParserConstants.XML_DATETIME_FORMAT);
                currentTimeStamp = new StringBuffer(formatter.format(creationDate).trim());

                //Replace the first space with "T":
                currentTimeStamp.replace(10, 11, "T");

                //Add the fractional second value (e.g. .000000):
                currentTimeStamp.append(ParserConstants.SIX_ZERO);

                //NOTE:  Time Zone is in the form of [+/-]hh:mm.
                //Add the time zone offset sign (e.g. "+" or "-"):
                currentTimeStamp.append(curLine.charAt(timeZoneIndex));

                //Derive the time zone offset (e.g. hours or 04 and minutes or 00) and add it to the XML dateTime string delimited by a colon:
                currentTimeStamp.append(curLine.substring((timeZoneIndex + 1), (timeZoneIndex + 3)));
                currentTimeStamp.append(":");
                currentTimeStamp.append(curLine.substring((timeZoneIndex + 3), (timeZoneIndex + 5)));

                return true;
            }
        }

        return false;
    }

    /**
     * Main parsing routine for an access log record.
     *
     * @return   true if the access log record is successfully parsed; otherwise false. 
     */
    protected boolean parseLogRecord() {

        //Running example: 9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        //Running example notation: 
        //1) Previous index is enclosed with asterisks (e.g. *p*).
        //2) Current index is enclosed with pipes (e.g. |c|).
        //3) The previous and current indices are the same(e.g. *|c|*).
        //4) Second quote index is enclosed with uppercase Q (e.g. Q"Q).

        //Index of the previous substring:
        //Example:  *9*.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        int previousIndex = 0;

        //Find the first space in the log record:
        //Example:  *9*.26.157.24| |- - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        int currentIndex = curLine.indexOf(" ");

        //Return false if the space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Parse the client's IP address (e.g. 9.26.157.24):
        //Example:  *9*.26.157.24| |- - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        clientIP = curLine.substring(previousIndex, currentIndex).trim();

        //Return false if the client's IP address is not a valid IPv4 address since it is an invalid log record: 
        if (!ParserUtilities.isValidIPAddress(clientIP))
            return false;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24*| |*- - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the second space in the log record:
        //Example:  9.26.157.24* *-| |- [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf(" ", (previousIndex + 1)); //1 is the length of " ".

        //Return false if the space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //NOTE: Skip the identity of client (e.g. - or RFC1413).

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 -*| |*- [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the third space in the log record:
        //Example:  9.26.157.24 -* *-| |[28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf(" ", (previousIndex + 1)); //1 is the length of " ".

        //Return false if the space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Parse the user ID of the document requester (e.g. -):
        //Example:  9.26.157.24 -* *-| |[28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        userID = curLine.substring(previousIndex, currentIndex).trim();

        //Disregard the dash since the user ID of the document requester is not present: 
        if (userID.equals("-"))
            userID = null;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - -*| |*[28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the first '[' character in the log record:
        //Example:  9.26.157.24 - -* *|[|28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf("[", previousIndex);

        //Return false if the '[' character cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - - *|[|*28/May/2003:16:36:39 -0400] "GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the first ']' character in the log record:
        //Example:  9.26.157.24 - - *[*28/May/2003:16:36:39 -0400|]| "GET / HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf("]", previousIndex);

        //Return false if the ']' character cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Parse the timestamp (e.g. [28/May/2003:16:36:39 -0400]):
        if (!parseDate(previousIndex, currentIndex))
            return false;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400*|]|* "GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the first '"' character in the log record:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400*]* |"|GET / HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf("\"", previousIndex);

        //Return false if the '"' character cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] *|"|*GET / HTTP/1.1" 200 4757
        previousIndex = currentIndex;

        //Find the next space in the log record:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] *"*GET| |/ HTTP/1.1" 200 4757
        currentIndex = curLine.indexOf(" ", previousIndex);

        //Return false if the next space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Parse the method name (e.g. GET):
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] *"*GET| |/ HTTP/1.1" 200 4757
        method = curLine.substring((previousIndex + 1), currentIndex).trim(); //1 is the length of "\"".

        //Parse the message (e.g. "GET / HTTP/1.1" 200 4757):
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] *"*GET| |/ HTTP/1.1" 200 4757
        message = curLine.substring(previousIndex).trim();

        //Find the last '"' character in the log record:
		//Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] *"*GET| |/ HTTP/1.1Q"Q 200 4757
        int secondQuoteIndex = curLine.lastIndexOf("\"");

        //Return false if the '"' character cannot be found or there is no second quote since it is an invalid log record: 
        if ((secondQuoteIndex == -1) || (secondQuoteIndex == previousIndex))
            return false;

		//Advance the previous pointer to the current pointer: 
		//Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET*| |*/ HTTP/1.1" 200 4757
		previousIndex = currentIndex;

        //Find the next space in the log record:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET* */| |HTTP/1.1Q"Q 200 4757
        currentIndex = curLine.lastIndexOf(" ", secondQuoteIndex);

        //Return false if the next space (between the previous index and the second double quote) cannot be found since it is an invalid log record: 
        if (currentIndex == previousIndex)
            return false;

        //Parse the file name (e.g. /):
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET* */| |HTTP/1.1Q"Q 200 4757
		//NOTE:  The file name may contain one or more spaces.
        fileName = curLine.substring(previousIndex, currentIndex).trim();

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET /*| |*HTTP/1.1Q"Q 200 4757
        previousIndex = currentIndex;

        //Find the next space in the log record:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET /* *HTTP/1.1Q"Q| |200 4757
        currentIndex = curLine.indexOf(" ", (previousIndex + 1)); //1 is the length of " ".

        //Return false if the next space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Advance the previous pointer to the current pointer: 
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1Q"Q*| |*200 4757
        previousIndex = currentIndex;

        //Find the next space in the log record:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1Q"Q* *200| |4757
        currentIndex = curLine.indexOf(" ", (previousIndex + 1)); //1 is the length of " ".

        //Return false if the next space cannot be found since it is an invalid log record: 
        if (currentIndex == -1)
            return false;

        //Parse the return code:
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1Q"Q* *200| |4757
        returnCode = curLine.substring(previousIndex, currentIndex).trim();

        //Parse the file's size:        
        //Example:  9.26.157.24 - - [28/May/2003:16:36:39 -0400] "GET / HTTP/1.1Q"Q* *200| |4757
        fileSize = curLine.substring(currentIndex).trim();

        return true;
    }

    /**
     * Resets the parsed values of a log record before next record is parsed.
     */
    protected void reset() {
        userID = null;
        method = null;
        returnCode = null;
        clientIP = null;
        fileName = null;
        fileSize = null;
        message = null;
        currentTimeStamp = null;
    }
}