/**********************************************************************
 * Copyright (c) 2005 IBM Corporation and others.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * $Id: RegularExpressionExtractor.java,v 1.8 2005/02/16 22:20:29 qiyanli Exp $
 * 
 * Contributors: 
 * IBM - Initial API and implementation
 **********************************************************************/
package org.eclipse.hyades.logging.adapter.extractors;

import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.eclipse.hyades.logging.adapter.MessageString;
import org.eclipse.hyades.logging.adapter.AdapterInvalidConfig;
import org.eclipse.hyades.logging.adapter.util.Messages;

/**
 *
 * This is an extractor implementation that uses regular expressions 
 * to extract records.  The JDK 1.4 java.util.regex library is used to
 * process the regular expressions.
 * 
 */
public class RegularExpressionExtractor extends SimpleExtractor {

	private Pattern compiledStartPattern = null;
	private Pattern compiledEndPattern = null;
	
	/* Flag to indicate the line start pattern '^' */
	private boolean lineStartPattern = false;
	
	/* Flag to indicate the line end pattern '^' */
	private boolean lineEndPattern = false;
	
	public void update() throws AdapterInvalidConfig {
		super.update();
		
		/* If there are no start and end patterns then this is an invalid configuration */
		if ((getStartPattern()== null || getStartPattern().equals("")) && 
			(getEndPattern()== null || getEndPattern().equals(""))) {
			throw new AdapterInvalidConfig(Messages.getString("HyadesGA_CBE_Regular_Expression_Extractor_End_Pattern_Invalid_ERROR_", getStartPattern()));
		}
		/* If we have a start pattern then compile it now */
		if(getStartPattern()!=null && !getStartPattern().equals("")) {
			try {
				compiledStartPattern = Pattern.compile(getStartPattern(), Pattern.MULTILINE);
			}
			catch (PatternSyntaxException e) {
				throw new AdapterInvalidConfig(Messages.getString("HyadesGA_CBE_Regular_Expression_Extractor_Start_Pattern_Invalid_ERROR_",e.getMessage()));
			}
			if (getStartPattern().equals("^")) {
				lineStartPattern = true;
			}
		}
	
		/* If we have a end pattern then compile it now */
		if(getEndPattern()!=null && !getEndPattern().equals("")) {
			try {
				compiledEndPattern = Pattern.compile(getEndPattern(), Pattern.MULTILINE);
			}
			catch (PatternSyntaxException e) {
				throw new AdapterInvalidConfig(Messages.getString("HyadesGA_CBE_Regular_Expression_Extractor_End_Pattern_Invalid_ERROR_",e.getMessage()));
			}
			if (getEndPattern().equals("$")) {
				lineEndPattern = true;
			}
		}
	}
	
	/**
	 * Search for the pattern in the specified search string.  If the pattern is located in the 
	 * search string then the position variable is loaded with the start of the pattern and the 
	 * end of the pattern.
	 * 
	 * @param pos
	 * @param pattern
	 * @param searchString
	 * @param startOffset
	 */
	protected void search(StringPosition position, String pattern, String searchString, int startOffset) {
		boolean found = false;
		
		Pattern expression=null;
		
		/* Which pattern are we searhing for */
		if(pattern==getStartPattern()) {
			expression=compiledStartPattern;
			/* Skip the line separator if it is at the beginning of the search string and the start
			 * pattern is '^'
			 */
			if (lineStartPattern && startOffset == 0 && searchString.indexOf(localLineSeparator) == 0) {
				startOffset = localLineSeparator.length();
			}
		}
		else {
			expression=compiledEndPattern;
		}
		
		/* Get the input string to search in */
		String input = searchString.substring(startOffset);

		/* Get the matcher and try to find the pattern */
		Matcher matcher = expression.matcher(input);
		
		found = matcher.find();
		
		if (found) {

			/* For special characters such as '^' and '$' the pattern may have length 0 */
			if (matcher.start() == matcher.end()) {
				int matchOffset = startOffset + matcher.start();

				/* If this is '$' then the match is the first line terminator character but 
				 * we don't want to include that character.
				 */
				if (matcher.start() != 0 && matcher.start() != input.length()-1) {
					matchOffset--;
				}
				position.setStartPosition(matchOffset);
				position.setEndPosition(matchOffset);				
			}
			else {
				position.setStartPosition(startOffset + matcher.start());
				position.setEndPosition(startOffset + matcher.end()-1);
			}
			position.setValid(true);
		}

	}

	/**
	 * Extract our final message based upon whether the end and start patterns are 
	 * included.
	 * @param searchString
	 * @param startPosition
	 * @param endPosition
	 * @return
	 */
	protected MessageString adjustMessage(String searchString, StringPosition startPosition, StringPosition endPosition) {
		int adjustedStart;
		int adjustedEnd;
		int endOffset = searchString.length()-1;
		
		if(getIncludeStartPattern() || lineStartPattern ) {
			adjustedStart = startPosition.getStartPosition();
		}
		else {
			adjustedStart = startPosition.getEndPosition()+1;
		}
		if(getIncludeEndPattern() || lineEndPattern) {
			/* check if we are at the end of the string (ie we are flushing) */
			if (endPosition.getStartPosition() > endOffset) {
				adjustedEnd = endOffset + 1;
			}
			else {
				adjustedEnd = endPosition.getEndPosition()+1;
			}
		}
		else {
			/* check if we are at the end of the string (ie we are flushing) */
			if (endPosition.getStartPosition() == endOffset) {
				/* To include the last character we must specify one past the end
				 * of the string for substring below to work.
				 */
				adjustedEnd = endPosition.getStartPosition()+1;
			}
			else {
				adjustedEnd = endPosition.getStartPosition();
			}
		}
		MessageString result=new MessageString();
		result.setValue(searchString.substring(adjustedStart, adjustedEnd));
		
		return result;		
	}
}

