//------------------------------------------------------------------------------
// Copyright (c) 2005, 2006 IBM Corporation and others.
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// which accompanies this distribution, and is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// Contributors:
// IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.publishing.services.search;

import java.io.File;
import java.io.Reader;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;

/**
 * This class returns a Lucene document for indexing and searching.
 * A document is a unit for indexing and searching. It consists of a
 * list of fields that can be indexed and searched. Each field has
 * a name and a text value. <br>
 *
 * The fields that are maintained by this class are: <br>
 * <UL>
 * <LI>document title</LI>
 * <LI>document URL</LI>
 * <LI>summary (usually the first few lines of text)</LI>
 * <LI>text of document</LI>
 * </UL>
 * <br>
 * All types of document, such as HTML and PDF documents, must
 * subclass AbstractDocument. <br>
 *
 */
public abstract class AbstractDocument
{
	public static final String FIELD_URL = "url"; //$NON-NLS-1$
	public static final String FIELD_CONTENTS = "contents"; //$NON-NLS-1$
	public static final String FIELD_SUMMARY = "summary"; //$NON-NLS-1$
	public static final String FIELD_TITLE = "title"; //$NON-NLS-1$
	public static final String FIELD_VALUE_UNDEFINED = ""; //$NON-NLS-1$

	protected String _docTitle = null;
	protected String _docUrl = null;
	protected String _summary = null;
	protected String _contents = null;
	protected Reader _contentReader = null;

	protected Map additionalFields = new HashMap();
	
	/**
	 * Default constructor.
	 */
	public AbstractDocument()
	{
	}

	/**
	 * Parses and compiles the document given the file.
	 */
	public Document document( File file )
	{
		// first compile the document fields
		compileDocument( file );

		if( _docTitle == null )
		{
			_docTitle = FIELD_VALUE_UNDEFINED;
		}
		if( _docUrl == null )
		{
			_docUrl = FIELD_VALUE_UNDEFINED;
		}
		if( _summary == null )
		{
			_summary = FIELD_VALUE_UNDEFINED;
		}

		// create a new Lucene document
		Document luceneDocument = new Document();

    	// add the url as a field named "url".  Use an UnIndexed field, so
    	// that the url is just stored with the document, but is not searchable.
    	luceneDocument.add( Field.UnIndexed( FIELD_URL, _docUrl ) );

	    // add the contents so it will get tokenized and indexed.
	    if( null != _contents )
	    {
    		luceneDocument.add( Field.Text( FIELD_CONTENTS, _contents ) );
	    }
	    else
	    {
    		luceneDocument.add( Field.Text( FIELD_CONTENTS, _contentReader ) );
	    }

    	// add the summary as an UnIndexed field, so that it is stored and returned
    	// with hit documents for display.
    	luceneDocument.add( Field.UnIndexed( FIELD_SUMMARY, _summary ) );

    	// Add the title as a separate Text field, so that it can be searched
    	// separately.
    	luceneDocument.add( Field.Text( FIELD_TITLE, _docTitle ) );

    	if ( additionalFields.size() > 0 )
    	{
    		for ( Iterator it = additionalFields.entrySet().iterator(); it.hasNext(); )
    		{
    			Map.Entry entry = (Map.Entry) it.next();
    			luceneDocument.add( Field.Text((String)entry.getKey(), (String)entry.getValue() ) );
     		}
    	}
    	return( luceneDocument );
	}

	/**
	 * Sets the document title.
	 */
	protected void setDocTitle( String title )
	{
		//System.out.println( "TITLE === " + title );
		_docTitle = title;
	}

	/**
	 * Sets the document url.
	 */
	protected void setDocUrl( String url )
	{
//		System.out.println( "URL === " + url );
		_docUrl = url;
	}

	/**
	 * Sets the document summary.
	 */
	protected void setSummary( String summary )
	{
		//System.out.println( "SUMMARY === " + summary );
		_summary = summary;
	}

	/**
	 * Sets the document content with the given string.
	 * Mutually exclusive with setting the document content
	 * with a reader.
	 * @see #setContentReader()
	 */
	protected void setContentString( String contents )
	{
		System.out.println( contents );
		_contents = contents;
		_contentReader = null;
	}

	/**
	 * Sets the document content with the given reader.
	 * Mutually exclusive with setting the document content
	 * with a string.
	 * @see #setContentString()
	 */
	protected void setContentReader( Reader contentReader )
	{
		_contentReader = contentReader;
		_contents = null;
	}

	/**
	 * Parses and compiles the document fields from the given file.
	 */
	protected abstract void compileDocument( File file );

}

