//------------------------------------------------------------------------------
// Copyright (c) 2005, 2006 IBM Corporation and others.
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// which accompanies this distribution, and is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// Contributors:
// IBM Corporation - initial implementation
//------------------------------------------------------------------------------
package org.eclipse.epf.publishing.services.search;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.Properties;

import org.apache.lucene.demo.html.HTMLParser;

/**
 * This class parses and compiles the fields for a HTML document.<br>
 *
 */
public class HTMLDocument extends AbstractDocument
{
	/**
	 * Default constructor.
	 */
	public HTMLDocument()
	{
		super();
	}

	/**
	 * Parses and compiles the document fields from the given file.
	 */
	protected void compileDocument( File file )
	{
    	// use the file path as the url
    	setDocUrl( file.getPath() );

		try
		{
 			FileInputStream inStream = new FileInputStream( file );
 			InputStreamReader streamReader = new InputStreamReader( inStream, "UTF8" ); //$NON-NLS-1$
    		HTMLParser parser = new HTMLParser( streamReader );

//			HTMLParser parser = new HTMLParser( file );

    		// set the contents
    		setContentReader( parser.getReader() );

    		// set the summary
    		setSummary( parser.getSummary() );

    		// set title
    		setDocTitle( parser.getTitle() );

    		Properties prop = parser.getMetaTags();

    		for(Enumeration enu = prop.propertyNames(); enu.hasMoreElements();)
    		{
    			String tagName = (String)enu.nextElement();
    			super.additionalFields.put(tagName, prop.getProperty(tagName));
    		}
    		
    		parser = null;

			// close stream
		  	streamReader.close();
		  	inStream.close();

//    		System.out.println( file.getPath() );
		}
		catch( Exception e )
		{
			e.printStackTrace();
		}
	}

}

