/**********************************************************************
 * Copyright (c) 2005, 2009 IBM Corporation and others.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * $Id: performance.c,v 1.4 2009/05/20 03:35:12 kchan Exp $
 * 
 * Contributors: 
 * IBM - Initial API and implementation
 **********************************************************************/

#ifdef _WIN32
#include <winsock2.h> /* Bug 134924 */
#include <windows.h>
#include <time.h>
#include <sys/timeb.h>
#include <sys/types.h>
#ifdef _M_IA64
#include <ia64reg.h>
#endif
#else
 #ifdef _AIX
  #include <sys/atomic_op.h>
  #include <sys/systemcfg.h>
 #elif __linux__
 	#if defined __i386__ && defined(USE_PTHREAD_ATOMICS)
		// no need to define asm/system.h
 	#elif defined __i386__
		#include <asm/system.h>
 	#elif defined __s390__ || defined __powerpc64__
		#include <asm/atomic.h>
 	#endif
 #elif __OS400__
  #include <mih/cmpswp.h>
  #include <mih/mattod.h>                                            
 #else
 #endif

 #include <sys/time.h>
 #ifdef __OS400__
  #define TIMEB time_t
  #define FTIME(param) time(param)
 #else
  #include <sys/timeb.h>
 #endif
 #include <unistd.h>
#endif

#include <stdio.h>

#include "performance.h"
#include "RABindings.h"
#include "JvmpiWriter.h" 

#ifdef _WIN32									/* _WIN32 */
#define TIMEB  struct _timeb
#define FTIME(param)  _ftime(param)
#else											/* else */
#define TIMEB	struct timeb
#define FTIME(param)  ftime(param)
#endif											/* endif */


timestamp_t _startTime;

unsigned int _ticksPerMicrosecond;
double       _startTimeAsDouble;
timestamp_t  _startTimeAsTicks;

#if defined (_WIN32)  /* bug 160511 */
BOOL         _highResPerfAvailable=FALSE; /* high-resolution performance counter if facility available*/
unsigned int _tickBooster;                /* boosts tick frequency to at least 100 per microsec (3-digit accuracy) */
#endif

#ifdef MVS
#pragma option_override(determineTicksPerMicrosecond, "OPT(LEVEL, 0)")
#endif

void determineTicksPerMicrosecond(int *boggie)
{
#if defined (_WIN32) /* bug 160511 start */ 
	LARGE_INTEGER liHighResTicksPerSec; /* actual ticks per sec if high resolution performance counter available */
	if (QueryPerformanceFrequency(&liHighResTicksPerSec)) { 
		double exactTicksPerMicrosecond; /* exact ticks per microsecond (possibly with fractional part)*/
		
		_highResPerfAvailable=TRUE;
		exactTicksPerMicrosecond=liHighResTicksPerSec.QuadPart/1000000.0;
		
		/* Need to simulate an integral MHz tick rate with at least 3-digit accuracy for consistency with other 
		 * platforms. Calculate _tickBooster as multiplier that pushes both _ticksPerMicrosecond and the actual
		 * tick count obtained later into the right range.
		 */
		if (exactTicksPerMicrosecond < 1.0) { 
			_tickBooster=(unsigned int)(1+100.0/exactTicksPerMicrosecond); 
		} else if (exactTicksPerMicrosecond < 10.0) {
			_tickBooster=100;	
		} else if (exactTicksPerMicrosecond < 100.0) {
			_tickBooster=10;	
		} else {
			_tickBooster=1;	
		}
		/* _ticksPerMicrosecond will be recorded as a boosted rate via multiplication by _tickBooster.
		 * Actual ticks measured at jvmpiAgent_getCurrentTime will be equally boosted so it all works out.
		 */
		_ticksPerMicrosecond=(unsigned int)(_tickBooster*exactTicksPerMicrosecond);
#if defined(_DEBUG) && !defined (MVS) && !defined (__OS400__) 
		printf("Using high res perf counter: highResTicksPerSec=%I64d, exactTicksPerMicrosecond=%f, _tickBooster=%d, _ticksPerMicrosecond=%d \n",
				liHighResTicksPerSec.QuadPart, exactTicksPerMicrosecond, _tickBooster, _ticksPerMicrosecond);
#endif				
	} else {         /* bug 160511 end */
		HKEY    handle;
		DWORD   error;
		DWORD	valueType;
		DWORD   valueLength=4;
		char *valueName="~Mhz";
		char *key="HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
		unsigned long cpuMhz;
	
		error=RegOpenKeyEx(HKEY_LOCAL_MACHINE,	/* Registery tree */
						   key,					/* Key to look up */
						   0,					/* Reserved, must be 0 */
						   KEY_READ,			/* READ mode */
						   &handle);			/* Handle to the key */
		if(error!=ERROR_SUCCESS) {
			printf("Error opening CPU speed key 0x%x\n", error);
			fflush(stdout); 
		}
	
		error=RegQueryValueEx(handle,				/* Handle to the key */
							  valueName,			/* Value to query */
							  NULL,					/* Reserved, must be NULL */
							  &valueType,			/* Type of value */
							  (char*)&cpuMhz,		/* The result of the query */
							  &valueLength);		/* Length of the data */
	
	
		if(error!=ERROR_SUCCESS) {
			printf("Error getting CPU speed 0x%x\n", error);
			fflush(stdout); 
		}
		RegCloseKey(handle);
		_ticksPerMicrosecond=cpuMhz;
	}

#elif defined (__OS400__)
    /* There is always 4096 ticks per microsecond on AS400 */
    _ticksPerMicrosecond=4096;
#else

  volatile int j;
  volatile unsigned int ticks_per_microsecond = 0;
  volatile int A[4003];

  timestamp_t now;
  timestamp_t a,b,c,start;
  unsigned int jvmpiAgent_getCurrentTime_overhead;
  const int MM=10000;

  jvmpiAgent_getCurrentTime(&a);
  jvmpiAgent_getCurrentTime(&a);
  start = a;
  jvmpiAgent_getCurrentTime(&a);
  c = 0;
  for(j=0; j<MM; j++) {
    jvmpiAgent_getCurrentTime(&c);
  }
  jvmpiAgent_getCurrentTime(&b);
  jvmpiAgent_getCurrentTime_overhead = (unsigned int)(b-a)/MM;

  {
    unsigned int interval_microseconds;
    volatile unsigned int total = 0;
    timestamp_t start_ticks, end_ticks;
    const int M = 3;
    const int N = 2003;
    A[5] = 5; A[15] = 10;
    for(j=0; j<M; j++) {
      struct timeval start_clock, end_clock, interval_clock;
      gettimeofday(&start_clock, NULL);
      jvmpiAgent_getCurrentTime(&now);
      start_ticks = now-start;

      {
         int i,k=1, l=1;
         for(i=0; i<N; i++) {
            for(l=1; l<N; l++) {
               k += (i*i)/k/l;
            }
         }
         for(i=1; i<N; i++) {
            k += (i*i+i*2)/i ;
         }
      }

      jvmpiAgent_getCurrentTime(&now);
      end_ticks   = now-start;
      gettimeofday(&end_clock, NULL);

# define timersub(a, b, result)						      \
  do {									      \
    (result)->tv_sec = (a)->tv_sec - (b)->tv_sec;			      \
    (result)->tv_usec = (a)->tv_usec - (b)->tv_usec;			      \
    if ((result)->tv_usec < 0) {					      \
      --(result)->tv_sec;						      \
      (result)->tv_usec += 1000000;					      \
    }									      \
  } while (0)

      timersub(&end_clock, &start_clock, &interval_clock);
      {
         timestamp_t foo = interval_clock.tv_sec;
         foo *= 1000000;
         foo += interval_clock.tv_usec;
         interval_microseconds = (unsigned int)foo;
      }

      {
         unsigned int tick_interval = (unsigned int)(end_ticks-start_ticks);
         double temp = (1.0f * tick_interval)/interval_microseconds;
         unsigned int this_one      = (unsigned int) ( temp < 1 ? 1 : temp );

         total += this_one;
         if(this_one > ticks_per_microsecond) {
            ticks_per_microsecond = this_one;
         }
      }
    }

    ticks_per_microsecond = total/M;
  }

  *boggie = A[5]+A[15];
  _ticksPerMicrosecond = ticks_per_microsecond;
#endif
}


/** COLLECT_START_TIME_INFORMATION  ****************************************
  * Initialization routine for future use of jvmpiAgent_getCurrentTime().
  * This routine records the time at which this function was called and uses
  * this time to determine what the current time is.
  */
void jvmpiAgent_collectStartTimeInformation() {
#ifdef _WIN32
	TIMEB   time;
	
	FTIME(&time);
	_startTime=timeToTicks(time.time, time.millitm*1000);

#else
	{
	  struct timeval tv;
	  gettimeofday(&tv, NULL);
	  _startTime  = timeToTicks(tv.tv_sec, tv.tv_usec);
	}
#endif
	jvmpiAgent_getCurrentTime(&_startTimeAsTicks);
   if(_startTime < _startTimeAsTicks) {
       _startTimeAsDouble = -ticksToTime2(_startTimeAsTicks-_startTime, TRUE);
   }
   else {
       _startTimeAsDouble = ticksToTime2(_startTime-_startTimeAsTicks, TRUE);
   }
}


/* GET_START_TIME  ********************************************************
 * Returns a copy of the start time that was recorded with
 * jvmpiAgent_collectStartTimeInformation
 */
void jvmpiAgent_getStartTime(timestamp_t *time) {
  *time = _startTime;
}

/* GET_CURRENT_TIME  ******************************************************
 * Determins the current clock time and fills the timestamp_t structure
 * with the data
 */
void jvmpiAgent_getCurrentTime(timestamp_t *timestamp) {
/* 
 * Note: WIN32 designates Windows generally and is specified with or without _WIN64.
 * Therefore, when both are true, the _WIN64 case is desired (as in the following sequence).
 */

/* Windows */
#if defined(_WIN64) || defined(_WIN32)

	if (_highResPerfAvailable==TRUE) {  /* bug 160511 start */
		LARGE_INTEGER liHighResPerfCount;		
		if (QueryPerformanceCounter(&liHighResPerfCount)) {
			*timestamp=_tickBooster*liHighResPerfCount.QuadPart;
		} else {
			printf("Failed attempt to get highResperfCount\n");
		}		
	} else {  /* bug 160511 end */	

 #if defined(_WIN64)	/*ts. bug 120479*/
  #ifdef _M_IA64
		*timestamp = __getReg(CV_IA64_ApITC);
  #else
		*timestamp = __rdtsc();
  #endif
	}
 #else /* defined(_WIN32) */
  #define RDTSC(tsc_high,tsc_low) \
{                               \
        __asm rdtsc             \
        __asm mov tsc_high,edx  \
        __asm mov tsc_low,eax   \
}

  #define RDTSC_LL(time64)               \
{					                   \
		unsigned int tsc_high,tsc_low; \
		RDTSC(tsc_high, tsc_low);      \
		time64 = tsc_high;             \
	    time64 = time64 << 32;         \
		time64 = time64 | tsc_low;     \
}

		RDTSC_LL(*timestamp);
	}
 #endif

/* Linux */
#elif defined(__linux__) && defined(__s390__) /* bug 160511: deleted __i386__ clause*/
		/* for Linux/390, generate gcc assembler to use the STCK
			instruction, which provides the current cycle counter */
		asm ("STCK %0" : "=m" (*timestamp) );

/* OS/390 */
#elif defined(MVS)
/* for os/390, use a pre-defined macro to accomplish the
   same thing we did on Linux/390 */
        __stck(timestamp);


#elif defined(SOLARIS)
/* for Solaris, we don't yet use the most efficient method
   to acquire a high-res clock, but this one is pretty good */
       hrtime_t time = gethrtime();
       *timestamp=time;

#elif defined(_AIX)
/* AIX has built-in routines to handle this, but is somewhat
   chip-dependent (PowerPC versus POWER) */
      timebasestruct_t t;
      int i = read_real_time(&t, sizeof(timebasestruct_t));

      if(t.flag == RTC_POWER_PC) {
       /* then t contains the high-order
          and low-order 32 bits of the cycle counter. */
          Uint64 ticks64 = t.tb_high;
          ticks64  = ticks64 << 32;
          ticks64 |= t.tb_low;
          *timestamp=ticks64;
      }
      else {
       /* then t contains seconds and nanoseconds (a little more
          expensive in this case) */
          int32_t secs   = t.tb_high;
          int32_t n_secs = t.tb_low;

       /*
        * If there was a carry from low-order to high-order during
        * the measurement, we may have to undo it.
        */
        if (n_secs < 0)  {
          secs--;
          n_secs += 1000000000;
        }

        /* here we add the seconds and nonoseconds */
        *timestamp = secs + n_secs*1000000000;
      }

#elif defined (__OS400__)
      mattod((unsigned char*)timestamp); 
#elif defined (_HPUX)
      #include <machine/inline.h>
      register unsigned long long now;
      _MFCTL(16, now);
      *timestamp = now;
#else /* platform on which we don't support high-resolution timers */
	struct timeval tv;
	struct timezone tz;
	timestamp_t time;
	gettimeofday(&tv, &tz);
	time=tv.tv_sec*1000000;
	time+=tv.tv_usec;
	*timestamp = time;
#endif /* CHOOSE PLATFORM */
}


/** GET_CURRENT_THREAD_CPU_TIME  ************************************************
  * Retruns the amount of CPU time consumed by this thread as a 64 bit number.
  * This is returned in nanoseconds
  */
Uint64 jvmpiAgent_getCurrentThreadCPUTime() {

	if (_jvmpiAgent_jvmpiInterface) {
#ifdef __OS400__
		return (_jvmpiAgent_jvmpiInterface->GetCurrentThreadCpuTime()).unsigned_ll;
#else
		return _jvmpiAgent_jvmpiInterface->GetCurrentThreadCpuTime(); 
#endif 
	} else {
		return 0; 
	}

/** old code **/ 
#if 0 
	/* Get the current thread and then get its thread time
	   this could be improved by caching the handle in the
	   thread structure */
	FILETIME  start, end, kernal, user;
	Uint64 kernalTime, userTime;
	HANDLE handle=GetCurrentThread();
	GetThreadTimes(handle,
				   &start,	/* dont want start time */
				   &end,	/* dont want exit time */
				   &kernal,	/* kernal mode time */
				   &user);	/* user mode time */

	kernalTime=(kernal.dwHighDateTime<<32);
	kernalTime|=kernal.dwLowDateTime;
	userTime=(user.dwHighDateTime<<32);
	userTime|=user.dwLowDateTime;
	return (kernalTime+userTime);
/* #else */ 
	return 0;
#endif

}

/** GET_PROCESS_START_TIME  ***************************************************
  *  Returns the start time of the current process as a timestamp_t in UTC time.
  */
void jvmpiAgent_getProcessStartTime(timestamp_t *timestamp) {
#ifdef _WIN32
	
	FILETIME  start, end, kernal, user;
	Uint64 startTime, deltaTime;
	HANDLE handle;
	timestamp_t ticks;
	
	handle=OpenProcess(PROCESS_QUERY_INFORMATION,
					   FALSE,
					   ra_getProcessId());
	GetProcessTimes(handle,
				   &start,	/* start time */
				   &end,	/* dont want exit time */
				   &kernal,	/* dont want kernal mode time */
				   &user);	/* dont want user mode time */


	/* The filetime structure starts as of Janauary 1, 1601.  Subtract the passed time
	   between this and January 1, 1970
	*/
	startTime=((Uint64)(start.dwHighDateTime))<<32;
	startTime|=start.dwLowDateTime;

	deltaTime=((Uint64)(0x19db1de))<<32;
	deltaTime|=0xd53e8000;

	startTime-=deltaTime;

	/* The startTime is now a count of 100ns intervals since January 1, 1970.  We need to change this to
	 * seconds and microseconds.
	 */
	ticks = timeToTicks(startTime/10000000,(startTime%10000000)/10);

	/* The ticks variable has the real value that represents the process start time.
	 * Because all of the timestamps taken in the trace are actually corrected by the
	 * constant _startTime-_startTimeAsTicks we need to subtract it here.
	 */
	*timestamp = ticks - (_startTime-_startTimeAsTicks);
#else
	/* On Unix we don't know when the process was started.  We
	 * will specify the time the trace was started.
	 * Because all of the timestamps taken in the trace are actually corrected by the
	 * constant _startTime-_startTimeAsTicks we need to subtract it here.
	 */
	*timestamp = _startTime - (_startTime-_startTimeAsTicks);
#endif
}

/** GET_TIMEZONE  *************************************************************
  * Returns the current timezone as an offset, in minutes, working westward from
  * GMT.  ie.  GMT+5  would be 300 as there are 300 minutes in 5 hours.
  */
unsigned long jvmpiAgent_getTimezone() {
    /* The first version here may work on alll platforms. */
#ifdef __OS400__
    struct timeval time1;
    struct timezone time2;
    gettimeofday(&time1, &time2);
    return time2.tz_minuteswest;
#else
	TIMEB currenttime;
	FTIME(&currenttime);
	return currenttime.timezone;
#endif
}


/** RKD:  The functions below are 64bit INTEL assembler functions to test compiler
  * support and are not used anywhere in the code today.
  */

_inline Uint64 sub64(Uint64 op1, Uint64 op2) {
#if defined (_INTEL) && !defined(_WIN64) /*ts. bug 120479*/
	unsigned long result_high=0, result_low=0,
				  op1_high, op1_low,
				  op2_high, op2_low;
	op1_high=(unsigned long)(op1>>32 & 0x00000000ffffffff);
	op1_low=(unsigned long)op1;
	op2_high=(unsigned long)(op2>>32 & 0x00000000ffffffff);
	op2_low=(unsigned long)op2;
	__asm {
		pushad				/* Store all the general purpose registers */
		mov edx, op1_high	/* High order subtract first */
		sub edx, op2_high
		mov result_high, edx
		mov edx, op1_low	/* Low order subtract next */
		clc					/* Clear the carry flag */
		sub edx, op2_low
		mov result_low, edx
		jnc Noflow			/* Did we borrow? If so decrement the high order bits */
		dec result_high
Noflow: popad
	}
	return ((Uint64)result_high<<32) | result_low;
#else
	return 0;
#endif
}


_inline void div64(Uint64 op1, unsigned long op2, unsigned long *quotient, unsigned long remainder) {
#ifdef INTEL /* TO GET THIS TO COMPILE CHANGE TO "_INTEL" */
	unsigned long q, r, op1_high, op1_low;
	op1_high=(unsigned long)(op1>>32 & 0x00000000ffffffff);
	op1_low=(unsigned long)op1;
	__asm {
		pushad			/* Store all the general purpose registers */
		mov	edx, op1_high
		mov ead, op1_low
		div op2
		mov r, edx
		mov q, eax
		popad
	}

#else
#endif
}

/*********************************************************
 * DEBUG assists:
 */
#if defined(_DEBUG) && !defined (MVS) && !defined (__OS400__)
/* This function tests for a POTENTIAL/IMMINENT time accumulation overflow and prints if one is detected */
void debug_check_and_report_time_overflow (timestamp_t testTime, char *errMsg) {                /* 134577 */
	/* if time (in  seconds) is suspiciously high (over 1000 secs for testing), then print message */ 
	if (testTime/(_ticksPerMicrosecond*1000000) > 1000) {
		printf(errMsg);
		fflush(stdout); 
	}	
}
#endif
