diff options
Diffstat (limited to 'MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/performanceTimers.c')
-rw-r--r-- | MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/performanceTimers.c | 309 |
1 files changed, 0 insertions, 309 deletions
diff --git a/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/performanceTimers.c b/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/performanceTimers.c deleted file mode 100644 index 83d47341..00000000 --- a/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/performanceTimers.c +++ /dev/null @@ -1,309 +0,0 @@ -/// \file -/// Performance timer functions. -/// -/// Use the timer functionality to collect timing and number of calls -/// information for chosen computations (such as force calls) and -/// communication (such as sends, receives, reductions). Timing results -/// are reported at the end of the run showing overall timings and -/// statistics of timings across ranks. -/// -/// A new timer can be added as follows: -/// -# add new handle to the TimerHandle in performanceTimers.h -/// -# provide a corresponding name in timerName -/// -/// Note that the order of the handles and names must be the -/// same. This order also determines the order in which the timers are -/// printed. Names can contain leading spaces to show a hierarchical -/// ordering. Timers with zero calls are omitted from the report. -/// -/// Raw timer data is obtained from the getTime() and getTick() -/// functions. The supplied portable versions of these functions can be -/// replaced with platform specific versions for improved accuracy or -/// lower latency. -/// \see TimerHandle -/// \see getTime -/// \see getTick -/// - - -#include "performanceTimers.h" - -#include <stdio.h> -#include <sys/time.h> -#include <string.h> -#include <stdint.h> -#include <inttypes.h> -#include <math.h> - -#include "performanceTimers.h" -#include "mytype.h" -#include "parallel.h" -#include "yamlOutput.h" - -static uint64_t getTime(void); -static double getTick(void); -static void timerStats(void); - -/// You must add timer name in same order as enum in .h file. -/// Leading spaces can be specified to show a hierarchy of timers. -char* timerName[numberOfTimers] = { - "total", - "loop", - "timestep", - " position", - " velocity", - " redistribute", - " atomHalo", - " force", - " eamHalo", - "commHalo", - "commReduce" -}; - -/// Timer data collected. Also facilitates computing averages and -/// statistics. -typedef struct TimersSt -{ - uint64_t start; //!< call start time - uint64_t total; //!< current total time - uint64_t count; //!< current call count - uint64_t elapsed; //!< lap time - - int minRank; //!< rank with min value - int maxRank; //!< rank with max value - - double minValue; //!< min over ranks - double maxValue; //!< max over ranks - double average; //!< average over ranks - double stdev; //!< stdev across ranks -} Timers; - -/// Global timing data collected. -typedef struct TimerGlobalSt -{ - double atomRate; //!< average time (us) per atom per rank - double atomAllRate; //!< average time (us) per atom - double atomsPerUSec; //!< average atoms per time (us) -} TimerGlobal; - -static Timers perfTimer[numberOfTimers]; -static TimerGlobal perfGlobal; - -void profileStart(const enum TimerHandle handle) -{ - perfTimer[handle].start = getTime(); -} - -void profileStop(const enum TimerHandle handle) -{ - perfTimer[handle].count += 1; - uint64_t delta = getTime() - perfTimer[handle].start; - perfTimer[handle].total += delta; - perfTimer[handle].elapsed += delta; -} - -/// \details -/// Return elapsed time (in seconds) since last call with this handle -/// and clear for next lap. -double getElapsedTime(const enum TimerHandle handle) -{ - double etime = getTick() * (double)perfTimer[handle].elapsed; - perfTimer[handle].elapsed = 0; - - return etime; -} - -/// \details -/// The report contains two blocks. The upper block is performance -/// information for the printRank. The lower block is statistical -/// information over all ranks. -void printPerformanceResults(int nGlobalAtoms, int printRate) -{ - // Collect timer statistics overall and across ranks - timerStats(); - - if (!printRank()) - return; - - // only print timers with non-zero values. - double tick = getTick(); - double loopTime = perfTimer[loopTimer].total*tick; - - fprintf(screenOut, "\n\nTimings for Rank %d\n", getMyRank()); - fprintf(screenOut, " Timer # Calls Avg/Call (s) Total (s) %% Loop\n"); - fprintf(screenOut, "___________________________________________________________________\n"); - for (int ii=0; ii<numberOfTimers; ++ii) - { - double totalTime = perfTimer[ii].total*tick; - if (perfTimer[ii].count > 0) - fprintf(screenOut, "%-16s%12"PRIu64" %8.4f %8.4f %8.2f\n", - timerName[ii], - perfTimer[ii].count, - totalTime/(double)perfTimer[ii].count, - totalTime, - totalTime/loopTime*100.0); - } - - fprintf(screenOut, "\nTiming Statistics Across %d Ranks:\n", getNRanks()); - fprintf(screenOut, " Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s)\n"); - fprintf(screenOut, "_____________________________________________________________________________\n"); - - for (int ii = 0; ii < numberOfTimers; ++ii) - { - if (perfTimer[ii].count > 0) - fprintf(screenOut, "%-16s%6d:%10.4f %6d:%10.4f %10.4f %10.4f\n", - timerName[ii], - perfTimer[ii].minRank, perfTimer[ii].minValue*tick, - perfTimer[ii].maxRank, perfTimer[ii].maxValue*tick, - perfTimer[ii].average*tick, perfTimer[ii].stdev*tick); - } - double atomsPerTask = nGlobalAtoms/(real_t)getNRanks(); - perfGlobal.atomRate = perfTimer[timestepTimer].average * tick * 1e6 / - (atomsPerTask * perfTimer[timestepTimer].count * printRate); - perfGlobal.atomAllRate = perfTimer[timestepTimer].average * tick * 1e6 / - (nGlobalAtoms * perfTimer[timestepTimer].count * printRate); - perfGlobal.atomsPerUSec = 1.0 / perfGlobal.atomAllRate; - - fprintf(screenOut, "\n---------------------------------------------------\n"); - fprintf(screenOut, " Average atom update rate: %6.2f us/atom/task\n", perfGlobal.atomRate); - fprintf(screenOut, "---------------------------------------------------\n\n"); - - fprintf(screenOut, "\n---------------------------------------------------\n"); - fprintf(screenOut, " Average all atom update rate: %6.2f us/atom\n", perfGlobal.atomAllRate); - fprintf(screenOut, "---------------------------------------------------\n\n"); - - fprintf(screenOut, "\n---------------------------------------------------\n"); - fprintf(screenOut, " Average atom rate: %6.2f atoms/us\n", perfGlobal.atomsPerUSec); - fprintf(screenOut, "---------------------------------------------------\n\n"); -} - -void printPerformanceResultsYaml(FILE* file) -{ - if (! printRank()) - return; - - double tick = getTick(); - double loopTime = perfTimer[loopTimer].total*tick; - - fprintf(file,"\nPerformance Results:\n"); - fprintf(file, " TotalRanks: %d\n", getNRanks()); - fprintf(file, " ReportingTimeUnits: seconds\n"); - fprintf(file, "Performance Results For Rank %d:\n", getMyRank()); - for (int ii = 0; ii < numberOfTimers; ii++) - { - if (perfTimer[ii].count > 0) - { - double totalTime = perfTimer[ii].total*tick; - fprintf(file, " Timer: %s\n", timerName[ii]); - fprintf(file, " CallCount: %"PRIu64"\n", perfTimer[ii].count); - fprintf(file, " AvgPerCall: %8.4f\n", totalTime/(double)perfTimer[ii].count); - fprintf(file, " Total: %8.4f\n", totalTime); - fprintf(file, " PercentLoop: %8.2f\n", totalTime/loopTime*100); - } - } - - fprintf(file, "Performance Results Across Ranks:\n"); - for (int ii = 0; ii < numberOfTimers; ii++) - { - if (perfTimer[ii].count > 0) - { - fprintf(file, " Timer: %s\n", timerName[ii]); - fprintf(file, " MinRank: %d\n", perfTimer[ii].minRank); - fprintf(file, " MinTime: %8.4f\n", perfTimer[ii].minValue*tick); - fprintf(file, " MaxRank: %d\n", perfTimer[ii].maxRank); - fprintf(file, " MaxTime: %8.4f\n", perfTimer[ii].maxValue*tick); - fprintf(file, " AvgTime: %8.4f\n", perfTimer[ii].average*tick); - fprintf(file, " StdevTime: %8.4f\n", perfTimer[ii].stdev*tick); - } - } - - fprintf(file,"Performance Global Update Rates:\n"); - fprintf(file, " AtomUpdateRate:\n"); - fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomRate); - fprintf(file, " Units: us/atom/task\n"); - fprintf(file, " AllAtomUpdateRate:\n"); - fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomAllRate); - fprintf(file, " Units: us/atom\n"); - fprintf(file, " AtomRate:\n"); - fprintf(file, " AverageRate: %6.2f\n", perfGlobal.atomsPerUSec); - fprintf(file, " Units: atoms/us\n"); - - fprintf(file, "\n"); -} - -/// Returns current time as a 64-bit integer. This portable version -/// returns the number of microseconds since mindight, Jamuary 1, 1970. -/// Hence, timing data will have a resolution of 1 microsecond. -/// Platforms with access to calls with lower latency or higher -/// resolution (such as a cycle counter) may wish to replace this -/// implementation and change the conversion factor in getTick as -/// appropriate. -/// \see getTick for the conversion factor between the integer time -/// units of this function and seconds. -static uint64_t getTime(void) -{ - struct timeval ptime; - uint64_t t = 0; - gettimeofday(&ptime, (struct timezone *)NULL); - t = ((uint64_t)1000000)*(uint64_t)ptime.tv_sec + (uint64_t)ptime.tv_usec; - - return t; -} - -/// Returns the factor for converting the integer time reported by -/// getTime into seconds. The portable getTime returns values in units -/// of microseconds so the conversion is simply 1e-6. -/// \see getTime -static double getTick(void) -{ - double seconds_per_cycle = 1.0e-6; - return seconds_per_cycle; -} - -/// Collect timer statistics across ranks. -void timerStats(void) -{ - double sendBuf[numberOfTimers], recvBuf[numberOfTimers]; - - // Determine average of each timer across ranks - for (int ii = 0; ii < numberOfTimers; ii++) - sendBuf[ii] = (double)perfTimer[ii].total; - addDoubleParallel(sendBuf, recvBuf, numberOfTimers); - - for (int ii = 0; ii < numberOfTimers; ii++) - perfTimer[ii].average = recvBuf[ii] / (double)getNRanks(); - - - // Determine min and max across ranks and which rank - RankReduceData reduceSendBuf[numberOfTimers], reduceRecvBuf[numberOfTimers]; - for (int ii = 0; ii < numberOfTimers; ii++) - { - reduceSendBuf[ii].val = (double)perfTimer[ii].total; - reduceSendBuf[ii].rank = getMyRank(); - } - minRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); - for (int ii = 0; ii < numberOfTimers; ii++) - { - perfTimer[ii].minValue = reduceRecvBuf[ii].val; - perfTimer[ii].minRank = reduceRecvBuf[ii].rank; - } - maxRankDoubleParallel(reduceSendBuf, reduceRecvBuf, numberOfTimers); - for (int ii = 0; ii < numberOfTimers; ii++) - { - perfTimer[ii].maxValue = reduceRecvBuf[ii].val; - perfTimer[ii].maxRank = reduceRecvBuf[ii].rank; - } - - // Determine standard deviation - for (int ii = 0; ii < numberOfTimers; ii++) - { - double temp = (double)perfTimer[ii].total - perfTimer[ii].average; - sendBuf[ii] = temp * temp; - } - addDoubleParallel(sendBuf, recvBuf, numberOfTimers); - for (int ii = 0; ii < numberOfTimers; ii++) - { - perfTimer[ii].stdev = sqrt(recvBuf[ii] / (double) getNRanks()); - } -} - |