diff options
Diffstat (limited to 'MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CoMD.c')
-rw-r--r-- | MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CoMD.c | 1107 |
1 files changed, 0 insertions, 1107 deletions
diff --git a/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CoMD.c b/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CoMD.c deleted file mode 100644 index 5c2263f2..00000000 --- a/MultiSource/Benchmarks/DOE-ProxyApps-C/CoMD/CoMD.c +++ /dev/null @@ -1,1107 +0,0 @@ -/// \file -/// Main program -/// -/// \mainpage CoMD: A Classical Molecular Dynamics Mini-app -/// -/// CoMD is a reference implementation of typical classical molecular -/// dynamics algorithms and workloads. It is created and maintained by -/// The Exascale Co-Design Center for Materials in Extreme Environments -/// (ExMatEx). http://codesign.lanl.gov/projects/exmatex. The -/// code is intended to serve as a vehicle for co-design by allowing -/// others to extend and/or reimplement it as needed to test performance of -/// new architectures, programming models, etc. -/// -/// The current version of CoMD is available from: -/// http://exmatex.github.io/CoMD -/// -/// To contact the developers of CoMD send email to: exmatex-comd@llnl.gov. -/// -/// Table of Contents -/// ================= -/// -/// Click on the links below to browse the CoMD documentation. -/// -/// \subpage pg_md_basics -/// -/// \subpage pg_building_comd -/// -/// \subpage pg_running_comd -/// -/// \subpage pg_measuring_performance -/// -/// \subpage pg_problem_selection_and_scaling -/// -/// \subpage pg_verifying_correctness -/// -/// \subpage pg_comd_architecture -/// -/// \subpage pg_optimization_targets -/// -/// \subpage pg_whats_new - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <strings.h> -#include <unistd.h> -#include <assert.h> - -#include "CoMDTypes.h" -#include "decomposition.h" -#include "linkCells.h" -#include "eam.h" -#include "ljForce.h" -#include "initAtoms.h" -#include "memUtils.h" -#include "yamlOutput.h" -#include "parallel.h" -#include "performanceTimers.h" -#include "mycommand.h" -#include "timestep.h" -#include "constants.h" - -#define REDIRECT_OUTPUT 0 -#define MIN(A,B) ((A) < (B) ? (A) : (B)) - -static SimFlat* initSimulation(Command cmd); -static void destroySimulation(SimFlat** ps); - -static void initSubsystems(void); -static void finalizeSubsystems(void); - -static BasePotential* initPotential( - int doeam, const char* potDir, const char* potName, const char* potType); -static SpeciesData* initSpecies(BasePotential* pot); -static Validate* initValidate(SimFlat* s); -static void validateResult(const Validate* val, SimFlat *sim); - -static void sumAtoms(SimFlat* s); -static void printThings(SimFlat* s, int iStep, double elapsedTime); -static void printSimulationDataYaml(FILE* file, SimFlat* s); -static void sanityChecks(Command cmd, double cutoff, double latticeConst, char latticeType[8]); - - -int main(int argc, char** argv) -{ - // Prolog - initParallel(&argc, &argv); - profileStart(totalTimer); - initSubsystems(); - timestampBarrier("Starting Initialization\n"); - -#ifdef PRINT_YAML - yamlAppInfo(yamlFile); -#endif - yamlAppInfo(screenOut); - - Command cmd = parseCommandLine(argc, argv); -#ifdef PRINT_YAML - printCmdYaml(yamlFile, &cmd); -#endif - printCmdYaml(screenOut, &cmd); - - SimFlat* sim = initSimulation(cmd); -#ifdef PRINT_YAML - printSimulationDataYaml(yamlFile, sim); -#endif - printSimulationDataYaml(screenOut, sim); - - Validate* validate = initValidate(sim); // atom counts, energy - timestampBarrier("Initialization Finished\n"); - - timestampBarrier("Starting simulation\n"); - - // This is the CoMD main loop - const int nSteps = sim->nSteps; - const int printRate = sim->printRate; - int iStep = 0; - profileStart(loopTimer); - for (; iStep<nSteps;) - { - startTimer(commReduceTimer); - sumAtoms(sim); - stopTimer(commReduceTimer); - - printThings(sim, iStep, getElapsedTime(timestepTimer)); - - startTimer(timestepTimer); - timestep(sim, printRate, sim->dt); - stopTimer(timestepTimer); - - iStep += printRate; - } - profileStop(loopTimer); - - sumAtoms(sim); - printThings(sim, iStep, getElapsedTime(timestepTimer)); - timestampBarrier("Ending simulation\n"); - - // Epilog - validateResult(validate, sim); - profileStop(totalTimer); - -#ifdef PRINT_TIMING - printPerformanceResults(sim->atoms->nGlobal, sim->printRate); - printPerformanceResultsYaml(yamlFile); -#endif - - destroySimulation(&sim); - comdFree(validate); - finalizeSubsystems(); - - timestampBarrier("CoMD Ending\n"); - destroyParallel(); - - return 0; -} - -/// Initialized the main CoMD data stucture, SimFlat, based on command -/// line input from the user. Also performs certain sanity checks on -/// the input to screen out certain non-sensical inputs. -/// -/// Simple data members such as the time step dt are initialized -/// directly, substructures such as the potential, the link cells, the -/// atoms, etc., are initialized by calling additional initialization -/// functions (initPotential(), initLinkCells(), initAtoms(), etc.). -/// Initialization order is set by the natural dependencies of the -/// substructure such as the atoms need the link cells so the link cells -/// must be initialized before the atoms. -SimFlat* initSimulation(Command cmd) -{ - SimFlat* sim = comdMalloc(sizeof(SimFlat)); - sim->nSteps = cmd.nSteps; - sim->printRate = cmd.printRate; - sim->dt = cmd.dt; - sim->domain = NULL; - sim->boxes = NULL; - sim->atoms = NULL; - sim->ePotential = 0.0; - sim->eKinetic = 0.0; - sim->atomExchange = NULL; - - sim->pot = initPotential(cmd.doeam, cmd.potDir, cmd.potName, cmd.potType); - real_t latticeConstant = cmd.lat; - if (cmd.lat < 0.0) - latticeConstant = sim->pot->lat; - - // ensure input parameters make sense. - sanityChecks(cmd, sim->pot->cutoff, latticeConstant, sim->pot->latticeType); - - sim->species = initSpecies(sim->pot); - - real3 globalExtent; - globalExtent[0] = cmd.nx * latticeConstant; - globalExtent[1] = cmd.ny * latticeConstant; - globalExtent[2] = cmd.nz * latticeConstant; - - sim->domain = initDecomposition( - cmd.xproc, cmd.yproc, cmd.zproc, globalExtent); - - sim->boxes = initLinkCells(sim->domain, sim->pot->cutoff); - sim->atoms = initAtoms(sim->boxes); - - // create lattice with desired temperature and displacement. - createFccLattice(cmd.nx, cmd.ny, cmd.nz, latticeConstant, sim); - setTemperature(sim, cmd.temperature); - randomDisplacements(sim, cmd.initialDelta); - - sim->atomExchange = initAtomHaloExchange(sim->domain, sim->boxes); - - // Forces must be computed before we call the time stepper. - startTimer(redistributeTimer); - redistributeAtoms(sim); - stopTimer(redistributeTimer); - - startTimer(computeForceTimer); - computeForce(sim); - stopTimer(computeForceTimer); - - kineticEnergy(sim); - - return sim; -} - -/// frees all data associated with *ps and frees *ps -void destroySimulation(SimFlat** ps) -{ - if ( ! ps ) return; - - SimFlat* s = *ps; - if ( ! s ) return; - - BasePotential* pot = s->pot; - if ( pot) pot->destroy(&pot); - destroyLinkCells(&(s->boxes)); - destroyAtoms(s->atoms); - destroyHaloExchange(&(s->atomExchange)); - comdFree(s->species); - comdFree(s->domain); - comdFree(s); - *ps = NULL; - - return; -} - -void initSubsystems(void) -{ -#if REDIRECT_OUTPUT - freopen("testOut.txt","w",screenOut); -#endif - -#ifdef PRINT_YAML - yamlBegin(); -#endif -} - -void finalizeSubsystems(void) -{ -#if REDIRECT_OUTPUT - fclose(screenOut); -#endif - -#ifdef PRINT_YAML - yamlEnd(); -#endif -} - -/// decide whether to get LJ or EAM potentials -BasePotential* initPotential( - int doeam, const char* potDir, const char* potName, const char* potType) -{ - BasePotential* pot = NULL; - - if (doeam) - pot = initEamPot(potDir, potName, potType); - else - pot = initLjPot(); - assert(pot); - return pot; -} - -SpeciesData* initSpecies(BasePotential* pot) -{ - SpeciesData* species = comdMalloc(sizeof(SpeciesData)); - - strcpy(species->name, pot->name); - species->atomicNo = pot->atomicNo; - species->mass = pot->mass; - - return species; -} - -Validate* initValidate(SimFlat* sim) -{ - sumAtoms(sim); - Validate* val = comdMalloc(sizeof(Validate)); - val->eTot0 = (sim->ePotential + sim->eKinetic) / sim->atoms->nGlobal; - val->nAtoms0 = sim->atoms->nGlobal; - - if (printRank()) - { - fprintf(screenOut, "\n"); - printSeparator(screenOut); - fprintf(screenOut, "Initial energy : %14.12f, atom count : %d \n", - val->eTot0, val->nAtoms0); - fprintf(screenOut, "\n"); - } - return val; -} - -void validateResult(const Validate* val, SimFlat* sim) -{ - if (printRank()) - { - real_t eFinal = (sim->ePotential + sim->eKinetic) / sim->atoms->nGlobal; - - int nAtomsDelta = (sim->atoms->nGlobal - val->nAtoms0); - - fprintf(screenOut, "\n"); - fprintf(screenOut, "\n"); - fprintf(screenOut, "Simulation Validation:\n"); - - fprintf(screenOut, " Initial energy : %14.12f\n", val->eTot0); - fprintf(screenOut, " Final energy : %14.12f\n", eFinal); - fprintf(screenOut, " eFinal/eInitial : %f\n", eFinal/val->eTot0); - if ( nAtomsDelta == 0) - { - fprintf(screenOut, " Final atom count : %d, no atoms lost\n", - sim->atoms->nGlobal); - } - else - { - fprintf(screenOut, "#############################\n"); - fprintf(screenOut, "# WARNING: %6d atoms lost #\n", nAtomsDelta); - fprintf(screenOut, "#############################\n"); - } - } -} - -void sumAtoms(SimFlat* s) -{ - // sum atoms across all processers - s->atoms->nLocal = 0; - for (int i = 0; i < s->boxes->nLocalBoxes; i++) - { - s->atoms->nLocal += s->boxes->nAtoms[i]; - } - - startTimer(commReduceTimer); - addIntParallel(&s->atoms->nLocal, &s->atoms->nGlobal, 1); - stopTimer(commReduceTimer); -} - -/// Prints current time, energy, performance etc to monitor the state of -/// the running simulation. Performance per atom is scaled by the -/// number of local atoms per process this should give consistent timing -/// assuming reasonable load balance -void printThings(SimFlat* s, int iStep, double elapsedTime) -{ - // keep track previous value of iStep so we can calculate number of steps. - static int iStepPrev = -1; - static int firstCall = 1; - - int nEval = iStep - iStepPrev; // gives nEval = 1 for zeroth step. - iStepPrev = iStep; - - if (! printRank() ) - return; - - if (firstCall) - { - firstCall = 0; - fprintf(screenOut, - "# Performance\n" - "# Loop Time(fs) Total Energy Potential Energy Kinetic Energy Temperature (us/atom) # Atoms\n"); - fflush(screenOut); - } - - real_t time = iStep*s->dt; - real_t eTotal = (s->ePotential+s->eKinetic) / s->atoms->nGlobal; - real_t eK = s->eKinetic / s->atoms->nGlobal; - real_t eU = s->ePotential / s->atoms->nGlobal; - real_t Temp = (s->eKinetic / s->atoms->nGlobal) / (kB_eV * 1.5); - - double timePerAtom = 1.0e6*elapsedTime/(double)(nEval*s->atoms->nLocal); -#ifndef PRINT_TIMING - timePerAtom = 0.0; -#endif - - fprintf(screenOut, " %6d %10.2f %18.12f %18.12f %18.12f %12.4f %10.4f %12d\n", - iStep, time, eTotal, eU, eK, Temp, timePerAtom, s->atoms->nGlobal); -} - -/// Print information about the simulation in a format that is (mostly) -/// YAML compliant. -void printSimulationDataYaml(FILE* file, SimFlat* s) -{ - // All ranks get maxOccupancy - int maxOcc = maxOccupancy(s->boxes); - - // Only rank 0 prints - if (! printRank()) - return; - - fprintf(file,"Simulation data: \n"); - fprintf(file," Total atoms : %d\n", - s->atoms->nGlobal); - fprintf(file," Min global bounds : [ %14.10f, %14.10f, %14.10f ]\n", - s->domain->globalMin[0], s->domain->globalMin[1], s->domain->globalMin[2]); - fprintf(file," Max global bounds : [ %14.10f, %14.10f, %14.10f ]\n", - s->domain->globalMax[0], s->domain->globalMax[1], s->domain->globalMax[2]); - printSeparator(file); - fprintf(file,"Decomposition data: \n"); - fprintf(file," Processors : %6d,%6d,%6d\n", - s->domain->procGrid[0], s->domain->procGrid[1], s->domain->procGrid[2]); - fprintf(file," Local boxes : %6d,%6d,%6d = %8d\n", - s->boxes->gridSize[0], s->boxes->gridSize[1], s->boxes->gridSize[2], - s->boxes->gridSize[0]*s->boxes->gridSize[1]*s->boxes->gridSize[2]); - fprintf(file," Box size : [ %14.10f, %14.10f, %14.10f ]\n", - s->boxes->boxSize[0], s->boxes->boxSize[1], s->boxes->boxSize[2]); - fprintf(file," Box factor : [ %14.10f, %14.10f, %14.10f ] \n", - s->boxes->boxSize[0]/s->pot->cutoff, - s->boxes->boxSize[1]/s->pot->cutoff, - s->boxes->boxSize[2]/s->pot->cutoff); - fprintf(file, " Max Link Cell Occupancy: %d of %d\n", - maxOcc, MAXATOMS); - printSeparator(file); - fprintf(file,"Potential data: \n"); - s->pot->print(file, s->pot); - - // Memory footprint diagnostics - int perAtomSize = 10*sizeof(real_t)+2*sizeof(int); - float mbPerAtom = perAtomSize/1024/1024; - float totalMemLocal = (float)(perAtomSize*s->atoms->nLocal)/1024/1024; - float totalMemGlobal = (float)(perAtomSize*s->atoms->nGlobal)/1024/1024; - - int nLocalBoxes = s->boxes->gridSize[0]*s->boxes->gridSize[1]*s->boxes->gridSize[2]; - int nTotalBoxes = (s->boxes->gridSize[0]+2)*(s->boxes->gridSize[1]+2)*(s->boxes->gridSize[2]+2); - float paddedMemLocal = (float) nLocalBoxes*(perAtomSize*MAXATOMS)/1024/1024; - float paddedMemTotal = (float) nTotalBoxes*(perAtomSize*MAXATOMS)/1024/1024; - - printSeparator(file); - fprintf(file,"Memory data: \n"); - fprintf(file, " Intrinsic atom footprint = %4d B/atom \n", perAtomSize); - fprintf(file, " Total atom footprint = %7.3f MB (%6.2f MB/node)\n", totalMemGlobal, totalMemLocal); - fprintf(file, " Link cell atom footprint = %7.3f MB/node\n", paddedMemLocal); - fprintf(file, " Link cell atom footprint = %7.3f MB/node (including halo cell data\n", paddedMemTotal); - - fflush(file); -} - -/// Check that the user input meets certain criteria. -void sanityChecks(Command cmd, double cutoff, double latticeConst, char latticeType[8]) -{ - int failCode = 0; - - // Check that domain grid matches number of ranks. (fail code 1) - int nProcs = cmd.xproc * cmd.yproc * cmd.zproc; - if (nProcs != getNRanks()) - { - failCode |= 1; - if (printRank() ) - fprintf(screenOut, - "\nNumber of MPI ranks must match xproc * yproc * zproc\n"); - } - - // Check whether simuation is too small (fail code 2) - double minx = 2*cutoff*cmd.xproc; - double miny = 2*cutoff*cmd.yproc; - double minz = 2*cutoff*cmd.zproc; - double sizex = cmd.nx*latticeConst; - double sizey = cmd.ny*latticeConst; - double sizez = cmd.nz*latticeConst; - - if ( sizex < minx || sizey < miny || sizez < minz) - { - failCode |= 2; - if (printRank()) - fprintf(screenOut,"\nSimulation too small.\n" - " Increase the number of unit cells to make the simulation\n" - " at least (%3.2f, %3.2f. %3.2f) Ansgstroms in size\n", - minx, miny, minz); - } - - // Check for supported lattice structure (fail code 4) - if (strcasecmp(latticeType, "FCC") != 0) - { - failCode |= 4; - if ( printRank() ) - fprintf(screenOut, - "\nOnly FCC Lattice type supported, not %s. Fatal Error.\n", - latticeType); - } - int checkCode = failCode; - bcastParallel(&checkCode, sizeof(int), 0); - // This assertion can only fail if different tasks failed different - // sanity checks. That should not be possible. - assert(checkCode == failCode); - - if (failCode != 0) - exit(failCode); -} - -// -------------------------------------------------------------- - - -/// \page pg_building_comd Building CoMD -/// -/// CoMD is written with portability in mind and should compile using -/// practically any compiler that implements the C99 standard. You will -/// need to create a Makefile by copying the sample provided with the -/// distribution (Makefile.vanilla). -/// -/// $ cp Makefile.vanilla Makefile -/// -/// and use the make command to build the code -/// -/// $ make -/// -/// The sample Makefile will compile the code on many platforms. See -/// comments in Makefile.vanilla for information about specifying the -/// name of the C compiler, and/or additional compiler switches that -/// might be necessary for your platform. -/// -/// The main options available in the Makefile are toggling single/double -/// precision and enabling/disabling MPI. In the event MPI is not -/// available, setting the DO_MPI flag to OFF will create a purely -/// serial build (you will likely also need to change the setting of -/// CC). -/// -/// The makefile should handle all the dependency checking needed, via -/// makedepend. -/// -/// 'make clean' removes the object and dependency files. -/// -/// 'make distclean' additionally removes the executable file and the -/// documentation files. -/// -/// Other build options -/// ------------------- -/// -/// Various other options are made available by \#define arguments within -/// some of the source files. -/// -/// #REDIRECT_OUTPUT in CoMD.c -/// -/// Setting this to 1 will redirect all screen output to a file, -/// currently set to 'testOut.txt'. -/// -/// #POT_SHIFT in ljForce.c -/// -/// This is set to 1.0 by default, and shifts the values of the cohesive -/// energy given by the Lennard-Jones potential so it is zero at the -/// cutoff radius. This setting improves energy conservation -/// step-to-step as it reduces the noise generated by atoms crossing the -/// cutoff threshold. However, it does not affect the long-term energy -/// conservation of the code. -/// -/// #MAXATOMS in linkCells.h -/// -/// The default value is 64, which allows ample padding of the linkCell -/// structure to allow for density fluctuations. Reducing it may improve -/// the efficiency of the code via improved thread utilization and -/// reduced memory footprint. - -// -------------------------------------------------------------- - - -// -------------------------------------------------------------- - - -/// \page pg_measuring_performance Measuring Performance -/// -/// CoMD implements a simple and extensible system of internal timers to -/// measure the performance profile of the code. As explained in -/// performanceTimers.c, it is easy to create additional timers and -/// associate them with code regions of specific interest. In addition, -/// the getTime() and getTick() functions can be easily reimplemented to -/// take advantage of platform specific timing resources. -/// -/// A timing report is printed at the end of each simulation. -/// -/// ~~~~ -/// Timings for Rank 0 -/// Timer # Calls Avg/Call (s) Total (s) % Loop -/// ___________________________________________________________________ -/// total 1 50.6701 50.6701 100.04 -/// loop 1 50.6505 50.6505 100.00 -/// timestep 1 50.6505 50.6505 100.00 -/// position 10000 0.0000 0.0441 0.09 -/// velocity 20000 0.0000 0.0388 0.08 -/// redistribute 10001 0.0003 3.4842 6.88 -/// atomHalo 10001 0.0002 2.4577 4.85 -/// force 10001 0.0047 47.0856 92.96 -/// eamHalo 10001 0.0001 1.0592 2.09 -/// commHalo 60006 0.0000 1.7550 3.46 -/// commReduce 12 0.0000 0.0003 0.00 -/// -/// Timing Statistics Across 8 Ranks: -/// Timer Rank: Min(s) Rank: Max(s) Avg(s) Stdev(s) -/// _____________________________________________________________________________ -/// total 3: 50.6697 0: 50.6701 50.6699 0.0001 -/// loop 0: 50.6505 4: 50.6505 50.6505 0.0000 -/// timestep 0: 50.6505 4: 50.6505 50.6505 0.0000 -/// position 2: 0.0437 0: 0.0441 0.0439 0.0001 -/// velocity 2: 0.0380 4: 0.0392 0.0385 0.0004 -/// redistribute 0: 3.4842 1: 3.7085 3.6015 0.0622 -/// atomHalo 0: 2.4577 7: 2.6441 2.5780 0.0549 -/// force 1: 46.8624 0: 47.0856 46.9689 0.0619 -/// eamHalo 3: 0.2269 6: 1.2936 1.0951 0.3344 -/// commHalo 3: 1.0803 6: 2.1856 1.9363 0.3462 -/// commReduce 6: 0.0002 2: 0.0003 0.0003 0.0000 -/// -/// --------------------------------------------------- -/// Average atom update rate: 9.39 us/atom/task -/// --------------------------------------------------- -/// -/// ~~~~ -/// This report consists of two blocks. The upper block lists the absolute -/// wall clock time spent in each timer on rank 0 of the job. The lower -/// block reports minimum, maximum, average, and standard deviation of -/// times across all tasks. -/// The ranks where the minimum and maximum values occured are also reported -/// to aid in identifying hotspots or load imbalances. -/// -/// The last line of the report gives the atom update rate in -/// microseconds/atom/task. Since this quantity is normalized by both -/// the number of atoms and the number of tasks it provides a simple -/// figure of merit to compare performance between runs with different -/// numbers of atoms and different numbers of tasks. Any increase in -/// this number relative to a large number of atoms on a single task -/// represents a loss of parallel efficiency. -/// -/// Choosing the problem size correctly has important implications for the -/// reported performance. Small problem sizes may run entirely in the cache -/// of some architectures, leading to very good performance results. -/// For general characterization of performance, it is probably best to -/// choose problem sizes which force the code to access main memory, even -/// though there may be strong scaling scenarios where the code is indeed -/// running mainly in cache. -/// -/// *** Architecture/Configuration for above timing numbers: -/// SGI XE1300 cluster with dual-socket Intel quad-core Nehalem processors. -/// Each node has 2 Quad-Core Xeon X5550 processors runnning at 2.66 GHz -/// with 3 GB of memory per core. - -// -------------------------------------------------------------- - - -/// \page pg_problem_selection_and_scaling Problem Selection and Scaling -/// -/// CoMD is a reference molecular dynamics simulation code as used in -/// materials science. -/// -/// Problem Specification {#sec_problem_spec} -/// ====================== -/// -/// The reference problem is solid Copper starting from a face-centered -/// cubic (FCC) lattice. The initial thermodynamic conditions -/// (Temperature and Volume (via the lattice spacing, lat))can be specified -/// from the command line input. The default is 600 K and standard -/// volume (lat = 3.615 Angstroms). -/// Different temperatures (e.g. T =3000K) and volumes can be -/// specified to melt the system and enhance the interchange of atoms -/// between domains. -/// -/// The dynamics is micro-canonical (NVE = constant Number of atoms, -/// constant total system Volume, and constant total system Energy). As -/// a result, the temperature is not fixed. Rather, the temperature will -/// adjust from the initial temperature (as specified on the command line) -/// to a final temperature as the total system kinetic energy comes into -/// equilibrium with the total system potential energy. -/// -/// The total size of the problem (number of atoms) is specified by the -/// number (nx, ny, nz) of FCC unit cells in the x, y, z directions: nAtoms -/// = 4 * nx * ny * nz. The default size is nx = ny = nz = 20 or 32,000 atoms. -/// -/// The simulation models bulk copper by replicating itself in every -/// direction using periodic boundary conditions. -/// -/// Two interatomic force models are available: the Lennard-Jones (LJ) -/// two-body potential (ljForce.c) and the many-body Embedded-Atom Model (EAM) -/// potential (eam.c). The LJ potential is included for comparison and -/// is a valid approximation for constant volume and uniform -/// density. The EAM potential is a more accurate model of cohesion in -/// simple metals like Copper and includes the energetics necessary to -/// model non-uniform density and free surfaces. -/// -/// Scaling Studies in CoMD {#sec_scaling_studies} -/// ======================= -/// -/// CoMD implements a simple geometric domain decomposition to divide -/// the total problem space into domains, which are owned by MPI -/// ranks. Each domain is a single-program multiple data (SPMD) -/// partition of the larger problem. -/// -/// Caution: When doing scaling studies, it is important to distinguish -/// between the problem setup phase and the problem execution phase. Both -/// are important to the workflow of doing molecular dynamics, but it -/// is the execution phase we want to quantify in the scaling studies -/// described below, for that dominates the execution time for long runs -/// (millions of time steps). The problem setup can be an appreciable fraction -/// of the execution time for short runs (the default is 100 time steps) -/// and erroneous conclusions drawn. -/// -/// This code is configured with timers. The times are reported per particle -/// and the timers for the force calculation, timestep, etc start after the -/// initialization phase is done. -/// -/// Weak Scaling {#ssec_weak_scaling} -/// ----------- -/// -/// A weak scaling test fixes the amount of work per processor and -/// compares the execution time over number of processors. Weak scaling -/// keeps the ratio of inter-processor communication (surface) to -/// intra-processor work (volume) fixed. The amount of inter-processor -/// work scales with the number of processors in the domain and O(1000) -/// atoms per domain are needed for reasonable performance. -/// -/// Examples, -/// -/// - Increase in processor count by 8: <br> -/// (xproc=yproc=zproc=2, nx=ny=nz=20) -> (xproc=yproc=zproc=4, nx=ny=nz=40) -/// -/// - Increase in processor count by 2: <br> -/// (xproc=yproc=zproc=2, nx=ny=nz=20) -> (xproc=yproc=2, zproc=4, nx=ny=20, nz=40) -/// -/// In general, it is wise to keep the ratio of processor count to -/// system size in each direction fixed (i.e. cubic domains): xproc_0 / nx_0 = xproc_1 / -/// nx_1, since this minimizes surface area to volume. -/// Feel free to experiment, you might learn something about -/// algorithms to optimize communication relative to work. -/// -/// Strong Scaling {#ssec_strong_scaling} -/// --------------- -/// -/// A strong scaling test fixes the total problem size and compares the -/// execution time for different numbers of processors. Strong scaling -/// increases the ratio of inter-processor communication (surface) to -/// intra-processor work (volume). -/// -/// Examples, -/// -/// - Increase in processor count by 8: <br> -/// (xproc=yproc=zproc=2, nx=ny=nz=20) -> (xproc=yproc=zproc=4, nx=ny=nz=20) -/// -/// - Increase in processor count by 2: <br> -/// (xproc=yproc=zproc=2, nx=ny=nz=20) -> (xproc=yproc=2, zproc=4, nx=ny=nz=20) -/// -/// The domain decomposition requires O(1000) atoms per domain and -/// begins to scale poorly for small numbers of atoms per domain. -/// Again, feel free to experiment, you might learn something here as -/// well. For example, when molecular dynamics codes were written for -/// vector supercomputers, large lists of force pairs were created for -/// the vector processor. These force lists provide a natural force -/// decomposition for early parallel computers (Fast Parallel Algorithms -/// for Short-Range Molecular Dynamics, S. J. Plimpton, J Comp Phys, -/// 117, 1-19 (1995).) Using replicated data, force decomposition can -/// scale to fewer than one atom per processor and is a natural -/// mechanism to exploit intra-processor parallelism. -/// -/// For further details see for example: -/// https://support.scinet.utoronto.ca/wiki/index.php/Introduction_To_Performance - - -// -------------------------------------------------------------- - - -/// \page pg_verifying_correctness Verifying Correctness -/// -/// Verifying the correctness of an MD simulation is challenging. -/// Because MD is Lyapunov unstable, any small errors, even harmless -/// round-off errors, will lead to a long-term divergence in the atom -/// trajectories. Hence, comparing atom positions at the end of a run -/// is not always a useful verification technique. (Such divergences -/// are not a problem for science applications of MD since they do not -/// alter the statistical physics.) Small, single-particle errors can -/// also be difficult to detect in system-wide quantities such as the -/// kinetic or potential energy that are averaged over a large number of -/// particles. -/// -/// In spite of these challenges, there are several methods which are -/// likely to catch significant errors. -/// -/// Cohesive Energy {#sec_ver_cohesive_energy} -/// =============== -/// -/// With a perfect lattice as the initial structure (this is the -/// default), the potential energy per atom is the cohesive energy. -/// This value should be computed correctly to many decimal places. Any -/// variation beyond the last 1 or 2 decimal places is cause for -/// investigation. The correct values for the cohesive energy are -/// -/// | Potential | Cohesive Energy | -/// | :------------- | :-------------- | -/// | Lennard-Jones | -1.243619295058 | -/// | EAM (Adams) | -3.538079224691 | -/// | EAM (Mishin) | -3.539999969176 | -/// -/// The \link sec_command_line_options command -/// line options \endlink documentation explains the switches used to -/// select the potential used in the simulation. -/// -/// Note that the cohesive energy calculation is not sensitive to errors -/// in forces. It is also performed on a highly symmetric structure so -/// there are many errors this will not catch. Still, it is a good -/// first check. -/// -/// Energy Conservation {#sec_ver_energy_conservation} -/// =================== -/// -/// A correctly implemented force kernel, with an appropriate time step -/// (the default value of 1 fs is conservative for temperatures under -/// 10,000K) will conserve total energy over long times to 5 or more -/// digits. Any long term systematic drift in the total energy is a -/// cause for concern. -/// -/// To facilitate checking energy conservation CoMD prints the final and -/// initial values of the total energy. When comparing these values, pay -/// careful attention to these details: -/// -/// - It is common to observe an initial transient change in the total -/// energy. Differences in the total energy of 2-3% can be expected in -/// the first 10-100 time steps. -/// - The best way to check energy conservation is to run at least -/// several thousand steps and look at the slope of the total energy -/// ignoring at least the first one or two thousand steps. More steps -/// are even better. -/// - Set the temperature to at least several hundred K. This ensures -/// that atoms will sample a large range of configurations and expose -/// possible errors. -/// - Fluctuations in the energy can make it difficult to tell if -/// conservation is observed. Increasing the number of atoms will reduce -/// the fluctuations. -/// -/// -/// Particle Conservation {#sec_ver_particle_conservation} -/// ===================== -/// -/// The simulation should always end with the same number of particles -/// it started with. Any change is a bug. CoMD checks the initial and -/// final number of particles and prints a warning at the end of the -/// simulation if they are not equal. -/// -/// Reproducibility {#sec_ver_reproducibility} -/// =============== -/// -/// The same simulation run repeatedly on the same hardware should -/// produce the same result. Because parallel computing can add -/// elements of non-determinism we do not expect perfect long term -/// reproducibility, however over a few hundred to a few thousand time -/// steps the energies should not exhibit run-to-run differences outside -/// the last 1 or 2 decimal places. Larger differences are a sign of -/// trouble and should be investigated. This kind of test is -/// practically the only way to detect race conditions in shared memory -/// parallelism. -/// -/// Portability {#sec_ver_portability} -/// =========== -/// -/// In our experience, simulations that start from the same initial -/// condition tend to produce very similar trajectories over short terms -/// (100 to 1000 time step), even on different hardware platforms. -/// Short term differences beyond the last 1 or 2 decimal places should -/// likely be investigated. -/// -/// General Principles {#sec_ver_general} -/// ======================= -/// -/// - Simulations run at 0K are too trivial for verification, set -/// the initial temperature to at least several hundred K. -/// - Longer runs are better to check conservation. Compare -/// energies after initial transients are damped out. -/// - Larger runs are better to check conservation. Fluctuations in the -/// energy are averaged out. -/// - Short term (order 100 time steps) discrepancies from run-to-run -/// or platform-to platform beyond the last one or two decimal places -/// are reason for concern. Differences in 4th or 5th decimal place -/// is almost certainly a bug. -/// - Contact the CoMD developers (exmatex-comd@llnl.gov) if you have -/// questions about validation. -/// - -// -------------------------------------------------------------- - - -/// \page pg_comd_architecture CoMD Architecture -/// -/// Program Flow {#sec_program_flow} -/// ============ -/// -/// We have attempted to make the program flow in CoMD 1.1 as simple and -/// transparent as possible. The main program consists of three blocks: -/// prolog, main loop, and epilog. -/// -/// Prolog {#ssec_flow_prolog} -/// ------- -/// -/// The job of the prolog is to initialize the simulation and prepare -/// for the main loop. Notable tasks in the prolog include calling -/// - initParallel() to start MPI -/// - parseCommandLine() to read the command line options -/// - initSimulation() to initialize the main data structure, SimFlatSt. -/// This includes tasks such as -/// - initEamPot() to read tabular data for the potential function -/// - initDecomposition() to set up the domain decomposition -/// - createFccLattice() to generate an initial structure for the atoms -/// - initValidate() to store initial data for a simple validation check -/// -/// In CoMD 1.1 all atomic structures are internally generated so -/// there is no need to read large files with atom coordinate data. -/// -/// Main Loop {#ssec_flow_main_loop} -/// --------- -/// -/// The main loop calls -/// - timestep(), the integrator to update particle positions, -/// - printThings() to periodically prints simulation information -/// -/// The timestep() function is the heart of the code as it choreographs -/// updating the particle positions, along with computing forces -/// (computeForce()) and communicating atoms between ranks -/// (redistributeAtoms()). -/// -/// Epilog {#ssec_flow_epilog} -/// ------- -/// -/// The epilog code handles end of run bookkeeping such as -/// - validateResult() to check validation -/// - printPerformanceResults() to print a performance summary -/// - destroySimulation() to free memory -/// -/// Key Data Structures {#sec_key_data_structures} -/// ================== -/// -/// Practically all data in CoMD belongs to the SimFlatSt structure. -/// This includes: -/// - BasePotentialSt A polymorphic structure for the potential model -/// - HaloExchangeSt A polymorphic strcuture for communication halo data -/// - DomainSt The parallel domain decomposition -/// - LinkCellSt The link cells -/// - AtomsSt The atom coordinates and velocities -/// - SpeciesDataSt Properties of the atomic species being simulated. -/// -/// Consult the individual pages for each of these structures to learn -/// more. The descriptions in haloExchange.c and initLinkCells() are -/// especially useful to understand how the atoms are commuicated -/// between tasks and stored in link cells for fast pair finding. - -// -------------------------------------------------------------- - - -/// \page pg_optimization_targets Optimization Targets -/// -/// Computation {#sec_computation} -/// ============ -/// -/// The computational effort of classical MD is usually highly focused -/// in the force kernel. The two force kernels supplied by CoMD are -/// eamForce() and ljForce(). Both kernels are fundamentally loops over -/// pairs of atoms with significant opportunity to exploit high levels -/// of concurrency. One potential challenge when reordering or -/// parallelizing the pair loop structure is preventing race conditions -/// that result if two concurrent pair evaluations try to simultaneously -/// increment the forces and energies on the same atom. -/// -/// The supplied EAM kernel uses interpolation from tabular data to -/// evaluate functions. Hence the interpolate() function is another -/// potential optimization target. Note that the two potential files -/// distributed with CoMD have very different sizes. The Adams -/// potential (Cu_u6.eam) has 500 points per function in the table while -/// the Mishin potential (Cu01.eam.alloy) has 10,000 points per -/// function. This difference could potentially impact important -/// details such as cache miss rates. -/// -/// Communication {#sec_communication} -/// ============= -/// -/// As the number of atoms per MPI rank decreases, the communication -/// routines will start to require a significant fraction of the -/// run time. The main communication routine in CoMD is haloExchange(). -/// The halo exchange is simple nearest neighbor, point-to-point -/// communication so it should scale well to practically any number of -/// nodes. -/// -/// The halo exchange in CoMD 1.1 is a very simple 6-direction -/// structured halo exchange (see haloExchange.c). Other exchange -/// algorithms can be implemented without much difficulty. -/// -/// The halo exchange function is called in two very different contexts. -/// The main usage is to exchange halo particle information (see -/// initAtomHaloExchange()). This process is coordinated by the -/// redistributeAtoms() function. -/// -/// In addition to the atom exchange, when using the EAM potential, a -/// halo exchange is performed in the force routine (see -/// initForceHaloExchange()). - - -// -------------------------------------------------------------- - - -/// \page pg_whats_new New Features and Changes in CoMD 1.1 -/// -/// The main goals of the 1.1 release were to add support for MPI and to -/// improve the structure and clarity of the code. Achieving these -/// goals required considerable changes compared to the 1.0 release. -/// However, the core structure of the most computationally intensive -/// kernels (the force routines) is mostly unchanged. We believe that -/// lessons learned from optimizing 1.0 force kernels to specific -/// hardware or programming models can be quickly transferred to kernels -/// in the 1.1 release. -/// -/// Significant changes in CoMD 1.1 include: -/// -/// - MPI support. Both MPI and single node serial executables can be -/// built from the same source files. -/// -/// - Improved modularity and code clarity. Major data structures are -/// now organized with their own structs and initialization routines. -/// -/// - The build system has been simplified to use only standard -/// Makefiles instead of CMake. -/// -/// - The halo exchange operation needed to communicate remote particle -/// data between MPI ranks also creates "image" particles in the -/// serial build. -/// -/// - Unified force kernels for both serial and MPI builds -/// -/// - The addition of remote/image atoms allows periodic boundary -/// conditions to be handled outside the force loop. -/// -/// - An additional communication/data copy step to handle electron -/// density on remote/image atoms has been added to the EAM force -/// loop. -/// -/// - The coordinate system has been simplified to a single global -/// coordinate system for all particles. -/// -/// - Evaluation of energies and forces using a Chebyshev polynomial -/// fits has been removed. Polynomial approximation of energies and -/// forces will return in a future CoMD version. -/// -/// - Atomic structures are now generated internally, eliminating the -/// requirement to read, write, and distribute large atom -/// configuration files. Arbitrarily large initial structures can -/// be generated with specified initial temperature and random -/// displacements from lattice positions. Code to read/write atomic -/// positions has been removed. -/// -/// - EAM potentials are now read from standard funcfl and setfl format -/// files. Voter style files are no longer supported. -/// -/// - Collection of performance metrics is significantly improved. -/// Developers can easily add new timers to regions of interest. The -/// system is also designed to allow easy integration with platform -/// specific API's to high resolution timers, cycle counters, -/// hardware counters, etc. -/// -/// -/// - Hooks to in-situ analysis and visualization have been removed. -/// In-situ analysis capabilities will return in a future CoMD release. -/// -/// Please contact the CoMD developers (exematex-comd@llnl.gov) if -/// any of the deleted features negative impacts your work. We -/// may be able to help produce a custom version that includes the code -/// you need. - - -// -------------------------------------------------------------- - - -/// \page pg_md_basics MD Basics -/// -/// The molecular dynamics (MD) computer simulation method is a well -/// established and important tool for the study of the dynamical -/// properties of liquids, solids, and other systems of interest in -/// Materials Science and Engineering, Chemistry and Biology. A material -/// is represented in terms of atoms and molecules. The method of MD -/// simulation involves the evaluation of the force acting on each atom -/// due to all other atoms in the system and the numerical integration -/// of the Newtonian equations of motion. Though MD was initially -/// developed to compute the equilibrium thermodynamic behavior of -/// materials (equation of state), most recent applications have used MD -/// to study non-equilibrium processes. -/// -/// Wikipeda offers a basic introduction to molecular dynamics with -/// many references: -/// -/// http://en.wikipedia.org/wiki/Molecular_dynamics -/// -/// For a thorough treatment of MD methods, see: -/// - "Computer simulation of liquids" by M.P. Allen and D.J. Tildesley -/// (Oxford, 1989) -/// ISBN-10: 0198556454 | ISBN-13: 978-0198556459. -/// -/// For an understanding of MD simulations and application to statistical mechanics: -/// - "Understanding Molecular Simulation, Second Edition: From Algorithms -/// to Applications," by D. Frenkel and B. Smit (Academic Press, 2001) -/// ISBN-10: 0122673514 | ISBN-13: 978-0122673511 -/// - "Statistical and Thermal Physics: With Computer Applications," by -/// H. Gould and J. Tobochnik (Princeton, 2010) -/// ISBN-10: 0691137447 | ISBN-13: 978-0691137445 -/// -/// CoMD implements both the Lennard-Jones Potential (ljForce.c) and the -/// Embedded Atom Method Potential (eam.c). -/// |