From 0ca2e3b3896e20dd85a866da5c22a5d694c0509c Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 26 May 2011 22:00:16 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6217 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/Makefile | 4 +- src/{accelerator.h => accelerator_cuda.h} | 21 ++++++- src/atom.cpp | 24 ++++---- src/force.cpp | 2 +- src/input.cpp | 6 +- src/lammps.cpp | 69 +++++++++++++---------- src/lammps.h | 3 +- src/modify.cpp | 4 +- src/update.cpp | 5 +- 9 files changed, 79 insertions(+), 59 deletions(-) rename src/{accelerator.h => accelerator_cuda.h} (84%) diff --git a/src/Makefile b/src/Makefile index 1812e979b9..dc7f54fc3d 100755 --- a/src/Makefile +++ b/src/Makefile @@ -17,8 +17,8 @@ PACKAGE = asphere class2 colloid dipole dsmc gpu granular \ kspace manybody meam molecule opt peri poems reax replica \ shock srd xtc -PACKUSER = user-ackland user-atc user-cd-eam user-cg-cmm user-eff \ - user-ewaldn user-imd user-reaxc user-smd +PACKUSER = user-ackland user-atc user-cd-eam user-cg-cmm user-cuda \ + user-eff user-ewaldn user-imd user-reaxc user-smd PACKALL = $(PACKAGE) $(PACKUSER) diff --git a/src/accelerator.h b/src/accelerator_cuda.h similarity index 84% rename from src/accelerator.h rename to src/accelerator_cuda.h index 9ef291adbc..455d63fc8a 100644 --- a/src/accelerator.h +++ b/src/accelerator_cuda.h @@ -11,12 +11,26 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#ifndef LMP_ACCELERATOR_CUDA_H +#define LMP_ACCELERATOR_CUDA_H + +// true interface to USER-CUDA +// used when USER-CUDA is installed + +#ifdef LMP_USER_CUDA + +#include "cuda.h" +#include "comm_cuda.h" +#include "domain_cuda.h" +#include "neighbor_cuda.h" +#include "modify_cuda.h" +#include "verlet_cuda.h" + +#else + // dummy interface to USER-CUDA // used when USER-CUDA is not installed -#ifndef LMP_ACCELERATOR_H -#define LMP_ACCELERATOR_H - #include "comm.h" #include "modify.h" #include "verlet.h" @@ -71,3 +85,4 @@ class VerletCuda : public Verlet { } #endif +#endif diff --git a/src/atom.cpp b/src/atom.cpp index d7c08d9cbb..2293878c18 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -31,7 +31,7 @@ #include "update.h" #include "domain.h" #include "group.h" -#include "accelerator.h" +#include "accelerator_cuda.h" #include "memory.h" #include "error.h" @@ -42,8 +42,6 @@ using namespace LAMMPS_NS; #define EPSILON 1.0e-6 #define CUDA_CHUNK 3000 -enum{NOACCEL,OPT,GPU,USERCUDA}; // same as lammps.cpp - #define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MAX(A,B) ((A) > (B)) ? (A) : (B) @@ -289,7 +287,7 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix) AtomVec *Atom::new_avec(const char *style, int narg, char **arg, char *suffix, int &sflag) { - if (suffix && lmp->offaccel == 0) { + if (suffix && lmp->accelerator) { sflag = 1; char estyle[256]; sprintf(estyle,"%s/%s",style,suffix); @@ -1335,8 +1333,7 @@ void Atom::sort() // download data from GPU if necessary - if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu) - lmp->cuda->downloadAll(); + if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll(); // re-setup sort bins if needed @@ -1415,8 +1412,7 @@ void Atom::sort() // upload data back to GPU if necessary - if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu) - lmp->cuda->uploadAll(); + if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->uploadAll(); // sanity check that current = permute @@ -1434,14 +1430,16 @@ void Atom::sort() void Atom::setup_sort_bins() { - // binsize = user setting or default - // default = 1/2 of neighbor cutoff for non-CUDA - // CUDA_CHUNK atoms/proc for CUDA + // binsize: + // user setting if explicitly set + // 1/2 of neighbor cutoff for non-CUDA + // CUDA_CHUNK atoms/proc for CUDA // check if neighbor cutoff = 0.0 double binsize; if (userbinsize > 0.0) binsize = userbinsize; - else if (lmp->accelerator == USERCUDA) { + else if (!lmp->cuda) binsize = 0.5 * neighbor->cutneighmax; + else { if (domain->dimension == 3) { double vol = (domain->boxhi[0]-domain->boxlo[0]) * (domain->boxhi[1]-domain->boxlo[1]) * @@ -1452,7 +1450,7 @@ void Atom::setup_sort_bins() (domain->boxhi[1]-domain->boxlo[1]); binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0); } - } else binsize = 0.5 * neighbor->cutneighmax; + } if (binsize == 0.0) error->all("Atom sorting has bin size = 0.0"); double bininv = 1.0/binsize; diff --git a/src/force.cpp b/src/force.cpp index 3364455924..eec52f22d4 100644 --- a/src/force.cpp +++ b/src/force.cpp @@ -144,7 +144,7 @@ void Force::create_pair(const char *style, char *suffix) Pair *Force::new_pair(const char *style, char *suffix, int &sflag) { - if (suffix && lmp->offaccel == 0) { + if (suffix && lmp->accelerator) { sflag = 1; char estyle[256]; sprintf(estyle,"%s/%s",style,suffix); diff --git a/src/input.cpp b/src/input.cpp index 079879574f..835b8ae5c9 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -42,7 +42,7 @@ #include "neighbor.h" #include "special.h" #include "variable.h" -#include "accelerator.h" +#include "accelerator_cuda.h" #include "error.h" #include "memory.h" @@ -813,13 +813,13 @@ void Input::accelerator() if (strcmp(arg[0],"off") == 0) { if (narg != 1) error->all("Illegal accelerator command"); - lmp->offaccel = 1; + lmp->accelerator = 0; return; } if (strcmp(arg[0],"on") == 0) { if (narg != 1) error->all("Illegal accelerator command"); - lmp->offaccel = 0; + lmp->accelerator = 1; return; } diff --git a/src/lammps.cpp b/src/lammps.cpp index f0b79ec45b..dfef210734 100644 --- a/src/lammps.cpp +++ b/src/lammps.cpp @@ -27,13 +27,11 @@ #include "modify.h" #include "group.h" #include "output.h" -#include "accelerator.h" +#include "accelerator_cuda.h" #include "timer.h" using namespace LAMMPS_NS; -enum{NOACCEL,OPT,GPU,USERCUDA}; - /* ---------------------------------------------------------------------- start up LAMMPS allocate fundamental classes (memory, error, universe, input) @@ -52,15 +50,30 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) screen = NULL; logfile = NULL; + // create CUDA class + // cuda = true version if USER-CUDA installed, else dummy + + cuda = new Cuda(this); + if (!cuda->cuda_exists) { + delete cuda; + cuda = NULL; + } + // parse input switches int inflag = 0; int screenflag = 0; int logflag = 0; - accelerator = NOACCEL; - asuffix = NULL; - offaccel = 0; - cuda = NULL; + + if (cuda) { + int n = strlen("cuda") + 1; + asuffix = new char[n]; + strcpy(asuffix,"cuda"); + accelerator = 1; + } else { + asuffix = NULL; + accelerator = 0; + } int iarg = 1; @@ -101,12 +114,20 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) } else if (strcmp(arg[iarg],"-accel") == 0 || strcmp(arg[iarg],"-a") == 0) { if (iarg+2 > narg) error->universe_all("Invalid command-line argument"); - if (strcmp(arg[iarg+1],"opt") == 0) accelerator = OPT; - else if (strcmp(arg[iarg+1],"gpu") == 0) accelerator = GPU; - else if (strcmp(arg[iarg+1],"cuda") == 0) accelerator = USERCUDA; - else error->universe_all("Invalid command-line argument"); - asuffix = new char[8]; - strcpy(asuffix,arg[iarg+1]); + if (strcmp(arg[iarg+1],"none") == 0) { + delete [] asuffix; + asuffix = NULL; + accelerator = 0; + } else if (strcmp(arg[iarg+1],"opt") == 0 || + strcmp(arg[iarg+1],"gpu") == 0 || + strcmp(arg[iarg+1],"cuda") == 0) { + int n = strlen(arg[iarg+1]) + 1; + asuffix = new char[n]; + strcpy(asuffix,arg[iarg+1]); + accelerator = 1; + } + if (strcmp(asuffix,"cuda") == 0 && !cuda) + error->all("Cannot use -a cuda without USER-CUDA package installed"); iarg += 2; } else error->universe_all("Invalid command-line argument"); } @@ -277,16 +298,6 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) if (mpisize != sizeof(bigint)) error->all("MPI_LMP_BIGINT and bigint in lmptype.h are not compatible"); - // check consistency of -a switch with installed packages - // for OPT and GPU, no problem if not installed - // for USER-CUDA, throw error if not installed - - if (accelerator == USERCUDA) { - cuda = new Cuda(this); - if (!cuda->cuda_exists) - error->all("Command-line switch requires USER-CUDA package be installed"); - } - // allocate input class now that MPI is fully setup input = new Input(this,narg,arg); @@ -307,7 +318,6 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) LAMMPS::~LAMMPS() { destroy(); - if (accelerator == USERCUDA) delete cuda; if (universe->nworlds == 1) { if (logfile) fclose(logfile); @@ -320,6 +330,7 @@ LAMMPS::~LAMMPS() if (world != universe->uworld) MPI_Comm_free(&world); delete [] asuffix; + delete cuda; delete input; delete universe; @@ -337,19 +348,19 @@ void LAMMPS::create() { atom = new Atom(this); - if (accelerator == USERCUDA) neighbor = new NeighborCuda(this); + if (cuda) neighbor = new NeighborCuda(this); else neighbor = new Neighbor(this); - if (accelerator == USERCUDA) comm = new CommCuda(this); + if (cuda) comm = new CommCuda(this); else comm = new Comm(this); - if (accelerator == USERCUDA) domain = new DomainCuda(this); + if (cuda) domain = new DomainCuda(this); else domain = new Domain(this); group = new Group(this); force = new Force(this); // must be after group, to create temperature - if (accelerator == USERCUDA) modify = new ModifyCuda(this); + if (cuda) modify = new ModifyCuda(this); else modify = new Modify(this); output = new Output(this); // must be after group, so "all" exists @@ -364,7 +375,7 @@ void LAMMPS::create() void LAMMPS::init() { - if (accelerator == USERCUDA) cuda->accelerator(0,NULL); + if (cuda) cuda->accelerator(0,NULL); update->init(); force->init(); // pair must come after update due to minimizer diff --git a/src/lammps.h b/src/lammps.h index deaa10ebd9..930eebbab2 100644 --- a/src/lammps.h +++ b/src/lammps.h @@ -42,9 +42,8 @@ class LAMMPS { FILE *screen; // screen output FILE *logfile; // logfile - int accelerator; // accelerator flag char *asuffix; // accelerator suffix - int offaccel; // 1 if accelerator flag currently disabled + int accelerator; // 1 if asuffix enabled, 0 if disabled class Cuda *cuda; // CUDA accelerator class LAMMPS(int, char **, MPI_Comm); diff --git a/src/modify.cpp b/src/modify.cpp index 0576cd2c83..35a05a742a 100644 --- a/src/modify.cpp +++ b/src/modify.cpp @@ -640,7 +640,7 @@ void Modify::add_fix(int narg, char **arg, char *suffix) int success = 0; - if (suffix && lmp->offaccel == 0) { + if (suffix && lmp->accelerator) { char estyle[256]; sprintf(estyle,"%s/%s",arg[2],suffix); success = 1; @@ -784,7 +784,7 @@ void Modify::add_compute(int narg, char **arg, char *suffix) int success = 0; - if (suffix && lmp->offaccel == 0) { + if (suffix && lmp->accelerator) { char estyle[256]; sprintf(estyle,"%s/%s",arg[2],suffix); success = 1; diff --git a/src/update.cpp b/src/update.cpp index c8efe4b796..628a22bf11 100644 --- a/src/update.cpp +++ b/src/update.cpp @@ -25,14 +25,11 @@ #include "region.h" #include "compute.h" #include "output.h" -#include "accelerator.h" #include "memory.h" #include "error.h" using namespace LAMMPS_NS; -enum{NOACCEL,OPT,GPU,USERCUDA}; // same as lammps.cpp - /* ---------------------------------------------------------------------- */ Update::Update(LAMMPS *lmp) : Pointers(lmp) @@ -225,7 +222,7 @@ void Update::new_integrate(char *style, int narg, char **arg, { int success = 0; - if (suffix && lmp->offaccel == 0) { + if (suffix && lmp->accelerator) { sflag = 1; char estyle[256]; sprintf(estyle,"%s/%s",style,suffix);