From 0ca2e3b3896e20dd85a866da5c22a5d694c0509c Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Thu, 26 May 2011 22:00:16 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6217
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 src/Makefile                              |  4 +-
 src/{accelerator.h => accelerator_cuda.h} | 21 ++++++-
 src/atom.cpp                              | 24 ++++----
 src/force.cpp                             |  2 +-
 src/input.cpp                             |  6 +-
 src/lammps.cpp                            | 69 +++++++++++++----------
 src/lammps.h                              |  3 +-
 src/modify.cpp                            |  4 +-
 src/update.cpp                            |  5 +-
 9 files changed, 79 insertions(+), 59 deletions(-)
 rename src/{accelerator.h => accelerator_cuda.h} (84%)

diff --git a/src/Makefile b/src/Makefile
index 1812e979b9..dc7f54fc3d 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -17,8 +17,8 @@ PACKAGE = asphere class2 colloid dipole dsmc gpu granular \
 	  kspace manybody meam molecule opt peri poems reax replica \
 	  shock srd xtc
 
-PACKUSER = user-ackland user-atc user-cd-eam user-cg-cmm user-eff \
-	   user-ewaldn user-imd user-reaxc user-smd
+PACKUSER = user-ackland user-atc user-cd-eam user-cg-cmm user-cuda \
+	   user-eff user-ewaldn user-imd user-reaxc user-smd
 
 PACKALL = $(PACKAGE) $(PACKUSER)
 
diff --git a/src/accelerator.h b/src/accelerator_cuda.h
similarity index 84%
rename from src/accelerator.h
rename to src/accelerator_cuda.h
index 9ef291adbc..455d63fc8a 100644
--- a/src/accelerator.h
+++ b/src/accelerator_cuda.h
@@ -11,12 +11,26 @@
    See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 
+#ifndef LMP_ACCELERATOR_CUDA_H
+#define LMP_ACCELERATOR_CUDA_H
+
+// true interface to USER-CUDA
+// used when USER-CUDA is installed
+
+#ifdef LMP_USER_CUDA
+
+#include "cuda.h"
+#include "comm_cuda.h"
+#include "domain_cuda.h"
+#include "neighbor_cuda.h"
+#include "modify_cuda.h"
+#include "verlet_cuda.h"
+
+#else
+
 // dummy interface to USER-CUDA
 // used when USER-CUDA is not installed
 
-#ifndef LMP_ACCELERATOR_H
-#define LMP_ACCELERATOR_H
-
 #include "comm.h"
 #include "modify.h"
 #include "verlet.h"
@@ -71,3 +85,4 @@ class VerletCuda : public Verlet {
 }
 
 #endif
+#endif
diff --git a/src/atom.cpp b/src/atom.cpp
index d7c08d9cbb..2293878c18 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -31,7 +31,7 @@
 #include "update.h"
 #include "domain.h"
 #include "group.h"
-#include "accelerator.h"
+#include "accelerator_cuda.h"
 #include "memory.h"
 #include "error.h"
 
@@ -42,8 +42,6 @@ using namespace LAMMPS_NS;
 #define EPSILON 1.0e-6
 #define CUDA_CHUNK 3000
 
-enum{NOACCEL,OPT,GPU,USERCUDA};     // same as lammps.cpp
-
 #define MIN(A,B) ((A) < (B)) ? (A) : (B)
 #define MAX(A,B) ((A) > (B)) ? (A) : (B)
 
@@ -289,7 +287,7 @@ void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
 AtomVec *Atom::new_avec(const char *style, int narg, char **arg,
 			char *suffix, int &sflag)
 {
-  if (suffix && lmp->offaccel == 0) {
+  if (suffix && lmp->accelerator) {
     sflag = 1;
     char estyle[256];
     sprintf(estyle,"%s/%s",style,suffix);
@@ -1335,8 +1333,7 @@ void Atom::sort()
 
   // download data from GPU if necessary
 
-  if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu) 
-    lmp->cuda->downloadAll();
+  if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->downloadAll();
 
   // re-setup sort bins if needed
 
@@ -1415,8 +1412,7 @@ void Atom::sort()
 
   // upload data back to GPU if necessary
 
-  if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu)
-    lmp->cuda->uploadAll();
+  if (lmp->cuda && !lmp->cuda->oncpu) lmp->cuda->uploadAll();
 
   // sanity check that current = permute
 
@@ -1434,14 +1430,16 @@ void Atom::sort()
 
 void Atom::setup_sort_bins()
 {
-  // binsize = user setting or default
-  // default = 1/2 of neighbor cutoff for non-CUDA
-  //           CUDA_CHUNK atoms/proc for CUDA
+  // binsize:
+  // user setting if explicitly set
+  // 1/2 of neighbor cutoff for non-CUDA
+  // CUDA_CHUNK atoms/proc for CUDA
   // check if neighbor cutoff = 0.0
 
   double binsize;
   if (userbinsize > 0.0) binsize = userbinsize;
-  else if (lmp->accelerator == USERCUDA) {
+  else if (!lmp->cuda) binsize = 0.5 * neighbor->cutneighmax;
+  else {
     if (domain->dimension == 3) {
       double vol = (domain->boxhi[0]-domain->boxlo[0]) * 
 	(domain->boxhi[1]-domain->boxlo[1]) * 
@@ -1452,7 +1450,7 @@ void Atom::setup_sort_bins()
 	(domain->boxhi[1]-domain->boxlo[1]);
       binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0);
     }
-  } else binsize = 0.5 * neighbor->cutneighmax;
+  }
   if (binsize == 0.0) error->all("Atom sorting has bin size = 0.0");
 
   double bininv = 1.0/binsize;
diff --git a/src/force.cpp b/src/force.cpp
index 3364455924..eec52f22d4 100644
--- a/src/force.cpp
+++ b/src/force.cpp
@@ -144,7 +144,7 @@ void Force::create_pair(const char *style, char *suffix)
 
 Pair *Force::new_pair(const char *style, char *suffix, int &sflag)
 {
-  if (suffix && lmp->offaccel == 0) {
+  if (suffix && lmp->accelerator) {
     sflag = 1;
     char estyle[256];
     sprintf(estyle,"%s/%s",style,suffix);
diff --git a/src/input.cpp b/src/input.cpp
index 079879574f..835b8ae5c9 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -42,7 +42,7 @@
 #include "neighbor.h"
 #include "special.h"
 #include "variable.h"
-#include "accelerator.h"
+#include "accelerator_cuda.h"
 #include "error.h"
 #include "memory.h"
 
@@ -813,13 +813,13 @@ void Input::accelerator()
 
   if (strcmp(arg[0],"off") == 0) {
     if (narg != 1) error->all("Illegal accelerator command");
-    lmp->offaccel = 1;
+    lmp->accelerator = 0;
     return;
   }
 
   if (strcmp(arg[0],"on") == 0) {
     if (narg != 1) error->all("Illegal accelerator command");
-    lmp->offaccel = 0;
+    lmp->accelerator = 1;
     return;
   }
 
diff --git a/src/lammps.cpp b/src/lammps.cpp
index f0b79ec45b..dfef210734 100644
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@@ -27,13 +27,11 @@
 #include "modify.h"
 #include "group.h"
 #include "output.h"
-#include "accelerator.h"
+#include "accelerator_cuda.h"
 #include "timer.h"
 
 using namespace LAMMPS_NS;
 
-enum{NOACCEL,OPT,GPU,USERCUDA};
-
 /* ----------------------------------------------------------------------
    start up LAMMPS
    allocate fundamental classes (memory, error, universe, input)
@@ -52,15 +50,30 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
   screen = NULL;
   logfile = NULL;
 
+  // create CUDA class
+  // cuda = true version if USER-CUDA installed, else dummy
+
+  cuda = new Cuda(this);
+  if (!cuda->cuda_exists) {
+    delete cuda;
+    cuda = NULL;
+  }
+
   // parse input switches
 
   int inflag = 0;
   int screenflag = 0;
   int logflag = 0;
-  accelerator = NOACCEL;
-  asuffix = NULL;
-  offaccel = 0;
-  cuda = NULL;
+
+  if (cuda) {
+    int n = strlen("cuda") + 1;
+    asuffix = new char[n];
+    strcpy(asuffix,"cuda");
+    accelerator = 1;
+  } else {
+    asuffix = NULL;
+    accelerator = 0;
+  }
 
   int iarg = 1;
 
@@ -101,12 +114,20 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
     } else if (strcmp(arg[iarg],"-accel") == 0 || 
 	       strcmp(arg[iarg],"-a") == 0) {
       if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
-      if (strcmp(arg[iarg+1],"opt") == 0) accelerator = OPT;
-      else if (strcmp(arg[iarg+1],"gpu") == 0) accelerator = GPU;
-      else if (strcmp(arg[iarg+1],"cuda") == 0) accelerator = USERCUDA;
-      else error->universe_all("Invalid command-line argument");
-      asuffix = new char[8];
-      strcpy(asuffix,arg[iarg+1]);
+      if (strcmp(arg[iarg+1],"none") == 0) {
+	delete [] asuffix;
+	asuffix = NULL;
+	accelerator = 0;
+      } else if (strcmp(arg[iarg+1],"opt") == 0 ||
+		 strcmp(arg[iarg+1],"gpu") == 0 ||
+		 strcmp(arg[iarg+1],"cuda") == 0) {
+	int n = strlen(arg[iarg+1]) + 1;
+	asuffix = new char[n];
+	strcpy(asuffix,arg[iarg+1]);
+	accelerator = 1;
+      }
+      if (strcmp(asuffix,"cuda") == 0 && !cuda)
+	error->all("Cannot use -a cuda without USER-CUDA package installed");
       iarg += 2;
     } else error->universe_all("Invalid command-line argument");
   }
@@ -277,16 +298,6 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
   if (mpisize != sizeof(bigint))
       error->all("MPI_LMP_BIGINT and bigint in lmptype.h are not compatible");
 
-  // check consistency of -a switch with installed packages
-  // for OPT and GPU, no problem if not installed
-  // for USER-CUDA, throw error if not installed
-
-  if (accelerator == USERCUDA) {
-    cuda = new Cuda(this);
-    if (!cuda->cuda_exists)
-      error->all("Command-line switch requires USER-CUDA package be installed");
-  }
-
   // allocate input class now that MPI is fully setup
 
   input = new Input(this,narg,arg);
@@ -307,7 +318,6 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
 LAMMPS::~LAMMPS()
 {
   destroy();
-  if (accelerator == USERCUDA) delete cuda;
 
   if (universe->nworlds == 1) {
     if (logfile) fclose(logfile);
@@ -320,6 +330,7 @@ LAMMPS::~LAMMPS()
   if (world != universe->uworld) MPI_Comm_free(&world);
 
   delete [] asuffix;
+  delete cuda;
 
   delete input;
   delete universe;
@@ -337,19 +348,19 @@ void LAMMPS::create()
 {
   atom = new Atom(this);
 
-  if (accelerator == USERCUDA) neighbor = new NeighborCuda(this);
+  if (cuda) neighbor = new NeighborCuda(this);
   else neighbor = new Neighbor(this);
 
-  if (accelerator == USERCUDA) comm = new CommCuda(this);
+  if (cuda) comm = new CommCuda(this);
   else comm = new Comm(this);
 
-  if (accelerator == USERCUDA) domain = new DomainCuda(this);
+  if (cuda) domain = new DomainCuda(this);
   else domain = new Domain(this);
 
   group = new Group(this);
   force = new Force(this);    // must be after group, to create temperature
 
-  if (accelerator == USERCUDA) modify = new ModifyCuda(this);
+  if (cuda) modify = new ModifyCuda(this);
   else modify = new Modify(this);
 
   output = new Output(this);  // must be after group, so "all" exists
@@ -364,7 +375,7 @@ void LAMMPS::create()
 
 void LAMMPS::init()
 {
-  if (accelerator == USERCUDA) cuda->accelerator(0,NULL);
+  if (cuda) cuda->accelerator(0,NULL);
  
   update->init();
   force->init();         // pair must come after update due to minimizer
diff --git a/src/lammps.h b/src/lammps.h
index deaa10ebd9..930eebbab2 100644
--- a/src/lammps.h
+++ b/src/lammps.h
@@ -42,9 +42,8 @@ class LAMMPS {
   FILE *screen;                  // screen output
   FILE *logfile;                 // logfile
 
-  int accelerator;               // accelerator flag
   char *asuffix;                 // accelerator suffix
-  int offaccel;                  // 1 if accelerator flag currently disabled
+  int accelerator;               // 1 if asuffix enabled, 0 if disabled
   class Cuda *cuda;              // CUDA accelerator class
 
   LAMMPS(int, char **, MPI_Comm);
diff --git a/src/modify.cpp b/src/modify.cpp
index 0576cd2c83..35a05a742a 100644
--- a/src/modify.cpp
+++ b/src/modify.cpp
@@ -640,7 +640,7 @@ void Modify::add_fix(int narg, char **arg, char *suffix)
 
   int success = 0;
 
-  if (suffix && lmp->offaccel == 0) {
+  if (suffix && lmp->accelerator) {
     char estyle[256];
     sprintf(estyle,"%s/%s",arg[2],suffix);
     success = 1;
@@ -784,7 +784,7 @@ void Modify::add_compute(int narg, char **arg, char *suffix)
 
   int success = 0;
 
-  if (suffix && lmp->offaccel == 0) {
+  if (suffix && lmp->accelerator) {
     char estyle[256];
     sprintf(estyle,"%s/%s",arg[2],suffix);
     success = 1;
diff --git a/src/update.cpp b/src/update.cpp
index c8efe4b796..628a22bf11 100644
--- a/src/update.cpp
+++ b/src/update.cpp
@@ -25,14 +25,11 @@
 #include "region.h"
 #include "compute.h"
 #include "output.h"
-#include "accelerator.h"
 #include "memory.h"
 #include "error.h"
 
 using namespace LAMMPS_NS;
 
-enum{NOACCEL,OPT,GPU,USERCUDA};     // same as lammps.cpp
-
 /* ---------------------------------------------------------------------- */
 
 Update::Update(LAMMPS *lmp) : Pointers(lmp)
@@ -225,7 +222,7 @@ void Update::new_integrate(char *style, int narg, char **arg,
 {
   int success = 0;
 
-  if (suffix && lmp->offaccel == 0) {
+  if (suffix && lmp->accelerator) {
     sflag = 1;
     char estyle[256];
     sprintf(estyle,"%s/%s",style,suffix);