git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8689 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2012-08-21 13:53:49 +00:00
parent 5a71c1a391
commit f1effd1c0d
5 changed files with 245 additions and 38 deletions
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@ -21,6 +21,26 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
 ' ../Makefile.package.settings
  fi
  
+  if (test -e ../pair_dipole_sf.cpp) then
+    cp pair_dipole_sf_gpu.cpp ..
+    cp pair_dipole_sf_gpu.h ..
+  fi
+
+  if (test -e ../pair_dipole_cut.cpp) then
+    cp pair_dipole_cut_gpu.cpp ..
+    cp pair_dipole_cut_gpu.h ..
+  fi
+  
+  if (test -e ../pair_yukawa_colloid.cpp) then
+    cp pair_yukawa_colloid_gpu.cpp ..
+    cp pair_yukawa_colloid_gpu.h ..
+  fi
+  
+  if (test -e ../pair_colloid.cpp) then
+    cp pair_colloid_gpu.cpp ..
+    cp pair_colloid_gpu.h ..
+  fi
+  
  if (test -e ../pair_yukawa.cpp) then
    cp pair_yukawa_gpu.cpp ..
    cp pair_yukawa_gpu.h ..
@ -107,6 +127,15 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
  cp pair_lj96_cut_gpu.cpp ..
  cp pair_lj_expand_gpu.cpp ..
  cp pair_lj_cut_coul_cut_gpu.cpp ..
+#  cp pair_lj_cut_coul_dsf.cpp ..
+#  cp pair_lj_cut_coul_dsf_gpu.cpp ..
+#  cp pair_coul_dsf.cpp ..
+#  cp pair_coul_dsf_gpu.cpp ..
+  cp pair_lj_cut_coul_debye_gpu.cpp ..
+  cp pair_born_gpu.cpp ..
+  cp pair_born_coul_wolf_gpu.cpp ..
+  cp pair_born_coul_long_gpu.cpp ..
+  cp pair_gauss_gpu.cpp ..
  
  cp fix_gpu.cpp ..

@ -115,6 +144,15 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
  cp pair_lj96_cut_gpu.h ..
  cp pair_lj_expand_gpu.h ..
  cp pair_lj_cut_coul_cut_gpu.h ..
+#  cp pair_lj_cut_coul_dsf.h ..
+#  cp pair_lj_cut_coul_dsf_gpu.h ..
+#  cp pair_coul_dsf.h ..
+#  cp pair_coul_dsf_gpu.h ..
+  cp pair_lj_cut_coul_debye_gpu.h ..
+  cp pair_born_gpu.h ..
+  cp pair_born_coul_wolf_gpu.h ..
+  cp pair_born_coul_long_gpu.h ..
+  cp pair_gauss_gpu.h ..
  
  cp fix_gpu.h ..
  cp gpu_extra.h ..
@ -151,6 +189,19 @@ elif (test $1 = 0) then
  rm -f ../pair_resquared_gpu.cpp
  rm -f ../pair_table_gpu.cpp
  rm -f ../pair_yukawa_gpu.cpp
+  rm -f ../pair_born_gpu.cpp
+  rm -f ../pair_born_coul_wolf_gpu.cpp
+  rm -f ../pair_born_coul_long_gpu.cpp
+  rm -f ../pair_gauss_gpu.cpp
+  rm -f ../pair_colloid_gpu.cpp
+  rm -f ../pair_yukawa_colloid_gpu.cpp
+  rm -f ../pair_dipole_cut_gpu.cpp
+  rm -f ../pair_dipole_sf_gpu.cpp
+  rm -f ../pair_lj_cut_coul_dsf.cpp
+  rm -f ../pair_lj_cut_coul_dsf_gpu.cpp
+  rm -f ../pair_coul_dsf.cpp
+  rm -f ../pair_coul_dsf_gpu.cpp
+  rm -f ../pair_lj_cut_coul_debye_gpu.cpp
  rm -f ../pppm_gpu.cpp

  rm -f ../fix_gpu.cpp
@ -177,6 +228,19 @@ elif (test $1 = 0) then
  rm -f ../pair_resquared_gpu.h
  rm -f ../pair_table_gpu.h
  rm -f ../pair_yukawa_gpu.h
+  rm -f ../pair_born_gpu.h
+  rm -f ../pair_born_coul_wolf_gpu.h
+  rm -f ../pair_born_coul_long_gpu.h
+  rm -f ../pair_gauss_gpu.h
+  rm -f ../pair_colloid_gpu.h
+  rm -f ../pair_yukawa_colloid_gpu.h
+  rm -f ../pair_dipole_cut_gpu.h
+  rm -f ../pair_dipole_sf_gpu.h
+  rm -f ../pair_lj_cut_coul_dsf.h
+  rm -f ../pair_lj_cut_coul_dsf_gpu.h
+  rm -f ../pair_coul_dsf.h
+  rm -f ../pair_coul_dsf_gpu.h
+  rm -f ../pair_lj_cut_coul_debye_gpu.h
  rm -f ../pppm_gpu.h

  rm -f ../fix_gpu.h
--- a/src/GPU/Package.sh
+++ b/src/GPU/Package.sh
@ -9,6 +9,12 @@ for file in *.cpp *.h; do
  if (test $file = pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then
    continue
  fi
+  if (test $file = pair_resquared_gpu.cpp -a ! -e ../pair_resquared.cpp) then
+    continue
+  fi
+  if (test $file = pair_resquared_gpu.h -a ! -e ../pair_resquared.cpp) then
+    continue
+  fi
  if (test $file = pair_lj_cut_coul_long_gpu.cpp -a ! -e ../pair_lj_cut_coul_long.cpp) then
    continue
  fi
@ -33,6 +39,87 @@ for file in *.cpp *.h; do
  if (test $file = pair_lj_sdk_coul_long_gpu.h -a ! -e ../pair_lj_sdk_coul_long.cpp) then
    continue
  fi
+  if (test $file = pair_dipole_sf_gpu.cpp -a ! -e ../pair_dipole_sf.cpp) then
+    continue
+  fi
+  if (test $file = pair_dipole_sf_gpu.h -a ! -e ../pair_dipole_sf.cpp) then
+    continue
+  fi
+  if (test $file = pair_dipole_cut_gpu.cpp -a ! -e ../pair_dipole_cut.cpp) then
+    continue
+  fi
+  if (test $file = pair_dipole_cut_gpu.h -a ! -e ../pair_dipole_cut.cpp) then
+    continue
+  fi
+  if (test $file = pair_yukawa_colloid_gpu.cpp -a ! -e ../pair_yukawa_colloid.cpp) then
+    continue
+  fi
+  if (test $file = pair_yukawa_colloid_gpu.h -a ! -e ../pair_yukawa_colloid.cpp) then
+    continue
+  fi
+  if (test $file = pair_colloid_gpu.cpp -a ! -e ../pair_colloid.cpp) then
+    continue
+  fi
+  if (test $file = pair_colloid_gpu.h -a ! -e ../pair_colloid.cpp) then
+    continue
+  fi
+  if (test $file = pair_buck_coul_long_gpu.cpp -a ! -e ../pair_buck_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_buck_coul_long_gpu.h -a ! -e ../pair_buck_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_born_coul_long_gpu.cpp -a ! -e ../pair_born_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_born_coul_long_gpu.h -a ! -e ../pair_born_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_gpu.cpp -a ! -e ../pair_eam.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_gpu.h -a ! -e ../pair_eam.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_alloy_gpu.cpp -a ! -e ../pair_eam_alloy.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_alloy_gpu.h -a ! -e ../pair_eam_alloy.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_fs_gpu.cpp -a ! -e ../pair_eam_fs.cpp) then
+    continue
+  fi
+  if (test $file = pair_eam_fs_gpu.h -a ! -e ../pair_eam_fs.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_class2_gpu.cpp -a ! -e ../pair_lj_class2.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_class2_coul_long_gpu.cpp -a ! -e ../pair_lj_class2_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_class2_coul_long_gpu.h -a ! -e ../pair_lj_class2_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_charmm_coul_long_gpu.cpp -a ! -e ../pair_lj_charmm_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_charmm_coul_long_gpu.h -a ! -e ../pair_lj_charmm_coul_long.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_cut_coul_dsf_gpu.cpp -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
+    continue
+  fi
+  if (test $file = pair_lj_cut_coul_dsf_gpu.h -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
+    continue
+  fi
+  if (test $file = pppm_gpu.cpp -a ! -e ../pppm.cpp) then
+    continue
+  fi
+  if (test $file = pppm_gpu.h -a ! -e ../pppm.cpp) then
+    continue
+  fi
  
  if (test ! -e ../$file) then
    echo "  creating src/$file"
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@ -35,7 +35,8 @@ enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};
 extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
                           const int first_gpu, const int last_gpu,
                           const int gpu_mode, const double particle_split,
-                           const int nthreads, const int t_per_atom);
+                           const int nthreads, const int t_per_atom,
+                           const double cell_size);
 extern void lmp_clear_device();
 extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
                             double **vatom, double *virial, double &ecoul);
@ -76,15 +77,23 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :

  int nthreads = 1;
  int threads_per_atom = -1;
-  if (narg == 9) {
-    if (strcmp(arg[7],"threads_per_atom") == 0)
-      threads_per_atom = atoi(arg[8]);
-    else if (strcmp(arg[7],"nthreads") == 0)
-      nthreads = atoi(arg[8]);
+  double cell_size = -1;
+
+  int iarg = 7;
+  while (iarg < narg) {
+    if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
+
+    if (strcmp(arg[iarg],"threads_per_atom") == 0)
+      threads_per_atom = atoi(arg[iarg+1]);
+    else if (strcmp(arg[iarg],"nthreads") == 0)
+      nthreads = atoi(arg[iarg+1]);
+    else if (strcmp(arg[iarg],"cellsize") == 0)
+      cell_size = atof(arg[iarg+1]);
    else
      error->all(FLERR,"Illegal fix GPU command");
-  } else if (narg != 7)
-    error->all(FLERR,"Illegal fix GPU command");
+
+    iarg += 2;
+  }

  if (nthreads < 1)
    error->all(FLERR,"Illegal fix GPU command");
@ -96,7 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :

  int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
                                 _gpu_mode, _particle_split, nthreads,
-                                 threads_per_atom);
+                                 threads_per_atom, cell_size);
  GPU_EXTRA::check_flag(gpu_flag,error,world);
 }

--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@ -85,7 +85,7 @@ void PPPM_GPU_API(forces)(double **f);

 /* ---------------------------------------------------------------------- */

-PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPMOld(lmp, narg, arg)
+PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
 {
  if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command");

@ -111,10 +111,15 @@ PPPMGPU::~PPPMGPU()

 void PPPMGPU::init()
 {
-  PPPMOld::init();
+  PPPM::init();

-  if (strcmp(update->integrate_style,"verlet/split") == 0)
+  if (differentiation_flag == 1)
+    error->all(FLERR,"Cannot (yet) do analytic differentiation with pppm/gpu.");
+
+  if (strcmp(update->integrate_style,"verlet/split") == 0) {
    kspace_split=true;
+    old_nlocal = 0;
+  }

  if (kspace_split && universe->iworld == 0) {
    im_real_space = true;
@ -153,21 +158,31 @@ void PPPMGPU::init()

 void PPPMGPU::compute(int eflag, int vflag)
 {
-  if (im_real_space) return;
+  int nago;
+  if (kspace_split) {
+    if (im_real_space) return;
+    if (atom->nlocal > old_nlocal) {
+      nago=0;
+      old_nlocal = atom->nlocal;
+    } else
+      nago=1;
+  } else
+    nago=neighbor->ago;

  // set energy/virial flags
  // invoke allocate_peratom() if needed for first time

  if (eflag || vflag) ev_setup(eflag,vflag);
-  else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
+  else evflag = evflag_atom = eflag_global = vflag_global = 
+        eflag_atom = vflag_atom = 0;

-  if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
+  if (evflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    peratom_allocate_flag = 1;
  }

  bool success = true;
-  int flag=PPPM_GPU_API(spread)(neighbor->ago, atom->nlocal, atom->nlocal +
+  int flag=PPPM_GPU_API(spread)(nago, atom->nlocal, atom->nlocal +
                             atom->nghost, atom->x, atom->type, success,
                             atom->q, domain->boxlo, delxinv, delyinv,
                             delzinv);
@ -241,7 +256,7 @@ void PPPMGPU::compute(int eflag, int vflag)

    if (vflag_atom) {
      for (i = 0; i < nlocal; i++)
-        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
+        for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
    }
  }

@ -300,7 +315,10 @@ void PPPMGPU::allocate()

  memory->create(gf_b,order,"pppm:gf_b");
  memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
+  memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
  memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
+  memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
+                          "pppm:drho_coeff");

  // create 2 FFTs and a Remap
  // 1st FFT keeps data in FFT decompostion
@ -349,7 +367,9 @@ void PPPMGPU::deallocate()

  memory->destroy(gf_b);
  memory->destroy2d_offset(rho1d,-order/2);
+  memory->destroy2d_offset(drho1d,-order/2);
  memory->destroy2d_offset(rho_coeff,(1-order)/2);
+  memory->destroy2d_offset(drho_coeff,(1-order)/2);

  delete fft1;
  delete fft2;
@ -527,10 +547,20 @@ void PPPMGPU::brick2fft()
 }

 /* ----------------------------------------------------------------------
-   ghost-swap to fill ghost cells of my brick with field values
+   Same as base class - needed to call GPU version of fillbrick_.
 ------------------------------------------------------------------------- */

 void PPPMGPU::fillbrick()
+{
+  if (differentiation_flag == 1) fillbrick_ad();
+  else fillbrick_ik();
+}
+
+/* ----------------------------------------------------------------------
+   ghost-swap to fill ghost cells of my brick with field values
+------------------------------------------------------------------------- */
+
+void PPPMGPU::fillbrick_ik()
 {
  int i,n,ix,iy,iz;
  MPI_Request request;
@ -727,10 +757,20 @@ void PPPMGPU::fillbrick()
 }

 /* ----------------------------------------------------------------------
-   FFT-based Poisson solver
+   Same code as base class - necessary to call GPU version of poisson_ik
 ------------------------------------------------------------------------- */

 void PPPMGPU::poisson()
+{
+  if (differentiation_flag == 1) poisson_ad();
+  else poisson_ik();
+}
+
+/* ----------------------------------------------------------------------
+   FFT-based Poisson solver
+------------------------------------------------------------------------- */
+
+void PPPMGPU::poisson_ik()
 {
  int i,j,k,n;
  double eng;
@ -925,11 +965,11 @@ double PPPMGPU::memory_usage()

 int PPPMGPU::timing(int n, double &time3d, double &time1d) {
  if (im_real_space) {
-    time3d = 0.0;
-    time1d = 0.0;
+    time3d = 1.0;
+    time1d = 1.0;
    return 4;
  }
-  PPPMOld::timing(n,time3d,time1d);
+  PPPM::timing(n,time3d,time1d);
  return 4;
 }

@ -940,5 +980,5 @@ int PPPMGPU::timing(int n, double &time3d, double &time1d) {
 void PPPMGPU::setup()
 {
  if (im_real_space) return;
-  PPPMOld::setup();
+  PPPM::setup();
 }
--- a/src/GPU/pppm_gpu.h
+++ b/src/GPU/pppm_gpu.h
@ -20,31 +20,34 @@ KSpaceStyle(pppm/gpu,PPPMGPU)
 #ifndef LMP_PPPM_GPU_H
 #define LMP_PPPM_GPU_H

-#include "pppm_old.h"
+#include "pppm.h"

 namespace LAMMPS_NS {

-class PPPMGPU : public PPPMOld {
+class PPPMGPU : public PPPM {
 public:
  PPPMGPU(class LAMMPS *, int, char **);
  virtual ~PPPMGPU();
-  virtual void init();
-  virtual void setup();
-  virtual void compute(int, int);
-  virtual int timing(int, double &, double &);
-  virtual double memory_usage();
+  void init();
+  void setup();
+  void compute(int, int);
+  int timing(int, double &, double &);
+  double memory_usage();

 protected:

  FFT_SCALAR ***density_brick_gpu, ***vd_brick;
  bool kspace_split, im_real_space;

-  virtual void allocate();
-  virtual void deallocate();
-  virtual void brick2fft();
-  virtual void fillbrick();
-  virtual void poisson();
+  void allocate();
+  void deallocate();
+  void brick2fft();
+  void fillbrick();
+  void fillbrick_ik();
+  void poisson();
+  void poisson_ik();

+  int old_nlocal;
  double poisson_time;

  FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
@ -65,6 +68,10 @@ Self-explanatory.  Check the input script syntax and compare to the
 documentation for the command.  You can use -echo screen as a
 command-line option when running LAMMPS to see the offending line.

+E: Cannot (yet) do analytic differentiation with pppm/gpu.
+
+Self-explanatory.
+
 E: Cannot use order greater than 8 with pppm/gpu.

 Self-explanatory.