git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8689 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2012-08-21 13:53:49 +00:00
parent 5a71c1a391
commit f1effd1c0d
5 changed files with 245 additions and 38 deletions

View File

@ -20,7 +20,27 @@ if (test $1 = 1) then
include ..\/..\/lib\/gpu\/Makefile.lammps include ..\/..\/lib\/gpu\/Makefile.lammps
' ../Makefile.package.settings ' ../Makefile.package.settings
fi fi
if (test -e ../pair_dipole_sf.cpp) then
cp pair_dipole_sf_gpu.cpp ..
cp pair_dipole_sf_gpu.h ..
fi
if (test -e ../pair_dipole_cut.cpp) then
cp pair_dipole_cut_gpu.cpp ..
cp pair_dipole_cut_gpu.h ..
fi
if (test -e ../pair_yukawa_colloid.cpp) then
cp pair_yukawa_colloid_gpu.cpp ..
cp pair_yukawa_colloid_gpu.h ..
fi
if (test -e ../pair_colloid.cpp) then
cp pair_colloid_gpu.cpp ..
cp pair_colloid_gpu.h ..
fi
if (test -e ../pair_yukawa.cpp) then if (test -e ../pair_yukawa.cpp) then
cp pair_yukawa_gpu.cpp .. cp pair_yukawa_gpu.cpp ..
cp pair_yukawa_gpu.h .. cp pair_yukawa_gpu.h ..
@ -54,7 +74,7 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
cp pair_eam_fs_gpu.cpp .. cp pair_eam_fs_gpu.cpp ..
cp pair_eam_fs_gpu.h .. cp pair_eam_fs_gpu.h ..
fi fi
if (test -e ../pair_gayberne.cpp) then if (test -e ../pair_gayberne.cpp) then
cp pair_gayberne_gpu.cpp .. cp pair_gayberne_gpu.cpp ..
cp pair_gayberne_gpu.h .. cp pair_gayberne_gpu.h ..
@ -107,7 +127,16 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
cp pair_lj96_cut_gpu.cpp .. cp pair_lj96_cut_gpu.cpp ..
cp pair_lj_expand_gpu.cpp .. cp pair_lj_expand_gpu.cpp ..
cp pair_lj_cut_coul_cut_gpu.cpp .. cp pair_lj_cut_coul_cut_gpu.cpp ..
# cp pair_lj_cut_coul_dsf.cpp ..
# cp pair_lj_cut_coul_dsf_gpu.cpp ..
# cp pair_coul_dsf.cpp ..
# cp pair_coul_dsf_gpu.cpp ..
cp pair_lj_cut_coul_debye_gpu.cpp ..
cp pair_born_gpu.cpp ..
cp pair_born_coul_wolf_gpu.cpp ..
cp pair_born_coul_long_gpu.cpp ..
cp pair_gauss_gpu.cpp ..
cp fix_gpu.cpp .. cp fix_gpu.cpp ..
cp pair_lj_cut_gpu.h .. cp pair_lj_cut_gpu.h ..
@ -115,6 +144,15 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
cp pair_lj96_cut_gpu.h .. cp pair_lj96_cut_gpu.h ..
cp pair_lj_expand_gpu.h .. cp pair_lj_expand_gpu.h ..
cp pair_lj_cut_coul_cut_gpu.h .. cp pair_lj_cut_coul_cut_gpu.h ..
# cp pair_lj_cut_coul_dsf.h ..
# cp pair_lj_cut_coul_dsf_gpu.h ..
# cp pair_coul_dsf.h ..
# cp pair_coul_dsf_gpu.h ..
cp pair_lj_cut_coul_debye_gpu.h ..
cp pair_born_gpu.h ..
cp pair_born_coul_wolf_gpu.h ..
cp pair_born_coul_long_gpu.h ..
cp pair_gauss_gpu.h ..
cp fix_gpu.h .. cp fix_gpu.h ..
cp gpu_extra.h .. cp gpu_extra.h ..
@ -151,6 +189,19 @@ elif (test $1 = 0) then
rm -f ../pair_resquared_gpu.cpp rm -f ../pair_resquared_gpu.cpp
rm -f ../pair_table_gpu.cpp rm -f ../pair_table_gpu.cpp
rm -f ../pair_yukawa_gpu.cpp rm -f ../pair_yukawa_gpu.cpp
rm -f ../pair_born_gpu.cpp
rm -f ../pair_born_coul_wolf_gpu.cpp
rm -f ../pair_born_coul_long_gpu.cpp
rm -f ../pair_gauss_gpu.cpp
rm -f ../pair_colloid_gpu.cpp
rm -f ../pair_yukawa_colloid_gpu.cpp
rm -f ../pair_dipole_cut_gpu.cpp
rm -f ../pair_dipole_sf_gpu.cpp
rm -f ../pair_lj_cut_coul_dsf.cpp
rm -f ../pair_lj_cut_coul_dsf_gpu.cpp
rm -f ../pair_coul_dsf.cpp
rm -f ../pair_coul_dsf_gpu.cpp
rm -f ../pair_lj_cut_coul_debye_gpu.cpp
rm -f ../pppm_gpu.cpp rm -f ../pppm_gpu.cpp
rm -f ../fix_gpu.cpp rm -f ../fix_gpu.cpp
@ -177,6 +228,19 @@ elif (test $1 = 0) then
rm -f ../pair_resquared_gpu.h rm -f ../pair_resquared_gpu.h
rm -f ../pair_table_gpu.h rm -f ../pair_table_gpu.h
rm -f ../pair_yukawa_gpu.h rm -f ../pair_yukawa_gpu.h
rm -f ../pair_born_gpu.h
rm -f ../pair_born_coul_wolf_gpu.h
rm -f ../pair_born_coul_long_gpu.h
rm -f ../pair_gauss_gpu.h
rm -f ../pair_colloid_gpu.h
rm -f ../pair_yukawa_colloid_gpu.h
rm -f ../pair_dipole_cut_gpu.h
rm -f ../pair_dipole_sf_gpu.h
rm -f ../pair_lj_cut_coul_dsf.h
rm -f ../pair_lj_cut_coul_dsf_gpu.h
rm -f ../pair_coul_dsf.h
rm -f ../pair_coul_dsf_gpu.h
rm -f ../pair_lj_cut_coul_debye_gpu.h
rm -f ../pppm_gpu.h rm -f ../pppm_gpu.h
rm -f ../fix_gpu.h rm -f ../fix_gpu.h

View File

@ -9,6 +9,12 @@ for file in *.cpp *.h; do
if (test $file = pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then if (test $file = pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then
continue continue
fi fi
if (test $file = pair_resquared_gpu.cpp -a ! -e ../pair_resquared.cpp) then
continue
fi
if (test $file = pair_resquared_gpu.h -a ! -e ../pair_resquared.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_long_gpu.cpp -a ! -e ../pair_lj_cut_coul_long.cpp) then if (test $file = pair_lj_cut_coul_long_gpu.cpp -a ! -e ../pair_lj_cut_coul_long.cpp) then
continue continue
fi fi
@ -33,7 +39,88 @@ for file in *.cpp *.h; do
if (test $file = pair_lj_sdk_coul_long_gpu.h -a ! -e ../pair_lj_sdk_coul_long.cpp) then if (test $file = pair_lj_sdk_coul_long_gpu.h -a ! -e ../pair_lj_sdk_coul_long.cpp) then
continue continue
fi fi
if (test $file = pair_dipole_sf_gpu.cpp -a ! -e ../pair_dipole_sf.cpp) then
continue
fi
if (test $file = pair_dipole_sf_gpu.h -a ! -e ../pair_dipole_sf.cpp) then
continue
fi
if (test $file = pair_dipole_cut_gpu.cpp -a ! -e ../pair_dipole_cut.cpp) then
continue
fi
if (test $file = pair_dipole_cut_gpu.h -a ! -e ../pair_dipole_cut.cpp) then
continue
fi
if (test $file = pair_yukawa_colloid_gpu.cpp -a ! -e ../pair_yukawa_colloid.cpp) then
continue
fi
if (test $file = pair_yukawa_colloid_gpu.h -a ! -e ../pair_yukawa_colloid.cpp) then
continue
fi
if (test $file = pair_colloid_gpu.cpp -a ! -e ../pair_colloid.cpp) then
continue
fi
if (test $file = pair_colloid_gpu.h -a ! -e ../pair_colloid.cpp) then
continue
fi
if (test $file = pair_buck_coul_long_gpu.cpp -a ! -e ../pair_buck_coul_long.cpp) then
continue
fi
if (test $file = pair_buck_coul_long_gpu.h -a ! -e ../pair_buck_coul_long.cpp) then
continue
fi
if (test $file = pair_born_coul_long_gpu.cpp -a ! -e ../pair_born_coul_long.cpp) then
continue
fi
if (test $file = pair_born_coul_long_gpu.h -a ! -e ../pair_born_coul_long.cpp) then
continue
fi
if (test $file = pair_eam_gpu.cpp -a ! -e ../pair_eam.cpp) then
continue
fi
if (test $file = pair_eam_gpu.h -a ! -e ../pair_eam.cpp) then
continue
fi
if (test $file = pair_eam_alloy_gpu.cpp -a ! -e ../pair_eam_alloy.cpp) then
continue
fi
if (test $file = pair_eam_alloy_gpu.h -a ! -e ../pair_eam_alloy.cpp) then
continue
fi
if (test $file = pair_eam_fs_gpu.cpp -a ! -e ../pair_eam_fs.cpp) then
continue
fi
if (test $file = pair_eam_fs_gpu.h -a ! -e ../pair_eam_fs.cpp) then
continue
fi
if (test $file = pair_lj_class2_gpu.cpp -a ! -e ../pair_lj_class2.cpp) then
continue
fi
if (test $file = pair_lj_class2_coul_long_gpu.cpp -a ! -e ../pair_lj_class2_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_class2_coul_long_gpu.h -a ! -e ../pair_lj_class2_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_charmm_coul_long_gpu.cpp -a ! -e ../pair_lj_charmm_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_charmm_coul_long_gpu.h -a ! -e ../pair_lj_charmm_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_dsf_gpu.cpp -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_dsf_gpu.h -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
continue
fi
if (test $file = pppm_gpu.cpp -a ! -e ../pppm.cpp) then
continue
fi
if (test $file = pppm_gpu.h -a ! -e ../pppm.cpp) then
continue
fi
if (test ! -e ../$file) then if (test ! -e ../$file) then
echo " creating src/$file" echo " creating src/$file"
cp $file .. cp $file ..

View File

@ -35,7 +35,8 @@ enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};
extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
const int first_gpu, const int last_gpu, const int first_gpu, const int last_gpu,
const int gpu_mode, const double particle_split, const int gpu_mode, const double particle_split,
const int nthreads, const int t_per_atom); const int nthreads, const int t_per_atom,
const double cell_size);
extern void lmp_clear_device(); extern void lmp_clear_device();
extern double lmp_gpu_forces(double **f, double **tor, double *eatom, extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
double **vatom, double *virial, double &ecoul); double **vatom, double *virial, double &ecoul);
@ -76,15 +77,23 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
int nthreads = 1; int nthreads = 1;
int threads_per_atom = -1; int threads_per_atom = -1;
if (narg == 9) { double cell_size = -1;
if (strcmp(arg[7],"threads_per_atom") == 0)
threads_per_atom = atoi(arg[8]); int iarg = 7;
else if (strcmp(arg[7],"nthreads") == 0) while (iarg < narg) {
nthreads = atoi(arg[8]); if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
if (strcmp(arg[iarg],"threads_per_atom") == 0)
threads_per_atom = atoi(arg[iarg+1]);
else if (strcmp(arg[iarg],"nthreads") == 0)
nthreads = atoi(arg[iarg+1]);
else if (strcmp(arg[iarg],"cellsize") == 0)
cell_size = atof(arg[iarg+1]);
else else
error->all(FLERR,"Illegal fix GPU command"); error->all(FLERR,"Illegal fix GPU command");
} else if (narg != 7)
error->all(FLERR,"Illegal fix GPU command"); iarg += 2;
}
if (nthreads < 1) if (nthreads < 1)
error->all(FLERR,"Illegal fix GPU command"); error->all(FLERR,"Illegal fix GPU command");
@ -96,7 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu, int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
_gpu_mode, _particle_split, nthreads, _gpu_mode, _particle_split, nthreads,
threads_per_atom); threads_per_atom, cell_size);
GPU_EXTRA::check_flag(gpu_flag,error,world); GPU_EXTRA::check_flag(gpu_flag,error,world);
} }

View File

@ -85,7 +85,7 @@ void PPPM_GPU_API(forces)(double **f);
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPMOld(lmp, narg, arg) PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
{ {
if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command"); if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command");
@ -111,10 +111,15 @@ PPPMGPU::~PPPMGPU()
void PPPMGPU::init() void PPPMGPU::init()
{ {
PPPMOld::init(); PPPM::init();
if (strcmp(update->integrate_style,"verlet/split") == 0) if (differentiation_flag == 1)
error->all(FLERR,"Cannot (yet) do analytic differentiation with pppm/gpu.");
if (strcmp(update->integrate_style,"verlet/split") == 0) {
kspace_split=true; kspace_split=true;
old_nlocal = 0;
}
if (kspace_split && universe->iworld == 0) { if (kspace_split && universe->iworld == 0) {
im_real_space = true; im_real_space = true;
@ -153,21 +158,31 @@ void PPPMGPU::init()
void PPPMGPU::compute(int eflag, int vflag) void PPPMGPU::compute(int eflag, int vflag)
{ {
if (im_real_space) return; int nago;
if (kspace_split) {
if (im_real_space) return;
if (atom->nlocal > old_nlocal) {
nago=0;
old_nlocal = atom->nlocal;
} else
nago=1;
} else
nago=neighbor->ago;
// set energy/virial flags // set energy/virial flags
// invoke allocate_peratom() if needed for first time // invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag); if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom(); allocate_peratom();
peratom_allocate_flag = 1; peratom_allocate_flag = 1;
} }
bool success = true; bool success = true;
int flag=PPPM_GPU_API(spread)(neighbor->ago, atom->nlocal, atom->nlocal + int flag=PPPM_GPU_API(spread)(nago, atom->nlocal, atom->nlocal +
atom->nghost, atom->x, atom->type, success, atom->nghost, atom->x, atom->type, success,
atom->q, domain->boxlo, delxinv, delyinv, atom->q, domain->boxlo, delxinv, delyinv,
delzinv); delzinv);
@ -241,7 +256,7 @@ void PPPMGPU::compute(int eflag, int vflag)
if (vflag_atom) { if (vflag_atom) {
for (i = 0; i < nlocal; i++) for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale; for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
} }
} }
@ -300,7 +315,10 @@ void PPPMGPU::allocate()
memory->create(gf_b,order,"pppm:gf_b"); memory->create(gf_b,order,"pppm:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
"pppm:drho_coeff");
// create 2 FFTs and a Remap // create 2 FFTs and a Remap
// 1st FFT keeps data in FFT decompostion // 1st FFT keeps data in FFT decompostion
@ -349,7 +367,9 @@ void PPPMGPU::deallocate()
memory->destroy(gf_b); memory->destroy(gf_b);
memory->destroy2d_offset(rho1d,-order/2); memory->destroy2d_offset(rho1d,-order/2);
memory->destroy2d_offset(drho1d,-order/2);
memory->destroy2d_offset(rho_coeff,(1-order)/2); memory->destroy2d_offset(rho_coeff,(1-order)/2);
memory->destroy2d_offset(drho_coeff,(1-order)/2);
delete fft1; delete fft1;
delete fft2; delete fft2;
@ -527,10 +547,20 @@ void PPPMGPU::brick2fft()
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
ghost-swap to fill ghost cells of my brick with field values Same as base class - needed to call GPU version of fillbrick_.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PPPMGPU::fillbrick() void PPPMGPU::fillbrick()
{
if (differentiation_flag == 1) fillbrick_ad();
else fillbrick_ik();
}
/* ----------------------------------------------------------------------
ghost-swap to fill ghost cells of my brick with field values
------------------------------------------------------------------------- */
void PPPMGPU::fillbrick_ik()
{ {
int i,n,ix,iy,iz; int i,n,ix,iy,iz;
MPI_Request request; MPI_Request request;
@ -727,10 +757,20 @@ void PPPMGPU::fillbrick()
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
FFT-based Poisson solver Same code as base class - necessary to call GPU version of poisson_ik
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PPPMGPU::poisson() void PPPMGPU::poisson()
{
if (differentiation_flag == 1) poisson_ad();
else poisson_ik();
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver
------------------------------------------------------------------------- */
void PPPMGPU::poisson_ik()
{ {
int i,j,k,n; int i,j,k,n;
double eng; double eng;
@ -925,11 +965,11 @@ double PPPMGPU::memory_usage()
int PPPMGPU::timing(int n, double &time3d, double &time1d) { int PPPMGPU::timing(int n, double &time3d, double &time1d) {
if (im_real_space) { if (im_real_space) {
time3d = 0.0; time3d = 1.0;
time1d = 0.0; time1d = 1.0;
return 4; return 4;
} }
PPPMOld::timing(n,time3d,time1d); PPPM::timing(n,time3d,time1d);
return 4; return 4;
} }
@ -940,5 +980,5 @@ int PPPMGPU::timing(int n, double &time3d, double &time1d) {
void PPPMGPU::setup() void PPPMGPU::setup()
{ {
if (im_real_space) return; if (im_real_space) return;
PPPMOld::setup(); PPPM::setup();
} }

View File

@ -20,31 +20,34 @@ KSpaceStyle(pppm/gpu,PPPMGPU)
#ifndef LMP_PPPM_GPU_H #ifndef LMP_PPPM_GPU_H
#define LMP_PPPM_GPU_H #define LMP_PPPM_GPU_H
#include "pppm_old.h" #include "pppm.h"
namespace LAMMPS_NS { namespace LAMMPS_NS {
class PPPMGPU : public PPPMOld { class PPPMGPU : public PPPM {
public: public:
PPPMGPU(class LAMMPS *, int, char **); PPPMGPU(class LAMMPS *, int, char **);
virtual ~PPPMGPU(); virtual ~PPPMGPU();
virtual void init(); void init();
virtual void setup(); void setup();
virtual void compute(int, int); void compute(int, int);
virtual int timing(int, double &, double &); int timing(int, double &, double &);
virtual double memory_usage(); double memory_usage();
protected: protected:
FFT_SCALAR ***density_brick_gpu, ***vd_brick; FFT_SCALAR ***density_brick_gpu, ***vd_brick;
bool kspace_split, im_real_space; bool kspace_split, im_real_space;
virtual void allocate(); void allocate();
virtual void deallocate(); void deallocate();
virtual void brick2fft(); void brick2fft();
virtual void fillbrick(); void fillbrick();
virtual void poisson(); void fillbrick_ik();
void poisson();
void poisson_ik();
int old_nlocal;
double poisson_time; double poisson_time;
FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *, FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
@ -65,6 +68,10 @@ Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line. command-line option when running LAMMPS to see the offending line.
E: Cannot (yet) do analytic differentiation with pppm/gpu.
Self-explanatory.
E: Cannot use order greater than 8 with pppm/gpu. E: Cannot use order greater than 8 with pppm/gpu.
Self-explanatory. Self-explanatory.