git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@8689 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2012-08-21 13:53:49 +00:00
parent 5a71c1a391
commit f1effd1c0d
5 changed files with 245 additions and 38 deletions

View File

@ -21,6 +21,26 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
' ../Makefile.package.settings
fi
if (test -e ../pair_dipole_sf.cpp) then
cp pair_dipole_sf_gpu.cpp ..
cp pair_dipole_sf_gpu.h ..
fi
if (test -e ../pair_dipole_cut.cpp) then
cp pair_dipole_cut_gpu.cpp ..
cp pair_dipole_cut_gpu.h ..
fi
if (test -e ../pair_yukawa_colloid.cpp) then
cp pair_yukawa_colloid_gpu.cpp ..
cp pair_yukawa_colloid_gpu.h ..
fi
if (test -e ../pair_colloid.cpp) then
cp pair_colloid_gpu.cpp ..
cp pair_colloid_gpu.h ..
fi
if (test -e ../pair_yukawa.cpp) then
cp pair_yukawa_gpu.cpp ..
cp pair_yukawa_gpu.h ..
@ -107,6 +127,15 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
cp pair_lj96_cut_gpu.cpp ..
cp pair_lj_expand_gpu.cpp ..
cp pair_lj_cut_coul_cut_gpu.cpp ..
# cp pair_lj_cut_coul_dsf.cpp ..
# cp pair_lj_cut_coul_dsf_gpu.cpp ..
# cp pair_coul_dsf.cpp ..
# cp pair_coul_dsf_gpu.cpp ..
cp pair_lj_cut_coul_debye_gpu.cpp ..
cp pair_born_gpu.cpp ..
cp pair_born_coul_wolf_gpu.cpp ..
cp pair_born_coul_long_gpu.cpp ..
cp pair_gauss_gpu.cpp ..
cp fix_gpu.cpp ..
@ -115,6 +144,15 @@ include ..\/..\/lib\/gpu\/Makefile.lammps
cp pair_lj96_cut_gpu.h ..
cp pair_lj_expand_gpu.h ..
cp pair_lj_cut_coul_cut_gpu.h ..
# cp pair_lj_cut_coul_dsf.h ..
# cp pair_lj_cut_coul_dsf_gpu.h ..
# cp pair_coul_dsf.h ..
# cp pair_coul_dsf_gpu.h ..
cp pair_lj_cut_coul_debye_gpu.h ..
cp pair_born_gpu.h ..
cp pair_born_coul_wolf_gpu.h ..
cp pair_born_coul_long_gpu.h ..
cp pair_gauss_gpu.h ..
cp fix_gpu.h ..
cp gpu_extra.h ..
@ -151,6 +189,19 @@ elif (test $1 = 0) then
rm -f ../pair_resquared_gpu.cpp
rm -f ../pair_table_gpu.cpp
rm -f ../pair_yukawa_gpu.cpp
rm -f ../pair_born_gpu.cpp
rm -f ../pair_born_coul_wolf_gpu.cpp
rm -f ../pair_born_coul_long_gpu.cpp
rm -f ../pair_gauss_gpu.cpp
rm -f ../pair_colloid_gpu.cpp
rm -f ../pair_yukawa_colloid_gpu.cpp
rm -f ../pair_dipole_cut_gpu.cpp
rm -f ../pair_dipole_sf_gpu.cpp
rm -f ../pair_lj_cut_coul_dsf.cpp
rm -f ../pair_lj_cut_coul_dsf_gpu.cpp
rm -f ../pair_coul_dsf.cpp
rm -f ../pair_coul_dsf_gpu.cpp
rm -f ../pair_lj_cut_coul_debye_gpu.cpp
rm -f ../pppm_gpu.cpp
rm -f ../fix_gpu.cpp
@ -177,6 +228,19 @@ elif (test $1 = 0) then
rm -f ../pair_resquared_gpu.h
rm -f ../pair_table_gpu.h
rm -f ../pair_yukawa_gpu.h
rm -f ../pair_born_gpu.h
rm -f ../pair_born_coul_wolf_gpu.h
rm -f ../pair_born_coul_long_gpu.h
rm -f ../pair_gauss_gpu.h
rm -f ../pair_colloid_gpu.h
rm -f ../pair_yukawa_colloid_gpu.h
rm -f ../pair_dipole_cut_gpu.h
rm -f ../pair_dipole_sf_gpu.h
rm -f ../pair_lj_cut_coul_dsf.h
rm -f ../pair_lj_cut_coul_dsf_gpu.h
rm -f ../pair_coul_dsf.h
rm -f ../pair_coul_dsf_gpu.h
rm -f ../pair_lj_cut_coul_debye_gpu.h
rm -f ../pppm_gpu.h
rm -f ../fix_gpu.h

View File

@ -9,6 +9,12 @@ for file in *.cpp *.h; do
if (test $file = pair_gayberne_gpu.h -a ! -e ../pair_gayberne.cpp) then
continue
fi
if (test $file = pair_resquared_gpu.cpp -a ! -e ../pair_resquared.cpp) then
continue
fi
if (test $file = pair_resquared_gpu.h -a ! -e ../pair_resquared.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_long_gpu.cpp -a ! -e ../pair_lj_cut_coul_long.cpp) then
continue
fi
@ -33,6 +39,87 @@ for file in *.cpp *.h; do
if (test $file = pair_lj_sdk_coul_long_gpu.h -a ! -e ../pair_lj_sdk_coul_long.cpp) then
continue
fi
if (test $file = pair_dipole_sf_gpu.cpp -a ! -e ../pair_dipole_sf.cpp) then
continue
fi
if (test $file = pair_dipole_sf_gpu.h -a ! -e ../pair_dipole_sf.cpp) then
continue
fi
if (test $file = pair_dipole_cut_gpu.cpp -a ! -e ../pair_dipole_cut.cpp) then
continue
fi
if (test $file = pair_dipole_cut_gpu.h -a ! -e ../pair_dipole_cut.cpp) then
continue
fi
if (test $file = pair_yukawa_colloid_gpu.cpp -a ! -e ../pair_yukawa_colloid.cpp) then
continue
fi
if (test $file = pair_yukawa_colloid_gpu.h -a ! -e ../pair_yukawa_colloid.cpp) then
continue
fi
if (test $file = pair_colloid_gpu.cpp -a ! -e ../pair_colloid.cpp) then
continue
fi
if (test $file = pair_colloid_gpu.h -a ! -e ../pair_colloid.cpp) then
continue
fi
if (test $file = pair_buck_coul_long_gpu.cpp -a ! -e ../pair_buck_coul_long.cpp) then
continue
fi
if (test $file = pair_buck_coul_long_gpu.h -a ! -e ../pair_buck_coul_long.cpp) then
continue
fi
if (test $file = pair_born_coul_long_gpu.cpp -a ! -e ../pair_born_coul_long.cpp) then
continue
fi
if (test $file = pair_born_coul_long_gpu.h -a ! -e ../pair_born_coul_long.cpp) then
continue
fi
if (test $file = pair_eam_gpu.cpp -a ! -e ../pair_eam.cpp) then
continue
fi
if (test $file = pair_eam_gpu.h -a ! -e ../pair_eam.cpp) then
continue
fi
if (test $file = pair_eam_alloy_gpu.cpp -a ! -e ../pair_eam_alloy.cpp) then
continue
fi
if (test $file = pair_eam_alloy_gpu.h -a ! -e ../pair_eam_alloy.cpp) then
continue
fi
if (test $file = pair_eam_fs_gpu.cpp -a ! -e ../pair_eam_fs.cpp) then
continue
fi
if (test $file = pair_eam_fs_gpu.h -a ! -e ../pair_eam_fs.cpp) then
continue
fi
if (test $file = pair_lj_class2_gpu.cpp -a ! -e ../pair_lj_class2.cpp) then
continue
fi
if (test $file = pair_lj_class2_coul_long_gpu.cpp -a ! -e ../pair_lj_class2_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_class2_coul_long_gpu.h -a ! -e ../pair_lj_class2_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_charmm_coul_long_gpu.cpp -a ! -e ../pair_lj_charmm_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_charmm_coul_long_gpu.h -a ! -e ../pair_lj_charmm_coul_long.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_dsf_gpu.cpp -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
continue
fi
if (test $file = pair_lj_cut_coul_dsf_gpu.h -a ! -e ../pair_lj_cut_coul_dsf.cpp) then
continue
fi
if (test $file = pppm_gpu.cpp -a ! -e ../pppm.cpp) then
continue
fi
if (test $file = pppm_gpu.h -a ! -e ../pppm.cpp) then
continue
fi
if (test ! -e ../$file) then
echo " creating src/$file"

View File

@ -35,7 +35,8 @@ enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};
extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
const int first_gpu, const int last_gpu,
const int gpu_mode, const double particle_split,
const int nthreads, const int t_per_atom);
const int nthreads, const int t_per_atom,
const double cell_size);
extern void lmp_clear_device();
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
double **vatom, double *virial, double &ecoul);
@ -76,15 +77,23 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
int nthreads = 1;
int threads_per_atom = -1;
if (narg == 9) {
if (strcmp(arg[7],"threads_per_atom") == 0)
threads_per_atom = atoi(arg[8]);
else if (strcmp(arg[7],"nthreads") == 0)
nthreads = atoi(arg[8]);
double cell_size = -1;
int iarg = 7;
while (iarg < narg) {
if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
if (strcmp(arg[iarg],"threads_per_atom") == 0)
threads_per_atom = atoi(arg[iarg+1]);
else if (strcmp(arg[iarg],"nthreads") == 0)
nthreads = atoi(arg[iarg+1]);
else if (strcmp(arg[iarg],"cellsize") == 0)
cell_size = atof(arg[iarg+1]);
else
error->all(FLERR,"Illegal fix GPU command");
} else if (narg != 7)
error->all(FLERR,"Illegal fix GPU command");
iarg += 2;
}
if (nthreads < 1)
error->all(FLERR,"Illegal fix GPU command");
@ -96,7 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
_gpu_mode, _particle_split, nthreads,
threads_per_atom);
threads_per_atom, cell_size);
GPU_EXTRA::check_flag(gpu_flag,error,world);
}

View File

@ -85,7 +85,7 @@ void PPPM_GPU_API(forces)(double **f);
/* ---------------------------------------------------------------------- */
PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPMOld(lmp, narg, arg)
PPPMGPU::PPPMGPU(LAMMPS *lmp, int narg, char **arg) : PPPM(lmp, narg, arg)
{
if (narg != 1) error->all(FLERR,"Illegal kspace_style pppm/gpu command");
@ -111,10 +111,15 @@ PPPMGPU::~PPPMGPU()
void PPPMGPU::init()
{
PPPMOld::init();
PPPM::init();
if (strcmp(update->integrate_style,"verlet/split") == 0)
if (differentiation_flag == 1)
error->all(FLERR,"Cannot (yet) do analytic differentiation with pppm/gpu.");
if (strcmp(update->integrate_style,"verlet/split") == 0) {
kspace_split=true;
old_nlocal = 0;
}
if (kspace_split && universe->iworld == 0) {
im_real_space = true;
@ -153,21 +158,31 @@ void PPPMGPU::init()
void PPPMGPU::compute(int eflag, int vflag)
{
if (im_real_space) return;
int nago;
if (kspace_split) {
if (im_real_space) return;
if (atom->nlocal > old_nlocal) {
nago=0;
old_nlocal = atom->nlocal;
} else
nago=1;
} else
nago=neighbor->ago;
// set energy/virial flags
// invoke allocate_peratom() if needed for first time
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0;
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) {
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
peratom_allocate_flag = 1;
}
bool success = true;
int flag=PPPM_GPU_API(spread)(neighbor->ago, atom->nlocal, atom->nlocal +
int flag=PPPM_GPU_API(spread)(nago, atom->nlocal, atom->nlocal +
atom->nghost, atom->x, atom->type, success,
atom->q, domain->boxlo, delxinv, delyinv,
delzinv);
@ -241,7 +256,7 @@ void PPPMGPU::compute(int eflag, int vflag)
if (vflag_atom) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*q[i]*qscale;
for (j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
@ -300,7 +315,10 @@ void PPPMGPU::allocate()
memory->create(gf_b,order,"pppm:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
"pppm:drho_coeff");
// create 2 FFTs and a Remap
// 1st FFT keeps data in FFT decompostion
@ -349,7 +367,9 @@ void PPPMGPU::deallocate()
memory->destroy(gf_b);
memory->destroy2d_offset(rho1d,-order/2);
memory->destroy2d_offset(drho1d,-order/2);
memory->destroy2d_offset(rho_coeff,(1-order)/2);
memory->destroy2d_offset(drho_coeff,(1-order)/2);
delete fft1;
delete fft2;
@ -527,10 +547,20 @@ void PPPMGPU::brick2fft()
}
/* ----------------------------------------------------------------------
ghost-swap to fill ghost cells of my brick with field values
Same as base class - needed to call GPU version of fillbrick_.
------------------------------------------------------------------------- */
void PPPMGPU::fillbrick()
{
if (differentiation_flag == 1) fillbrick_ad();
else fillbrick_ik();
}
/* ----------------------------------------------------------------------
ghost-swap to fill ghost cells of my brick with field values
------------------------------------------------------------------------- */
void PPPMGPU::fillbrick_ik()
{
int i,n,ix,iy,iz;
MPI_Request request;
@ -727,10 +757,20 @@ void PPPMGPU::fillbrick()
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver
Same code as base class - necessary to call GPU version of poisson_ik
------------------------------------------------------------------------- */
void PPPMGPU::poisson()
{
if (differentiation_flag == 1) poisson_ad();
else poisson_ik();
}
/* ----------------------------------------------------------------------
FFT-based Poisson solver
------------------------------------------------------------------------- */
void PPPMGPU::poisson_ik()
{
int i,j,k,n;
double eng;
@ -925,11 +965,11 @@ double PPPMGPU::memory_usage()
int PPPMGPU::timing(int n, double &time3d, double &time1d) {
if (im_real_space) {
time3d = 0.0;
time1d = 0.0;
time3d = 1.0;
time1d = 1.0;
return 4;
}
PPPMOld::timing(n,time3d,time1d);
PPPM::timing(n,time3d,time1d);
return 4;
}
@ -940,5 +980,5 @@ int PPPMGPU::timing(int n, double &time3d, double &time1d) {
void PPPMGPU::setup()
{
if (im_real_space) return;
PPPMOld::setup();
PPPM::setup();
}

View File

@ -20,31 +20,34 @@ KSpaceStyle(pppm/gpu,PPPMGPU)
#ifndef LMP_PPPM_GPU_H
#define LMP_PPPM_GPU_H
#include "pppm_old.h"
#include "pppm.h"
namespace LAMMPS_NS {
class PPPMGPU : public PPPMOld {
class PPPMGPU : public PPPM {
public:
PPPMGPU(class LAMMPS *, int, char **);
virtual ~PPPMGPU();
virtual void init();
virtual void setup();
virtual void compute(int, int);
virtual int timing(int, double &, double &);
virtual double memory_usage();
void init();
void setup();
void compute(int, int);
int timing(int, double &, double &);
double memory_usage();
protected:
FFT_SCALAR ***density_brick_gpu, ***vd_brick;
bool kspace_split, im_real_space;
virtual void allocate();
virtual void deallocate();
virtual void brick2fft();
virtual void fillbrick();
virtual void poisson();
void allocate();
void deallocate();
void brick2fft();
void fillbrick();
void fillbrick_ik();
void poisson();
void poisson_ik();
int old_nlocal;
double poisson_time;
FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *,
@ -65,6 +68,10 @@ Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Cannot (yet) do analytic differentiation with pppm/gpu.
Self-explanatory.
E: Cannot use order greater than 8 with pppm/gpu.
Self-explanatory.