git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10670 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -27,6 +27,8 @@ action () {
|
|||||||
action fix_gpu.cpp
|
action fix_gpu.cpp
|
||||||
action fix_gpu.h
|
action fix_gpu.h
|
||||||
action gpu_extra.h
|
action gpu_extra.h
|
||||||
|
action pair_beck_gpu.cpp
|
||||||
|
action pair_beck_gpu.h
|
||||||
action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp
|
action pair_born_coul_long_gpu.cpp pair_born_coul_long.cpp
|
||||||
action pair_born_coul_long_gpu.h pair_born_coul_long.cpp
|
action pair_born_coul_long_gpu.h pair_born_coul_long.cpp
|
||||||
action pair_born_coul_wolf_gpu.cpp
|
action pair_born_coul_wolf_gpu.cpp
|
||||||
@ -75,6 +77,8 @@ action pair_lj_cut_coul_dsf_gpu.cpp
|
|||||||
action pair_lj_cut_coul_dsf_gpu.h
|
action pair_lj_cut_coul_dsf_gpu.h
|
||||||
action pair_lj_cut_coul_long_gpu.cpp pair_lj_cut_coul_long.cpp
|
action pair_lj_cut_coul_long_gpu.cpp pair_lj_cut_coul_long.cpp
|
||||||
action pair_lj_cut_coul_long_gpu.h pair_lj_cut_coul_long.cpp
|
action pair_lj_cut_coul_long_gpu.h pair_lj_cut_coul_long.cpp
|
||||||
|
action pair_lj_cut_coul_msm_gpu.cpp
|
||||||
|
action pair_lj_cut_coul_msm_gpu.h
|
||||||
action pair_lj_cut_gpu.cpp
|
action pair_lj_cut_gpu.cpp
|
||||||
action pair_lj_cut_gpu.h
|
action pair_lj_cut_gpu.h
|
||||||
action pair_lj_expand_gpu.cpp
|
action pair_lj_expand_gpu.cpp
|
||||||
@ -83,10 +87,16 @@ action pair_lj_sdk_coul_long_gpu.cpp pair_lj_sdk_coul_long.cpp
|
|||||||
action pair_lj_sdk_coul_long_gpu.h pair_lj_sdk_coul_long.cpp
|
action pair_lj_sdk_coul_long_gpu.h pair_lj_sdk_coul_long.cpp
|
||||||
action pair_lj_sdk_gpu.cpp pair_lj_sdk.cpp
|
action pair_lj_sdk_gpu.cpp pair_lj_sdk.cpp
|
||||||
action pair_lj_sdk_gpu.h pair_lj_sdk.cpp
|
action pair_lj_sdk_gpu.h pair_lj_sdk.cpp
|
||||||
|
action pair_mie_cut_gpu.cpp
|
||||||
|
action pair_mie_cut_gpu.h
|
||||||
action pair_morse_gpu.cpp
|
action pair_morse_gpu.cpp
|
||||||
action pair_morse_gpu.h
|
action pair_morse_gpu.h
|
||||||
action pair_resquared_gpu.cpp pair_resquared.cpp
|
action pair_resquared_gpu.cpp pair_resquared.cpp
|
||||||
action pair_resquared_gpu.h pair_resquared.cpp
|
action pair_resquared_gpu.h pair_resquared.cpp
|
||||||
|
action pair_soft_gpu.cpp
|
||||||
|
action pair_soft_gpu.h
|
||||||
|
action pair_sw_gpu.cpp pair_sw.cpp
|
||||||
|
action pair_sw_gpu.h pair_sw.h
|
||||||
action pair_table_gpu.cpp pair_table.cpp
|
action pair_table_gpu.cpp pair_table.cpp
|
||||||
action pair_table_gpu.h pair_table.cpp
|
action pair_table_gpu.h pair_table.cpp
|
||||||
action pair_yukawa_colloid_gpu.cpp pair_yukawa_colloid.cpp
|
action pair_yukawa_colloid_gpu.cpp pair_yukawa_colloid.cpp
|
||||||
|
|||||||
@ -17,6 +17,8 @@
|
|||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "pair.h"
|
#include "pair.h"
|
||||||
|
#include "pair_hybrid.h"
|
||||||
|
#include "pair_hybrid_overlay.h"
|
||||||
#include "respa.h"
|
#include "respa.h"
|
||||||
#include "input.h"
|
#include "input.h"
|
||||||
#include "timer.h"
|
#include "timer.h"
|
||||||
@ -38,7 +40,7 @@ extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
|
|||||||
const int first_gpu, const int last_gpu,
|
const int first_gpu, const int last_gpu,
|
||||||
const int gpu_mode, const double particle_split,
|
const int gpu_mode, const double particle_split,
|
||||||
const int nthreads, const int t_per_atom,
|
const int nthreads, const int t_per_atom,
|
||||||
const double cell_size);
|
const double cell_size, char *opencl_flags);
|
||||||
extern void lmp_clear_device();
|
extern void lmp_clear_device();
|
||||||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||||||
double **vatom, double *virial, double &ecoul);
|
double **vatom, double *virial, double &ecoul);
|
||||||
@ -103,6 +105,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
double cell_size = -1;
|
double cell_size = -1;
|
||||||
|
|
||||||
int iarg = 7;
|
int iarg = 7;
|
||||||
|
char *opencl_flags = NULL;
|
||||||
while (iarg < narg) {
|
while (iarg < narg) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
|
if (iarg+2 > narg) error->all(FLERR,"Illegal fix GPU command");
|
||||||
|
|
||||||
@ -112,6 +115,8 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
nthreads = force->inumeric(FLERR,arg[iarg+1]);
|
nthreads = force->inumeric(FLERR,arg[iarg+1]);
|
||||||
else if (strcmp(arg[iarg],"cellsize") == 0)
|
else if (strcmp(arg[iarg],"cellsize") == 0)
|
||||||
cell_size = force->numeric(FLERR,arg[iarg+1]);
|
cell_size = force->numeric(FLERR,arg[iarg+1]);
|
||||||
|
else if (strcmp(arg[iarg],"device") == 0)
|
||||||
|
opencl_flags = arg[iarg+1];
|
||||||
else
|
else
|
||||||
error->all(FLERR,"Illegal fix GPU command");
|
error->all(FLERR,"Illegal fix GPU command");
|
||||||
|
|
||||||
@ -128,7 +133,7 @@ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
|
|
||||||
int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
|
int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
|
||||||
_gpu_mode, _particle_split, nthreads,
|
_gpu_mode, _particle_split, nthreads,
|
||||||
threads_per_atom, cell_size);
|
threads_per_atom, cell_size, opencl_flags);
|
||||||
GPU_EXTRA::check_flag(gpu_flag,error,world);
|
GPU_EXTRA::check_flag(gpu_flag,error,world);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -165,21 +170,24 @@ void FixGPU::init()
|
|||||||
force->pair_match("hybrid/overlay",1) != NULL)
|
force->pair_match("hybrid/overlay",1) != NULL)
|
||||||
error->all(FLERR,"GPU 'split' must be positive for hybrid pair styles");
|
error->all(FLERR,"GPU 'split' must be positive for hybrid pair styles");
|
||||||
|
|
||||||
|
// Make sure fdotr virial is not accumulated multiple times
|
||||||
|
|
||||||
|
if (force->pair_match("hybrid",1) != NULL) {
|
||||||
|
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||||
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
|
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||||
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
|
} else if (force->pair_match("hybrid/overlay",1) != NULL) {
|
||||||
|
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||||
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
|
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||||
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
|
}
|
||||||
|
|
||||||
// r-RESPA support
|
// r-RESPA support
|
||||||
|
|
||||||
if (strstr(update->integrate_style,"respa")) {
|
if (strstr(update->integrate_style,"respa"))
|
||||||
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||||
|
|
||||||
// need to check that gpu accelerated styles are at the outmost levels
|
|
||||||
|
|
||||||
if ((force->pair_match("/gpu",0) != NULL) &&
|
|
||||||
(((Respa *) update->integrate)->level_pair != _nlevels_respa-1))
|
|
||||||
error->all(FLERR,"GPU pair style must be at outermost respa level");
|
|
||||||
|
|
||||||
if ((force->kspace_match("/gpu",0) != NULL) &&
|
|
||||||
(((Respa *) update->integrate)->level_kspace != _nlevels_respa-1))
|
|
||||||
error->all(FLERR,"GPU Kspace style must be at outermost respa level");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -194,8 +202,9 @@ void FixGPU::setup(int vflag)
|
|||||||
if (strstr(update->integrate_style,"verlet"))
|
if (strstr(update->integrate_style,"verlet"))
|
||||||
post_force(vflag);
|
post_force(vflag);
|
||||||
else {
|
else {
|
||||||
|
// In setup only, all forces calculated on gpu are put in the outer level
|
||||||
((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1);
|
((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1);
|
||||||
post_force_respa(vflag,_nlevels_respa-1,0);
|
post_force(vflag);
|
||||||
((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1);
|
((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -241,7 +250,7 @@ void FixGPU::min_post_force(int vflag)
|
|||||||
|
|
||||||
void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
|
void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
|
||||||
{
|
{
|
||||||
if (ilevel == _nlevels_respa-1) post_force(vflag);
|
post_force(vflag);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
@ -252,3 +261,4 @@ double FixGPU::memory_usage()
|
|||||||
// Memory usage currently returned by pair routine
|
// Memory usage currently returned by pair routine
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -52,6 +52,12 @@ namespace GPU_EXTRA {
|
|||||||
else if (all_success == -9)
|
else if (all_success == -9)
|
||||||
error->all(FLERR,
|
error->all(FLERR,
|
||||||
"CPU neighbor lists must be used for ellipsoid/sphere mix.");
|
"CPU neighbor lists must be used for ellipsoid/sphere mix.");
|
||||||
|
else if (all_success == -10)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Invalid threads_per_atom specified.");
|
||||||
|
else if (all_success == -11)
|
||||||
|
error->all(FLERR,
|
||||||
|
"Invalid custom OpenCL parameter string.");
|
||||||
else
|
else
|
||||||
error->all(FLERR,"Unknown error in GPU library");
|
error->all(FLERR,"Unknown error in GPU library");
|
||||||
}
|
}
|
||||||
@ -110,8 +116,19 @@ E: CPU neighbor lists must be used for ellipsoid/sphere mix
|
|||||||
When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
|
When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
|
||||||
spherical particles, the neighbor list must be built on the CPU
|
spherical particles, the neighbor list must be built on the CPU
|
||||||
|
|
||||||
|
E: Invalid threads_per_atom specified.
|
||||||
|
|
||||||
|
For 3-body potentials on the GPU, the threads_per_atom setting cannot be
|
||||||
|
greater than 4 for NVIDIA GPUs.
|
||||||
|
|
||||||
E: Unknown error in GPU library
|
E: Unknown error in GPU library
|
||||||
|
|
||||||
Self-explanatory.
|
Self-explanatory.
|
||||||
|
|
||||||
|
E: Invalid custom OpenCL parameter string.
|
||||||
|
|
||||||
|
There are not enough or too many parameters in the custom string for package
|
||||||
|
GPU.
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|||||||
@ -78,7 +78,7 @@ FFT_SCALAR* PPPM_GPU_API(init)(const int nlocal, const int nall, FILE *screen,
|
|||||||
const double slab_volfactor,
|
const double slab_volfactor,
|
||||||
const int nx_pppm, const int ny_pppm,
|
const int nx_pppm, const int ny_pppm,
|
||||||
const int nz_pppm, const bool split,
|
const int nz_pppm, const bool split,
|
||||||
int &success);
|
const bool respa, int &success);
|
||||||
void PPPM_GPU_API(clear)(const double poisson_time);
|
void PPPM_GPU_API(clear)(const double poisson_time);
|
||||||
int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall,
|
int PPPM_GPU_API(spread)(const int ago, const int nlocal, const int nall,
|
||||||
double **host_x, int *host_type, bool &success,
|
double **host_x, int *host_type, bool &success,
|
||||||
@ -152,6 +152,10 @@ void PPPMGPU::init()
|
|||||||
|
|
||||||
// GPU precision specific init
|
// GPU precision specific init
|
||||||
|
|
||||||
|
bool respa_value=false;
|
||||||
|
if (strstr(update->integrate_style,"respa"))
|
||||||
|
respa_value=true;
|
||||||
|
|
||||||
if (order>8)
|
if (order>8)
|
||||||
error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu.");
|
error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu.");
|
||||||
PPPM_GPU_API(clear)(poisson_time);
|
PPPM_GPU_API(clear)(poisson_time);
|
||||||
@ -162,7 +166,7 @@ void PPPMGPU::init()
|
|||||||
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
|
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
|
||||||
nyhi_out, nzhi_out, rho_coeff, &data,
|
nyhi_out, nzhi_out, rho_coeff, &data,
|
||||||
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||||
kspace_split,success);
|
kspace_split,respa_value,success);
|
||||||
|
|
||||||
GPU_EXTRA::check_flag(success,error,world);
|
GPU_EXTRA::check_flag(success,error,world);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user