Enabled neighbor list build on the device with pair_style hybrid and hybrid/overlay

This commit is contained in:
Trung Nguyen
2019-04-16 23:30:25 -05:00
parent 898860328b
commit c55009a0ac
8 changed files with 80 additions and 27 deletions

View File

@ -70,6 +70,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
if (success!=0) if (success!=0)
return success; return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;
ucl_device=device->gpu; ucl_device=device->gpu;
atom=&device->atom; atom=&device->atom;

View File

@ -71,6 +71,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
if (success!=0) if (success!=0)
return success; return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;
ucl_device=device->gpu; ucl_device=device->gpu;
atom=&device->atom; atom=&device->atom;

View File

@ -72,6 +72,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
if (success!=0) if (success!=0)
return success; return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;
ucl_device=device->gpu; ucl_device=device->gpu;
atom=&device->atom; atom=&device->atom;

View File

@ -71,6 +71,12 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
if (success!=0) if (success!=0)
return success; return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;
ucl_device=device->gpu; ucl_device=device->gpu;
atom=&device->atom; atom=&device->atom;

View File

@ -84,6 +84,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
if (success!=0) if (success!=0)
return success; return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;
ucl_device=device->gpu; ucl_device=device->gpu;
atom=&device->atom; atom=&device->atom;

View File

@ -301,16 +301,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
if (!ans.init(ef_nlocal,charge,rot,*gpu)) if (!ans.init(ef_nlocal,charge,rot,*gpu))
return -3; return -3;
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
_block_cell_id, _block_nbor_build, threads_per_atom,
_warp_size, _time_device, compile_string()))
return -3;
if (_cell_size<0.0)
nbor->cell_size(cell_size,cell_size);
else
nbor->cell_size(_cell_size,cell_size);
_init_count++; _init_count++;
return 0; return 0;
} }
@ -338,6 +328,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
return 0; return 0;
} }
template <class numtyp, class acctyp>
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
const int host_nlocal, const int nall,
const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size,
const bool pre_cut, const int threads_per_atom) {
int ef_nlocal=nlocal;
if (_particle_split<1.0 && _particle_split>0.0)
ef_nlocal=static_cast<int>(_particle_split*nlocal);
int gpu_nbor=0;
if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
gpu_nbor=1;
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
gpu_nbor=2;
#ifndef USE_CUDPP
if (gpu_nbor==1)
gpu_nbor=2;
#endif
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
*gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
_block_cell_id, _block_nbor_build, threads_per_atom,
_warp_size, _time_device, compile_string()))
return -3;
if (_cell_size<0.0)
nbor->cell_size(cell_size,cell_size);
else
nbor->cell_size(_cell_size,cell_size);
return 0;
}
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void DeviceT::set_single_precompute void DeviceT::set_single_precompute
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) { (PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {

View File

@ -91,6 +91,13 @@ class Device {
* - -5 Double precision is not supported on card **/ * - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall); int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
/// Initialize neighbor list build -- callback function from pair
int init_nbor(Neighbor *nbor, const int nlocal,
const int host_nlocal, const int nall,
const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size,
const bool pre_cut, const int threads_per_atom);
/// Output a message for pair_style acceleration with device stats /// Output a message for pair_style acceleration with device stats
void init_message(FILE *screen, const char *name, void init_message(FILE *screen, const char *name,
const int first_gpu, const int last_gpu); const int first_gpu, const int last_gpu);

View File

@ -30,7 +30,6 @@
#include "neighbor.h" #include "neighbor.h"
#include "citeme.h" #include "citeme.h"
#include "error.h" #include "error.h"
#include "utils.h"
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;
@ -219,17 +218,6 @@ void FixGPU::init()
error->all(FLERR,"GPU package does not (yet) work with " error->all(FLERR,"GPU package does not (yet) work with "
"atom_style template"); "atom_style template");
// hybrid cannot be used with force/neigh option
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
if (force->pair_match("^hybrid",0) != NULL)
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
if (_particle_split < 0)
if (force->pair_match("^hybrid",0) != NULL)
error->all(FLERR,"GPU split param must be positive "
"for hybrid pair styles");
// neighbor list builds on the GPU with triclinic box is not yet supported // neighbor list builds on the GPU with triclinic box is not yet supported
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) && if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
@ -243,16 +231,21 @@ void FixGPU::init()
// make sure fdotr virial is not accumulated multiple times // make sure fdotr virial is not accumulated multiple times
if (force->pair_match("^hybrid",0) != NULL) { if (force->pair_match("hybrid",1) != NULL) {
PairHybrid *hybrid = (PairHybrid *) force->pair; PairHybrid *hybrid = (PairHybrid *) force->pair;
for (int i = 0; i < hybrid->nstyles; i++) for (int i = 0; i < hybrid->nstyles; i++)
if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) if (strstr(hybrid->keywords[i],"/gpu")==NULL)
force->pair->no_virial_fdotr_compute = 1;
} else if (force->pair_match("hybrid/overlay",1) != NULL) {
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
for (int i = 0; i < hybrid->nstyles; i++)
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
force->pair->no_virial_fdotr_compute = 1; force->pair->no_virial_fdotr_compute = 1;
} }
// rRESPA support // rRESPA support
if (utils::strmatch(update->integrate_style,"^respa")) if (strstr(update->integrate_style,"respa"))
_nlevels_respa = ((Respa *) update->integrate)->nlevels; _nlevels_respa = ((Respa *) update->integrate)->nlevels;
} }
@ -283,7 +276,7 @@ void FixGPU::min_setup(int vflag)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void FixGPU::post_force(int /* vflag */) void FixGPU::post_force(int vflag)
{ {
if (!force->pair) return; if (!force->pair) return;
@ -315,7 +308,7 @@ void FixGPU::min_post_force(int vflag)
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */) void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
{ {
post_force(vflag); post_force(vflag);
} }