Enabled neighbor list build on the device with pair_style hybrid and hybrid/overlay
This commit is contained in:
@ -70,6 +70,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -71,6 +71,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -72,6 +72,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -71,6 +71,12 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -84,6 +84,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -301,16 +301,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
if (!ans.init(ef_nlocal,charge,rot,*gpu))
|
||||
return -3;
|
||||
|
||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
|
||||
_block_cell_id, _block_nbor_build, threads_per_atom,
|
||||
_warp_size, _time_device, compile_string()))
|
||||
return -3;
|
||||
if (_cell_size<0.0)
|
||||
nbor->cell_size(cell_size,cell_size);
|
||||
else
|
||||
nbor->cell_size(_cell_size,cell_size);
|
||||
|
||||
_init_count++;
|
||||
return 0;
|
||||
}
|
||||
@ -338,6 +328,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size,
|
||||
const bool pre_cut, const int threads_per_atom) {
|
||||
int ef_nlocal=nlocal;
|
||||
if (_particle_split<1.0 && _particle_split>0.0)
|
||||
ef_nlocal=static_cast<int>(_particle_split*nlocal);
|
||||
|
||||
int gpu_nbor=0;
|
||||
if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||
gpu_nbor=1;
|
||||
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
|
||||
gpu_nbor=2;
|
||||
#ifndef USE_CUDPP
|
||||
if (gpu_nbor==1)
|
||||
gpu_nbor=2;
|
||||
#endif
|
||||
|
||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
*gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
|
||||
_block_cell_id, _block_nbor_build, threads_per_atom,
|
||||
_warp_size, _time_device, compile_string()))
|
||||
return -3;
|
||||
if (_cell_size<0.0)
|
||||
nbor->cell_size(cell_size,cell_size);
|
||||
else
|
||||
nbor->cell_size(_cell_size,cell_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void DeviceT::set_single_precompute
|
||||
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
|
||||
|
||||
@ -91,6 +91,13 @@ class Device {
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||
|
||||
/// Initialize neighbor list build -- callback function from pair
|
||||
int init_nbor(Neighbor *nbor, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size,
|
||||
const bool pre_cut, const int threads_per_atom);
|
||||
|
||||
/// Output a message for pair_style acceleration with device stats
|
||||
void init_message(FILE *screen, const char *name,
|
||||
const int first_gpu, const int last_gpu);
|
||||
|
||||
@ -30,7 +30,6 @@
|
||||
#include "neighbor.h"
|
||||
#include "citeme.h"
|
||||
#include "error.h"
|
||||
#include "utils.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
@ -219,17 +218,6 @@ void FixGPU::init()
|
||||
error->all(FLERR,"GPU package does not (yet) work with "
|
||||
"atom_style template");
|
||||
|
||||
// hybrid cannot be used with force/neigh option
|
||||
|
||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||||
if (force->pair_match("^hybrid",0) != NULL)
|
||||
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
|
||||
|
||||
if (_particle_split < 0)
|
||||
if (force->pair_match("^hybrid",0) != NULL)
|
||||
error->all(FLERR,"GPU split param must be positive "
|
||||
"for hybrid pair styles");
|
||||
|
||||
// neighbor list builds on the GPU with triclinic box is not yet supported
|
||||
|
||||
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
|
||||
@ -243,16 +231,21 @@ void FixGPU::init()
|
||||
|
||||
// make sure fdotr virial is not accumulated multiple times
|
||||
|
||||
if (force->pair_match("^hybrid",0) != NULL) {
|
||||
if (force->pair_match("hybrid",1) != NULL) {
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
||||
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||
force->pair->no_virial_fdotr_compute = 1;
|
||||
} else if (force->pair_match("hybrid/overlay",1) != NULL) {
|
||||
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||
force->pair->no_virial_fdotr_compute = 1;
|
||||
}
|
||||
|
||||
// rRESPA support
|
||||
|
||||
if (utils::strmatch(update->integrate_style,"^respa"))
|
||||
if (strstr(update->integrate_style,"respa"))
|
||||
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||
}
|
||||
|
||||
@ -283,7 +276,7 @@ void FixGPU::min_setup(int vflag)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::post_force(int /* vflag */)
|
||||
void FixGPU::post_force(int vflag)
|
||||
{
|
||||
if (!force->pair) return;
|
||||
|
||||
@ -315,7 +308,7 @@ void FixGPU::min_post_force(int vflag)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */)
|
||||
void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
|
||||
{
|
||||
post_force(vflag);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user