Enabled neighbor list build on the device with pair_style hybrid and hybrid/overlay
This commit is contained in:
@ -70,6 +70,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
|
|||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||||
|
max_nbors,cell_size,false,_threads_per_atom);
|
||||||
|
|
||||||
|
if (success!=0)
|
||||||
|
return success;
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|
||||||
|
|||||||
@ -71,6 +71,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
|||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||||
|
max_nbors,cell_size,false,_threads_per_atom);
|
||||||
|
|
||||||
|
if (success!=0)
|
||||||
|
return success;
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|
||||||
|
|||||||
@ -72,6 +72,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
|
|||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||||
|
max_nbors,cell_size,false,_threads_per_atom);
|
||||||
|
|
||||||
|
if (success!=0)
|
||||||
|
return success;
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|
||||||
|
|||||||
@ -71,6 +71,12 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
|
|||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||||
|
max_nbors,cell_size,false,_threads_per_atom);
|
||||||
|
|
||||||
|
if (success!=0)
|
||||||
|
return success;
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|
||||||
|
|||||||
@ -84,6 +84,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
|
|||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||||
|
max_nbors,cell_size,false,_threads_per_atom);
|
||||||
|
|
||||||
|
if (success!=0)
|
||||||
|
return success;
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
atom=&device->atom;
|
atom=&device->atom;
|
||||||
|
|
||||||
|
|||||||
@ -301,16 +301,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
|||||||
if (!ans.init(ef_nlocal,charge,rot,*gpu))
|
if (!ans.init(ef_nlocal,charge,rot,*gpu))
|
||||||
return -3;
|
return -3;
|
||||||
|
|
||||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
|
||||||
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
|
|
||||||
_block_cell_id, _block_nbor_build, threads_per_atom,
|
|
||||||
_warp_size, _time_device, compile_string()))
|
|
||||||
return -3;
|
|
||||||
if (_cell_size<0.0)
|
|
||||||
nbor->cell_size(cell_size,cell_size);
|
|
||||||
else
|
|
||||||
nbor->cell_size(_cell_size,cell_size);
|
|
||||||
|
|
||||||
_init_count++;
|
_init_count++;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -338,6 +328,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
||||||
|
const int host_nlocal, const int nall,
|
||||||
|
const int maxspecial, const int gpu_host,
|
||||||
|
const int max_nbors, const double cell_size,
|
||||||
|
const bool pre_cut, const int threads_per_atom) {
|
||||||
|
int ef_nlocal=nlocal;
|
||||||
|
if (_particle_split<1.0 && _particle_split>0.0)
|
||||||
|
ef_nlocal=static_cast<int>(_particle_split*nlocal);
|
||||||
|
|
||||||
|
int gpu_nbor=0;
|
||||||
|
if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||||
|
gpu_nbor=1;
|
||||||
|
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
|
||||||
|
gpu_nbor=2;
|
||||||
|
#ifndef USE_CUDPP
|
||||||
|
if (gpu_nbor==1)
|
||||||
|
gpu_nbor=2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||||
|
*gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
|
||||||
|
_block_cell_id, _block_nbor_build, threads_per_atom,
|
||||||
|
_warp_size, _time_device, compile_string()))
|
||||||
|
return -3;
|
||||||
|
if (_cell_size<0.0)
|
||||||
|
nbor->cell_size(cell_size,cell_size);
|
||||||
|
else
|
||||||
|
nbor->cell_size(_cell_size,cell_size);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DeviceT::set_single_precompute
|
void DeviceT::set_single_precompute
|
||||||
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
|
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
|
||||||
|
|||||||
@ -91,6 +91,13 @@ class Device {
|
|||||||
* - -5 Double precision is not supported on card **/
|
* - -5 Double precision is not supported on card **/
|
||||||
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||||
|
|
||||||
|
/// Initialize neighbor list build -- callback function from pair
|
||||||
|
int init_nbor(Neighbor *nbor, const int nlocal,
|
||||||
|
const int host_nlocal, const int nall,
|
||||||
|
const int maxspecial, const int gpu_host,
|
||||||
|
const int max_nbors, const double cell_size,
|
||||||
|
const bool pre_cut, const int threads_per_atom);
|
||||||
|
|
||||||
/// Output a message for pair_style acceleration with device stats
|
/// Output a message for pair_style acceleration with device stats
|
||||||
void init_message(FILE *screen, const char *name,
|
void init_message(FILE *screen, const char *name,
|
||||||
const int first_gpu, const int last_gpu);
|
const int first_gpu, const int last_gpu);
|
||||||
|
|||||||
@ -30,7 +30,6 @@
|
|||||||
#include "neighbor.h"
|
#include "neighbor.h"
|
||||||
#include "citeme.h"
|
#include "citeme.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "utils.h"
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
@ -219,17 +218,6 @@ void FixGPU::init()
|
|||||||
error->all(FLERR,"GPU package does not (yet) work with "
|
error->all(FLERR,"GPU package does not (yet) work with "
|
||||||
"atom_style template");
|
"atom_style template");
|
||||||
|
|
||||||
// hybrid cannot be used with force/neigh option
|
|
||||||
|
|
||||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
|
||||||
if (force->pair_match("^hybrid",0) != NULL)
|
|
||||||
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
|
|
||||||
|
|
||||||
if (_particle_split < 0)
|
|
||||||
if (force->pair_match("^hybrid",0) != NULL)
|
|
||||||
error->all(FLERR,"GPU split param must be positive "
|
|
||||||
"for hybrid pair styles");
|
|
||||||
|
|
||||||
// neighbor list builds on the GPU with triclinic box is not yet supported
|
// neighbor list builds on the GPU with triclinic box is not yet supported
|
||||||
|
|
||||||
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
|
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
|
||||||
@ -243,16 +231,21 @@ void FixGPU::init()
|
|||||||
|
|
||||||
// make sure fdotr virial is not accumulated multiple times
|
// make sure fdotr virial is not accumulated multiple times
|
||||||
|
|
||||||
if (force->pair_match("^hybrid",0) != NULL) {
|
if (force->pair_match("hybrid",1) != NULL) {
|
||||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||||
for (int i = 0; i < hybrid->nstyles; i++)
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||||
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
|
} else if (force->pair_match("hybrid/overlay",1) != NULL) {
|
||||||
|
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||||||
|
for (int i = 0; i < hybrid->nstyles; i++)
|
||||||
|
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||||||
force->pair->no_virial_fdotr_compute = 1;
|
force->pair->no_virial_fdotr_compute = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// rRESPA support
|
// rRESPA support
|
||||||
|
|
||||||
if (utils::strmatch(update->integrate_style,"^respa"))
|
if (strstr(update->integrate_style,"respa"))
|
||||||
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -283,7 +276,7 @@ void FixGPU::min_setup(int vflag)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
void FixGPU::post_force(int /* vflag */)
|
void FixGPU::post_force(int vflag)
|
||||||
{
|
{
|
||||||
if (!force->pair) return;
|
if (!force->pair) return;
|
||||||
|
|
||||||
@ -315,7 +308,7 @@ void FixGPU::min_post_force(int vflag)
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */)
|
void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
|
||||||
{
|
{
|
||||||
post_force(vflag);
|
post_force(vflag);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user