diff --git a/lib/gpu/lal_base_atomic.cpp b/lib/gpu/lal_base_atomic.cpp index e59dae1a6f..3d47df8a92 100644 --- a/lib/gpu/lal_base_atomic.cpp +++ b/lib/gpu/lal_base_atomic.cpp @@ -70,6 +70,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall, if (success!=0) return success; + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; diff --git a/lib/gpu/lal_base_charge.cpp b/lib/gpu/lal_base_charge.cpp index c6341f7d57..94e7502c55 100644 --- a/lib/gpu/lal_base_charge.cpp +++ b/lib/gpu/lal_base_charge.cpp @@ -71,6 +71,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall, if (success!=0) return success; + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; diff --git a/lib/gpu/lal_base_dipole.cpp b/lib/gpu/lal_base_dipole.cpp index 478f0092c7..b2a41f10cf 100644 --- a/lib/gpu/lal_base_dipole.cpp +++ b/lib/gpu/lal_base_dipole.cpp @@ -72,6 +72,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall, if (success!=0) return success; + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; diff --git a/lib/gpu/lal_base_dpd.cpp b/lib/gpu/lal_base_dpd.cpp index 941f463b14..3f71c820c7 100644 --- a/lib/gpu/lal_base_dpd.cpp +++ b/lib/gpu/lal_base_dpd.cpp @@ -71,6 +71,12 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall, if (success!=0) return success; + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index aa77a48c66..ba28d697cc 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -84,6 +84,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall, if (success!=0) return success; + success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host, + max_nbors,cell_size,false,_threads_per_atom); + + if (success!=0) + return success; + ucl_device=device->gpu; atom=&device->atom; diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 9397f3c6c5..411e19a78a 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -301,16 +301,6 @@ int DeviceT::init(Answer &ans, const bool charge, if (!ans.init(ef_nlocal,charge,rot,*gpu)) return -3; - if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, - *gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d, - _block_cell_id, _block_nbor_build, threads_per_atom, - _warp_size, _time_device, compile_string())) - return -3; - if (_cell_size<0.0) - nbor->cell_size(cell_size,cell_size); - else - nbor->cell_size(_cell_size,cell_size); - _init_count++; return 0; } @@ -338,6 +328,39 @@ int DeviceT::init(Answer &ans, const int nlocal, return 0; } +template +int DeviceT::init_nbor(Neighbor *nbor, const int nlocal, + const int host_nlocal, const int nall, + const int maxspecial, const int gpu_host, + const int max_nbors, const double cell_size, + const bool pre_cut, const int threads_per_atom) { + int ef_nlocal=nlocal; + if (_particle_split<1.0 && _particle_split>0.0) + ef_nlocal=static_cast(_particle_split*nlocal); + + int gpu_nbor=0; + if (_gpu_mode==Device::GPU_NEIGH) + gpu_nbor=1; + else if (_gpu_mode==Device::GPU_HYB_NEIGH) + gpu_nbor=2; + #ifndef USE_CUDPP + if (gpu_nbor==1) + gpu_nbor=2; + #endif + + if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial, + *gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d, + _block_cell_id, _block_nbor_build, threads_per_atom, + _warp_size, _time_device, compile_string())) + return -3; + if (_cell_size<0.0) + nbor->cell_size(cell_size,cell_size); + else + nbor->cell_size(_cell_size,cell_size); + + return 0; +} + template void DeviceT::set_single_precompute (PPPM *pppm) { diff --git a/lib/gpu/lal_device.h b/lib/gpu/lal_device.h index 695b0a62f9..68d88a3182 100644 --- a/lib/gpu/lal_device.h +++ b/lib/gpu/lal_device.h @@ -91,6 +91,13 @@ class Device { * - -5 Double precision is not supported on card **/ int init(Answer &ans, const int nlocal, const int nall); + /// Initialize neighbor list build -- callback function from pair + int init_nbor(Neighbor *nbor, const int nlocal, + const int host_nlocal, const int nall, + const int maxspecial, const int gpu_host, + const int max_nbors, const double cell_size, + const bool pre_cut, const int threads_per_atom); + /// Output a message for pair_style acceleration with device stats void init_message(FILE *screen, const char *name, const int first_gpu, const int last_gpu); diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 0d5b4334c9..7be87939fe 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -30,7 +30,6 @@ #include "neighbor.h" #include "citeme.h" #include "error.h" -#include "utils.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -219,17 +218,6 @@ void FixGPU::init() error->all(FLERR,"GPU package does not (yet) work with " "atom_style template"); - // hybrid cannot be used with force/neigh option - - if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) - if (force->pair_match("^hybrid",0) != NULL) - error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds"); - - if (_particle_split < 0) - if (force->pair_match("^hybrid",0) != NULL) - error->all(FLERR,"GPU split param must be positive " - "for hybrid pair styles"); - // neighbor list builds on the GPU with triclinic box is not yet supported if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) && @@ -243,16 +231,21 @@ void FixGPU::init() // make sure fdotr virial is not accumulated multiple times - if (force->pair_match("^hybrid",0) != NULL) { + if (force->pair_match("hybrid",1) != NULL) { PairHybrid *hybrid = (PairHybrid *) force->pair; for (int i = 0; i < hybrid->nstyles; i++) - if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) + if (strstr(hybrid->keywords[i],"/gpu")==NULL) + force->pair->no_virial_fdotr_compute = 1; + } else if (force->pair_match("hybrid/overlay",1) != NULL) { + PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair; + for (int i = 0; i < hybrid->nstyles; i++) + if (strstr(hybrid->keywords[i],"/gpu")==NULL) force->pair->no_virial_fdotr_compute = 1; } // rRESPA support - if (utils::strmatch(update->integrate_style,"^respa")) + if (strstr(update->integrate_style,"respa")) _nlevels_respa = ((Respa *) update->integrate)->nlevels; } @@ -283,7 +276,7 @@ void FixGPU::min_setup(int vflag) /* ---------------------------------------------------------------------- */ -void FixGPU::post_force(int /* vflag */) +void FixGPU::post_force(int vflag) { if (!force->pair) return; @@ -315,7 +308,7 @@ void FixGPU::min_post_force(int vflag) /* ---------------------------------------------------------------------- */ -void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */) +void FixGPU::post_force_respa(int vflag, int ilevel, int iloop) { post_force(vflag); }