Enabled neighbor list build on the device with pair_style hybrid and hybrid/overlay

2019-04-16 23:30:25 -05:00
parent 898860328b
commit c55009a0ac
8 changed files with 80 additions and 27 deletions
--- a/lib/gpu/lal_base_atomic.cpp
+++ b/lib/gpu/lal_base_atomic.cpp
@ -70,6 +70,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
  if (success!=0)
    return success;
  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
                  max_nbors,cell_size,false,_threads_per_atom);
  if (success!=0)
    return success;
  ucl_device=device->gpu;
  atom=&device->atom;
--- a/lib/gpu/lal_base_charge.cpp
+++ b/lib/gpu/lal_base_charge.cpp
@ -71,6 +71,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
  if (success!=0)
    return success;
  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
                  max_nbors,cell_size,false,_threads_per_atom);
  if (success!=0)
    return success;
  ucl_device=device->gpu;
  atom=&device->atom;
--- a/lib/gpu/lal_base_dipole.cpp
+++ b/lib/gpu/lal_base_dipole.cpp
@ -72,6 +72,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
  if (success!=0)
    return success;
  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
                  max_nbors,cell_size,false,_threads_per_atom);
  if (success!=0)
    return success;
  ucl_device=device->gpu;
  atom=&device->atom;
--- a/lib/gpu/lal_base_dpd.cpp
+++ b/lib/gpu/lal_base_dpd.cpp
@ -71,6 +71,12 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
  if (success!=0)
    return success;
  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
                  max_nbors,cell_size,false,_threads_per_atom);
  if (success!=0)
    return success;
  ucl_device=device->gpu;
  atom=&device->atom;
--- a/lib/gpu/lal_base_three.cpp
+++ b/lib/gpu/lal_base_three.cpp
@ -84,6 +84,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
  if (success!=0)
    return success;
  success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
                  max_nbors,cell_size,false,_threads_per_atom);
  if (success!=0)
    return success;
  ucl_device=device->gpu;
  atom=&device->atom;
--- a/lib/gpu/lal_device.cpp
+++ b/lib/gpu/lal_device.cpp
@ -301,16 +301,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
  if (!ans.init(ef_nlocal,charge,rot,*gpu))
    return -3;
  if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
                  *gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
                  _block_cell_id, _block_nbor_build, threads_per_atom,
                  _warp_size, _time_device, compile_string()))
    return -3;
  if (_cell_size<0.0)
    nbor->cell_size(cell_size,cell_size);
  else
    nbor->cell_size(_cell_size,cell_size);
  _init_count++;
  return 0;
 }
@ -338,6 +328,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
  return 0;
 }
 template <class numtyp, class acctyp>
 int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
                  const int host_nlocal, const int nall,
                  const int maxspecial, const int gpu_host,
                  const int max_nbors, const double cell_size,
                  const bool pre_cut, const int threads_per_atom) {
  int ef_nlocal=nlocal;
  if (_particle_split<1.0 && _particle_split>0.0)
    ef_nlocal=static_cast<int>(_particle_split*nlocal);
  int gpu_nbor=0;
  if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
    gpu_nbor=1;
  else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
    gpu_nbor=2;
  #ifndef USE_CUDPP
  if (gpu_nbor==1)
    gpu_nbor=2;
  #endif
  if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
                  *gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
                  _block_cell_id, _block_nbor_build, threads_per_atom,
                  _warp_size, _time_device, compile_string()))
    return -3;
  if (_cell_size<0.0)
    nbor->cell_size(cell_size,cell_size);
  else
    nbor->cell_size(_cell_size,cell_size);
  return 0;
 }
 template <class numtyp, class acctyp>
 void DeviceT::set_single_precompute
                     (PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
--- a/lib/gpu/lal_device.h
+++ b/lib/gpu/lal_device.h
@ -91,6 +91,13 @@ class Device {
    * - -5 Double precision is not supported on card **/
  int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
  /// Initialize neighbor list build -- callback function from pair
  int init_nbor(Neighbor *nbor, const int nlocal,
                  const int host_nlocal, const int nall,
                  const int maxspecial, const int gpu_host,
                  const int max_nbors, const double cell_size,
                  const bool pre_cut, const int threads_per_atom);
  /// Output a message for pair_style acceleration with device stats
  void init_message(FILE *screen, const char *name,
                    const int first_gpu, const int last_gpu);
--- a/src/GPU/fix_gpu.cpp
+++ b/src/GPU/fix_gpu.cpp
@ -30,7 +30,6 @@
 #include "neighbor.h"
 #include "citeme.h"
 #include "error.h"
 #include "utils.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
@ -219,17 +218,6 @@ void FixGPU::init()
    error->all(FLERR,"GPU package does not (yet) work with "
               "atom_style template");
  // hybrid cannot be used with force/neigh option
  if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
    if (force->pair_match("^hybrid",0) != NULL)
      error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
  if (_particle_split < 0)
    if (force->pair_match("^hybrid",0) != NULL)
      error->all(FLERR,"GPU split param must be positive "
                 "for hybrid pair styles");
  // neighbor list builds on the GPU with triclinic box is not yet supported
  if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
@ -243,16 +231,21 @@ void FixGPU::init()
  // make sure fdotr virial is not accumulated multiple times
-  if (force->pair_match("^hybrid",0) != NULL) {
+  if (force->pair_match("hybrid",1) != NULL) {
    PairHybrid *hybrid = (PairHybrid *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
-      if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
+      if (strstr(hybrid->keywords[i],"/gpu")==NULL)
        force->pair->no_virial_fdotr_compute = 1;
  } else if (force->pair_match("hybrid/overlay",1) != NULL) {
    PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
    for (int i = 0; i < hybrid->nstyles; i++)
      if (strstr(hybrid->keywords[i],"/gpu")==NULL)
        force->pair->no_virial_fdotr_compute = 1;
  }
  // rRESPA support
-  if (utils::strmatch(update->integrate_style,"^respa"))
+  if (strstr(update->integrate_style,"respa"))
    _nlevels_respa = ((Respa *) update->integrate)->nlevels;
 }
@ -283,7 +276,7 @@ void FixGPU::min_setup(int vflag)
 /* ---------------------------------------------------------------------- */
-void FixGPU::post_force(int /* vflag */)
+void FixGPU::post_force(int vflag)
 {
  if (!force->pair) return;
@ -315,7 +308,7 @@ void FixGPU::min_post_force(int vflag)
 /* ---------------------------------------------------------------------- */
-void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */)
+void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
 {
  post_force(vflag);
 }