diff --git a/lib/gpu/lal_base_ellipsoid.cpp b/lib/gpu/lal_base_ellipsoid.cpp index 2e22b2f602..fa060bea5a 100644 --- a/lib/gpu/lal_base_ellipsoid.cpp +++ b/lib/gpu/lal_base_ellipsoid.cpp @@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() { #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL + if (times[0] > 80e6) times[0]=0.0; if (times[3] > 80e6) times[3]=0.0; + if (times[6] > 80e6) times[6]=0.0; #endif if (device->replica_me()==0) @@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() { fprintf(screen,"\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); - if (device->procs_per_gpu()==1 && times[3]>0) { - fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size); + if (device->procs_per_gpu()==1 && (times[3] > 0.0)) { + if (times[0] > 0.0) + fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size); - if (nbor->gpu_nbor()>0) + if (nbor->gpu_nbor() > 0.0) fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size); else fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size); fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size); } - if (times[6]>0) + if (times[6] > 0.0) fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size); fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom); diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 0ff7125089..e43e77a761 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -787,28 +787,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer &ans, #ifdef USE_OPENCL // Workaround for timing issue on Intel OpenCL + if (times[0] > 80e6) times[0]=0.0; if (times[3] > 80e6) times[3]=0.0; if (times[5] > 80e6) times[5]=0.0; #endif if (replica_me()==0) - if (screen && times[6]>0.0) { + if (screen && (times[6] > 0.0)) { fprintf(screen,"\n\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); fprintf(screen," Device Time Info (average): "); fprintf(screen,"\n-------------------------------------"); fprintf(screen,"--------------------------------\n"); - if (time_device() && times[3]>0) { - fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); + if (time_device() && (times[3] > 0.0)) { + if (times[0] > 0.0) + fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size); fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size); - if (nbor.gpu_nbor()>0) + if (nbor.gpu_nbor() > 0.0) fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size); else fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size); fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size); } - if (times[5]>0) + if (times[5] > 0.0) fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size); fprintf(screen,"Average split: %.4f.\n",avg_split); fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom); diff --git a/src/GPU/fix_gpu.cpp b/src/GPU/fix_gpu.cpp index 71ab3f4cb4..843bff2a35 100644 --- a/src/GPU/fix_gpu.cpp +++ b/src/GPU/fix_gpu.cpp @@ -13,25 +13,26 @@ ------------------------------------------------------------------------- */ #include "fix_gpu.h" -#include #include "atom.h" +#include "citeme.h" #include "comm.h" +#include "domain.h" +#include "error.h" #include "force.h" +#include "gpu_extra.h" +#include "input.h" +#include "modify.h" +#include "neighbor.h" #include "pair.h" #include "pair_hybrid.h" #include "pair_hybrid_overlay.h" #include "respa.h" -#include "input.h" #include "timer.h" -#include "modify.h" -#include "update.h" -#include "domain.h" #include "universe.h" -#include "gpu_extra.h" -#include "neighbor.h" -#include "citeme.h" -#include "error.h" +#include "update.h" + +#include #if (LAL_USE_OMP == 1) #include @@ -275,12 +276,15 @@ void FixGPU::init() error->warning(FLERR,"Using package gpu without any pair style defined"); // make sure fdotr virial is not accumulated multiple times + // also disallow GPU neighbor lists for hybrid styles if (force->pair_match("^hybrid",0) != nullptr) { PairHybrid *hybrid = (PairHybrid *) force->pair; for (int i = 0; i < hybrid->nstyles; i++) if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) force->pair->no_virial_fdotr_compute = 1; + if (_gpu_mode != GPU_FORCE) + error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style"); } // rRESPA support @@ -295,8 +299,7 @@ void FixGPU::setup(int vflag) { if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) if (neighbor->exclude_setting() != 0) - error->all(FLERR, - "Cannot use neigh_modify exclude with GPU neighbor builds"); + error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds"); if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag); else {