Merge pull request #2911 from akohlmey/fix-gpu-package-issues
Fix minor GPU package issues for the stable release
This commit is contained in:
@ -224,7 +224,9 @@ void BaseEllipsoidT::output_times() {
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
// Workaround for timing issue on Intel OpenCL
|
||||
if (times[0] > 80e6) times[0]=0.0;
|
||||
if (times[3] > 80e6) times[3]=0.0;
|
||||
if (times[6] > 80e6) times[6]=0.0;
|
||||
#endif
|
||||
|
||||
if (device->replica_me()==0)
|
||||
@ -237,17 +239,18 @@ void BaseEllipsoidT::output_times() {
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
|
||||
if (device->procs_per_gpu()==1 && times[3]>0) {
|
||||
if (device->procs_per_gpu()==1 && (times[3] > 0.0)) {
|
||||
if (times[0] > 0.0)
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/replica_size);
|
||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/replica_size);
|
||||
if (nbor->gpu_nbor()>0)
|
||||
if (nbor->gpu_nbor() > 0.0)
|
||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/replica_size);
|
||||
else
|
||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/replica_size);
|
||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/replica_size);
|
||||
fprintf(screen,"LJ calc: %.4f s.\n",times[4]/replica_size);
|
||||
}
|
||||
if (times[6]>0)
|
||||
if (times[6] > 0.0)
|
||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[6]/replica_size);
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",_threads_per_atom);
|
||||
|
||||
@ -787,28 +787,30 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
// Workaround for timing issue on Intel OpenCL
|
||||
if (times[0] > 80e6) times[0]=0.0;
|
||||
if (times[3] > 80e6) times[3]=0.0;
|
||||
if (times[5] > 80e6) times[5]=0.0;
|
||||
#endif
|
||||
|
||||
if (replica_me()==0)
|
||||
if (screen && times[6]>0.0) {
|
||||
if (screen && (times[6] > 0.0)) {
|
||||
fprintf(screen,"\n\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
fprintf(screen," Device Time Info (average): ");
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
|
||||
if (time_device() && times[3]>0) {
|
||||
if (time_device() && (times[3] > 0.0)) {
|
||||
if (times[0] > 0.0)
|
||||
fprintf(screen,"Data Transfer: %.4f s.\n",times[0]/_replica_size);
|
||||
fprintf(screen,"Neighbor copy: %.4f s.\n",times[1]/_replica_size);
|
||||
if (nbor.gpu_nbor()>0)
|
||||
if (nbor.gpu_nbor() > 0.0)
|
||||
fprintf(screen,"Neighbor build: %.4f s.\n",times[2]/_replica_size);
|
||||
else
|
||||
fprintf(screen,"Neighbor unpack: %.4f s.\n",times[2]/_replica_size);
|
||||
fprintf(screen,"Force calc: %.4f s.\n",times[3]/_replica_size);
|
||||
}
|
||||
if (times[5]>0)
|
||||
if (times[5] > 0.0)
|
||||
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
|
||||
fprintf(screen,"Average split: %.4f.\n",avg_split);
|
||||
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
|
||||
|
||||
@ -13,25 +13,26 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "fix_gpu.h"
|
||||
#include <cstring>
|
||||
|
||||
#include "atom.h"
|
||||
#include "citeme.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "gpu_extra.h"
|
||||
#include "input.h"
|
||||
#include "modify.h"
|
||||
#include "neighbor.h"
|
||||
#include "pair.h"
|
||||
#include "pair_hybrid.h"
|
||||
#include "pair_hybrid_overlay.h"
|
||||
#include "respa.h"
|
||||
#include "input.h"
|
||||
#include "timer.h"
|
||||
#include "modify.h"
|
||||
#include "update.h"
|
||||
#include "domain.h"
|
||||
#include "universe.h"
|
||||
#include "gpu_extra.h"
|
||||
#include "neighbor.h"
|
||||
#include "citeme.h"
|
||||
#include "error.h"
|
||||
#include "update.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#if (LAL_USE_OMP == 1)
|
||||
#include <omp.h>
|
||||
@ -275,12 +276,15 @@ void FixGPU::init()
|
||||
error->warning(FLERR,"Using package gpu without any pair style defined");
|
||||
|
||||
// make sure fdotr virial is not accumulated multiple times
|
||||
// also disallow GPU neighbor lists for hybrid styles
|
||||
|
||||
if (force->pair_match("^hybrid",0) != nullptr) {
|
||||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||||
for (int i = 0; i < hybrid->nstyles; i++)
|
||||
if (!utils::strmatch(hybrid->keywords[i],"/gpu$"))
|
||||
force->pair->no_virial_fdotr_compute = 1;
|
||||
if (_gpu_mode != GPU_FORCE)
|
||||
error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style");
|
||||
}
|
||||
|
||||
// rRESPA support
|
||||
@ -295,8 +299,7 @@ void FixGPU::setup(int vflag)
|
||||
{
|
||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||||
if (neighbor->exclude_setting() != 0)
|
||||
error->all(FLERR,
|
||||
"Cannot use neigh_modify exclude with GPU neighbor builds");
|
||||
error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds");
|
||||
|
||||
if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag);
|
||||
else {
|
||||
|
||||
Reference in New Issue
Block a user