Added the GPU versions of lj/cut/coul/cut/soft and lj/cut/coul/long/soft

This commit is contained in:
Trung Nguyen
2023-12-05 13:30:26 -06:00
parent 8389e2eb80
commit 9588ddabf6
18 changed files with 2023 additions and 14 deletions

View File

@ -364,6 +364,12 @@ int DeviceT::init_device(MPI_Comm /*world*/, MPI_Comm replica, const int ngpu,
} else
_neighbor_shared.setup_auto_cell_size(false,_user_cell_size,_simd_size);
#ifndef LAL_USE_OLD_NEIGHBOR
_use_old_nbor_build = 0;
#else
_use_old_nbor_build = 1;
#endif
return flag;
}
@ -510,9 +516,13 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
gpu_nbor=1;
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
gpu_nbor=2;
// NOTE: enforce the hybrid mode (binning on the CPU)
// when not using sorting on the device
#if !defined(USE_CUDPP) && !defined(USE_HIP_DEVICE_SORT)
if (gpu_nbor==1) gpu_nbor=2;
#endif
// or when the device supports subgroups
#ifndef LAL_USE_OLD_NEIGHBOR
if (gpu_nbor==1) gpu_nbor=2;
#endif
@ -886,19 +896,31 @@ void DeviceT::output_times(UCL_Timer &time_pair, Answer<numtyp,acctyp> &ans,
}
if (times[5] > 0.0)
fprintf(screen,"Device Overhead: %.4f s.\n",times[5]/_replica_size);
fprintf(screen,"Average split: %.4f.\n",avg_split);
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
fprintf(screen,"Vector width: %d.\n", simd_size());
fprintf(screen,"Prefetch mode: ");
if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
else fprintf(screen,"None.\n");
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
if (nbor.gpu_nbor()==2)
fprintf(screen,"CPU Neighbor: %.4f s.\n",times[8]/_replica_size);
fprintf(screen,"CPU Cast/Pack: %.4f s.\n",times[4]/_replica_size);
fprintf(screen,"CPU Driver_Time: %.4f s.\n",times[6]/_replica_size);
fprintf(screen,"CPU Idle_Time: %.4f s.\n",times[7]/_replica_size);
fprintf(screen,"Average split: %.4f.\n",avg_split);
fprintf(screen,"Max Mem / Proc: %.2f MB.\n",max_mb);
fprintf(screen,"Prefetch mode: ");
if (_nbor_prefetch==2) fprintf(screen,"Intrinsics.\n");
else if (_nbor_prefetch==1) fprintf(screen,"API.\n");
else fprintf(screen,"None.\n");
fprintf(screen,"Vector width: %d.\n", simd_size());
fprintf(screen,"Lanes / atom: %d.\n",threads_per_atom);
fprintf(screen,"Pair block: %d.\n",_block_pair);
fprintf(screen,"Neigh block: %d.\n",_block_nbor_build);
if (nbor.gpu_nbor()==2) {
fprintf(screen,"Neigh mode: Hybrid (binning on host)");
if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n");
else fprintf(screen," with subgroup support\n");
} else if (nbor.gpu_nbor()==1) {
fprintf(screen,"Neigh mode: Device");
if (_use_old_nbor_build == 1) fprintf(screen," - legacy\n");
else fprintf(screen," - with subgroup support\n");
} else if (nbor.gpu_nbor()==0)
fprintf(screen,"Neigh mode: Host\n");
fprintf(screen,"-------------------------------------");
fprintf(screen,"--------------------------------\n\n");