Merge branch 'develop' of https://github.com/lammps/lammps into kk_update_3.7

This commit is contained in:
Stan Moore
2022-10-10 13:44:02 -07:00
133 changed files with 6313 additions and 1175 deletions

View File

@ -1,5 +1,5 @@
# /* ----------------------------------------------------------------------
# Generic Linux Makefile for OpenCL
# /* ----------------------------------------------------------------------
# Linux Makefile for Intel oneAPI - Mixed precision
# ------------------------------------------------------------------------- */
# which file will be copied to Makefile.lammps
@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl
LMP_INC = -DLAMMPS_SMALLBIG
OCL_INC =
OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
OCL_LINK = -lOpenCL
OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
-qoverride-limits
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
OCL_PREC = -D_SINGLE_DOUBLE
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
BIN_DIR = ./
OBJ_DIR = ./

View File

@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP Dump all compiled OpenCL programs with compiler
flags and build logs
GPU_CAST Casting performed on GPU, untested recently
THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested
LAL_SERIALIZE_INIT Force serialization of initialization and compilation
for multiple MPI tasks sharing the same accelerator.
Some accelerator API implementations have had issues
with temporary file conflicts in the past.
GERYON_FORCE_SHARED_MAIN_MEM_ON Should only be used for builds where the
accelerator is guaranteed to share physical
main memory with the host (e.g. integrated
GPU or CPU device). Default behavior is to
auto-detect. Impacts OpenCL only.
GERYON_FORCE_SHARED_MAIN_MEM_OFF Should only be used for builds where the
accelerator is guaranteed to have discrete
physical main memory vs the host (discrete
GPU card). Default behavior is to
auto-detect. Impacts OpenCL only.
------------------------------------------------------------------------------

View File

@ -126,10 +126,13 @@ class UCL_Device {
/// Return the number of devices that support OpenCL
inline int num_devices() { return _num_devices; }
/// Specify whether profiling (device timers) will be used for the device (yes=true)
/// Specify whether profiling (device timers) will be used (yes=true)
/** No-op for CUDA and HIP **/
inline void configure_profiling(const bool profiling_on)
{ _cq_profiling = profiling_on; }
inline void configure_profiling(const bool profiling_on) {
#ifndef GERYON_NO_OCL_MARKERS
_cq_profiling = profiling_on;
#endif
}
/// Set the OpenCL device to the specified device number
/** A context and default command queue will be created for the device *
@ -176,8 +179,8 @@ class UCL_Device {
#ifdef CL_VERSION_2_0
if (_cq_profiling) {
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE,
0};
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES,
CL_QUEUE_PROFILING_ENABLE, 0};
_cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props,
&errorv);
} else {
@ -187,8 +190,8 @@ class UCL_Device {
}
#else
if (_cq_profiling)
_cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE,
&errorv);
_cq.back()=clCreateCommandQueue(_context, _cl_device,
CL_QUEUE_PROFILING_ENABLE, &errorv);
else
_cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv);
#endif
@ -403,7 +406,11 @@ class UCL_Device {
// Grabs the properties for all devices
UCL_Device::UCL_Device() {
_device=-1;
#ifndef GERYON_NO_OCL_MARKERS
_cq_profiling=true;
#else
_cq_profiling=false;
#endif
// --- Get Number of Platforms
cl_uint nplatforms;
@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) {
_num_devices = 0;
for (int i=0; i<num_unpart; i++) {
cl_uint num_subdevices = 1;
cl_device_id *subdevice_list = device_list + i;
#ifdef CL_VERSION_1_2
cl_device_affinity_domain adomain;
@ -494,25 +502,29 @@ int UCL_Device::set_platform(int pid) {
props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
props[2]=0;
cl_int err = CL_SUCCESS;
if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices));
if (num_subdevices > 1) {
cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices));
for (cl_uint j=0; j<num_subdevices; j++) {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
err = clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices);
if (err == CL_SUCCESS && num_subdevices > 1) {
subdevice_list = new cl_device_id[num_subdevices];
err = clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices);
if (err != CL_SUCCESS) {
delete[] subdevice_list;
num_subdevices = 1;
subdevice_list = device_list + i;
}
delete[] subdevice_list;
} else {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
}
#endif
for (cl_uint j=0; j<num_subdevices; j++) {
_num_devices++;
_cl_devices.push_back(subdevice_list[j]);
add_properties(subdevice_list[j]);
}
if (num_subdevices > 1) delete[] subdevice_list;
} // for i
#endif
@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) {
double arch = static_cast<double>(minor)/10+major;
if (arch >= 3.0)
op.has_shuffle_support=true;
op.shared_main_memory=_shared_mem_device(device_list);
}
delete[] buffer2;
#endif
op.shared_main_memory=_shared_mem_device(device_list);
_properties.push_back(op);
}

View File

@ -27,11 +27,15 @@
#include "ocl_macros.h"
#include "ocl_device.h"
#ifndef GERYON_NO_OCL_MARKERS
#ifdef CL_VERSION_1_2
#define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event)
#else
#define UCL_OCL_MARKER clEnqueueMarker
#endif
#else
#define UCL_OCL_MARKER(cq,event)
#endif
namespace ucl_opencl {
@ -51,8 +55,10 @@ class UCL_Timer {
inline void clear() {
if (_initialized) {
if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
#endif
has_measured_time = false;
}
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
@ -76,8 +82,10 @@ class UCL_Timer {
/// Start timing on default command queue
inline void start() {
if (has_measured_time) {
#ifndef GERYON_NO_OCL_MARKERS
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
#endif
has_measured_time = false;
}
UCL_OCL_MARKER(_cq,&start_event);
@ -91,17 +99,26 @@ class UCL_Timer {
/// Block until the start event has been reached on device
inline void sync_start() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
if (has_measured_time) {
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
has_measured_time = false;
}
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
#else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
#endif
}
/// Block until the stop event has been reached on device
inline void sync_stop() {
#ifndef GERYON_NO_OCL_MARKERS
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
#else
CL_SAFE_CALL(clFinish(_cq));
#endif
has_measured_time = true;
}
@ -126,6 +143,7 @@ class UCL_Timer {
/// Return the time (ms) of last start to stop - Forces synchronization
inline double time() {
if(!has_measured_time) return 0.0;
#ifndef GERYON_NO_OCL_MARKERS
cl_ulong tstart,tend;
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@ -138,6 +156,11 @@ class UCL_Timer {
clReleaseEvent(stop_event);
has_measured_time = false;
return (tend-tstart)*1e-6;
#else
CL_SAFE_CALL(clFinish(_cq));
has_measured_time = false;
return 0.0;
#endif
}
/// Return the time (s) of last start to stop - Forces synchronization

View File

@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
BLMF.device->gpu_barrier();
BLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
BCLCSMF.device->gpu_barrier();
BCLCSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
BORNCLMF.device->gpu_barrier();
BORNCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift);
BornCWCST.device->gpu_barrier();
BornCWCST.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_coulsq, host_special_coul, qqrd2e,
alf, e_shift, f_shift);
BORNCWMF.device->gpu_barrier();
BORNCWMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
BORNMF.device->gpu_barrier();
BORNMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv,
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
host_born3, host_a, host_c, host_d, offset);
BORNMF.device->gpu_barrier();
BORNMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_cut_ljsq, host_cut_coulsq,
host_special_coul, qqrd2e);
BUCKCMF.device->gpu_barrier();
BUCKCMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
BUCKCLMF.device->gpu_barrier();
BUCKCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
host_a, host_c, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
BUCKMF.device->gpu_barrier();
BUCKMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
host_a, host_c, offset);
BUCKMF.device->gpu_barrier();
BUCKMF.device->serialize_init();
}
}

View File

@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj,
denom_coul, epsilon, sigma, mix_arithmetic);
CRMMF.device->gpu_barrier();
CRMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon,
sigma, mix_arithmetic);
CRMLMF.device->gpu_barrier();
CRMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
COLLMF.device->gpu_barrier();
COLLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa);
CDEMF.device->gpu_barrier();
CDEMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CDEMF.reinit(ntypes, host_scale);
CDEMF.device->gpu_barrier();
CDEMF.device->serialize_init();
}
}

View File

@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall,
gpu_split, screen, host_cut_coulsq, host_special_coul,
qqrd2e, e_shift, f_shift, alpha);
CDMF.device->gpu_barrier();
CDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale,
init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen, qqrd2e);
COULMF.device->gpu_barrier();
COULMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
COULMF.reinit(ntypes, host_scale);
COULMF.device->gpu_barrier();
COULMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLCSMF.device->gpu_barrier();
CLCSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CLCSMF.reinit(ntypes, host_scale);
CLCSMF.device->gpu_barrier();
CLCSMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale,
cell_size, gpu_split, screen, host_cut_coulsq,
host_special_coul, qqrd2e, g_ewald);
CLMF.device->gpu_barrier();
CLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) {
if (gpu_rank==i && world_me!=0)
CLMF.reinit(ntypes, host_scale);
CLMF.device->gpu_barrier();
CLMF.device->serialize_init();
}
}

View File

@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
for (int i=0; i<_procs_per_gpu; i++) {
if (_gpu_rank==i)
flag=compile_kernels();
gpu_barrier();
serialize_init();
}
// check if double precision support is available
@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name,
int last=last_gpu+1;
if (last>gpu->num_devices())
last=gpu->num_devices();
if (gpu->num_platforms()>1) {
std::string pname=gpu->platform_name();
fprintf(screen,"Platform: %s\n",pname.c_str());
}
for (int i=first_gpu; i<last; i++) {
std::string sname;
if (i==first_gpu)

View File

@ -217,6 +217,12 @@ class Device {
inline int gpu_rank() const { return _gpu_rank; }
/// MPI Barrier for gpu
inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
/// Serialize GPU initialization and JIT for unsafe platforms
inline void serialize_init() {
#ifdef LAL_SERIALIZE_INIT
gpu_barrier();
#endif
}
/// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH
inline int gpu_mode() const { return _gpu_mode; }
/// Index of first device used by a node

View File

@ -80,7 +80,7 @@ int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
DPLMF.device->gpu_barrier();
DPLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
DPLSFMF.device->gpu_barrier();
DPLSFMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
DPLJMF.device->gpu_barrier();
DPLJMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, false, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
DPDMF.device->gpu_barrier();
DPDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
host_cut, special_lj, true, inum, nall, 300,
maxspecial, cell_size, gpu_split, screen);
DPDTMF.device->gpu_barrier();
DPDTMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMALMF.device->gpu_barrier();
EAMALMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMMF.device->gpu_barrier();
EAMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
EAMFSMF.device->gpu_barrier();
EAMFSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
GLMF.device->gpu_barrier();
GLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
if (gpu_rank==i && world_me!=0)
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
GLMF.device->gpu_barrier();
GLMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma,
host_lj3, host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen);
GBMF.device->gpu_barrier();
GBMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJ96MF.device->gpu_barrier();
LJ96MF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
C2CLMF.device->gpu_barrier();
C2CLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, kappa);
LJCDMF.device->gpu_barrier();
LJCDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e);
LJCMF.device->gpu_barrier();
LJCMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJCLMF.device->gpu_barrier();
LJCLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, host_cut_ljsq);
LJCLMF.device->gpu_barrier();
LJCLMF.device->serialize_init();
}
}

View File

@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, order, qqrd2e);
LJCMLMF.device->gpu_barrier();
LJCMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJCubicLMF.device->gpu_barrier();
LJCubicLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_cut_coulsq, host_special_coul, qqrd2e, e_shift,
f_shift, alpha);
LJDMF.device->gpu_barrier();
LJDMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_cut_ljsq,
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
LJECLMF.device->gpu_barrier();
LJECLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift, host_cut_ljsq);
LJECLMF.device->gpu_barrier();
LJECLMF.device->serialize_init();
}
}

View File

@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
if (gpu_rank==i && world_me!=0)
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, shift);
LJEMF.device->gpu_barrier();
LJEMF.device->serialize_init();
}
}

View File

@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
LJLMF.device->gpu_barrier();
LJLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
LJLMF.device->gpu_barrier();
LJLMF.device->serialize_init();
}
}

View File

@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq);
LJGRMMF.device->gpu_barrier();
LJGRMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
LJSMTMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
LJSMTMF.device->serialize_init();
}
}

View File

@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types,
host_lj4, offset, special_lj, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen);
CMMMF.device->gpu_barrier();
CMMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type,
maxspecial, cell_size, gpu_split, screen,
host_cut_ljsq, host_cut_coulsq, host_special_coul,
qqrd2e, g_ewald);
CMMLMF.device->gpu_barrier();
CMMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
host_special_coul, qqrd2e,
g_ewald, map_size, max_same);
LJTIP4PLMF.device->gpu_barrier();
LJTIP4PLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
MLMF.device->gpu_barrier();
MLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
MORMF.device->gpu_barrier();
MORMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
split,success);
pppm.device->gpu_barrier();
pppm.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq,
host_lj4, offset, special_lj, inum, nall,
max_nbors, maxspecial, cell_size, gpu_split, screen);
REMF.device->gpu_barrier();
REMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
SLMF.device->gpu_barrier();
SLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
if (gpu_rank==i && world_me!=0)
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
SLMF.device->gpu_barrier();
SLMF.device->serialize_init();
}
}

View File

@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall,
sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon,
costheta, map, e2param);
SWMF.device->gpu_barrier();
SWMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs,
special_lj, inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen, tabstyle, ntables, tablength);
TBMF.device->gpu_barrier();
TBMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
ts_gamma, ts_beta, ts_powern, ts_cutsq);
TSMF.device->gpu_barrier();
TSMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
ts_powern_del, ts_ca1, ts_cutsq);
TSMMF.device->gpu_barrier();
TSMMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
global_epsilon_0, ts_cutsq);
TSZMF.device->gpu_barrier();
TSZMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
offset, special_lj, inum, nall, max_nbors, maxspecial,
cell_size, gpu_split, screen);
UFMLMF.device->gpu_barrier();
UFMLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}
@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset);
UFMLMF.device->gpu_barrier();
UFMLMF.device->serialize_init();
}
}

View File

@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i
lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
c0, costheta, bigb, big2b, bigc);
VashishtaMF.device->gpu_barrier();
VashishtaMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split,
screen, kappa);
YKCOLLMF.device->gpu_barrier();
YKCOLLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
inum, nall, max_nbors, maxspecial, cell_size,
gpu_split, screen);
YKMF.device->gpu_barrier();
YKMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
cut_globalsq, cut_innersq, cut_inner,
inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen);
ZBLMF.device->gpu_barrier();
ZBLMF.device->serialize_init();
if (message)
fprintf(screen,"Done.\n");
}

View File

@ -1,4 +1,4 @@
# TODO#!/usr/bin/env python
#!/usr/bin/env python
"""
Install.py tool to download, compile, and setup the pace library
@ -6,7 +6,10 @@ used to automate the steps described in the README file in this dir
"""
from __future__ import print_function
import sys, subprocess
import shutil
import subprocess
import sys
from argparse import ArgumentParser
sys.path.append('..')
@ -15,23 +18,16 @@ from install_helpers import fullpath, geturl, checkmd5sum
# settings
thisdir = fullpath('.')
version = 'v.2021.10.25.fix2'
version ='v.2022.09.27.fix10Oct'
# known checksums for different PACE versions. used to validate the download.
checksums = { \
'v.2021.2.3.upd2' : '8fd1162724d349b930e474927197f20d',
'v.2021.4.9' : '4db54962fbd6adcf8c18d46e1798ceb5',
'v.2021.9.28' : 'f98363bb98adc7295ea63974738c2a1b',
'v.2021.10.25' : 'a2ac3315c41a1a4a5c912bcb1bc9c5cc',
'v.2021.10.25.fix': 'e0572de57039d4afedefb25707b6ceae',
'v.2021.10.25.fix2': '32394d799bc282bb57696c78c456e64f'
}
'v.2022.09.27.fix10Oct': '766cebcc0e5c4b8430c2f3cd202d9905'
}
parser = ArgumentParser(prog='Install.py',
description="LAMMPS library build wrapper script")
# help message
HELP = """
@ -55,55 +51,68 @@ parser.add_argument("-v", "--version", default=version, choices=checksums.keys()
help="set version of PACE library to download and build (default: %s)" % version)
parser.add_argument("-vv", "--verbose", action="store_true",
help="be more verbose about is happening while this script runs")
parser.add_argument("-l", "--local", default=None,
help="use local version of PACE library build")
args = parser.parse_args()
# print help message and exit, if neither build nor path options are given
if not args.build:
parser.print_help()
sys.exit(HELP)
parser.print_help()
sys.exit(HELP)
buildflag = args.build
verboseflag = args.verbose
version = args.version
local = args.local
archive_extension = "tar.gz"
url = "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/%s.%s" % (version, archive_extension)
unarchived_folder_name = "lammps-user-pace-%s"%(version)
unarchived_folder_name = "lammps-user-pace-%s" % (version)
# download PACE tarball, unpack, build PACE
if buildflag:
if not local:
# download entire tarball
print("Downloading pace tarball ...")
archive_filename = "%s.%s" % (version, archive_extension)
download_filename = "%s/%s" % (thisdir, archive_filename)
print("Downloading from ", url, " to ", download_filename, end=" ")
geturl(url, download_filename)
print(" done")
# download entire tarball
# verify downloaded archive integrity via md5 checksum, if known.
if version in checksums:
if not checkmd5sum(checksums[version], archive_filename):
sys.exit("Checksum for pace library does not match")
print("Downloading pace tarball ...")
archive_filename = "%s.%s" % (version, archive_extension)
download_filename = "%s/%s" % (thisdir, archive_filename)
print("Downloading from ",url," to ",download_filename, end=" ")
geturl(url, download_filename)
print(" done")
print("Unpacking pace tarball ...")
src_folder = thisdir + "/src"
cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (
thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
else:
# copy from local version of library PACE
print("Copy pace from ", local)
src_folder = thisdir + "/src"
shutil.copytree(local, src_folder,
# ignore=lambda (s1,s2): ('.git' in s1 or '.git' in s2),
dirs_exist_ok=True)
# verify downloaded archive integrity via md5 checksum, if known.
if version in checksums:
if not checkmd5sum(checksums[version], archive_filename):
sys.exit("Checksum for pace library does not match")
print("Unpacking pace tarball ...")
src_folder = thisdir+"/src"
cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
# build
print("Building libpace ...")
cmd = 'make lib -j2'
txt = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
if verboseflag:
print(txt.decode("UTF-8"))
# build
print("Building libpace ...")
cmd = 'make lib -j2'
txt = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
if verboseflag:
print(txt.decode("UTF-8"))
# remove source files
# remove source files
print("Removing pace build files and archive ...")
cmd = 'make clean-build'
if not local:
cmd = ('rm %s;' % (download_filename))+cmd
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
print("Removing pace build files and archive ...")
cmd = 'rm %s; make clean-build' % (download_filename)
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)

View File

@ -5,8 +5,14 @@ SHELL = /bin/sh
YAML_CPP_PATH = src/yaml-cpp
YAML_CPP_INC = $(YAML_CPP_PATH)/include
SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
WIGNER_CPP_INC = src/wigner-cpp/include/wigner
CNPY_CPP_PATH = src/cnpy
CNPY_CPP_INC = $(CNPY_CPP_PATH)
CNPY_SRC_FILES = $(CNPY_CPP_PATH)/cnpy.cpp
SRC_FILES = $(wildcard src/ML-PACE/ace/*.cpp) $(wildcard src/ML-PACE/ace-evaluator/*.cpp)
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES)) $(CNPY_SRC_FILES)
# ------ DEFINITIONS ------
@ -15,7 +21,7 @@ OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE -I$(YAML_CPP_INC)
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE/ace -Isrc/ML-PACE/ace-evaluator -I$(YAML_CPP_INC) -I$(WIGNER_CPP_INC) -I$(CNPY_CPP_INC) -DEXTRA_C_PROJECTIONS
ARCHIVE = ar
ARCHFLAG = -rc

View File

@ -1,3 +1,3 @@
pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include
pace_SYSINC =-I../../lib/pace/src/ML-PACE/ace -I../../lib/pace/src/ML-PACE/ace-evaluator -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include/wigner -DEXTRA_C_PROJECTIONS
pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp
pace_SYSPATH =