Merge branch 'develop' of https://github.com/lammps/lammps into kk_update_3.7
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Generic Linux Makefile for OpenCL
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Linux Makefile for Intel oneAPI - Mixed precision
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
# which file will be copied to Makefile.lammps
|
||||
@ -11,11 +11,14 @@ EXTRAMAKE = Makefile.lammps.opencl
|
||||
|
||||
LMP_INC = -DLAMMPS_SMALLBIG
|
||||
|
||||
OCL_INC =
|
||||
OCL_CPP = mpiicpc -std=c++11 -xHost -O2 -qopenmp -qopenmp-simd -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
|
||||
OCL_LINK = -lOpenCL
|
||||
OCL_INC = -I$(ONEAPI_ROOT)/compiler/latest/linux/include/sycl/
|
||||
CPP_OPT = -xHost -O2 -qopenmp -qopenmp-simd -fp-model fast=2 -no-prec-div \
|
||||
-qoverride-limits
|
||||
OCL_CPP = mpiicpc -std=c++11 -diag-disable=10441 -DMPICH_IGNORE_CXX_SEEK \
|
||||
$(LMP_INC) $(OCL_INC) $(CPP_OPT)
|
||||
OCL_LINK = -L$(ONEAPI_ROOT)/compiler/latest/linux/lib -lOpenCL
|
||||
OCL_PREC = -D_SINGLE_DOUBLE
|
||||
OCL_TUNE = -DMPI_GERYON -DGERYON_NUMA_FISSION -DUCL_NO_EXIT -fp-model fast=2 -no-prec-div
|
||||
OCL_TUNE = -DMPI_GERYON -DCUDA_PROXY -DGERYON_NUMA_FISSION -DUCL_NO_EXIT
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
|
||||
@ -264,6 +264,20 @@ GERYON_KERNEL_DUMP Dump all compiled OpenCL programs with compiler
|
||||
flags and build logs
|
||||
GPU_CAST Casting performed on GPU, untested recently
|
||||
THREE_CONCURRENT Concurrent 3-body calcs in separate queues, untested
|
||||
LAL_SERIALIZE_INIT Force serialization of initialization and compilation
|
||||
for multiple MPI tasks sharing the same accelerator.
|
||||
Some accelerator API implementations have had issues
|
||||
with temporary file conflicts in the past.
|
||||
GERYON_FORCE_SHARED_MAIN_MEM_ON Should only be used for builds where the
|
||||
accelerator is guaranteed to share physical
|
||||
main memory with the host (e.g. integrated
|
||||
GPU or CPU device). Default behavior is to
|
||||
auto-detect. Impacts OpenCL only.
|
||||
GERYON_FORCE_SHARED_MAIN_MEM_OFF Should only be used for builds where the
|
||||
accelerator is guaranteed to have discrete
|
||||
physical main memory vs the host (discrete
|
||||
GPU card). Default behavior is to
|
||||
auto-detect. Impacts OpenCL only.
|
||||
|
||||
|
||||
------------------------------------------------------------------------------
|
||||
|
||||
@ -126,10 +126,13 @@ class UCL_Device {
|
||||
/// Return the number of devices that support OpenCL
|
||||
inline int num_devices() { return _num_devices; }
|
||||
|
||||
/// Specify whether profiling (device timers) will be used for the device (yes=true)
|
||||
/// Specify whether profiling (device timers) will be used (yes=true)
|
||||
/** No-op for CUDA and HIP **/
|
||||
inline void configure_profiling(const bool profiling_on)
|
||||
{ _cq_profiling = profiling_on; }
|
||||
inline void configure_profiling(const bool profiling_on) {
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
_cq_profiling = profiling_on;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Set the OpenCL device to the specified device number
|
||||
/** A context and default command queue will be created for the device *
|
||||
@ -176,8 +179,8 @@ class UCL_Device {
|
||||
|
||||
#ifdef CL_VERSION_2_0
|
||||
if (_cq_profiling) {
|
||||
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE,
|
||||
0};
|
||||
cl_queue_properties props[] = {CL_QUEUE_PROPERTIES,
|
||||
CL_QUEUE_PROFILING_ENABLE, 0};
|
||||
_cq.back()=clCreateCommandQueueWithProperties(_context, _cl_device, props,
|
||||
&errorv);
|
||||
} else {
|
||||
@ -187,8 +190,8 @@ class UCL_Device {
|
||||
}
|
||||
#else
|
||||
if (_cq_profiling)
|
||||
_cq.back()=clCreateCommandQueue(_context, _cl_device, CL_QUEUE_PROFILING_ENABLE,
|
||||
&errorv);
|
||||
_cq.back()=clCreateCommandQueue(_context, _cl_device,
|
||||
CL_QUEUE_PROFILING_ENABLE, &errorv);
|
||||
else
|
||||
_cq.back()=clCreateCommandQueue(_context, _cl_device, 0, &errorv);
|
||||
#endif
|
||||
@ -403,7 +406,11 @@ class UCL_Device {
|
||||
// Grabs the properties for all devices
|
||||
UCL_Device::UCL_Device() {
|
||||
_device=-1;
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
_cq_profiling=true;
|
||||
#else
|
||||
_cq_profiling=false;
|
||||
#endif
|
||||
|
||||
// --- Get Number of Platforms
|
||||
cl_uint nplatforms;
|
||||
@ -482,6 +489,7 @@ int UCL_Device::set_platform(int pid) {
|
||||
_num_devices = 0;
|
||||
for (int i=0; i<num_unpart; i++) {
|
||||
cl_uint num_subdevices = 1;
|
||||
cl_device_id *subdevice_list = device_list + i;
|
||||
|
||||
#ifdef CL_VERSION_1_2
|
||||
cl_device_affinity_domain adomain;
|
||||
@ -494,25 +502,29 @@ int UCL_Device::set_platform(int pid) {
|
||||
props[0]=CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
|
||||
props[1]=CL_DEVICE_AFFINITY_DOMAIN_NUMA;
|
||||
props[2]=0;
|
||||
|
||||
cl_int err = CL_SUCCESS;
|
||||
if (adomain & CL_DEVICE_AFFINITY_DOMAIN_NUMA)
|
||||
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
|
||||
&num_subdevices));
|
||||
if (num_subdevices > 1) {
|
||||
cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
|
||||
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
|
||||
subdevice_list, &num_subdevices));
|
||||
for (cl_uint j=0; j<num_subdevices; j++) {
|
||||
_cl_devices.push_back(device_list[i]);
|
||||
add_properties(device_list[i]);
|
||||
_num_devices++;
|
||||
err = clCreateSubDevices(device_list[i], props, 0, NULL,
|
||||
&num_subdevices);
|
||||
if (err == CL_SUCCESS && num_subdevices > 1) {
|
||||
subdevice_list = new cl_device_id[num_subdevices];
|
||||
err = clCreateSubDevices(device_list[i], props, num_subdevices,
|
||||
subdevice_list, &num_subdevices);
|
||||
if (err != CL_SUCCESS) {
|
||||
delete[] subdevice_list;
|
||||
num_subdevices = 1;
|
||||
subdevice_list = device_list + i;
|
||||
}
|
||||
delete[] subdevice_list;
|
||||
} else {
|
||||
_cl_devices.push_back(device_list[i]);
|
||||
add_properties(device_list[i]);
|
||||
_num_devices++;
|
||||
}
|
||||
#endif
|
||||
|
||||
for (cl_uint j=0; j<num_subdevices; j++) {
|
||||
_num_devices++;
|
||||
_cl_devices.push_back(subdevice_list[j]);
|
||||
add_properties(subdevice_list[j]);
|
||||
}
|
||||
if (num_subdevices > 1) delete[] subdevice_list;
|
||||
} // for i
|
||||
#endif
|
||||
|
||||
@ -686,10 +698,10 @@ void UCL_Device::add_properties(cl_device_id device_list) {
|
||||
double arch = static_cast<double>(minor)/10+major;
|
||||
if (arch >= 3.0)
|
||||
op.has_shuffle_support=true;
|
||||
op.shared_main_memory=_shared_mem_device(device_list);
|
||||
}
|
||||
delete[] buffer2;
|
||||
#endif
|
||||
op.shared_main_memory=_shared_mem_device(device_list);
|
||||
|
||||
_properties.push_back(op);
|
||||
}
|
||||
|
||||
@ -27,11 +27,15 @@
|
||||
#include "ocl_macros.h"
|
||||
#include "ocl_device.h"
|
||||
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
#ifdef CL_VERSION_1_2
|
||||
#define UCL_OCL_MARKER(cq,event) clEnqueueMarkerWithWaitList(cq,0,nullptr,event)
|
||||
#else
|
||||
#define UCL_OCL_MARKER clEnqueueMarker
|
||||
#endif
|
||||
#else
|
||||
#define UCL_OCL_MARKER(cq,event)
|
||||
#endif
|
||||
|
||||
namespace ucl_opencl {
|
||||
|
||||
@ -51,8 +55,10 @@ class UCL_Timer {
|
||||
inline void clear() {
|
||||
if (_initialized) {
|
||||
if (has_measured_time) {
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
clReleaseEvent(start_event);
|
||||
clReleaseEvent(stop_event);
|
||||
#endif
|
||||
has_measured_time = false;
|
||||
}
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
|
||||
@ -76,8 +82,10 @@ class UCL_Timer {
|
||||
/// Start timing on default command queue
|
||||
inline void start() {
|
||||
if (has_measured_time) {
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
clReleaseEvent(start_event);
|
||||
clReleaseEvent(stop_event);
|
||||
#endif
|
||||
has_measured_time = false;
|
||||
}
|
||||
UCL_OCL_MARKER(_cq,&start_event);
|
||||
@ -91,17 +99,26 @@ class UCL_Timer {
|
||||
|
||||
/// Block until the start event has been reached on device
|
||||
inline void sync_start() {
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
|
||||
if (has_measured_time) {
|
||||
clReleaseEvent(start_event);
|
||||
clReleaseEvent(stop_event);
|
||||
has_measured_time = false;
|
||||
}
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
|
||||
#else
|
||||
CL_SAFE_CALL(clFinish(_cq));
|
||||
has_measured_time = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Block until the stop event has been reached on device
|
||||
inline void sync_stop() {
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
|
||||
#else
|
||||
CL_SAFE_CALL(clFinish(_cq));
|
||||
#endif
|
||||
has_measured_time = true;
|
||||
}
|
||||
|
||||
@ -126,6 +143,7 @@ class UCL_Timer {
|
||||
/// Return the time (ms) of last start to stop - Forces synchronization
|
||||
inline double time() {
|
||||
if(!has_measured_time) return 0.0;
|
||||
#ifndef GERYON_NO_OCL_MARKERS
|
||||
cl_ulong tstart,tend;
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
|
||||
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
|
||||
@ -138,6 +156,11 @@ class UCL_Timer {
|
||||
clReleaseEvent(stop_event);
|
||||
has_measured_time = false;
|
||||
return (tend-tstart)*1e-6;
|
||||
#else
|
||||
CL_SAFE_CALL(clFinish(_cq));
|
||||
has_measured_time = false;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Return the time (s) of last start to stop - Forces synchronization
|
||||
|
||||
@ -76,7 +76,7 @@ int beck_gpu_init(const int ntypes, double **cutsq, double **aa,
|
||||
special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
BLMF.device->gpu_barrier();
|
||||
BLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
BCLCSMF.device->gpu_barrier();
|
||||
BCLCSMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
BORNCLMF.device->gpu_barrier();
|
||||
BORNCLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e,
|
||||
alf, e_shift, f_shift);
|
||||
|
||||
BornCWCST.device->gpu_barrier();
|
||||
BornCWCST.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e,
|
||||
alf, e_shift, f_shift);
|
||||
|
||||
BORNCWMF.device->gpu_barrier();
|
||||
BORNCWMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
offset, special_lj, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
BORNMF.device->gpu_barrier();
|
||||
BORNMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -114,7 +114,7 @@ void born_gpu_reinit(const int ntypes, double **host_rhoinv,
|
||||
BORNMF.reinit(ntypes, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, offset);
|
||||
|
||||
BORNMF.device->gpu_barrier();
|
||||
BORNMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
host_cut_ljsq, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e);
|
||||
|
||||
BUCKCMF.device->gpu_barrier();
|
||||
BUCKCMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -82,7 +82,7 @@ int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
BUCKCLMF.device->gpu_barrier();
|
||||
BUCKCLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
host_a, host_c, offset, special_lj, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
BUCKMF.device->gpu_barrier();
|
||||
BUCKMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -110,7 +110,7 @@ void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
BUCKMF.reinit(ntypes, cutsq, host_rhoinv, host_buck1, host_buck2,
|
||||
host_a, host_c, offset);
|
||||
|
||||
BUCKMF.device->gpu_barrier();
|
||||
BUCKMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -88,7 +88,7 @@ int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
|
||||
qqrd2e, cut_lj_innersq, cut_coul_innersq, denom_lj,
|
||||
denom_coul, epsilon, sigma, mix_arithmetic);
|
||||
|
||||
CRMMF.device->gpu_barrier();
|
||||
CRMMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
|
||||
qqrd2e, g_ewald, cut_lj_innersq, denom_lj, epsilon,
|
||||
sigma, mix_arithmetic);
|
||||
|
||||
CRMLMF.device->gpu_barrier();
|
||||
CRMLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -83,7 +83,7 @@ int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
COLLMF.device->gpu_barrier();
|
||||
COLLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
|
||||
init_ok=CDEMF.init(ntypes, host_scale, cutsq, host_special_coul, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen, qqrd2e, kappa);
|
||||
|
||||
CDEMF.device->gpu_barrier();
|
||||
CDEMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -103,7 +103,7 @@ void cdebye_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
CDEMF.reinit(ntypes, host_scale);
|
||||
|
||||
CDEMF.device->gpu_barrier();
|
||||
CDEMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -77,7 +77,7 @@ int cdsf_gpu_init(const int ntypes, const int inum, const int nall,
|
||||
gpu_split, screen, host_cut_coulsq, host_special_coul,
|
||||
qqrd2e, e_shift, f_shift, alpha);
|
||||
|
||||
CDMF.device->gpu_barrier();
|
||||
CDMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -74,7 +74,7 @@ int coul_gpu_init(const int ntypes, double **host_scale,
|
||||
init_ok=COULMF.init(ntypes, host_scale, cutsq, special_coul, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen, qqrd2e);
|
||||
|
||||
COULMF.device->gpu_barrier();
|
||||
COULMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -103,7 +103,7 @@ void coul_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
COULMF.reinit(ntypes, host_scale);
|
||||
|
||||
COULMF.device->gpu_barrier();
|
||||
COULMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ int clcs_gpu_init(const int ntypes, double **host_scale,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLCSMF.device->gpu_barrier();
|
||||
CLCSMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -105,7 +105,7 @@ void clcs_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
CLCSMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLCSMF.device->gpu_barrier();
|
||||
CLCSMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ int cl_gpu_init(const int ntypes, double **host_scale,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLMF.device->gpu_barrier();
|
||||
CLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -105,7 +105,7 @@ void cl_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
CLMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLMF.device->gpu_barrier();
|
||||
CLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -328,7 +328,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
for (int i=0; i<_procs_per_gpu; i++) {
|
||||
if (_gpu_rank==i)
|
||||
flag=compile_kernels();
|
||||
gpu_barrier();
|
||||
serialize_init();
|
||||
}
|
||||
|
||||
// check if double precision support is available
|
||||
@ -609,6 +609,10 @@ void DeviceT::init_message(FILE *screen, const char *name,
|
||||
int last=last_gpu+1;
|
||||
if (last>gpu->num_devices())
|
||||
last=gpu->num_devices();
|
||||
if (gpu->num_platforms()>1) {
|
||||
std::string pname=gpu->platform_name();
|
||||
fprintf(screen,"Platform: %s\n",pname.c_str());
|
||||
}
|
||||
for (int i=first_gpu; i<last; i++) {
|
||||
std::string sname;
|
||||
if (i==first_gpu)
|
||||
|
||||
@ -217,6 +217,12 @@ class Device {
|
||||
inline int gpu_rank() const { return _gpu_rank; }
|
||||
/// MPI Barrier for gpu
|
||||
inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
|
||||
/// Serialize GPU initialization and JIT for unsafe platforms
|
||||
inline void serialize_init() {
|
||||
#ifdef LAL_SERIALIZE_INIT
|
||||
gpu_barrier();
|
||||
#endif
|
||||
}
|
||||
/// Return the 'mode' for acceleration: GPU_FORCE, GPU_NEIGH or GPU_HYB_NEIGH
|
||||
inline int gpu_mode() const { return _gpu_mode; }
|
||||
/// Index of first device used by a node
|
||||
|
||||
@ -80,7 +80,7 @@ int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e);
|
||||
|
||||
DPLMF.device->gpu_barrier();
|
||||
DPLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e);
|
||||
|
||||
DPLSFMF.device->gpu_barrier();
|
||||
DPLSFMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
DPLJMF.device->gpu_barrier();
|
||||
DPLJMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||
host_cut, special_lj, false, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
DPDMF.device->gpu_barrier();
|
||||
DPDMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
|
||||
host_cut, special_lj, true, inum, nall, 300,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
DPDTMF.device->gpu_barrier();
|
||||
DPDTMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
|
||||
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
EAMALMF.device->gpu_barrier();
|
||||
EAMALMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ int eam_gpu_init(const int ntypes, double host_cutforcesq,
|
||||
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
EAMMF.device->gpu_barrier();
|
||||
EAMMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -90,7 +90,7 @@ int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
|
||||
nz2r, nfrho, nr, nlocal, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
EAMFSMF.device->gpu_barrier();
|
||||
EAMFSMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
GLMF.device->gpu_barrier();
|
||||
GLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -106,7 +106,7 @@ void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
GLMF.reinit(ntypes, cutsq, host_a, host_b, offset);
|
||||
|
||||
GLMF.device->gpu_barrier();
|
||||
GLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int gb_gpu_init(const int ntypes, const double gamma,
|
||||
host_lj3, host_lj4, offset, special_lj, inum, nall,
|
||||
max_nbors, maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
GBMF.device->gpu_barrier();
|
||||
GBMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
LJ96MF.device->gpu_barrier();
|
||||
LJ96MF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
C2CLMF.device->gpu_barrier();
|
||||
C2CLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, kappa);
|
||||
|
||||
LJCDMF.device->gpu_barrier();
|
||||
LJCDMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e);
|
||||
|
||||
LJCMF.device->gpu_barrier();
|
||||
LJCMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
LJCLMF.device->gpu_barrier();
|
||||
LJCLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -112,7 +112,7 @@ void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJCLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, host_cut_ljsq);
|
||||
LJCLMF.device->gpu_barrier();
|
||||
LJCLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -83,7 +83,7 @@ int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, order, qqrd2e);
|
||||
|
||||
LJCMLMF.device->gpu_barrier();
|
||||
LJCMLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
|
||||
special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
LJCubicLMF.device->gpu_barrier();
|
||||
LJCubicLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -84,7 +84,7 @@ int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, e_shift,
|
||||
f_shift, alpha);
|
||||
|
||||
LJDMF.device->gpu_barrier();
|
||||
LJDMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
LJECLMF.device->gpu_barrier();
|
||||
LJECLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -112,7 +112,7 @@ void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift, host_cut_ljsq);
|
||||
LJECLMF.device->gpu_barrier();
|
||||
LJECLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -108,7 +108,7 @@ void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJEMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift);
|
||||
LJEMF.device->gpu_barrier();
|
||||
LJEMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -76,7 +76,7 @@ int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
LJLMF.device->gpu_barrier();
|
||||
LJLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -105,7 +105,7 @@ void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset);
|
||||
LJLMF.device->gpu_barrier();
|
||||
LJLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -81,7 +81,7 @@ int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
|
||||
host_ljsw4, host_ljsw5, cut_inner, cut_inner_sq);
|
||||
|
||||
LJGRMMF.device->gpu_barrier();
|
||||
LJGRMMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3,
|
||||
host_ljsw4, cut_inner, cut_inner_sq);
|
||||
|
||||
LJSMTMF.device->gpu_barrier();
|
||||
LJSMTMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -110,7 +110,7 @@ void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
|
||||
LJSMTMF.device->gpu_barrier();
|
||||
LJSMTMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -77,7 +77,7 @@ int spica_gpu_init(const int ntypes, double **cutsq, int **cg_types,
|
||||
host_lj4, offset, special_lj, inum, nall, max_nbors,
|
||||
maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
CMMMF.device->gpu_barrier();
|
||||
CMMMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ int spical_gpu_init(const int ntypes, double **cutsq, int **cg_type,
|
||||
maxspecial, cell_size, gpu_split, screen,
|
||||
host_cut_ljsq, host_cut_coulsq, host_special_coul,
|
||||
qqrd2e, g_ewald);
|
||||
CMMLMF.device->gpu_barrier();
|
||||
CMMLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -89,7 +89,7 @@ int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
host_special_coul, qqrd2e,
|
||||
g_ewald, map_size, max_same);
|
||||
|
||||
LJTIP4PLMF.device->gpu_barrier();
|
||||
LJTIP4PLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
MLMF.device->gpu_barrier();
|
||||
MLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -77,7 +77,7 @@ int mor_gpu_init(const int ntypes, double **cutsq,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
MORMF.device->gpu_barrier();
|
||||
MORMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -81,7 +81,7 @@ grdtyp * pppm_gpu_init(memtyp &pppm, const int nlocal, const int nall,
|
||||
vd_brick,slab_volfactor,nx_pppm,ny_pppm,nz_pppm,
|
||||
split,success);
|
||||
|
||||
pppm.device->gpu_barrier();
|
||||
pppm.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -80,7 +80,7 @@ int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq,
|
||||
host_lj4, offset, special_lj, inum, nall,
|
||||
max_nbors, maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
REMF.device->gpu_barrier();
|
||||
REMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int soft_gpu_init(const int ntypes, double **cutsq, double **host_prefactor,
|
||||
special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
SLMF.device->gpu_barrier();
|
||||
SLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -106,7 +106,7 @@ void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
SLMF.reinit(ntypes, cutsq, host_prefactor, host_cut);
|
||||
|
||||
SLMF.device->gpu_barrier();
|
||||
SLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -84,7 +84,7 @@ int sw_gpu_init(const int ntypes, const int inum, const int nall,
|
||||
sigma_gamma, c1, c2, c3, c4, c5, c6, lambda_epsilon,
|
||||
costheta, map, e2param);
|
||||
|
||||
SWMF.device->gpu_barrier();
|
||||
SWMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int table_gpu_init(const int ntypes, double **cutsq, double ***table_coeffs,
|
||||
special_lj, inum, nall, max_nbors, maxspecial, cell_size,
|
||||
gpu_split, screen, tabstyle, ntables, tablength);
|
||||
|
||||
TBMF.device->gpu_barrier();
|
||||
TBMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int
|
||||
ts_c1, ts_c2, ts_c3, ts_c4, ts_c, ts_d, ts_h,
|
||||
ts_gamma, ts_beta, ts_powern, ts_cutsq);
|
||||
|
||||
TSMF.device->gpu_barrier();
|
||||
TSMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
|
||||
ts_c3, ts_c4, ts_c5, ts_h, ts_beta, ts_powern,
|
||||
ts_powern_del, ts_ca1, ts_cutsq);
|
||||
|
||||
TSMMF.device->gpu_barrier();
|
||||
TSMMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
|
||||
ts_ZBLcut, ts_ZBLexpscale, global_e, global_a_0,
|
||||
global_epsilon_0, ts_cutsq);
|
||||
|
||||
TSZMF.device->gpu_barrier();
|
||||
TSZMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -78,7 +78,7 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
|
||||
offset, special_lj, inum, nall, max_nbors, maxspecial,
|
||||
cell_size, gpu_split, screen);
|
||||
|
||||
UFMLMF.device->gpu_barrier();
|
||||
UFMLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
@ -106,7 +106,7 @@ void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
UFMLMF.reinit(ntypes, cutsq, host_uf1, host_uf2, host_uf3, offset);
|
||||
UFMLMF.device->gpu_barrier();
|
||||
UFMLMF.device->serialize_init();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -89,7 +89,7 @@ int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const i
|
||||
lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
|
||||
c0, costheta, bigb, big2b, bigc);
|
||||
|
||||
VashishtaMF.device->gpu_barrier();
|
||||
VashishtaMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
|
||||
inum, nall, max_nbors, maxspecial, cell_size, gpu_split,
|
||||
screen, kappa);
|
||||
|
||||
YKCOLLMF.device->gpu_barrier();
|
||||
YKCOLLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -76,7 +76,7 @@ int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
|
||||
inum, nall, max_nbors, maxspecial, cell_size,
|
||||
gpu_split, screen);
|
||||
|
||||
YKMF.device->gpu_barrier();
|
||||
YKMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
|
||||
cut_globalsq, cut_innersq, cut_inner,
|
||||
inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen);
|
||||
|
||||
ZBLMF.device->gpu_barrier();
|
||||
ZBLMF.device->serialize_init();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# TODO#!/usr/bin/env python
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Install.py tool to download, compile, and setup the pace library
|
||||
@ -6,7 +6,10 @@ used to automate the steps described in the README file in this dir
|
||||
"""
|
||||
|
||||
from __future__ import print_function
|
||||
import sys, subprocess
|
||||
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
|
||||
sys.path.append('..')
|
||||
@ -15,23 +18,16 @@ from install_helpers import fullpath, geturl, checkmd5sum
|
||||
# settings
|
||||
|
||||
thisdir = fullpath('.')
|
||||
version = 'v.2021.10.25.fix2'
|
||||
version ='v.2022.09.27.fix10Oct'
|
||||
|
||||
# known checksums for different PACE versions. used to validate the download.
|
||||
checksums = { \
|
||||
'v.2021.2.3.upd2' : '8fd1162724d349b930e474927197f20d',
|
||||
'v.2021.4.9' : '4db54962fbd6adcf8c18d46e1798ceb5',
|
||||
'v.2021.9.28' : 'f98363bb98adc7295ea63974738c2a1b',
|
||||
'v.2021.10.25' : 'a2ac3315c41a1a4a5c912bcb1bc9c5cc',
|
||||
'v.2021.10.25.fix': 'e0572de57039d4afedefb25707b6ceae',
|
||||
'v.2021.10.25.fix2': '32394d799bc282bb57696c78c456e64f'
|
||||
}
|
||||
|
||||
'v.2022.09.27.fix10Oct': '766cebcc0e5c4b8430c2f3cd202d9905'
|
||||
}
|
||||
|
||||
parser = ArgumentParser(prog='Install.py',
|
||||
description="LAMMPS library build wrapper script")
|
||||
|
||||
|
||||
# help message
|
||||
|
||||
HELP = """
|
||||
@ -55,55 +51,68 @@ parser.add_argument("-v", "--version", default=version, choices=checksums.keys()
|
||||
help="set version of PACE library to download and build (default: %s)" % version)
|
||||
parser.add_argument("-vv", "--verbose", action="store_true",
|
||||
help="be more verbose about is happening while this script runs")
|
||||
parser.add_argument("-l", "--local", default=None,
|
||||
help="use local version of PACE library build")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# print help message and exit, if neither build nor path options are given
|
||||
if not args.build:
|
||||
parser.print_help()
|
||||
sys.exit(HELP)
|
||||
parser.print_help()
|
||||
sys.exit(HELP)
|
||||
|
||||
buildflag = args.build
|
||||
|
||||
verboseflag = args.verbose
|
||||
version = args.version
|
||||
|
||||
local = args.local
|
||||
|
||||
archive_extension = "tar.gz"
|
||||
url = "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/%s.%s" % (version, archive_extension)
|
||||
unarchived_folder_name = "lammps-user-pace-%s"%(version)
|
||||
unarchived_folder_name = "lammps-user-pace-%s" % (version)
|
||||
|
||||
# download PACE tarball, unpack, build PACE
|
||||
if buildflag:
|
||||
if not local:
|
||||
# download entire tarball
|
||||
print("Downloading pace tarball ...")
|
||||
archive_filename = "%s.%s" % (version, archive_extension)
|
||||
download_filename = "%s/%s" % (thisdir, archive_filename)
|
||||
print("Downloading from ", url, " to ", download_filename, end=" ")
|
||||
geturl(url, download_filename)
|
||||
print(" done")
|
||||
|
||||
# download entire tarball
|
||||
# verify downloaded archive integrity via md5 checksum, if known.
|
||||
if version in checksums:
|
||||
if not checkmd5sum(checksums[version], archive_filename):
|
||||
sys.exit("Checksum for pace library does not match")
|
||||
|
||||
print("Downloading pace tarball ...")
|
||||
archive_filename = "%s.%s" % (version, archive_extension)
|
||||
download_filename = "%s/%s" % (thisdir, archive_filename)
|
||||
print("Downloading from ",url," to ",download_filename, end=" ")
|
||||
geturl(url, download_filename)
|
||||
print(" done")
|
||||
print("Unpacking pace tarball ...")
|
||||
src_folder = thisdir + "/src"
|
||||
cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (
|
||||
thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
|
||||
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
else:
|
||||
# copy from local version of library PACE
|
||||
print("Copy pace from ", local)
|
||||
src_folder = thisdir + "/src"
|
||||
shutil.copytree(local, src_folder,
|
||||
# ignore=lambda (s1,s2): ('.git' in s1 or '.git' in s2),
|
||||
dirs_exist_ok=True)
|
||||
|
||||
# verify downloaded archive integrity via md5 checksum, if known.
|
||||
if version in checksums:
|
||||
if not checkmd5sum(checksums[version], archive_filename):
|
||||
sys.exit("Checksum for pace library does not match")
|
||||
|
||||
print("Unpacking pace tarball ...")
|
||||
src_folder = thisdir+"/src"
|
||||
cmd = 'cd "%s"; rm -rf "%s"; tar -xvf %s; mv %s %s' % (thisdir, src_folder, archive_filename, unarchived_folder_name, src_folder)
|
||||
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
# build
|
||||
print("Building libpace ...")
|
||||
cmd = 'make lib -j2'
|
||||
txt = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
if verboseflag:
|
||||
print(txt.decode("UTF-8"))
|
||||
|
||||
# build
|
||||
print("Building libpace ...")
|
||||
cmd = 'make lib -j2'
|
||||
txt = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
if verboseflag:
|
||||
print(txt.decode("UTF-8"))
|
||||
# remove source files
|
||||
|
||||
# remove source files
|
||||
print("Removing pace build files and archive ...")
|
||||
cmd = 'make clean-build'
|
||||
if not local:
|
||||
cmd = ('rm %s;' % (download_filename))+cmd
|
||||
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
|
||||
print("Removing pace build files and archive ...")
|
||||
cmd = 'rm %s; make clean-build' % (download_filename)
|
||||
subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True)
|
||||
|
||||
@ -5,8 +5,14 @@ SHELL = /bin/sh
|
||||
YAML_CPP_PATH = src/yaml-cpp
|
||||
YAML_CPP_INC = $(YAML_CPP_PATH)/include
|
||||
|
||||
SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
|
||||
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
|
||||
WIGNER_CPP_INC = src/wigner-cpp/include/wigner
|
||||
|
||||
CNPY_CPP_PATH = src/cnpy
|
||||
CNPY_CPP_INC = $(CNPY_CPP_PATH)
|
||||
CNPY_SRC_FILES = $(CNPY_CPP_PATH)/cnpy.cpp
|
||||
|
||||
SRC_FILES = $(wildcard src/ML-PACE/ace/*.cpp) $(wildcard src/ML-PACE/ace-evaluator/*.cpp)
|
||||
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES)) $(CNPY_SRC_FILES)
|
||||
|
||||
# ------ DEFINITIONS ------
|
||||
|
||||
@ -15,7 +21,7 @@ OBJ = $(SRC:.cpp=.o)
|
||||
|
||||
|
||||
# ------ SETTINGS ------
|
||||
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE -I$(YAML_CPP_INC)
|
||||
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE/ace -Isrc/ML-PACE/ace-evaluator -I$(YAML_CPP_INC) -I$(WIGNER_CPP_INC) -I$(CNPY_CPP_INC) -DEXTRA_C_PROJECTIONS
|
||||
|
||||
ARCHIVE = ar
|
||||
ARCHFLAG = -rc
|
||||
|
||||
@ -1,3 +1,3 @@
|
||||
pace_SYSINC =-I../../lib/pace/src/ML-PACE -I../../lib/pace/src/yaml-cpp/include
|
||||
pace_SYSINC =-I../../lib/pace/src/ML-PACE/ace -I../../lib/pace/src/ML-PACE/ace-evaluator -I../../lib/pace/src/yaml-cpp/include -I../../lib/pace/src/wigner-cpp/include/wigner -DEXTRA_C_PROJECTIONS
|
||||
pace_SYSLIB = -L../../lib/pace/ -lpace -L../../lib/pace/src/yaml-cpp/ -lyaml-cpp
|
||||
pace_SYSPATH =
|
||||
|
||||
Reference in New Issue
Block a user