Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Gareth Tribello
2018-10-26 22:01:05 +01:00
1856 changed files with 103330 additions and 24994 deletions

View File

@ -35,6 +35,8 @@ linalg set of BLAS and LAPACK routines needed by USER-ATC package
from Axel Kohlmeyer (Temple U)
meam modified embedded atom method (MEAM) potential, MEAM package
from Greg Wagner (Sandia)
message client/server communication library via MPI, sockets, files
from Steve Plimpton (Sandia)
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
from Axel Kohlmeyer (Temple U) and the VMD development team
mscg hooks to the MSCG library, used by fix_mscg command

View File

@ -23,15 +23,17 @@ optionally copies Makefile.auto to a new Makefile.osuffix
-m = use Makefile.machine as starting point, copy to Makefile.auto
default machine = linux
default for -h, -a, -p, -e settings are those in -m Makefile
-h = set CUDA_HOME variable in Makefile.auto to hdir
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
-a = set CUDA_ARCH variable in Makefile.auto to arch
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
or GeForce GTX 580 or similar
use arch = 30 for Tesla K10 (Kepler)
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
use arch = 37 for Tesla dual K80 (Kepler)
use arch = 60 for Tesla P100 (Pascal)
use arch = sm_20 for Fermi (C2050/C2070, deprecated as of CUDA 8.0)
or GeForce GTX 580 or similar
use arch = sm_30 for Kepler (K10)
use arch = sm_35 for Kepler (K40) or GeForce GTX Titan or similar
use arch = sm_37 for Kepler (dual K80)
use arch = sm_60 for Pascal (P100)
use arch = sm_70 for Volta
-p = set CUDA_PRECISION variable in Makefile.auto to precision
use precision = double or mixed or single
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
@ -46,7 +48,7 @@ Examples:
make lib-gpu args="-b" # build GPU lib with default Makefile.linux
make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
make lib-gpu args="-m mpi -a sm_35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
"""
# print error message or help
@ -127,7 +129,7 @@ for line in lines:
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
line = line.replace(words[2],hdir)
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
line = line.replace(words[2],"-arch=sm_%s" % arch)
line = line.replace(words[2],"-arch=%s" % arch)
if pflag and words[0] == "CUDA_PRECISION" and words[1] == '=':
line = line.replace(words[2],precstr)
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':

View File

@ -13,8 +13,8 @@ endif
NVCC = nvcc
# Tesla CUDA
CUDA_ARCH = -arch=sm_21
# older CUDA
#CUDA_ARCH = -arch=sm_21
# newer CUDA
#CUDA_ARCH = -arch=sm_13
# older CUDA

View File

@ -79,7 +79,10 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
$(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
@ -137,7 +140,10 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
$(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
$(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
$(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
$(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
$(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
@ -837,6 +843,42 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/coul_long_cs.cubin: lal_coul_long_cs.cu lal_precision.h lal_preprocessor.h
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul_long_cs.cu
$(OBJ_DIR)/coul_long_cs_cubin.h: $(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs.cubin
$(BIN2C) -c -n coul_long_cs $(OBJ_DIR)/coul_long_cs.cubin > $(OBJ_DIR)/coul_long_cs_cubin.h
$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp $(OBJ_DIR)/coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
$(CUDR) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
$(CUDR) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/born_coul_long_cs.cubin: lal_born_coul_long_cs.cu lal_precision.h lal_preprocessor.h
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_long_cs.cu
$(OBJ_DIR)/born_coul_long_cs_cubin.h: $(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs.cubin
$(BIN2C) -c -n born_coul_long_cs $(OBJ_DIR)/born_coul_long_cs.cubin > $(OBJ_DIR)/born_coul_long_cs_cubin.h
$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp $(OBJ_DIR)/born_coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
$(CUDR) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
$(CUDR) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/born_coul_wolf_cs.cubin: lal_born_coul_wolf_cs.cu lal_precision.h lal_preprocessor.h
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_wolf_cs.cu
$(OBJ_DIR)/born_coul_wolf_cs_cubin.h: $(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs.cubin
$(BIN2C) -c -n born_coul_wolf_cs $(OBJ_DIR)/born_coul_wolf_cs.cubin > $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp $(OBJ_DIR)/born_coul_wolf_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
$(CUDR) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
$(CUDR) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda

View File

@ -48,7 +48,18 @@ struct NVDProperties {
int minor;
CUDA_INT_TYPE totalGlobalMem;
int multiProcessorCount;
CUdevprop_st p;
int maxThreadsPerBlock;
int maxThreadsDim[3];
int maxGridSize[3];
int sharedMemPerBlock;
int totalConstantMemory;
int SIMDWidth;
int memPitch;
int regsPerBlock;
int clockRate;
int textureAlign;
int kernelExecTimeoutEnabled;
int integrated;
int canMapHostMemory;
@ -210,18 +221,18 @@ class UCL_Device {
inline double clock_rate() { return clock_rate(_device); }
/// Clock rate in GHz
inline double clock_rate(const int i)
{ return _properties[i].p.clockRate*1e-6;}
{ return _properties[i].clockRate*1e-6;}
/// Get the maximum number of threads per block
inline size_t group_size() { return group_size(_device); }
/// Get the maximum number of threads per block
inline size_t group_size(const int i)
{ return _properties[i].p.maxThreadsPerBlock; }
{ return _properties[i].maxThreadsPerBlock; }
/// Return the maximum memory pitch in bytes for current device
inline size_t max_pitch() { return max_pitch(_device); }
/// Return the maximum memory pitch in bytes
inline size_t max_pitch(const int i) { return _properties[i].p.memPitch; }
inline size_t max_pitch(const int i) { return _properties[i].memPitch; }
/// Returns false if accelerator cannot be shared by multiple processes
/** If it cannot be determined, true is returned **/
@ -260,6 +271,9 @@ class UCL_Device {
/// List all devices along with all properties
inline void print_all(std::ostream &out);
/// Select the platform that has accelerators (for compatibility with OpenCL)
inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
private:
int _device, _num_devices;
std::vector<NVDProperties> _properties;
@ -272,49 +286,54 @@ class UCL_Device {
UCL_Device::UCL_Device() {
CU_SAFE_CALL_NS(cuInit(0));
CU_SAFE_CALL_NS(cuDeviceGetCount(&_num_devices));
for (int dev=0; dev<_num_devices; ++dev) {
CUdevice m;
CU_SAFE_CALL_NS(cuDeviceGet(&m,dev));
for (int i=0; i<_num_devices; ++i) {
CUdevice dev;
CU_SAFE_CALL_NS(cuDeviceGet(&dev,i));
int major, minor;
CU_SAFE_CALL_NS(cuDeviceComputeCapability(&major,&minor,m));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev));
if (major==9999)
continue;
_properties.push_back(NVDProperties());
_properties.back().device_id=dev;
_properties.back().major=major;
_properties.back().minor=minor;
NVDProperties prop;
prop.device_id = i;
prop.major=major;
prop.minor=minor;
char namecstr[1024];
CU_SAFE_CALL_NS(cuDeviceGetName(namecstr,1024,m));
_properties.back().name=namecstr;
CU_SAFE_CALL_NS(cuDeviceGetName(namecstr,1024,dev));
prop.name=namecstr;
CU_SAFE_CALL_NS(cuDeviceTotalMem(&prop.totalGlobalMem,dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.totalConstantMemory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.SIMDWidth, CU_DEVICE_ATTRIBUTE_WARP_SIZE, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
CU_SAFE_CALL_NS(cuDeviceTotalMem(&_properties.back().totalGlobalMem,m));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().multiProcessorCount,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
m));
CU_SAFE_CALL_NS(cuDeviceGetProperties(&_properties.back().p,m));
#if CUDA_VERSION >= 2020
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().kernelExecTimeoutEnabled,
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().integrated,
CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().canMapHostMemory,
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().computeMode,
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
#endif
#if CUDA_VERSION >= 3010
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().concurrentKernels,
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
&_properties.back().ECCEnabled,
CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
#endif
_properties.push_back(prop);
}
_device=-1;
_cq.push_back(CUstream());
@ -390,27 +409,27 @@ void UCL_Device::print_all(std::ostream &out) {
<< cores(i) << std::endl;
#endif
out << " Total amount of constant memory: "
<< _properties[i].p.totalConstantMemory << " bytes\n";
<< _properties[i].totalConstantMemory << " bytes\n";
out << " Total amount of local/shared memory per block: "
<< _properties[i].p.sharedMemPerBlock << " bytes\n";
<< _properties[i].sharedMemPerBlock << " bytes\n";
out << " Total number of registers available per block: "
<< _properties[i].p.regsPerBlock << std::endl;
<< _properties[i].regsPerBlock << std::endl;
out << " Warp size: "
<< _properties[i].p.SIMDWidth << std::endl;
<< _properties[i].SIMDWidth << std::endl;
out << " Maximum number of threads per block: "
<< _properties[i].p.maxThreadsPerBlock << std::endl;
<< _properties[i].maxThreadsPerBlock << std::endl;
out << " Maximum group size (# of threads per block) "
<< _properties[i].p.maxThreadsDim[0] << " x "
<< _properties[i].p.maxThreadsDim[1] << " x "
<< _properties[i].p.maxThreadsDim[2] << std::endl;
<< _properties[i].maxThreadsDim[0] << " x "
<< _properties[i].maxThreadsDim[1] << " x "
<< _properties[i].maxThreadsDim[2] << std::endl;
out << " Maximum item sizes (# threads for each dim) "
<< _properties[i].p.maxGridSize[0] << " x "
<< _properties[i].p.maxGridSize[1] << " x "
<< _properties[i].p.maxGridSize[2] << std::endl;
<< _properties[i].maxGridSize[0] << " x "
<< _properties[i].maxGridSize[1] << " x "
<< _properties[i].maxGridSize[2] << std::endl;
out << " Maximum memory pitch: "
<< max_pitch(i) << " bytes\n";
out << " Texture alignment: "
<< _properties[i].p.textureAlign << " bytes\n";
<< _properties[i].textureAlign << " bytes\n";
out << " Clock rate: "
<< clock_rate(i) << " GHz\n";
#if CUDA_VERSION >= 2020

View File

@ -165,8 +165,8 @@ class UCL_Device {
/// Get the current OpenCL device name
inline std::string name() { return name(_device); }
/// Get the OpenCL device name
inline std::string name(const int i)
{ return std::string(_properties[i].name); }
inline std::string name(const int i) {
return std::string(_properties[i].name); }
/// Get a string telling the type of the current device
inline std::string device_type_name() { return device_type_name(_device); }
@ -281,7 +281,7 @@ class UCL_Device {
inline cl_device_id & cl_device() { return _cl_device; }
/// Select the platform that has accelerators
inline void set_platform_accelerator(int pid=-1);
inline int set_platform_accelerator(int pid=-1);
private:
int _num_platforms; // Number of platforms
@ -324,6 +324,7 @@ UCL_Device::~UCL_Device() {
void UCL_Device::clear() {
_properties.clear();
_cl_devices.clear();
if (_device>-1) {
for (size_t i=0; i<_cq.size(); i++) {
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
@ -520,8 +521,6 @@ int UCL_Device::device_type(const int i) {
// Set the CUDA device to the specified device number
int UCL_Device::set(int num) {
clear();
cl_device_id *device_list = new cl_device_id[_num_devices];
cl_uint n;
CL_SAFE_CALL(clGetDeviceIDs(_cl_platform,CL_DEVICE_TYPE_ALL,_num_devices,
@ -612,7 +611,7 @@ void UCL_Device::print_all(std::ostream &out) {
// Select the platform that is associated with accelerators
// if pid < 0, select the first platform
void UCL_Device::set_platform_accelerator(int pid) {
int UCL_Device::set_platform_accelerator(int pid) {
if (pid < 0) {
int found = 0;
for (int n=0; n<_num_platforms; n++) {
@ -625,10 +624,11 @@ void UCL_Device::set_platform_accelerator(int pid) {
break;
}
}
if (found) break;
if (found) return UCL_SUCCESS;
}
return UCL_ERROR;
} else {
set_platform(pid);
return set_platform(pid);
}
}

View File

@ -38,8 +38,8 @@ namespace ucl_opencl {
/// Class for timing OpenCL events
class UCL_Timer {
public:
inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { }
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false)
{ init(dev); }
inline ~UCL_Timer() { clear(); }
@ -49,11 +49,10 @@ class UCL_Timer {
inline void clear() {
if (_initialized) {
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
_initialized=false;
_total_time=0.0;
}
has_measured_time = false;
}
/// Initialize default command queue for timing
@ -66,25 +65,39 @@ class UCL_Timer {
_cq=cq;
clRetainCommandQueue(_cq);
_initialized=true;
has_measured_time = false;
}
/// Start timing on default command queue
inline void start() { UCL_OCL_MARKER(_cq,&start_event); }
inline void start() {
UCL_OCL_MARKER(_cq,&start_event);
has_measured_time = false;
}
/// Stop timing on default command queue
inline void stop() { UCL_OCL_MARKER(_cq,&stop_event); }
inline void stop() {
UCL_OCL_MARKER(_cq,&stop_event);
has_measured_time = true;
}
/// Block until the start event has been reached on device
inline void sync_start()
{ CL_SAFE_CALL(clWaitForEvents(1,&start_event)); }
inline void sync_start() {
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
has_measured_time = false;
}
/// Block until the stop event has been reached on device
inline void sync_stop()
{ CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); }
inline void sync_stop() {
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
has_measured_time = true;
}
/// Set the time elapsed to zero (not the total_time)
inline void zero()
{ UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&stop_event); }
inline void zero() {
has_measured_time = false;
UCL_OCL_MARKER(_cq,&start_event);
UCL_OCL_MARKER(_cq,&stop_event);
}
/// Set the total time to zero
inline void zero_total() { _total_time=0.0; }
@ -99,6 +112,7 @@ class UCL_Timer {
/// Return the time (ms) of last start to stop - Forces synchronization
inline double time() {
if(!has_measured_time) return 0.0;
cl_ulong tstart,tend;
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
@ -107,6 +121,9 @@ class UCL_Timer {
CL_SAFE_CALL(clGetEventProfilingInfo(start_event,
CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &tstart, NULL));
clReleaseEvent(start_event);
clReleaseEvent(stop_event);
has_measured_time = false;
return (tend-tstart)*t_factor;
}
@ -123,8 +140,9 @@ class UCL_Timer {
cl_event start_event, stop_event;
cl_command_queue _cq;
double _total_time;
bool _initialized;
double t_factor;
bool _initialized;
bool has_measured_time;
};
} // namespace

View File

@ -322,10 +322,12 @@ class Atom {
// Copy charges to device asynchronously
inline void add_q_data() {
time_q.start();
if (_q_avail==false) {
q.update_device(_nall,true);
_q_avail=true;
}
time_q.stop();
}
// Cast quaternions to write buffer
@ -347,10 +349,12 @@ class Atom {
// Copy quaternions to device
/** Copies nall()*4 elements **/
inline void add_quat_data() {
time_quat.start();
if (_quat_avail==false) {
quat.update_device(_nall*4,true);
_quat_avail=true;
}
time_quat.stop();
}
/// Cast velocities and tags to write buffer

View File

@ -34,8 +34,8 @@ using namespace LAMMPS_AL;
template <class numtyp, class acctyp>
DeviceT::Device() : _init_count(0), _device_init(false),
_gpu_mode(GPU_FORCE), _first_device(0),
_last_device(0), _compiled(false) {
_gpu_mode(GPU_FORCE), _first_device(0),
_last_device(0), _platform_id(-1), _compiled(false) {
}
template <class numtyp, class acctyp>
@ -67,6 +67,17 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
_particle_split=p_split;
_cell_size=cell_size;
_block_pair=block_pair;
// support selecting platform though "package device" keyword.
// "0:generic" will select platform 0 and tune for generic device
// "1:fermi" will select platform 1 and tune for Nvidia Fermi gpu
if (ocl_vendor) {
char *sep = NULL;
if ((sep = strstr(ocl_vendor,":"))) {
*sep = '\0';
_platform_id = atoi(ocl_vendor);
ocl_vendor = sep+1;
}
}
// Get the rank/size within the world
MPI_Comm_rank(_comm_world,&_world_me);
@ -119,8 +130,16 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
// Time on the device only if 1 proc per gpu
_time_device=true;
#if 0
// XXX: the following setting triggers a memory leak with OpenCL and MPI
// setting _time_device=true for all processes doesn't seem to be a
// problem with either (no segfault, no (large) memory leak.
// thus keeping this disabled for now. may need to review later.
// 2018-07-23 <akohlmey@gmail.com>
if (_procs_per_gpu>1)
_time_device=false;
#endif
// Set up a per device communicator
MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);
@ -135,6 +154,9 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
return -7;
#endif
if (gpu->set_platform_accelerator(_platform_id)!=UCL_SUCCESS)
return -12;
if (gpu->set(my_gpu)!=UCL_SUCCESS)
return -6;
@ -191,13 +213,15 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
_ocl_vendor_string="-DUSE_OPENCL";
int token_count=0;
std::string params[13];
char *pch = strtok(ocl_vendor,"\" ");
char *pch = strtok(ocl_vendor,",");
pch = strtok(NULL,",");
if (pch == NULL) return -11;
while (pch != NULL) {
if (token_count==13)
return -11;
params[token_count]=pch;
token_count++;
pch = strtok(NULL,"\" ");
pch = strtok(NULL,",");
}
_ocl_vendor_string+=" -DMEM_THREADS="+params[0]+
" -DTHREADS_PER_ATOM="+params[1]+
@ -656,7 +680,7 @@ int DeviceT::compile_kernels() {
dev_program=new UCL_Program(*gpu);
int success=dev_program->load_string(device,compile_string().c_str());
if (success!=UCL_SUCCESS)
return -4;
return -6;
k_zero.set_function(*dev_program,"kernel_zero");
k_info.set_function(*dev_program,"kernel_info");
_compiled=true;

View File

@ -292,7 +292,7 @@ class Device {
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
_replica_size;
int _gpu_mode, _first_device, _last_device, _nthreads;
int _gpu_mode, _first_device, _last_device, _platform_id, _nthreads;
double _particle_split;
double _cpu_full;
double _ptx_arch;

View File

@ -127,10 +127,10 @@ void Neighbor::alloc(bool &success) {
dev_packed.clear();
success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev,
_packed_permissions)==UCL_SUCCESS);
dev_acc.clear();
success=success && (dev_acc.alloc(_max_atoms,*dev,
dev_ilist.clear();
success=success && (dev_ilist.alloc(_max_atoms,*dev,
UCL_READ_WRITE)==UCL_SUCCESS);
_c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes();
_c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes();
}
if (_max_host>0) {
nbor_host.clear();
@ -197,7 +197,7 @@ void Neighbor::clear() {
host_packed.clear();
host_acc.clear();
dev_acc.clear();
dev_ilist.clear();
dev_nbor.clear();
nbor_host.clear();
dev_packed.clear();
@ -281,7 +281,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
}
UCL_D_Vec<int> acc_view;
acc_view.view_offset(inum,dev_nbor,inum*2);
ucl_copy(acc_view,host_acc,true);
ucl_copy(acc_view,host_acc,inum*2,true);
UCL_H_Vec<int> host_view;
host_view.alloc(_max_atoms,*dev,UCL_READ_WRITE);
@ -289,7 +289,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
int i=ilist[ii];
host_view[i] = ii;
}
ucl_copy(dev_acc,host_view,true);
ucl_copy(dev_ilist,host_view,true);
time_nbor.stop();
@ -364,7 +364,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int *ilist, int *numj,
}
UCL_D_Vec<int> acc_view;
acc_view.view_offset(inum,dev_nbor,inum*2);
ucl_copy(acc_view,host_acc,true);
ucl_copy(acc_view,host_acc,inum*2,true);
time_nbor.stop();
if (_use_packing==false) {

View File

@ -110,7 +110,7 @@ class Neighbor {
}
if (_time_device) {
time_nbor.add_to_total();
time_kernel.add_to_total();
if (_use_packing==false) time_kernel.add_to_total();
if (_gpu_nbor==2) {
time_hybrid1.add_to_total();
time_hybrid2.add_to_total();
@ -200,7 +200,7 @@ class Neighbor {
/// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
UCL_H_Vec<int> host_acc;
/// Device storage for accessing atom indices from the neighbor list (3-body)
UCL_D_Vec<int> dev_acc;
UCL_D_Vec<int> dev_ilist;
// ----------------- Data for GPU Neighbor Calculation ---------------

View File

@ -119,6 +119,8 @@
#define BLOCK_ELLIPSE 128
#define MAX_SHARED_TYPES 11
#if (__CUDACC_VER_MAJOR__ < 9)
#ifdef _SINGLE_SINGLE
#define shfl_xor __shfl_xor
#else
@ -132,6 +134,25 @@ ucl_inline double shfl_xor(double var, int laneMask, int width) {
}
#endif
#else
#ifdef _SINGLE_SINGLE
ucl_inline double shfl_xor(double var, int laneMask, int width) {
return __shfl_xor_sync(0xffffffff, var, laneMask, width);
}
#else
ucl_inline double shfl_xor(double var, int laneMask, int width) {
int2 tmp;
tmp.x = __double2hiint(var);
tmp.y = __double2loint(var);
tmp.x = __shfl_xor_sync(0xffffffff,tmp.x,laneMask,width);
tmp.y = __shfl_xor_sync(0xffffffff,tmp.y,laneMask,width);
return __hiloint2double(tmp.x,tmp.y);
}
#endif
#endif
#endif
#endif

View File

@ -243,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3,
&map, &elem2param, &_nelements,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
@ -252,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3,
&map, &elem2param, &_nelements,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);

View File

@ -544,7 +544,7 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
const int nelements,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -614,13 +614,13 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];
@ -698,7 +698,7 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
const int nelements,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -768,13 +768,13 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];

View File

@ -272,7 +272,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor,
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
ainum=this->ans->inum();
nbor_pitch=this->nbor->nbor_pitch();
@ -311,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
@ -320,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
}

View File

@ -696,7 +696,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -777,13 +777,13 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];
@ -941,7 +941,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -1022,13 +1022,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];

View File

@ -272,7 +272,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor,
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
ainum=this->ans->inum();
nbor_pitch=this->nbor->nbor_pitch();
@ -311,7 +311,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
@ -320,7 +320,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
}

View File

@ -272,7 +272,7 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
if (ii<inum) {
int nbor_j, nbor_end, i, numj;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -432,7 +432,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
if (ii<inum) {
int nbor, nbor_end, i, numj;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);
@ -547,7 +547,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -702,7 +702,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -740,7 +740,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end, k_end;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -785,13 +785,13 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];
@ -956,7 +956,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -994,7 +994,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end, k_end;
const int* nbor_mem = dev_packed;
const __global int* nbor_mem = dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -1039,13 +1039,13 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];

View File

@ -297,7 +297,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->dev_short_nbor,
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
ainum=this->ans->inum();
nbor_pitch=this->nbor->nbor_pitch();
@ -337,7 +337,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
@ -346,7 +346,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
}

View File

@ -278,7 +278,7 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
if (ii<inum) {
int nbor_j, nbor_end, i, numj;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -445,7 +445,7 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_,
if (ii<inum) {
int nbor, nbor_end, i, numj;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);
@ -563,7 +563,7 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -714,7 +714,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -750,7 +750,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end, k_end;
const int* nbor_mem=dev_packed;
const __global int* nbor_mem=dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -795,13 +795,13 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];
@ -959,7 +959,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
const __global acctyp4 *restrict zetaij,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -995,7 +995,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
if (ii<inum) {
int i, numj, nbor_j, nbor_end, k_end;
const int* nbor_mem = dev_packed;
const __global int* nbor_mem = dev_packed;
int offset_j=offset/t_per_atom;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
n_stride,nbor_end,nbor_j);
@ -1040,13 +1040,13 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];

View File

@ -278,7 +278,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end_vatom.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
&map, &elem2param, &_nelements,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
} else {
@ -286,7 +286,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
this->k_three_end.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
&map, &elem2param, &_nelements,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->nbor->dev_acc, &this->dev_short_nbor,
&this->nbor->dev_ilist, &this->dev_short_nbor,
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
}

View File

@ -554,7 +554,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
const int nelements,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -623,13 +623,13 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];
@ -709,7 +709,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
const int nelements,
const __global int * dev_nbor,
const __global int * dev_packed,
const __global int * dev_acc,
const __global int * dev_ilist,
const __global int * dev_short_nbor,
__global acctyp4 *restrict ans,
__global acctyp *restrict engv,
@ -778,13 +778,13 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
int nbor_k,numk;
if (dev_nbor==dev_packed) {
if (gpu_nbor) nbor_k=j+nbor_pitch;
else nbor_k=dev_acc[j]+nbor_pitch;
else nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
nbor_k+=offset_k;
} else {
nbor_k=dev_acc[j]+nbor_pitch;
nbor_k=dev_ilist[j]+nbor_pitch;
numk=dev_nbor[nbor_k];
nbor_k+=nbor_pitch;
nbor_k=dev_nbor[nbor_k];

View File

@ -158,7 +158,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
endif()
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}")
endif()
if (CMAKE_CXX_STANDARD)

View File

@ -292,7 +292,8 @@ public:
#if ! defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
if ( 0 == r ) {
Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
//Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
return handle_type();
}
#endif

View File

@ -40,7 +40,7 @@ version = '1.2.1'
checksums = { \
'1.1.0' : '533635721ee222d0ed2925a18fb5b294', \
'1.2.0' : '68bf0db879da5e068a71281020239ae7', \
'1.2.1' : 'bed76e7e76c545c36dd848a8f1fd35eb' \
'1.2.1' : '85ac414fdada2d04619c8f936344df14', \
}
# print error message or help

View File

@ -4,9 +4,9 @@
latte_SYSINC =
latte_SYSLIB = ../../lib/latte/filelink.o \
-llatte -lifcore -lsvml -lompstub -limf -lmkl_intel_lp64 \
-lmkl_intel_thread -lmkl_core -lmkl_intel_thread -lpthread \
-openmp -O0
-llatte -lifport -lifcore -lsvml -lompstub -limf \
-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core \
-lmkl_intel_thread -lpthread -openmp
latte_SYSPATH = -openmp -L${MKLROOT}/lib/intel64 -lmkl_lapack95_lp64 \
-L/opt/intel/composer_xe_2013_sp1.2.144/compiler/lib/intel64

118
lib/message/Install.py Normal file
View File

@ -0,0 +1,118 @@
#!/usr/bin/env python
# Install.py tool to build the CSlib library
# used to automate the steps described in the README file in this dir
from __future__ import print_function
import sys,os,re,subprocess
# help message
help = """
Syntax from src dir: make lib-message args="-m"
or: make lib-message args="-s -z"
Syntax from lib dir: python Install.py -m
or: python Install.py -s -z
specify zero or more options, order does not matter
-m = parallel build of CSlib library
-s = serial build of CSlib library
-z = build CSlib library with ZMQ socket support, default = no ZMQ support
Example:
make lib-message args="-m -z" # build parallel CSlib with ZMQ support
make lib-message args="-s" # build serial CSlib with no ZMQ support
"""
# print error message or help
def error(str=None):
if not str: print(help)
else: print("ERROR",str)
sys.exit()
# expand to full path name
# process leading '~' or relative path
def fullpath(path):
return os.path.abspath(os.path.expanduser(path))
def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
# parse args
args = sys.argv[1:]
nargs = len(args)
if nargs == 0: error()
mpiflag = False
serialflag = False
zmqflag = False
iarg = 0
while iarg < nargs:
if args[iarg] == "-m":
mpiflag = True
iarg += 1
elif args[iarg] == "-s":
serialflag = True
iarg += 1
elif args[iarg] == "-z":
zmqflag = True
iarg += 1
else: error()
if (not mpiflag and not serialflag):
error("Must use either -m or -s flag")
if (mpiflag and serialflag):
error("Cannot use -m and -s flag at the same time")
# build CSlib
# copy resulting lib to cslib/src/libmessage.a
# copy appropriate Makefile.lammps.* to Makefile.lammps
print("Building CSlib ...")
srcdir = fullpath("./cslib/src")
if mpiflag and zmqflag:
cmd = "cd %s; make lib_parallel" % srcdir
elif mpiflag and not zmqflag:
cmd = "cd %s; make lib_parallel zmq=no" % srcdir
elif not mpiflag and zmqflag:
cmd = "cd %s; make lib_serial" % srcdir
elif not mpiflag and not zmqflag:
cmd = "cd %s; make lib_serial zmq=no" % srcdir
print(cmd)
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
print(txt.decode('UTF-8'))
if mpiflag: cmd = "cd %s; cp libcsmpi.a libmessage.a" % srcdir
else: cmd = "cd %s; cp libcsnompi.a libmessage.a" % srcdir
print(cmd)
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
print(txt.decode('UTF-8'))
if zmqflag: cmd = "cp Makefile.lammps.zmq Makefile.lammps"
else: cmd = "cp Makefile.lammps.nozmq Makefile.lammps"
print(cmd)
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
print(txt.decode('UTF-8'))

View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
message_SYSINC =
message_SYSLIB =
message_SYSPATH =

View File

@ -0,0 +1,5 @@
# Settings that the LAMMPS build will import when this package library is used
message_SYSINC =
message_SYSLIB = -lzmq
message_SYSPATH =

51
lib/message/README Normal file
View File

@ -0,0 +1,51 @@
This directory contains the CSlib library which is required
to use the MESSAGE package and its client/server commands
in a LAMMPS input script.
The CSlib libary is included in the LAMMPS distribution. A fuller
version including documentation and test programs is available at
http://cslib.sandia.gov. It was developed by Steve Plimpton at Sandia
National Laboratories.
You can type "make lib-message" from the src directory to see help on
how to build this library via make commands, or you can do the same
thing by typing "python Install.py" from within this directory, or you
can do it manually by following the instructions below.
The CSlib can be optionally built with support for sockets using
the open-source ZeroMQ (ZMQ) library. If it is not installed
on your system, it is easy to download and install.
Go to the ZMQ website for details: http://zeromq.org
-----------------
Instructions:
1. Compile CSlib from within cslib/src with one of the following:
% make lib_parallel # build parallel library with ZMQ socket support
% make lib_serial # build serial library with ZMQ support
% make lib_parallel zmq=no # build parallel lib with no ZMQ support
% make lib_serial zmq=no # build serial lib with no ZMQ support
2. Copy the produced cslib/src/libcsmpi.a or libscnompi.a file to
cslib/src/libmessage.a
3. Copy either lib/message/Makefile.lammps.zmq or Makefile.lammps.nozmq
to lib/message/Makefile.lammps, depending on whether you
build the library with ZMQ support or not.
If your ZMQ library is not in a place your shell path finds,
you can set the INCLUDE and PATH variables in Makefile.lammps
to point to the dirs where the ZMQ include and library files are.
-----------------
When these steps are complete you can build LAMMPS
with the MESSAGAE package installed:
% cd lammps/src
% make yes-message
% make mpi (or whatever target you wish)
Note that if you download and unpack a new LAMMPS tarball, you will
need to re-build the CSlib in this dir.

32
lib/message/cslib/LICENSE Normal file
View File

@ -0,0 +1,32 @@
Program: CSlib client/server coupling library
Copyright 2018 National Technology & Engineering Solutions of Sandia,
LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
U.S. Government retains certain rights in this software. This
software is distributed under the modified Berkeley Software
Distribution (BSD) License.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of Sandia Corporation nor the names of contributors
to this software may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

23
lib/message/cslib/README Normal file
View File

@ -0,0 +1,23 @@
This is the the Client/Server messaging library (CSlib).
Only the source directory and license file are included here as part
of the LAMMPS distribution. The full CSlib distribution, including
documentation and test codes, can be found at the website:
http://cslib.sandia.gov (as of Aug 2018).
The contact author is
Steve Plimpton
Sandia National Laboratories
sjplimp@sandia.gov
http://www.sandia.gov/~sjplimp
The CSlib is distributed as open-source code under the modified
Berkeley Software Distribution (BSD) License. See the accompanying
LICENSE file.
This directory contains the following:
README this file
LICENSE GNU LGPL license
src source files for library

View File

@ -0,0 +1,107 @@
# Makefile for CSlib = client/server messaging library
# type "make help" for options
SHELL = /bin/sh
# ----------------------------------------
# should only need to change this section
# compiler/linker settings
# ----------------------------------------
CC = g++
CCFLAGS = -g -O3 -DZMQ_$(ZMQ) -DMPI_$(MPI)
SHFLAGS = -fPIC
ARCHIVE = ar
ARCHFLAGS = -rc
SHLIBFLAGS = -shared
# files
LIB = libcsmpi.a
SHLIB = libcsmpi.so
SRC = $(wildcard *.cpp)
INC = $(wildcard *.h)
OBJ = $(SRC:.cpp=.o)
# build with ZMQ support or not
zmq = yes
ZMQ = $(shell echo $(zmq) | tr a-z A-Z)
ifeq ($(ZMQ),YES)
ZMQLIB = -lzmq
else
CCFLAGS += -I./STUBS_ZMQ
endif
# build with MPI support or not
mpi = yes
MPI = $(shell echo $(mpi) | tr a-z A-Z)
ifeq ($(MPI),YES)
CC = mpicxx
else
CCFLAGS += -I./STUBS_MPI
LIB = libcsnompi.a
SHLIB = libcsnompi.so
endif
# targets
shlib: shlib_parallel shlib_serial
lib: lib_parallel lib_serial
all: shlib lib
help:
@echo 'make default = shlib'
@echo 'make shlib build 2 shared CSlibs: parallel & serial'
@echo 'make lib build 2 static CSlibs: parallel & serial'
@echo 'make all build 4 CSlibs: shlib and lib'
@echo 'make shlib_parallel build shared parallel CSlib'
@echo 'make shlib_serial build shared serial CSlib'
@echo 'make lib_parallel build static parallel CSlib'
@echo 'make lib_serial build static serial CSlib'
@echo 'make ... zmq=no build w/out ZMQ support'
@echo 'make clean remove all *.o files'
@echo 'make clean-all remove *.o and lib files'
@echo 'make tar create a tarball, 2 levels up'
shlib_parallel:
$(MAKE) clean
$(MAKE) shared zmq=$(zmq) mpi=yes
shlib_serial:
$(MAKE) clean
$(MAKE) shared zmq=$(zmq) mpi=no
lib_parallel:
$(MAKE) clean
$(MAKE) static zmq=$(zmq) mpi=yes
lib_serial:
$(MAKE) clean
$(MAKE) static zmq=$(zmq) mpi=no
static: $(OBJ)
$(ARCHIVE) $(ARCHFLAGS) $(LIB) $(OBJ)
shared: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(SHLIB) $(OBJ) $(ZMQLIB)
clean:
@rm -f *.o *.pyc
clean-all:
@rm -f *.o *.pyc lib*.a lib*.so
tar:
cd ../..; tar cvf cslib.tar cslib/README cslib/LICENSE \
cslib/doc cslib/src cslib/test
# rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) -c $<

View File

@ -0,0 +1,96 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
// MPI constants and dummy functions
#ifndef MPI_DUMMY_H
#define MPI_DUMMY_H
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
namespace CSLIB_NS {
typedef int MPI_Comm;
typedef int MPI_Fint;
typedef int MPI_Datatype;
typedef int MPI_Status;
typedef int MPI_Op;
typedef int MPI_Info;
#define MPI_COMM_WORLD 0
#define MPI_MAX_PORT_NAME 0
#define MPI_INFO_NULL 0
#define MPI_INT 1
#define MPI_LONG_LONG 2
#define MPI_FLOAT 3
#define MPI_DOUBLE 4
#define MPI_CHAR 5
#define MPI_SUM 0
static void MPI_Init(int *, char ***) {}
static MPI_Comm MPI_Comm_f2c(MPI_Comm world) {return world;}
static void MPI_Comm_rank(MPI_Comm, int *) {}
static void MPI_Comm_size(MPI_Comm, int *) {}
static void MPI_Open_port(MPI_Info, char *) {}
static void MPI_Close_port(const char *) {}
static void MPI_Comm_accept(const char *, MPI_Info, int,
MPI_Comm, MPI_Comm *) {}
static void MPI_Comm_connect(const char *, MPI_Info, int,
MPI_Comm, MPI_Comm *) {}
static void MPI_Comm_split(MPI_Comm, int, int, MPI_Comm *) {}
static void MPI_Comm_free(MPI_Comm *) {}
static void MPI_Send(const void *, int, MPI_Datatype, int, int, MPI_Comm) {}
static void MPI_Recv(void *, int, MPI_Datatype, int, int,
MPI_Comm, MPI_Status *) {}
static void MPI_Allreduce(const void *in, void *out, int, MPI_Datatype type,
MPI_Op op, MPI_Comm)
{
if (type == MPI_INT) *((int *) out) = *((int *) in);
}
static void MPI_Scan(const void *in, void *out, int, MPI_Datatype intype,
MPI_Op op,MPI_Comm)
{
if (intype == MPI_INT) *((int *) out) = *((int *) in);
}
static void MPI_Bcast(void *, int, MPI_Datatype, int, MPI_Comm) {}
static void MPI_Allgather(const void *in, int incount, MPI_Datatype intype,
void *out, int, MPI_Datatype, MPI_Comm)
{
// assuming incount = 1
if (intype == MPI_INT) *((int *) out) = *((int *) in);
}
static void MPI_Allgatherv(const void *in, int incount, MPI_Datatype intype,
void *out, const int *, const int *,
MPI_Datatype, MPI_Comm)
{
if (intype == MPI_INT) memcpy(out,in,incount*sizeof(int));
else if (intype == MPI_LONG_LONG) memcpy(out,in,incount*sizeof(int64_t));
else if (intype == MPI_FLOAT) memcpy(out,in,incount*sizeof(float));
else if (intype == MPI_DOUBLE) memcpy(out,in,incount*sizeof(double));
else if (intype == MPI_CHAR) memcpy(out,in,incount*sizeof(char));
}
static void MPI_Abort(MPI_Comm, int) {exit(1);}
static void MPI_Finalize() {}
}
#endif

View File

@ -0,0 +1,36 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
// ZMQ constants and dummy functions
#ifndef ZMQ_DUMMY_H
#define ZMQ_DUMMY_H
namespace CSLIB_NS {
#define ZMQ_REQ 0
#define ZMQ_REP 0
static void *zmq_ctx_new() {return NULL;}
static void *zmq_connect(void *, char *) {return NULL;}
static int zmq_bind(void *, char *) {return 0;}
static void *zmq_socket(void *,int) {return NULL;}
static void zmq_close(void *) {}
static void zmq_ctx_destroy(void *) {}
static void zmq_send(void *, void *, int, int) {}
static void zmq_recv(void *, void *, int, int) {}
};
#endif

View File

@ -0,0 +1,768 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <limits.h>
#include "cslib.h"
#include "msg_file.h"
#include "msg_zmq.h"
#include "msg_mpi_one.h"
#include "msg_mpi_two.h"
using namespace CSLIB_NS;
#define MAXTYPE 5 // # of defined field data types
/* ---------------------------------------------------------------------- */
CSlib::CSlib(int csflag, const char *mode, const void *ptr, const void *pcomm)
{
if (pcomm) myworld = (uint64_t) *((MPI_Comm *) pcomm);
else myworld = 0;
#ifdef MPI_NO
if (pcomm)
error_all("constructor(): CSlib invoked with MPI_Comm "
"but built w/out MPI support");
#endif
#ifdef MPI_YES // NOTE: this could be OK to allow ??
// would allow a parallel app to invoke CSlib
// in parallel and/or in serial
if (!pcomm)
error_all("constructor(): CSlib invoked w/out MPI_Comm "
"but built with MPI support");
#endif
client = server = 0;
if (csflag == 0) client = 1;
else if (csflag == 1) server = 1;
else error_all("constructor(): Invalid client/server arg");
if (pcomm == NULL) {
me = 0;
nprocs = 1;
if (strcmp(mode,"file") == 0) msg = new MsgFile(csflag,ptr);
else if (strcmp(mode,"zmq") == 0) msg = new MsgZMQ(csflag,ptr);
else if (strcmp(mode,"mpi/one") == 0)
error_all("constructor(): No mpi/one mode for serial lib usage");
else if (strcmp(mode,"mpi/two") == 0)
error_all("constructor(): No mpi/two mode for serial lib usage");
else error_all("constructor(): Unknown mode");
} else if (pcomm) {
MPI_Comm world = (MPI_Comm) myworld;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
if (strcmp(mode,"file") == 0) msg = new MsgFile(csflag,ptr,world);
else if (strcmp(mode,"zmq") == 0) msg = new MsgZMQ(csflag,ptr,world);
else if (strcmp(mode,"mpi/one") == 0) msg = new MsgMPIOne(csflag,ptr,world);
else if (strcmp(mode,"mpi/two") == 0) msg = new MsgMPITwo(csflag,ptr,world);
else error_all("constructor(): Unknown mode");
}
maxfield = 0;
fieldID = fieldtype = fieldlen = fieldoffset = NULL;
maxheader = 0;
header = NULL;
maxbuf = 0;
buf = NULL;
recvcounts = displs = NULL;
maxglobal = 0;
allids = NULL;
maxfieldbytes = 0;
fielddata = NULL;
pad = "\0\0\0\0\0\0\0"; // just length 7 since will have trailing NULL
nsend = nrecv = 0;
}
/* ---------------------------------------------------------------------- */
CSlib::~CSlib()
{
deallocate_fields();
sfree(header);
sfree(buf);
sfree(recvcounts);
sfree(displs);
sfree(allids);
sfree(fielddata);
delete msg;
}
/* ---------------------------------------------------------------------- */
void CSlib::send(int msgID_caller, int nfield_caller)
{
if (nfield_caller < 0) error_all("send(): Invalid nfield");
msgID = msgID_caller;
nfield = nfield_caller;
allocate_fields();
fieldcount = 0;
nbuf = 0;
if (fieldcount == nfield) send_message();
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_int(int id, int value)
{
pack(id,1,1,&value);
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_int64(int id, int64_t value)
{
pack(id,2,1,&value);
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_float(int id, float value)
{
pack(id,3,1,&value);
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_double(int id, double value)
{
pack(id,4,1,&value);
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_string(int id, char *value)
{
pack(id,5,strlen(value)+1,value);
}
/* ---------------------------------------------------------------------- */
void CSlib::pack(int id, int ftype, int flen, void *data)
{
if (find_field(id,fieldcount) >= 0)
error_all("pack(): Reuse of field ID");
if (ftype < 1 || ftype > MAXTYPE) error_all("pack(): Invalid ftype");
if (flen < 0) error_all("pack(): Invalid flen");
fieldID[fieldcount] = id;
fieldtype[fieldcount] = ftype;
fieldlen[fieldcount] = flen;
int nbytes,nbytesround;
onefield(ftype,flen,nbytes,nbytesround);
memcpy(&buf[nbuf],data,nbytes);
memcpy(&buf[nbuf+nbytes],pad,nbytesround-nbytes);
nbuf += nbytesround;
fieldcount++;
if (fieldcount == nfield) send_message();
}
/* ---------------------------------------------------------------------- */
void CSlib::pack_parallel(int id, int ftype,
int nlocal, int *ids, int nper, void *data)
{
int i,j,k,m;
if (find_field(id,fieldcount) >= 0)
error_all("pack_parallel(): Reuse of field ID");
if (ftype < 1 || ftype > MAXTYPE) error_all("pack_parallel(): Invalid ftype");
if (nlocal < 0) error_all("pack_parallel(): Invalid nlocal");
if (nper < 1) error_all("pack_parallel(): Invalid nper");
MPI_Comm world = (MPI_Comm) myworld;
// NOTE: check for overflow of maxglobal and flen
int nglobal;
MPI_Allreduce(&nlocal,&nglobal,1,MPI_INT,MPI_SUM,world);
int flen = nper*nglobal;
fieldID[fieldcount] = id;
fieldtype[fieldcount] = ftype;
fieldlen[fieldcount] = flen;
// nlocal datums, each of nper length, from all procs
// final data in buf = datums for all natoms, ordered by ids
if (recvcounts == NULL) {
recvcounts = (int *) smalloc(nprocs*sizeof(int));
displs = (int *) smalloc(nprocs*sizeof(int));
}
MPI_Allgather(&nlocal,1,MPI_INT,recvcounts,1,MPI_INT,world);
displs[0] = 0;
for (int iproc = 1; iproc < nprocs; iproc++)
displs[iproc] = displs[iproc-1] + recvcounts[iproc-1];
if (ids && nglobal > maxglobal) {
sfree(allids);
maxglobal = nglobal;
// NOTE: maxglobal*sizeof(int) could overflow int
allids = (int *) smalloc(maxglobal*sizeof(int));
}
MPI_Allgatherv(ids,nlocal,MPI_INT,allids,
recvcounts,displs,MPI_INT,world);
int nlocalsize = nper*nlocal;
MPI_Allgather(&nlocalsize,1,MPI_INT,recvcounts,1,MPI_INT,world);
displs[0] = 0;
for (int iproc = 1; iproc < nprocs; iproc++)
displs[iproc] = displs[iproc-1] + recvcounts[iproc-1];
int nbytes,nbytesround;
onefield(ftype,flen,nbytes,nbytesround);
if (ftype == 1) {
int *alldata;
if (ids) {
if (nbytes > maxfieldbytes) {
sfree(fielddata);
maxfieldbytes = nbytes;
fielddata = (char *) smalloc(maxfieldbytes);
}
alldata = (int *) fielddata;
} else alldata = (int *) &buf[nbuf];
MPI_Allgatherv(data,nlocalsize,MPI_INT,alldata,
recvcounts,displs,MPI_INT,world);
if (ids) {
int *bufptr = (int *) &buf[nbuf];
m = 0;
for (i = 0; i < nglobal; i++) {
j = (allids[i]-1) * nper;
if (nper == 1) bufptr[j] = alldata[m++];
else
for (k = 0; k < nper; k++)
bufptr[j++] = alldata[m++];
}
}
} else if (ftype == 2) {
int64_t *alldata;
if (ids) {
if (nbytes > maxfieldbytes) {
sfree(fielddata);
maxfieldbytes = nbytes;
fielddata = (char *) smalloc(maxfieldbytes);
}
alldata = (int64_t *) fielddata;
} else alldata = (int64_t *) &buf[nbuf];
// NOTE: may be just MPI_LONG on some machines
MPI_Allgatherv(data,nlocalsize,MPI_LONG_LONG,alldata,
recvcounts,displs,MPI_LONG_LONG,world);
if (ids) {
int64_t *bufptr = (int64_t *) &buf[nbuf];
m = 0;
for (i = 0; i < nglobal; i++) {
j = (allids[i]-1) * nper;
if (nper == 1) bufptr[j] = alldata[m++];
else
for (k = 0; k < nper; k++)
bufptr[j++] = alldata[m++];
}
}
} else if (ftype == 3) {
float *alldata;
if (ids) {
if (nbytes > maxfieldbytes) {
sfree(fielddata);
maxfieldbytes = nbytes;
fielddata = (char *) smalloc(maxfieldbytes);
}
alldata = (float *) fielddata;
} else alldata = (float *) &buf[nbuf];
MPI_Allgatherv(data,nlocalsize,MPI_FLOAT,alldata,
recvcounts,displs,MPI_FLOAT,world);
if (ids) {
float *bufptr = (float *) &buf[nbuf];
m = 0;
for (i = 0; i < nglobal; i++) {
j = (allids[i]-1) * nper;
if (nper == 1) bufptr[j] = alldata[m++];
else
for (k = 0; k < nper; k++)
bufptr[j++] = alldata[m++];
}
}
} else if (ftype == 4) {
double *alldata;
if (ids) {
if (nbytes > maxfieldbytes) {
sfree(fielddata);
maxfieldbytes = nbytes;
fielddata = (char *) smalloc(maxfieldbytes);
}
alldata = (double *) fielddata;
} else alldata = (double *) &buf[nbuf];
MPI_Allgatherv(data,nlocalsize,MPI_DOUBLE,alldata,
recvcounts,displs,MPI_DOUBLE,world);
if (ids) {
double *bufptr = (double *) &buf[nbuf];
m = 0;
for (i = 0; i < nglobal; i++) {
j = (allids[i]-1) * nper;
if (nper == 1) bufptr[j] = alldata[m++];
else
for (k = 0; k < nper; k++)
bufptr[j++] = alldata[m++];
}
}
/* eventually ftype = BYTE, but not yet
} else if (ftype == 5) {
char *alldata;
if (ids) {
if (nbytes > maxfieldbytes) {
sfree(fielddata);
maxfieldbytes = nbytes;
fielddata = (char *) smalloc(maxfieldbytes);
}
alldata = (char *) fielddata;
} else alldata = (char *) &buf[nbuf];
MPI_Allgatherv(data,nlocalsize,MPI_CHAR,alldata,
recvcounts,displs,MPI_CHAR,world);
if (ids) {
char *bufptr = (char *) &buf[nbuf];
m = 0;
for (i = 0; i < nglobal; i++) {
j = (allids[i]-1) * nper;
memcpy(&bufptr[j],&alldata[m],nper);
m += nper;
}
}
*/
}
memcpy(&buf[nbuf+nbytes],pad,nbytesround-nbytes);
nbuf += nbytesround;
fieldcount++;
if (fieldcount == nfield) send_message();
}
/* ---------------------------------------------------------------------- */
void CSlib::send_message()
{
// setup header message
int m = 0;
header[m++] = msgID;
header[m++] = nfield;
for (int ifield = 0; ifield < nfield; ifield++) {
header[m++] = fieldID[ifield];
header[m++] = fieldtype[ifield];
header[m++] = fieldlen[ifield];
}
msg->send(nheader,header,nbuf,buf);
nsend++;
}
/* ---------------------------------------------------------------------- */
int CSlib::recv(int &nfield_caller, int *&fieldID_caller,
int *&fieldtype_caller, int *&fieldlen_caller)
{
msg->recv(maxheader,header,maxbuf,buf);
nrecv++;
// unpack header message
int m = 0;
msgID = header[m++];
nfield = header[m++];
allocate_fields();
int nbytes,nbytesround;
nbuf = 0;
for (int ifield = 0; ifield < nfield; ifield++) {
fieldID[ifield] = header[m++];
fieldtype[ifield] = header[m++];
fieldlen[ifield] = header[m++];
fieldoffset[ifield] = nbuf;
onefield(fieldtype[ifield],fieldlen[ifield],nbytes,nbytesround);
nbuf += nbytesround;
}
// return message parameters
nfield_caller = nfield;
fieldID_caller = fieldID;
fieldtype_caller = fieldtype;
fieldlen_caller = fieldlen;
return msgID;
}
/* ---------------------------------------------------------------------- */
int CSlib::unpack_int(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_int(): Unknown field ID");
if (fieldtype[ifield] != 1) error_all("unpack_int(): Mis-match of ftype");
if (fieldlen[ifield] != 1) error_all("unpack_int(): Flen is not 1");
int *ptr = (int *) unpack(id);
return *ptr;
}
/* ---------------------------------------------------------------------- */
int64_t CSlib::unpack_int64(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_int64(): Unknown field ID");
if (fieldtype[ifield] != 2) error_all("unpack_int64(): Mis-match of ftype");
if (fieldlen[ifield] != 1) error_all("unpack_int64(): Flen is not 1");
int64_t *ptr = (int64_t *) unpack(id);
return *ptr;
}
/* ---------------------------------------------------------------------- */
float CSlib::unpack_float(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_float(): Unknown field ID");
if (fieldtype[ifield] != 3) error_all("unpack_float(): Mis-match of ftype");
if (fieldlen[ifield] != 1) error_all("unpack_float(): Flen is not 1");
float *ptr = (float *) unpack(id);
return *ptr;
}
/* ---------------------------------------------------------------------- */
double CSlib::unpack_double(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_double(): Unknown field ID");
if (fieldtype[ifield] != 4) error_all("unpack_double(): Mis-match of ftype");
if (fieldlen[ifield] != 1) error_all("unpack_double(): Flen is not 1");
double *ptr = (double *) unpack(id);
return *ptr;
}
/* ---------------------------------------------------------------------- */
char *CSlib::unpack_string(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_string(): Unknown field ID");
if (fieldtype[ifield] != 5) error_all("unpack_string(): Mis-match of ftype");
char *ptr = (char *) unpack(id);
return ptr;
}
/* ---------------------------------------------------------------------- */
void *CSlib::unpack(int id)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack(): Unknown field ID");
return &buf[fieldoffset[ifield]];
}
/* ---------------------------------------------------------------------- */
void CSlib::unpack(int id, void *data)
{
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack(): Unknown field ID");
int ftype = fieldtype[ifield];
int nbytes = fieldlen[ifield];
if (ftype == 1) nbytes *= sizeof(int);
else if (ftype == 2) nbytes *= sizeof(int64_t);
else if (ftype == 3) nbytes *= sizeof(float);
else if (ftype == 4) nbytes *= sizeof(double);
memcpy(data,&buf[fieldoffset[ifield]],nbytes);
}
/* ---------------------------------------------------------------------- */
void CSlib::unpack_parallel(int id, int nlocal, int *ids, int nper, void *data)
{
int i,j,k,m;
int ifield = find_field(id,nfield);
if (ifield < 0) error_all("unpack_parallel(): Unknown field ID");
if (nlocal < 0) error_all("unpack_parallel(): Invalid nlocal");
if (nper < 1) error_all("pack_parallel(): Invalid nper");
MPI_Comm world = (MPI_Comm) myworld;
int upto;
if (!ids) {
MPI_Scan(&nlocal,&upto,1,MPI_INT,MPI_SUM,world);
upto -= nlocal;
}
if (fieldtype[ifield] == 1) {
int *local = (int *) data;
int *global = (int *) &buf[fieldoffset[ifield]];
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(int));
else {
m = 0;
for (i = 0; i < nlocal; i++) {
j = (ids[i]-1) * nper;
if (nper == 1) local[m++] = global[j];
else
for (k = 0; k < nper; k++)
local[m++] = global[j++];
}
}
} else if (fieldtype[ifield] == 2) {
int64_t *local = (int64_t *) data;
int64_t *global = (int64_t *) &buf[fieldoffset[ifield]];
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(int64_t));
else {
m = 0;
for (i = 0; i < nlocal; i++) {
j = (ids[i]-1) * nper;
if (nper == 1) local[m++] = global[j];
else
for (k = 0; k < nper; k++)
local[m++] = global[j++];
}
}
} else if (fieldtype[ifield] == 3) {
float *local = (float *) data;
float *global = (float *) &buf[fieldoffset[ifield]];
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(float));
else {
m = 0;
for (i = 0; i < nlocal; i++) {
j = (ids[i]-1) * nper;
if (nper == 1) local[m++] = global[j];
else
for (k = 0; k < nper; k++)
local[m++] = global[j++];
}
}
} else if (fieldtype[ifield] == 4) {
double *local = (double *) data;
double *global = (double *) &buf[fieldoffset[ifield]];
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(double));
else {
m = 0;
for (i = 0; i < nlocal; i++) {
j = (ids[i]-1) * nper;
if (nper == 1) local[m++] = global[j];
else
for (k = 0; k < nper; k++)
local[m++] = global[j++];
}
}
/* eventually ftype = BYTE, but not yet
} else if (fieldtype[ifield] == 5) {
char *local = (char *) data;
char *global = (char *) &buf[fieldoffset[ifield]];
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(char));
else {
m = 0;
for (i = 0; i < nlocal; i++) {
j = (ids[i]-1) * nper;
memcpy(&local[m],&global[j],nper);
m += nper;
}
}
*/
}
}
/* ---------------------------------------------------------------------- */
int CSlib::extract(int flag)
{
if (flag == 1) return nsend;
if (flag == 2) return nrecv;
error_all("extract(): Invalid flag");
return 0;
}
/* ---------------------------------------------------------------------- */
void CSlib::onefield(int ftype, int flen, int &nbytes, int &nbytesround)
{
int64_t bigbytes,bigbytesround;
int64_t biglen = flen;
if (ftype == 1) bigbytes = biglen * sizeof(int);
else if (ftype == 2) bigbytes = biglen * sizeof(int64_t);
else if (ftype == 3) bigbytes = biglen * sizeof(float);
else if (ftype == 4) bigbytes = biglen * sizeof(double);
else if (ftype == 5) bigbytes = biglen * sizeof(char);
bigbytesround = roundup(bigbytes,8);
if (nbuf + bigbytesround > INT_MAX)
error_all("pack(): Message size exceeds 32-bit integer limit");
nbytes = (int) bigbytes;
nbytesround = (int) bigbytesround;
if (nbuf + nbytesround > maxbuf) {
maxbuf = nbuf + nbytesround;
buf = (char *) srealloc(buf,maxbuf);
}
}
/* ---------------------------------------------------------------------- */
int CSlib::find_field(int id, int n)
{
int ifield;
for (ifield = 0; ifield < n; ifield++)
if (id == fieldID[ifield]) return ifield;
return -1;
}
/* ---------------------------------------------------------------------- */
void CSlib::allocate_fields()
{
int64_t bigbytes = (2 + 3*((int64_t) nfield)) * sizeof(int);
if (bigbytes > INT_MAX)
error_all("send(): Message header size exceeds 32-bit integer limit");
nheader = 2;
nheader += 3 * nfield;
if (nfield > maxfield) {
deallocate_fields();
maxfield = nfield;
fieldID = new int[maxfield];
fieldtype = new int[maxfield];
fieldlen = new int[maxfield];
fieldoffset = new int[maxfield];
}
if (nheader > maxheader) {
sfree(header);
maxheader = nheader;
header = (int *) smalloc(maxheader*sizeof(int));
}
}
/* ---------------------------------------------------------------------- */
void CSlib::deallocate_fields()
{
delete [] fieldID;
delete [] fieldtype;
delete [] fieldlen;
delete [] fieldoffset;
}
/* ---------------------------------------------------------------------- */
void *CSlib::smalloc(int nbytes)
{
if (nbytes == 0) return NULL;
void *ptr = malloc(nbytes);
if (ptr == NULL) {
char str[128];
sprintf(str,"malloc(): Failed to allocate %d bytes",nbytes);
error_one(str);
}
return ptr;
}
/* ---------------------------------------------------------------------- */
void *CSlib::srealloc(void *ptr, int nbytes)
{
if (nbytes == 0) {
sfree(ptr);
return NULL;
}
ptr = realloc(ptr,nbytes);
if (ptr == NULL) {
char str[128];
sprintf(str,"realloc(): Failed to reallocate %d bytes",nbytes);
error_one(str);
}
return ptr;
}
/* ---------------------------------------------------------------------- */
void CSlib::sfree(void *ptr)
{
if (ptr == NULL) return;
free(ptr);
}
/* ---------------------------------------------------------------------- */
void CSlib::error_all(const char *str)
{
if (me == 0) printf("CSlib ERROR: %s\n",str);
MPI_Comm world = (MPI_Comm) myworld;
MPI_Abort(world,1);
}
/* ---------------------------------------------------------------------- */
void CSlib::error_one(const char *str)
{
printf("CSlib ERROR: %s\n",str);
MPI_Comm world = (MPI_Comm) myworld;
MPI_Abort(world,1);
}
/* ----------------------------------------------------------------------
round N up to multiple of nalign and return it
NOTE: see mapreduce/src/keyvalue.cpp for doing this as uint64_t
------------------------------------------------------------------------- */
int64_t CSlib::roundup(int64_t n, int nalign)
{
if (n % nalign == 0) return n;
n = (n/nalign + 1) * nalign;
return n;
}

View File

@ -0,0 +1,87 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef CSLIB_H
#define CSLIB_H
#include <stdint.h>
namespace CSLIB_NS {
class CSlib {
public:
int nsend,nrecv;
CSlib(int, const char *, const void *, const void *);
~CSlib();
void send(int, int);
void pack_int(int, int);
void pack_int64(int, int64_t);
void pack_float(int, float);
void pack_double(int, double);
void pack_string(int, char *);
void pack(int, int, int, void *);
void pack_parallel(int, int, int, int *, int, void *);
int recv(int &, int *&, int *&, int *&);
int unpack_int(int);
int64_t unpack_int64(int);
float unpack_float(int);
double unpack_double(int);
char *unpack_string(int);
void *unpack(int);
void unpack(int, void *);
void unpack_parallel(int, int, int *, int, void *);
int extract(int);
private:
uint64_t myworld; // really MPI_Comm, but avoids use of mpi.h in this file
// so apps can include this file w/ no MPI on system
int me,nprocs;
int client,server;
int nfield,maxfield;
int msgID,fieldcount;
int nheader,maxheader;
int nbuf,maxbuf;
int maxglobal,maxfieldbytes;
int *fieldID,*fieldtype,*fieldlen,*fieldoffset;
int *header;
int *recvcounts,*displs; // nprocs size for Allgathers
int *allids; // nglobal size for pack_parallel()
char *buf; // maxbuf size for msg with all fields
char *fielddata; // maxfieldbytes size for one global field
const char *pad;
class Msg *msg;
void send_message();
void onefield(int, int, int &, int &);
int find_field(int, int);
void allocate_fields();
void deallocate_fields();
int64_t roundup(int64_t, int);
void *smalloc(int);
void *srealloc(void *, int);
void sfree(void *);
void error_all(const char *);
void error_one(const char *);
};
}
#endif

View File

@ -0,0 +1,362 @@
# ------------------------------------------------------------------------
# CSlib - Client/server library for code coupling
# http://cslib.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright 2018 National Technology & Engineering Solutions of
# Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
# NTESS, the U.S. Government retains certain rights in this software.
# This software is distributed under the modified Berkeley Software
# Distribution (BSD) License.
#
# See the README file in the top-level CSlib directory.
# -------------------------------------------------------------------------
# Python wrapper on CSlib library via ctypes
# ctypes and Numpy data types:
# 32-bit int = c_int = np.intc = np.int32
# 64-bit int = c_longlong = np.int64
# 32-bit floating point = c_float = np.float32
# 64-bit floating point = c_double = np.float = np.float64
import sys,traceback
from ctypes import *
# Numpy and mpi4py packages may not exist
try:
import numpy as np
numpyflag = 1
except:
numpyflag = 0
try:
from mpi4py import MPI
mpi4pyflag = 1
except:
mpi4pyflag = 0
# wrapper class
class CSlib:
# instantiate CSlib thru its C-interface
def __init__(self,csflag,mode,ptr,comm):
# load libcslib.so
try:
if comm: self.lib = CDLL("libcsmpi.so",RTLD_GLOBAL)
else: self.lib = CDLL("libcsnompi.so",RTLD_GLOBAL)
except:
etype,value,tb = sys.exc_info()
traceback.print_exception(etype,value,tb)
raise OSError,"Could not load CSlib dynamic library"
# define ctypes API for each library method
self.lib.cslib_open.argtypes = [c_int,c_char_p,c_void_p,c_void_p,
POINTER(c_void_p)]
self.lib.cslib_open.restype = None
self.lib.cslib_close.argtypes = [c_void_p]
self.lib.cslib_close.restype = None
self.lib.cslib_send.argtypes = [c_void_p,c_int,c_int]
self.lib.cslib_send.restype = None
self.lib.cslib_pack_int.argtypes = [c_void_p,c_int,c_int]
self.lib.cslib_pack_int.restype = None
self.lib.cslib_pack_int64.argtypes = [c_void_p,c_int,c_longlong]
self.lib.cslib_pack_int64.restype = None
self.lib.cslib_pack_float.argtypes = [c_void_p,c_int,c_float]
self.lib.cslib_pack_float.restype = None
self.lib.cslib_pack_double.argtypes = [c_void_p,c_int,c_double]
self.lib.cslib_pack_double.restype = None
self.lib.cslib_pack_string.argtypes = [c_void_p,c_int,c_char_p]
self.lib.cslib_pack_string.restype = None
self.lib.cslib_pack.argtypes = [c_void_p,c_int,c_int,c_int,c_void_p]
self.lib.cslib_pack.restype = None
self.lib.cslib_pack_parallel.argtypes = [c_void_p,c_int,c_int,c_int,
POINTER(c_int),c_int,c_void_p]
self.lib.cslib_pack_parallel.restype = None
self.lib.cslib_recv.argtypes = [c_void_p,POINTER(c_int),
POINTER(POINTER(c_int)),
POINTER(POINTER(c_int)),
POINTER(POINTER(c_int))]
self.lib.cslib_recv.restype = c_int
self.lib.cslib_unpack_int.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack_int.restype = c_int
self.lib.cslib_unpack_int64.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack_int64.restype = c_longlong
self.lib.cslib_unpack_float.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack_float.restype = c_float
self.lib.cslib_unpack_double.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack_double.restype = c_double
self.lib.cslib_unpack_string.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack_string.restype = c_char_p
# override return in unpack()
self.lib.cslib_unpack.argtypes = [c_void_p,c_int]
self.lib.cslib_unpack.restype = c_void_p
self.lib.cslib_unpack_data.argtypes = [c_void_p,c_int,c_void_p]
self.lib.cslib_unpack_data.restype = None
# override last arg in unpack_parallel()
self.lib.cslib_unpack_parallel.argtypes = [c_void_p,c_int,c_int,
POINTER(c_int),c_int,c_void_p]
self.lib.cslib_unpack_parallel.restype = None
self.lib.cslib_extract.argtypes = [c_void_p,c_int]
self.lib.cslib_extract.restype = c_int
# create an instance of CSlib with or w/out MPI communicator
self.cs = c_void_p()
if not comm:
self.lib.cslib_open(csflag,mode,ptr,None,byref(self.cs))
elif not mpi4pyflag:
print "Cannot pass MPI communicator to CSlib w/out mpi4py package"
sys.exit()
else:
address = MPI._addressof(comm)
comm_ptr = c_void_p(address)
if mode == "mpi/one":
address = MPI._addressof(ptr)
ptrcopy = c_void_p(address)
else: ptrcopy = ptr
self.lib.cslib_open(csflag,mode,ptrcopy,comm_ptr,byref(self.cs))
# destroy instance of CSlib
def __del__(self):
if self.cs: self.lib.cslib_close(self.cs)
def close(self):
self.lib.cslib_close(self.cs)
self.lib = None
# send a message
def send(self,msgID,nfield):
self.nfield = nfield
self.lib.cslib_send(self.cs,msgID,nfield)
# pack one field of message
def pack_int(self,id,value):
self.lib.cslib_pack_int(self.cs,id,value)
def pack_int64(self,id,value):
self.lib.cslib_pack_int64(self.cs,id,value)
def pack_float(self,id,value):
self.lib.cslib_pack_float(self.cs,id,value)
def pack_double(self,id,value):
self.lib.cslib_pack_double(self.cs,id,value)
def pack_string(self,id,value):
self.lib.cslib_pack_string(self.cs,id,value)
def pack(self,id,ftype,flen,data):
cdata = self.data_convert(ftype,flen,data)
self.lib.cslib_pack(self.cs,id,ftype,flen,cdata)
def pack_parallel(self,id,ftype,nlocal,ids,nper,data):
cids = self.data_convert(1,nlocal,ids)
cdata = self.data_convert(ftype,nper*nlocal,data)
self.lib.cslib_pack_parallel(self.cs,id,ftype,nlocal,cids,nper,cdata)
# convert input data to a ctypes vector to pass to CSlib
def data_convert(self,ftype,flen,data):
# tflag = type of data
# tflag = 1 if data is list or tuple
# tflag = 2 if data is Numpy array
# tflag = 3 if data is ctypes vector
# same usage of tflag as in unpack function
txttype = str(type(data))
if "numpy" in txttype: tflag = 2
elif "c_" in txttype: tflag = 3
else: tflag = 1
# create ctypes vector out of data to pass to lib
# cdata = ctypes vector to return
# NOTE: error check on ftype and tflag everywhere, also flen
if ftype == 1:
if tflag == 1: cdata = (flen * c_int)(*data)
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_int))
elif tflag == 3: cdata = data
elif ftype == 2:
if tflag == 1: cdata = (flen * c_longlong)(*data)
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_longlong))
elif tflag == 3: cdata = data
elif ftype == 3:
if tflag == 1: cdata = (flen * c_float)(*data)
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_float))
elif tflag == 3: cdata = data
elif ftype == 4:
if tflag == 1: cdata = (flen * c_double)(*data)
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_double))
elif tflag == 3: cdata = data
return cdata
# receive a message
def recv(self):
self.lib.cslib_recv.restype = c_int
nfield = c_int()
fieldID = POINTER(c_int)()
fieldtype = POINTER(c_int)()
fieldlen = POINTER(c_int)()
msgID = self.lib.cslib_recv(self.cs,byref(nfield),
byref(fieldID),byref(fieldtype),byref(fieldlen))
# copy returned C args to native Python int and lists
# store them in class so unpack() methods can access the info
self.nfield = nfield = nfield.value
self.fieldID = fieldID[:nfield]
self.fieldtype = fieldtype[:nfield]
self.fieldlen = fieldlen[:nfield]
return msgID,self.nfield,self.fieldID,self.fieldtype,self.fieldlen
# unpack one field of message
# tflag = type of data to return
# 3 = ctypes vector is default, since no conversion required
def unpack_int(self,id):
return self.lib.cslib_unpack_int(self.cs,id)
def unpack_int64(self,id):
return self.lib.cslib_unpack_int64(self.cs,id)
def unpack_float(self,id):
return self.lib.cslib_unpack_float(self.cs,id)
def unpack_double(self,id):
return self.lib.cslib_unpack_double(self.cs,id)
def unpack_string(self,id):
return self.lib.cslib_unpack_string(self.cs,id)
def unpack(self,id,tflag=3):
index = self.fieldID.index(id)
# reset data type of return so can morph by tflag
# cannot do this for the generic c_void_p returned by CSlib
if self.fieldtype[index] == 1:
self.lib.cslib_unpack.restype = POINTER(c_int)
elif self.fieldtype[index] == 2:
self.lib.cslib_unpack.restype = POINTER(c_longlong)
elif self.fieldtype[index] == 3:
self.lib.cslib_unpack.restype = POINTER(c_float)
elif self.fieldtype[index] == 4:
self.lib.cslib_unpack.restype = POINTER(c_double)
#elif self.fieldtype[index] == 5:
# self.lib.cslib_unpack.restype = POINTER(c_char)
cdata = self.lib.cslib_unpack(self.cs,id)
# tflag = user-requested type of data to return
# tflag = 1 to return data as list
# tflag = 2 to return data as Numpy array
# tflag = 3 to return data as ctypes vector
# same usage of tflag as in pack functions
# tflag = 2,3 should NOT perform a data copy
if tflag == 1:
data = cdata[:self.fieldlen[index]]
elif tflag == 2:
if numpyflag == 0:
print "Cannot return Numpy array w/out numpy package"
sys.exit()
data = np.ctypeslib.as_array(cdata,shape=(self.fieldlen[index],))
elif tflag == 3:
data = cdata
return data
# handle data array like pack() or unpack_parallel() ??
def unpack_data(self,id,tflag=3):
index = self.fieldID.index(id)
# unpack one field of message in parallel
# tflag = type of data to return
# 3 = ctypes vector is default, since no conversion required
# NOTE: allow direct use of user array (e.g. Numpy), if user provides data arg?
# as opposed to creating this cdata
# does that make any performance difference ?
# e.g. should we allow CSlib to populate an existing Numpy array's memory
def unpack_parallel(self,id,nlocal,ids,nper,tflag=3):
cids = self.data_convert(1,nlocal,ids)
# allocate memory for the returned data
# pass cdata ptr to the memory to CSlib unpack_parallel()
# this resets data type of last unpack_parallel() arg
index = self.fieldID.index(id)
if self.fieldtype[index] == 1: cdata = (nper*nlocal * c_int)()
elif self.fieldtype[index] == 2: cdata = (nlocal*nper * c_longlong)()
elif self.fieldtype[index] == 3: cdata = (nlocal*nper * c_float)()
elif self.fieldtype[index] == 4: cdata = (nlocal*nper * c_double)()
#elif self.fieldtype[index] == 5: cdata = (nlocal*nper * c_char)()
self.lib.cslib_unpack_parallel(self.cs,id,nlocal,cids,nper,cdata)
# tflag = user-requested type of data to return
# tflag = 1 to return data as list
# tflag = 2 to return data as Numpy array
# tflag = 3 to return data as ctypes vector
# same usage of tflag as in pack functions
if tflag == 1:
data = cdata[:nper*nlocal]
elif tflag == 2:
if numpyflag == 0:
print "Cannot return Numpy array w/out numpy package"
sys.exit()
# NOTE: next line gives ctypes warning for fieldtype = 2 = 64-bit int
# not sure why, reported as bug between ctypes and Numpy here:
# https://stackoverflow.com/questions/4964101/pep-3118-
# warning-when-using-ctypes-array-as-numpy-array
# but why not same warning when just using unpack() ??
# in Python these lines give same warning:
# >>> import ctypes,numpy
# >>> a = (10 * ctypes.c_longlong)()
# >>> b = numpy.ctypeslib.as_array(a)
data = np.ctypeslib.as_array(cdata,shape=(nlocal*nper,))
elif tflag == 3:
data = cdata
return data
# extract a library value
def extract(self,flag):
return self.lib.cslib_extract(self.cs,flag)

View File

@ -0,0 +1,239 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
// C style library interface to CSlib class
#include <mpi.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cslib_wrap.h"
#include "cslib.h"
using namespace CSLIB_NS;
// ----------------------------------------------------------------------
void cslib_open(int csflag, const char *mode, const void *ptr,
const void *pcomm, void **csptr)
{
CSlib *cs = new CSlib(csflag,mode,ptr,pcomm);
*csptr = (void *) cs;
}
// ----------------------------------------------------------------------
void cslib_open_fortran(int csflag, const char *mode, const char *str,
const void *pcomm, void **csptr)
{
MPI_Comm ccomm;
void *pccomm = NULL;
if (pcomm) {
MPI_Fint *fcomm = (MPI_Fint *) pcomm;
ccomm = MPI_Comm_f2c(*fcomm);
pccomm = &ccomm;
}
CSlib *cs = new CSlib(csflag,mode,str,pccomm);
*csptr = (void *) cs;
}
// ----------------------------------------------------------------------
void cslib_open_fortran_mpi_one(int csflag, const char *mode,
const void *pboth, const void *pcomm,
void **csptr)
{
MPI_Comm ccomm,cboth;
void *pccomm,*pcboth;
MPI_Fint *fcomm = (MPI_Fint *) pcomm;
ccomm = MPI_Comm_f2c(*fcomm);
pccomm = &ccomm;
MPI_Fint *fboth = (MPI_Fint *) pboth;
cboth = MPI_Comm_f2c(*fboth);
pcboth = &cboth;
CSlib *cs = new CSlib(csflag,mode,pcboth,pccomm);
*csptr = (void *) cs;
}
// ----------------------------------------------------------------------
void cslib_close(void *ptr)
{
CSlib *cs = (CSlib *) ptr;
delete cs;
}
// ----------------------------------------------------------------------
void cslib_send(void *ptr, int msgID, int nfield)
{
CSlib *cs = (CSlib *) ptr;
cs->send(msgID,nfield);
}
// ----------------------------------------------------------------------
void cslib_pack_int(void *ptr, int id, int value)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_int(id,value);
}
// ----------------------------------------------------------------------
void cslib_pack_int64(void *ptr, int id, int64_t value)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_int64(id,value);
}
// ----------------------------------------------------------------------
void cslib_pack_float(void *ptr, int id, float value)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_float(id,value);
}
// ----------------------------------------------------------------------
void cslib_pack_double(void *ptr, int id, double value)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_double(id,value);
}
// ----------------------------------------------------------------------
void cslib_pack_string(void *ptr, int id, char *value)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_string(id,value);
}
// ----------------------------------------------------------------------
void cslib_pack(void *ptr, int id, int ftype, int flen, void *data)
{
CSlib *cs = (CSlib *) ptr;
cs->pack(id,ftype,flen,data);
}
// ----------------------------------------------------------------------
void cslib_pack_parallel(void *ptr, int id, int ftype,
int nlocal, int *ids, int nper, void *data)
{
CSlib *cs = (CSlib *) ptr;
cs->pack_parallel(id,ftype,nlocal,ids,nper,data);
}
// ----------------------------------------------------------------------
int cslib_recv(void *ptr, int *nfield_caller,
int **fieldID_caller, int **fieldtype_caller,
int **fieldlen_caller)
{
CSlib *cs = (CSlib *) ptr;
int nfield;
int *fieldID,*fieldtype,*fieldlen;
int msgID = cs->recv(nfield,fieldID,fieldtype,fieldlen);
*nfield_caller = nfield;
*fieldID_caller = fieldID;
*fieldtype_caller = fieldtype;
*fieldlen_caller = fieldlen;
return msgID;
}
// ----------------------------------------------------------------------
int cslib_unpack_int(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack_int(id);
}
// ----------------------------------------------------------------------
int64_t cslib_unpack_int64(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack_int64(id);
}
// ----------------------------------------------------------------------
float cslib_unpack_float(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack_float(id);
}
// ----------------------------------------------------------------------
double cslib_unpack_double(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack_double(id);
}
// ----------------------------------------------------------------------
char *cslib_unpack_string(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack_string(id);
}
// ----------------------------------------------------------------------
void *cslib_unpack(void *ptr, int id)
{
CSlib *cs = (CSlib *) ptr;
return cs->unpack(id);
}
// ----------------------------------------------------------------------
void cslib_unpack_data(void *ptr, int id, void *data)
{
CSlib *cs = (CSlib *) ptr;
cs->unpack(id,data);
}
// ----------------------------------------------------------------------
void cslib_unpack_parallel(void *ptr, int id, int nlocal, int *ids,
int nper, void *data)
{
CSlib *cs = (CSlib *) ptr;
cs->unpack_parallel(id,nlocal,ids,nper,data);
}
// ----------------------------------------------------------------------
int cslib_extract(void *ptr, int flag)
{
CSlib *cs = (CSlib *) ptr;
return cs->extract(flag);
}

View File

@ -0,0 +1,147 @@
! ISO_C_binding wrapper on CSlib C interface
module cslib_wrap
interface
subroutine cslib_open_fortran(csflag,mode,str,pcomm,ptr) bind(c)
use iso_c_binding
integer(c_int), value :: csflag
character(c_char) :: mode(*),str(*)
type(c_ptr), value :: pcomm
type(c_ptr) :: ptr
end subroutine cslib_open_fortran
subroutine cslib_open_fortran_mpi_one(csflag,mode,pboth,pcomm,ptr) bind(c)
use iso_c_binding
integer(c_int), value :: csflag
character(c_char) :: mode(*)
type(c_ptr), value :: pboth,pcomm
type(c_ptr) :: ptr
end subroutine cslib_open_fortran_mpi_one
subroutine cslib_close(ptr) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
end subroutine cslib_close
subroutine cslib_send(ptr,msgID,nfield) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: msgID,nfield
end subroutine cslib_send
subroutine cslib_pack_int(ptr,id,value) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id
integer(c_int), value :: value
end subroutine cslib_pack_int
subroutine cslib_pack_int64(ptr,id,value) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id
integer(c_int64_t), value :: value
end subroutine cslib_pack_int64
subroutine cslib_pack_float(ptr,id,value) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id
real(c_float), value :: value
end subroutine cslib_pack_float
subroutine cslib_pack_double(ptr,id,value) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id
real(c_double), value :: value
end subroutine cslib_pack_double
subroutine cslib_pack_string(ptr,id,value) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id
character(c_char) :: value(*)
end subroutine cslib_pack_string
subroutine cslib_pack(ptr,id,ftype,flen,data) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id,ftype,flen
type(c_ptr), value :: data
end subroutine cslib_pack
subroutine cslib_pack_parallel(ptr,id,ftype,nlocal,ids,nper,data) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id,ftype,nlocal,nper
type(c_ptr), value :: ids,data
end subroutine cslib_pack_parallel
function cslib_recv(ptr,nfield,fieldID,fieldtype,fieldlen) bind(c)
use iso_c_binding
integer(c_int) :: cslib_recv
type(c_ptr), value :: ptr
integer(c_int) :: nfield
type(c_ptr) :: fieldID,fieldtype,fieldlen
end function cslib_recv
function cslib_unpack_int(ptr,id) bind(c)
use iso_c_binding
integer(c_int) :: cslib_unpack_int
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack_int
function cslib_unpack_int64(ptr,id) bind(c)
use iso_c_binding
integer(c_int64_t) :: cslib_unpack_int64
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack_int64
function cslib_unpack_float(ptr,id) bind(c)
use iso_c_binding
real(c_float) :: cslib_unpack_float
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack_float
function cslib_unpack_double(ptr,id) bind(c)
use iso_c_binding
real(c_double) :: cslib_unpack_double
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack_double
function cslib_unpack_string(ptr,id) bind(c)
use iso_c_binding
type(c_ptr) :: cslib_unpack_string
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack_string
function cslib_unpack(ptr,id) bind(c)
use iso_c_binding
type(c_ptr) :: cslib_unpack
type(c_ptr), value :: ptr
integer(c_int), value :: id
end function cslib_unpack
subroutine cslib_unpack_parallel(ptr,id,nlocal,ids,nper,data) bind(c)
use iso_c_binding
type(c_ptr), value :: ptr
integer(c_int), value :: id,nlocal,nper
type(c_ptr), value :: ids,data
end subroutine cslib_unpack_parallel
function cslib_extract(ptr,flag) bind(c)
use iso_c_binding
integer(c_int) :: cslib_extract
type(c_ptr), value :: ptr
integer(c_int), value :: flag
end function cslib_extract
end interface
end module cslib_wrap

View File

@ -0,0 +1,54 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
/* C style library interface to CSlib class
ifdefs allow this file to be included in a C program
*/
#ifdef __cplusplus
extern "C" {
#endif
void cslib_open(int, const char *, const void *, const void *, void **);
void cslib_open_fortran(int, const char *, const char *, const void *, void **);
void cslib_open_fortran_mpi_one(int, const char *, const void *,
const void *, void **);
void cslib_close(void *);
void cslib_send(void *, int, int);
void cslib_pack_int(void *, int, int);
void cslib_pack_int64(void *, int, int64_t);
void cslib_pack_float(void *, int, float);
void cslib_pack_double(void *, int, double);
void cslib_pack_string(void *, int, char *);
void cslib_pack(void *, int, int, int, void *);
void cslib_pack_parallel(void *, int, int, int, int *, int, void *);
int cslib_recv(void *, int *, int **, int **, int **);
int cslib_unpack_int(void *, int);
int64_t cslib_unpack_int64(void *, int);
float cslib_unpack_float(void *, int);
double cslib_unpack_double(void *, int);
char *cslib_unpack_string(void *, int);
void *cslib_unpack(void *, int);
void cslib_unpack_data(void *, int, void *);
void cslib_unpack_parallel(void *, int, int, int *, int, void *);
int cslib_extract(void *, int);
#ifdef __cplusplus
}
#endif

View File

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "msg.h"
using namespace CSLIB_NS;
/* ---------------------------------------------------------------------- */
Msg::Msg(int csflag, const void *ptr, MPI_Comm cworld)
{
world = cworld;
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
init(csflag);
}
/* ---------------------------------------------------------------------- */
Msg::Msg(int csflag, const void *ptr)
{
world = 0;
me = 0;
nprocs = 1;
init(csflag);
}
/* ---------------------------------------------------------------------- */
void Msg::init(int csflag)
{
client = server = 0;
if (csflag == 0) client = 1;
else if (csflag == 1) server = 1;
nsend = nrecv = 0;
}
/* ---------------------------------------------------------------------- */
void Msg::allocate(int nheader, int &maxheader, int *&header,
int nbuf, int &maxbuf, char *&buf)
{
if (nheader > maxheader) {
sfree(header);
maxheader = nheader;
header = (int *) smalloc(maxheader*sizeof(int));
}
if (nbuf > maxbuf) {
sfree(buf);
maxbuf = nbuf;
buf = (char *) smalloc(maxbuf*sizeof(char));
}
}
/* ---------------------------------------------------------------------- */
void *Msg::smalloc(int nbytes)
{
if (nbytes == 0) return NULL;
void *ptr = (void *) malloc(nbytes);
if (ptr == NULL) {
char str[128];
sprintf(str,"Failed to allocate %d bytes",nbytes);
}
return ptr;
}
/* ---------------------------------------------------------------------- */
void Msg::sfree(void *ptr)
{
if (ptr == NULL) return;
free(ptr);
}
/* ---------------------------------------------------------------------- */
void Msg::error_all(const char *str)
{
if (me == 0) printf("CSlib ERROR: %s\n",str);
MPI_Abort(world,1);
}
/* ---------------------------------------------------------------------- */
void Msg::error_one(const char *str)
{
printf("CSlib ERROR: %s\n",str);
MPI_Abort(world,1);
}

View File

@ -0,0 +1,52 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef MSG_H
#define MSG_H
#include <mpi.h>
namespace CSLIB_NS {
class Msg {
public:
int nsend,nrecv;
MPI_Comm world;
Msg(int, const void *, MPI_Comm);
Msg(int, const void *);
virtual ~Msg() {}
virtual void send(int, int *, int, char *) = 0;
virtual void recv(int &, int *&, int &, char *&) = 0;
protected:
int me,nprocs;
int client,server;
int nfield;
int *fieldID,*fieldtype,*fieldlen;
int lengths[2];
void init(int);
void allocate(int, int &, int *&, int, int &, char *&);
void *smalloc(int);
void sfree(void *);
void error_all(const char *);
void error_one(const char *);
};
}
#endif

View File

@ -0,0 +1,143 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include "msg_file.h"
using namespace CSLIB_NS;
#define MAXLINE 256
#define SLEEP 0.1 // delay in CPU secs to check for message file
/* ---------------------------------------------------------------------- */
MsgFile::MsgFile(int csflag, const void *ptr, MPI_Comm cworld) :
Msg(csflag, ptr, cworld)
{
char *filename = (char *) ptr;
init(filename);
}
/* ---------------------------------------------------------------------- */
MsgFile::MsgFile(int csflag, const void *ptr) : Msg(csflag, ptr)
{
char *filename = (char *) ptr;
init(filename);
}
/* ---------------------------------------------------------------------- */
MsgFile::~MsgFile()
{
delete [] fileroot;
}
/* ---------------------------------------------------------------------- */
void MsgFile::init(char *filename)
{
int n = strlen(filename) + 1;
fileroot = new char[n];
strcpy(fileroot,filename);
}
/* ---------------------------------------------------------------------- */
void MsgFile::send(int nheader, int *header, int nbuf, char *buf)
{
char filename[MAXLINE];
lengths[0] = nheader;
lengths[1] = nbuf;
if (me == 0) {
if (client) sprintf(filename,"%s.%s",fileroot,"client");
else if (server) sprintf(filename,"%s.%s",fileroot,"server");
fp = fopen(filename,"wb");
if (!fp) error_one("send(): Could not open send message file");
fwrite(lengths,sizeof(int),2,fp);
fwrite(header,sizeof(int),nheader,fp);
fwrite(buf,1,nbuf,fp);
fclose(fp);
}
// create empty signal file
if (me == 0) {
if (client) sprintf(filename,"%s.%s",fileroot,"client.signal");
else if (server) sprintf(filename,"%s.%s",fileroot,"server.signal");
fp = fopen(filename,"w");
fclose(fp);
}
}
/* ---------------------------------------------------------------------- */
void MsgFile::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
{
char filename[MAXLINE];
// wait until signal file exists to open message file
if (me == 0) {
if (client) sprintf(filename,"%s.%s",fileroot,"server.signal");
else if (server) sprintf(filename,"%s.%s",fileroot,"client.signal");
int delay = (int) (1000000 * SLEEP);
while (1) {
fp = fopen(filename,"r");
if (fp) break;
usleep(delay);
}
fclose(fp);
if (client) sprintf(filename,"%s.%s",fileroot,"server");
else if (server) sprintf(filename,"%s.%s",fileroot,"client");
fp = fopen(filename,"rb");
if (!fp) error_one("recv(): Could not open recv message file");
}
// read and broadcast data
if (me == 0) fread(lengths,sizeof(int),2,fp);
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
int nheader = lengths[0];
int nbuf = lengths[1];
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
if (me == 0) fread(header,sizeof(int),nheader,fp);
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
if (me == 0) fread(buf,1,nbuf,fp);
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
// delete both message and signal file
if (me == 0) {
fclose(fp);
unlink(filename);
if (client) sprintf(filename,"%s.%s",fileroot,"server.signal");
else if (server) sprintf(filename,"%s.%s",fileroot,"client.signal");
unlink(filename);
}
}

View File

@ -0,0 +1,40 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef MSG_FILE_H
#define MSG_FILE_H
#include <stdio.h>
#include "msg.h"
namespace CSLIB_NS {
class MsgFile : public Msg {
public:
MsgFile(int, const void *, MPI_Comm);
MsgFile(int, const void *);
~MsgFile();
void send(int, int *, int, char *);
void recv(int &, int *&, int &, char *&);
private:
char *fileroot;
FILE *fp;
void init(char *);
};
}
#endif

View File

@ -0,0 +1,82 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include "msg_mpi_one.h"
using namespace CSLIB_NS;
/* ---------------------------------------------------------------------- */
MsgMPIOne::MsgMPIOne(int csflag, const void *ptr, MPI_Comm cworld) :
Msg(csflag, ptr, cworld)
{
// NOTE: ideally would skip this call if mpi/two
init(ptr);
}
/* ---------------------------------------------------------------------- */
void MsgMPIOne::init(const void *ptr)
{
MPI_Comm *pbothcomm = (MPI_Comm *) ptr;
bothcomm = *pbothcomm;
if (client) {
MPI_Comm_size(world,&nprocs);
otherroot = nprocs;
} else if (server) {
otherroot = 0;
}
}
/* ---------------------------------------------------------------------- */
void MsgMPIOne::send(int nheader, int *header, int nbuf, char *buf)
{
lengths[0] = nheader;
lengths[1] = nbuf;
if (me == 0) {
MPI_Send(lengths,2,MPI_INT,otherroot,0,bothcomm);
MPI_Send(header,nheader,MPI_INT,otherroot,0,bothcomm);
MPI_Send(buf,nbuf,MPI_CHAR,otherroot,0,bothcomm);
}
}
/* ---------------------------------------------------------------------- */
void MsgMPIOne::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
{
MPI_Status status;
if (me == 0) MPI_Recv(lengths,2,MPI_INT,otherroot,0,bothcomm,&status);
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
int nheader = lengths[0];
int nbuf = lengths[1];
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
if (me == 0) MPI_Recv(header,nheader,MPI_INT,otherroot,0,bothcomm,&status);
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
if (me == 0) MPI_Recv(buf,nbuf,MPI_CHAR,otherroot,0,bothcomm,&status);
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
}

View File

@ -0,0 +1,38 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef MSG_MPI_ONE_H
#define MSG_MPI_ONE_H
#include "msg.h"
namespace CSLIB_NS {
class MsgMPIOne : public Msg {
public:
MsgMPIOne(int, const void *, MPI_Comm);
virtual ~MsgMPIOne() {}
void send(int, int *, int, char *);
void recv(int &, int *&, int &, char *&);
protected:
MPI_Comm bothcomm;
int otherroot;
void init(const void *);
};
}
#endif

View File

@ -0,0 +1,81 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <unistd.h>
#include "msg_mpi_two.h"
using namespace CSLIB_NS;
/* ---------------------------------------------------------------------- */
MsgMPITwo::MsgMPITwo(int csflag, const void *ptr, MPI_Comm cworld) :
MsgMPIOne(csflag, ptr, cworld)
{
char *filename = (char *) ptr;
init(filename);
}
/* ---------------------------------------------------------------------- */
MsgMPITwo::~MsgMPITwo()
{
// free the inter comm that spans both client and server
MPI_Comm_free(&bothcomm);
MPI_Close_port(port);
}
/* ---------------------------------------------------------------------- */
void MsgMPITwo::init(char *filename)
{
if (client) {
if (me == 0) {
FILE *fp = NULL;
while (!fp) {
fp = fopen(filename,"r");
if (!fp) sleep(1);
}
fgets(port,MPI_MAX_PORT_NAME,fp);
//printf("Client port: %s\n",port);
fclose(fp);
}
MPI_Bcast(port,MPI_MAX_PORT_NAME,MPI_CHAR,0,world);
MPI_Comm_connect(port,MPI_INFO_NULL,0,world,&bothcomm);
//if (me == 0) printf("CLIENT comm connect\n");
if (me == 0) unlink(filename);
} else if (server) {
MPI_Open_port(MPI_INFO_NULL,port);
if (me == 0) {
//printf("Server name: %s\n",port);
FILE *fp = fopen(filename,"w");
fprintf(fp,"%s",port);
fclose(fp);
}
MPI_Comm_accept(port,MPI_INFO_NULL,0,world,&bothcomm);
//if (me == 0) printf("SERVER comm accept\n");
}
otherroot = 0;
}

View File

@ -0,0 +1,35 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef MSG_MPI_TWO_H
#define MSG_MPI_TWO_H
#include "msg_mpi_one.h"
namespace CSLIB_NS {
class MsgMPITwo : public MsgMPIOne {
public:
MsgMPITwo(int, const void *, MPI_Comm);
~MsgMPITwo();
private:
char port[MPI_MAX_PORT_NAME];
void init(char *);
};
}
#endif

View File

@ -0,0 +1,140 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include <zmq.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include "msg_zmq.h"
using namespace CSLIB_NS;
/* ---------------------------------------------------------------------- */
MsgZMQ::MsgZMQ(int csflag, const void *ptr, MPI_Comm cworld) :
Msg(csflag, ptr, cworld)
{
char *port = (char *) ptr;
init(port);
}
MsgZMQ::MsgZMQ(int csflag, const void *ptr) : Msg(csflag, ptr)
{
char *port = (char *) ptr;
init(port);
}
/* ---------------------------------------------------------------------- */
MsgZMQ::~MsgZMQ()
{
if (me == 0) {
zmq_close(socket);
zmq_ctx_destroy(context);
}
}
/* ---------------------------------------------------------------------- */
void MsgZMQ::init(char *port)
{
#ifdef ZMQ_NO
error_all("constructor(): Library not built with ZMQ support");
#endif
if (me == 0) {
int n = strlen(port) + 8;
char *socket_name = new char[n];
strcpy(socket_name,"tcp://");
strcat(socket_name,port);
if (client) {
context = zmq_ctx_new();
socket = zmq_socket(context,ZMQ_REQ);
zmq_connect(socket,socket_name);
} else if (server) {
context = zmq_ctx_new();
socket = zmq_socket(context,ZMQ_REP);
int rc = zmq_bind(socket,socket_name);
if (rc) error_one("constructor(): Server could not make socket connection");
}
delete [] socket_name;
}
}
/* ----------------------------------------------------------------------
client/server sockets (REQ/REP) must follow this protocol:
client sends request (REQ) which server receives
server sends response (REP) which client receives
every exchange is of this form, server cannot initiate a send
thus each ZMQ send below has a following ZMQ recv, except last one
if client calls send(), it will next call recv()
if server calls send(), it will next call recv() from its wait loop
in either case, recv() issues a ZMQ recv to match last ZMQ send here
------------------------------------------------------------------------- */
void MsgZMQ::send(int nheader, int *header, int nbuf, char *buf)
{
lengths[0] = nheader;
lengths[1] = nbuf;
if (me == 0) {
zmq_send(socket,lengths,2*sizeof(int),0);
zmq_recv(socket,NULL,0,0);
}
if (me == 0) {
zmq_send(socket,header,nheader*sizeof(int),0);
zmq_recv(socket,NULL,0,0);
}
if (me == 0) zmq_send(socket,buf,nbuf,0);
}
/* ----------------------------------------------------------------------
client/server sockets (REQ/REP) must follow this protocol:
client sends request (REQ) which server receives
server sends response (REP) which client receives
every exchange is of this form, server cannot initiate a send
thus each ZMQ recv below has a following ZMQ send, except last one
if client calls recv(), it will next call send() to ping server again,
if server calls recv(), it will next call send() to respond to client
in either case, send() issues a ZMQ send to match last ZMQ recv here
------------------------------------------------------------------------- */
void MsgZMQ::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
{
if (me == 0) {
zmq_recv(socket,lengths,2*sizeof(int),0);
zmq_send(socket,NULL,0,0);
}
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
int nheader = lengths[0];
int nbuf = lengths[1];
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
if (me == 0) {
zmq_recv(socket,header,nheader*sizeof(int),0);
zmq_send(socket,NULL,0,0);
}
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
if (me == 0) zmq_recv(socket,buf,nbuf,0);
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
}

View File

@ -0,0 +1,38 @@
/* ----------------------------------------------------------------------
CSlib - Client/server library for code coupling
http://cslib.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright 2018 National Technology & Engineering Solutions of
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
NTESS, the U.S. Government retains certain rights in this software.
This software is distributed under the modified Berkeley Software
Distribution (BSD) License.
See the README file in the top-level CSlib directory.
------------------------------------------------------------------------- */
#ifndef MSG_ZMQ_H
#define MSG_ZMQ_H
#include "msg.h"
namespace CSLIB_NS {
class MsgZMQ : public Msg {
public:
MsgZMQ(int, const void *, MPI_Comm);
MsgZMQ(int, const void *);
~MsgZMQ();
void send(int, int *, int, char *);
void recv(int &, int *&, int &, char *&);
private:
void *context,*socket;
void init(char *);
};
}
#endif

165
lib/scafacos/Install.py Normal file
View File

@ -0,0 +1,165 @@
#!/usr/bin/env python
# Install.py tool to download, unpack, build, and link to the Scafacos library
# used to automate the steps described in the README file in this dir
from __future__ import print_function
import sys,os,re,subprocess
# help message
help = """
Syntax from src dir: make lib-scafacos args="-b"
or: make lib-scafacos args="-p /usr/local/scafacos"
Syntax from lib dir: python Install.py -b
or: python Install.py -p /usr/local/scafacos
specify zero or more options, order does not matter
-b = download and build the Scafacos library
-p = specify folder of existing Scafacos installation
always creates includelink, liblink to Scafacos dirs
Example:
make lib-scafacos args="-b" # download/build in lib/scafacos/scafacos
make lib-scafacos args="-p $HOME/scafacos" # use existing Scafacos installation in $HOME
"""
# settings
version = "scafacos-1.0.1"
url = "https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz"
#url = "https://gigamove.rz.rwth-aachen.de/d/id/CTzyApN76MXMJ6/dd/100" % version
# print error message or help
def error(str=None):
if not str: print(help)
else: print("ERROR",str)
sys.exit()
# expand to full path name
# process leading '~' or relative path
def fullpath(path):
return os.path.abspath(os.path.expanduser(path))
def which(program):
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def geturl(url,fname):
success = False
if which('curl') != None:
cmd = 'curl -L -o "%s" %s' % (fname,url)
try:
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
success = True
except subprocess.CalledProcessError as e:
print("Calling curl failed with: %s" % e.output.decode('UTF-8'))
if not success and which('wget') != None:
cmd = 'wget -O "%s" %s' % (fname,url)
print("Wget command: %s" % cmd)
try:
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
success = True
except subprocess.CalledProcessError as e:
print("Calling wget failed with: %s" % e.output.decode('UTF-8'))
if not success:
error("Failed to download source code with 'curl' or 'wget'")
return
# parse args
args = sys.argv[1:]
nargs = len(args)
homepath = "."
buildflag = True
pathflag = False
linkflag = True
iarg = 0
while iarg < nargs:
if args[iarg] == "-v":
if iarg+2 > nargs: error()
version = args[iarg+1]
iarg += 2
elif args[iarg] == "-p":
if iarg+2 > nargs: error()
scafacospath = fullpath(args[iarg+1])
pathflag = True
iarg += 2
elif args[iarg] == "-b":
buildflag = True
iarg += 1
else: error()
homepath = fullpath(homepath)
homedir = "%s/%s" % (homepath,version)
if (pathflag):
if not os.path.isdir(scafacospath): error("Scafacos path does not exist")
homedir =scafacospath
if (buildflag and pathflag):
error("Cannot use -b and -p flag at the same time")
# download and unpack Scafacos tarball
if buildflag:
print("Downloading Scafacos ...")
geturl(url,"%s/%s.tar.gz" % (homepath,version))
print("Unpacking Scafacos tarball ...")
if os.path.exists("%s/%s" % (homepath,version)):
cmd = 'rm -rf "%s/%s"' % (homepath,version)
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
cmd = 'cd "%s"; tar -xzvf %s.tar.gz' % (homepath,version)
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
os.remove("%s/%s.tar.gz" % (homepath,version))
if os.path.basename(homedir) != version:
if os.path.exists(homedir):
cmd = 'rm -rf "%s"' % homedir
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
os.rename("%s/%s" % (homepath,version),homedir)
# build Scafacos
if buildflag:
print("Building Scafacos ...")
cmd = 'cd "%s"; ./configure --prefix="`pwd`/build" --disable-doc --enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m --with-internal-fftw --with-internal-pfft --with-internal-pnfft CC=mpicc FC=mpif90 CXX=mpicxx F77= > log.txt; make -j; make install' % homedir
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
print(txt.decode('UTF-8'))
# create 2 links in lib/scafacos to Scafacos include/lib dirs
if linkflag:
print("Creating links to Scafacos include and lib files")
if os.path.isfile("includelink") or os.path.islink("includelink"):
os.remove("includelink")
if os.path.isfile("liblink") or os.path.islink("liblink"):
os.remove("liblink")
cmd = 'ln -s "%s/build/include" includelink' % homedir
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
cmd = 'ln -s "%s/build/lib" liblink' % homedir
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)

76
lib/scafacos/README Normal file
View File

@ -0,0 +1,76 @@
This directory contains links to the ScaFaCoS library which
is required to use the KSPACE scafacos and its kspace_style
scafacos command in a LAMMPS input script.
The ScaFaCoS library is available at http://scafacos.de or
on github at https://github.com/scafacos, the libary was
developed by a consortium of different universities in
Germany (Bonn, Chemnitz, Stuttgart, Wuppertal) and
the Research Centre Juelich (Juelich Supercomputing Centre).
-----------------
Instructions:
1.) Download ScaFaCoS at http://scafacos.de or directly from github
https://github.com/scafacos where you can either clone the
repository or download the latest stable release.
NOTE: For the P2NFFT solver, you require an installation of the
GNU Scientific Library (GSL). Also to ensure the correct
linker-flags are used, ScaFaCoS employs the pkg-config
tool, which is also required.
If you cloned the repository, please refer to 2.), else continue
with 3.)
2.) If you cloned the git repository, you require autotools to setup
the library. For that the following packages are required:
m4
autotools
automake
libtools
In the build_aux folder of the scafacos folder, you can find the
get_autotools.sh script, that downloads and installs the tools
to ${HOME}/local. To change the target folder, please change the
value of 'myprefix' in that script.
To start the auto-configuration process, please run the './bootstrap'
command in the scafacos base-folder.
3.) If you downloaded the library as a tarball, please extract the file
to somewhere in your file system, or if you finished running
'./bootstrap', please run './configure' in the base folder.
Important flags for './configure' are:
--prefix=<install_dir>: sets the directory the compiled files will
be installed to [default: /usr/local]
--fcs-enable-solvers=<list>: sets the list of solvers that are going to
be built. By default all solvers will be
built. Currently supported by the kspace in LAMMPS
are: direct, ewald, fmm, p2nfft
The other solvers might work, but support
is purely experimental at the moment. To
give a list of solvers, use a comma seperated
list.
--fcs-disable-doc: disables the compilation of the documentation,
e.g. if no Latex is available on the system.
4.) To build the library after configuration, run 'make' from the base folder.
5.) To install the libray in the designated installation folder, run 'make install'.
Installation is required, as ScaFaCoS does not support an in-source build!
6.) Create two soft links to this directory (lib/scafacos) to where the libary
is installed. E.g. if you built ScaFaCoS in the default install directory:
% ln -s /usr/local/include includelink
% ln -s /usr/local/lib liblink
for any custom directory <custom_dir>:
% ln -s <custom_dir>/include includelink
% ln -s <custom_dir>/lib liblink
7.) ScaFaCoS uses the pkg-config tool to supply the correct, so you need to setup your
PKG_CONFIG_PATH environment variable to include the lib/pkgconfig folder in the
installation directory.
Depending on the shell you use, this can be done either by:
% export PKG_CONFIG_PATH=<custom_dir>/lib/pkgconfig:${PKG_CONFIG_PATH}
or
% setenv PKG_CONFIG_PATH=<custom_dir>/lib/pkgconfig:${PKG_CONFIG_PATH}
-----------------