Merge remote-tracking branch 'upstream/master'
This commit is contained in:
@ -35,6 +35,8 @@ linalg set of BLAS and LAPACK routines needed by USER-ATC package
|
||||
from Axel Kohlmeyer (Temple U)
|
||||
meam modified embedded atom method (MEAM) potential, MEAM package
|
||||
from Greg Wagner (Sandia)
|
||||
message client/server communication library via MPI, sockets, files
|
||||
from Steve Plimpton (Sandia)
|
||||
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
|
||||
from Axel Kohlmeyer (Temple U) and the VMD development team
|
||||
mscg hooks to the MSCG library, used by fix_mscg command
|
||||
|
||||
@ -23,15 +23,17 @@ optionally copies Makefile.auto to a new Makefile.osuffix
|
||||
|
||||
-m = use Makefile.machine as starting point, copy to Makefile.auto
|
||||
default machine = linux
|
||||
default for -h, -a, -p, -e settings are those in -m Makefile
|
||||
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
||||
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
||||
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
||||
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
|
||||
or GeForce GTX 580 or similar
|
||||
use arch = 30 for Tesla K10 (Kepler)
|
||||
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
|
||||
use arch = 37 for Tesla dual K80 (Kepler)
|
||||
use arch = 60 for Tesla P100 (Pascal)
|
||||
use arch = sm_20 for Fermi (C2050/C2070, deprecated as of CUDA 8.0)
|
||||
or GeForce GTX 580 or similar
|
||||
use arch = sm_30 for Kepler (K10)
|
||||
use arch = sm_35 for Kepler (K40) or GeForce GTX Titan or similar
|
||||
use arch = sm_37 for Kepler (dual K80)
|
||||
use arch = sm_60 for Pascal (P100)
|
||||
use arch = sm_70 for Volta
|
||||
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
||||
use precision = double or mixed or single
|
||||
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
||||
@ -46,7 +48,7 @@ Examples:
|
||||
|
||||
make lib-gpu args="-b" # build GPU lib with default Makefile.linux
|
||||
make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision
|
||||
make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
|
||||
make lib-gpu args="-m mpi -a sm_35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
@ -127,7 +129,7 @@ for line in lines:
|
||||
if hflag and words[0] == "CUDA_HOME" and words[1] == '=':
|
||||
line = line.replace(words[2],hdir)
|
||||
if aflag and words[0] == "CUDA_ARCH" and words[1] == '=':
|
||||
line = line.replace(words[2],"-arch=sm_%s" % arch)
|
||||
line = line.replace(words[2],"-arch=%s" % arch)
|
||||
if pflag and words[0] == "CUDA_PRECISION" and words[1] == '=':
|
||||
line = line.replace(words[2],precstr)
|
||||
if eflag and words[0] == "EXTRAMAKE" and words[1] == '=':
|
||||
|
||||
@ -13,8 +13,8 @@ endif
|
||||
|
||||
NVCC = nvcc
|
||||
|
||||
# Tesla CUDA
|
||||
CUDA_ARCH = -arch=sm_21
|
||||
# older CUDA
|
||||
#CUDA_ARCH = -arch=sm_21
|
||||
# newer CUDA
|
||||
#CUDA_ARCH = -arch=sm_13
|
||||
# older CUDA
|
||||
|
||||
@ -79,7 +79,10 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
|
||||
|
||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||
@ -137,7 +140,10 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
|
||||
$(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
|
||||
$(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
|
||||
|
||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||
|
||||
@ -837,6 +843,42 @@ $(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_long_cs.cubin: lal_coul_long_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul_long_cs.cu
|
||||
|
||||
$(OBJ_DIR)/coul_long_cs_cubin.h: $(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs.cubin
|
||||
$(BIN2C) -c -n coul_long_cs $(OBJ_DIR)/coul_long_cs.cubin > $(OBJ_DIR)/coul_long_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp $(OBJ_DIR)/coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
|
||||
$(CUDR) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
|
||||
$(CUDR) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cs.cubin: lal_born_coul_long_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_long_cs.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cs_cubin.h: $(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs.cubin
|
||||
$(BIN2C) -c -n born_coul_long_cs $(OBJ_DIR)/born_coul_long_cs.cubin > $(OBJ_DIR)/born_coul_long_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp $(OBJ_DIR)/born_coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cs.cubin: lal_born_coul_wolf_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_wolf_cs.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cs_cubin.h: $(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs.cubin
|
||||
$(BIN2C) -c -n born_coul_wolf_cs $(OBJ_DIR)/born_coul_wolf_cs.cubin > $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp $(OBJ_DIR)/born_coul_wolf_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||
|
||||
|
||||
@ -48,7 +48,18 @@ struct NVDProperties {
|
||||
int minor;
|
||||
CUDA_INT_TYPE totalGlobalMem;
|
||||
int multiProcessorCount;
|
||||
CUdevprop_st p;
|
||||
|
||||
int maxThreadsPerBlock;
|
||||
int maxThreadsDim[3];
|
||||
int maxGridSize[3];
|
||||
int sharedMemPerBlock;
|
||||
int totalConstantMemory;
|
||||
int SIMDWidth;
|
||||
int memPitch;
|
||||
int regsPerBlock;
|
||||
int clockRate;
|
||||
int textureAlign;
|
||||
|
||||
int kernelExecTimeoutEnabled;
|
||||
int integrated;
|
||||
int canMapHostMemory;
|
||||
@ -210,18 +221,18 @@ class UCL_Device {
|
||||
inline double clock_rate() { return clock_rate(_device); }
|
||||
/// Clock rate in GHz
|
||||
inline double clock_rate(const int i)
|
||||
{ return _properties[i].p.clockRate*1e-6;}
|
||||
{ return _properties[i].clockRate*1e-6;}
|
||||
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size() { return group_size(_device); }
|
||||
/// Get the maximum number of threads per block
|
||||
inline size_t group_size(const int i)
|
||||
{ return _properties[i].p.maxThreadsPerBlock; }
|
||||
{ return _properties[i].maxThreadsPerBlock; }
|
||||
|
||||
/// Return the maximum memory pitch in bytes for current device
|
||||
inline size_t max_pitch() { return max_pitch(_device); }
|
||||
/// Return the maximum memory pitch in bytes
|
||||
inline size_t max_pitch(const int i) { return _properties[i].p.memPitch; }
|
||||
inline size_t max_pitch(const int i) { return _properties[i].memPitch; }
|
||||
|
||||
/// Returns false if accelerator cannot be shared by multiple processes
|
||||
/** If it cannot be determined, true is returned **/
|
||||
@ -260,6 +271,9 @@ class UCL_Device {
|
||||
/// List all devices along with all properties
|
||||
inline void print_all(std::ostream &out);
|
||||
|
||||
/// Select the platform that has accelerators (for compatibility with OpenCL)
|
||||
inline int set_platform_accelerator(int pid=-1) { return UCL_SUCCESS; }
|
||||
|
||||
private:
|
||||
int _device, _num_devices;
|
||||
std::vector<NVDProperties> _properties;
|
||||
@ -272,49 +286,54 @@ class UCL_Device {
|
||||
UCL_Device::UCL_Device() {
|
||||
CU_SAFE_CALL_NS(cuInit(0));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetCount(&_num_devices));
|
||||
for (int dev=0; dev<_num_devices; ++dev) {
|
||||
CUdevice m;
|
||||
CU_SAFE_CALL_NS(cuDeviceGet(&m,dev));
|
||||
for (int i=0; i<_num_devices; ++i) {
|
||||
CUdevice dev;
|
||||
CU_SAFE_CALL_NS(cuDeviceGet(&dev,i));
|
||||
int major, minor;
|
||||
CU_SAFE_CALL_NS(cuDeviceComputeCapability(&major,&minor,m));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, dev));
|
||||
if (major==9999)
|
||||
continue;
|
||||
|
||||
_properties.push_back(NVDProperties());
|
||||
_properties.back().device_id=dev;
|
||||
_properties.back().major=major;
|
||||
_properties.back().minor=minor;
|
||||
NVDProperties prop;
|
||||
prop.device_id = i;
|
||||
prop.major=major;
|
||||
prop.minor=minor;
|
||||
|
||||
char namecstr[1024];
|
||||
CU_SAFE_CALL_NS(cuDeviceGetName(namecstr,1024,m));
|
||||
_properties.back().name=namecstr;
|
||||
CU_SAFE_CALL_NS(cuDeviceGetName(namecstr,1024,dev));
|
||||
prop.name=namecstr;
|
||||
|
||||
CU_SAFE_CALL_NS(cuDeviceTotalMem(&prop.totalGlobalMem,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.multiProcessorCount, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, dev));
|
||||
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[0], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[1], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxThreadsDim[2], CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[0], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[1], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.maxGridSize[2], CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.sharedMemPerBlock, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.totalConstantMemory, CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.SIMDWidth, CU_DEVICE_ATTRIBUTE_WARP_SIZE, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.memPitch, CU_DEVICE_ATTRIBUTE_MAX_PITCH, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.regsPerBlock, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.clockRate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.textureAlign, CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT, dev));
|
||||
|
||||
CU_SAFE_CALL_NS(cuDeviceTotalMem(&_properties.back().totalGlobalMem,m));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().multiProcessorCount,
|
||||
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
|
||||
m));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetProperties(&_properties.back().p,m));
|
||||
#if CUDA_VERSION >= 2020
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().kernelExecTimeoutEnabled,
|
||||
CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().integrated,
|
||||
CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().canMapHostMemory,
|
||||
CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&_properties.back().computeMode,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.kernelExecTimeoutEnabled, CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT,dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.integrated, CU_DEVICE_ATTRIBUTE_INTEGRATED, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.canMapHostMemory, CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.computeMode, CU_DEVICE_ATTRIBUTE_COMPUTE_MODE,dev));
|
||||
#endif
|
||||
#if CUDA_VERSION >= 3010
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().concurrentKernels,
|
||||
CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(
|
||||
&_properties.back().ECCEnabled,
|
||||
CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.concurrentKernels, CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS, dev));
|
||||
CU_SAFE_CALL_NS(cuDeviceGetAttribute(&prop.ECCEnabled, CU_DEVICE_ATTRIBUTE_ECC_ENABLED, dev));
|
||||
#endif
|
||||
|
||||
_properties.push_back(prop);
|
||||
}
|
||||
_device=-1;
|
||||
_cq.push_back(CUstream());
|
||||
@ -390,27 +409,27 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
<< cores(i) << std::endl;
|
||||
#endif
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].p.totalConstantMemory << " bytes\n";
|
||||
<< _properties[i].totalConstantMemory << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
<< _properties[i].p.sharedMemPerBlock << " bytes\n";
|
||||
<< _properties[i].sharedMemPerBlock << " bytes\n";
|
||||
out << " Total number of registers available per block: "
|
||||
<< _properties[i].p.regsPerBlock << std::endl;
|
||||
<< _properties[i].regsPerBlock << std::endl;
|
||||
out << " Warp size: "
|
||||
<< _properties[i].p.SIMDWidth << std::endl;
|
||||
<< _properties[i].SIMDWidth << std::endl;
|
||||
out << " Maximum number of threads per block: "
|
||||
<< _properties[i].p.maxThreadsPerBlock << std::endl;
|
||||
<< _properties[i].maxThreadsPerBlock << std::endl;
|
||||
out << " Maximum group size (# of threads per block) "
|
||||
<< _properties[i].p.maxThreadsDim[0] << " x "
|
||||
<< _properties[i].p.maxThreadsDim[1] << " x "
|
||||
<< _properties[i].p.maxThreadsDim[2] << std::endl;
|
||||
<< _properties[i].maxThreadsDim[0] << " x "
|
||||
<< _properties[i].maxThreadsDim[1] << " x "
|
||||
<< _properties[i].maxThreadsDim[2] << std::endl;
|
||||
out << " Maximum item sizes (# threads for each dim) "
|
||||
<< _properties[i].p.maxGridSize[0] << " x "
|
||||
<< _properties[i].p.maxGridSize[1] << " x "
|
||||
<< _properties[i].p.maxGridSize[2] << std::endl;
|
||||
<< _properties[i].maxGridSize[0] << " x "
|
||||
<< _properties[i].maxGridSize[1] << " x "
|
||||
<< _properties[i].maxGridSize[2] << std::endl;
|
||||
out << " Maximum memory pitch: "
|
||||
<< max_pitch(i) << " bytes\n";
|
||||
out << " Texture alignment: "
|
||||
<< _properties[i].p.textureAlign << " bytes\n";
|
||||
<< _properties[i].textureAlign << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
#if CUDA_VERSION >= 2020
|
||||
|
||||
@ -165,8 +165,8 @@ class UCL_Device {
|
||||
/// Get the current OpenCL device name
|
||||
inline std::string name() { return name(_device); }
|
||||
/// Get the OpenCL device name
|
||||
inline std::string name(const int i)
|
||||
{ return std::string(_properties[i].name); }
|
||||
inline std::string name(const int i) {
|
||||
return std::string(_properties[i].name); }
|
||||
|
||||
/// Get a string telling the type of the current device
|
||||
inline std::string device_type_name() { return device_type_name(_device); }
|
||||
@ -281,7 +281,7 @@ class UCL_Device {
|
||||
inline cl_device_id & cl_device() { return _cl_device; }
|
||||
|
||||
/// Select the platform that has accelerators
|
||||
inline void set_platform_accelerator(int pid=-1);
|
||||
inline int set_platform_accelerator(int pid=-1);
|
||||
|
||||
private:
|
||||
int _num_platforms; // Number of platforms
|
||||
@ -324,6 +324,7 @@ UCL_Device::~UCL_Device() {
|
||||
|
||||
void UCL_Device::clear() {
|
||||
_properties.clear();
|
||||
_cl_devices.clear();
|
||||
if (_device>-1) {
|
||||
for (size_t i=0; i<_cq.size(); i++) {
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
|
||||
@ -520,8 +521,6 @@ int UCL_Device::device_type(const int i) {
|
||||
|
||||
// Set the CUDA device to the specified device number
|
||||
int UCL_Device::set(int num) {
|
||||
clear();
|
||||
|
||||
cl_device_id *device_list = new cl_device_id[_num_devices];
|
||||
cl_uint n;
|
||||
CL_SAFE_CALL(clGetDeviceIDs(_cl_platform,CL_DEVICE_TYPE_ALL,_num_devices,
|
||||
@ -612,7 +611,7 @@ void UCL_Device::print_all(std::ostream &out) {
|
||||
|
||||
// Select the platform that is associated with accelerators
|
||||
// if pid < 0, select the first platform
|
||||
void UCL_Device::set_platform_accelerator(int pid) {
|
||||
int UCL_Device::set_platform_accelerator(int pid) {
|
||||
if (pid < 0) {
|
||||
int found = 0;
|
||||
for (int n=0; n<_num_platforms; n++) {
|
||||
@ -625,10 +624,11 @@ void UCL_Device::set_platform_accelerator(int pid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) break;
|
||||
if (found) return UCL_SUCCESS;
|
||||
}
|
||||
return UCL_ERROR;
|
||||
} else {
|
||||
set_platform(pid);
|
||||
return set_platform(pid);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -38,8 +38,8 @@ namespace ucl_opencl {
|
||||
/// Class for timing OpenCL events
|
||||
class UCL_Timer {
|
||||
public:
|
||||
inline UCL_Timer() : _total_time(0.0f), _initialized(false) { }
|
||||
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false)
|
||||
inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { }
|
||||
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false)
|
||||
{ init(dev); }
|
||||
|
||||
inline ~UCL_Timer() { clear(); }
|
||||
@ -49,11 +49,10 @@ class UCL_Timer {
|
||||
inline void clear() {
|
||||
if (_initialized) {
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq));
|
||||
clReleaseEvent(start_event);
|
||||
clReleaseEvent(stop_event);
|
||||
_initialized=false;
|
||||
_total_time=0.0;
|
||||
}
|
||||
has_measured_time = false;
|
||||
}
|
||||
|
||||
/// Initialize default command queue for timing
|
||||
@ -66,25 +65,39 @@ class UCL_Timer {
|
||||
_cq=cq;
|
||||
clRetainCommandQueue(_cq);
|
||||
_initialized=true;
|
||||
has_measured_time = false;
|
||||
}
|
||||
|
||||
/// Start timing on default command queue
|
||||
inline void start() { UCL_OCL_MARKER(_cq,&start_event); }
|
||||
inline void start() {
|
||||
UCL_OCL_MARKER(_cq,&start_event);
|
||||
has_measured_time = false;
|
||||
}
|
||||
|
||||
/// Stop timing on default command queue
|
||||
inline void stop() { UCL_OCL_MARKER(_cq,&stop_event); }
|
||||
inline void stop() {
|
||||
UCL_OCL_MARKER(_cq,&stop_event);
|
||||
has_measured_time = true;
|
||||
}
|
||||
|
||||
/// Block until the start event has been reached on device
|
||||
inline void sync_start()
|
||||
{ CL_SAFE_CALL(clWaitForEvents(1,&start_event)); }
|
||||
inline void sync_start() {
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&start_event));
|
||||
has_measured_time = false;
|
||||
}
|
||||
|
||||
/// Block until the stop event has been reached on device
|
||||
inline void sync_stop()
|
||||
{ CL_SAFE_CALL(clWaitForEvents(1,&stop_event)); }
|
||||
inline void sync_stop() {
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
|
||||
has_measured_time = true;
|
||||
}
|
||||
|
||||
/// Set the time elapsed to zero (not the total_time)
|
||||
inline void zero()
|
||||
{ UCL_OCL_MARKER(_cq,&start_event); UCL_OCL_MARKER(_cq,&stop_event); }
|
||||
inline void zero() {
|
||||
has_measured_time = false;
|
||||
UCL_OCL_MARKER(_cq,&start_event);
|
||||
UCL_OCL_MARKER(_cq,&stop_event);
|
||||
}
|
||||
|
||||
/// Set the total time to zero
|
||||
inline void zero_total() { _total_time=0.0; }
|
||||
@ -99,6 +112,7 @@ class UCL_Timer {
|
||||
|
||||
/// Return the time (ms) of last start to stop - Forces synchronization
|
||||
inline double time() {
|
||||
if(!has_measured_time) return 0.0;
|
||||
cl_ulong tstart,tend;
|
||||
CL_SAFE_CALL(clWaitForEvents(1,&stop_event));
|
||||
CL_SAFE_CALL(clGetEventProfilingInfo(stop_event,
|
||||
@ -107,6 +121,9 @@ class UCL_Timer {
|
||||
CL_SAFE_CALL(clGetEventProfilingInfo(start_event,
|
||||
CL_PROFILING_COMMAND_END,
|
||||
sizeof(cl_ulong), &tstart, NULL));
|
||||
clReleaseEvent(start_event);
|
||||
clReleaseEvent(stop_event);
|
||||
has_measured_time = false;
|
||||
return (tend-tstart)*t_factor;
|
||||
}
|
||||
|
||||
@ -123,8 +140,9 @@ class UCL_Timer {
|
||||
cl_event start_event, stop_event;
|
||||
cl_command_queue _cq;
|
||||
double _total_time;
|
||||
bool _initialized;
|
||||
double t_factor;
|
||||
bool _initialized;
|
||||
bool has_measured_time;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
@ -322,10 +322,12 @@ class Atom {
|
||||
|
||||
// Copy charges to device asynchronously
|
||||
inline void add_q_data() {
|
||||
time_q.start();
|
||||
if (_q_avail==false) {
|
||||
q.update_device(_nall,true);
|
||||
_q_avail=true;
|
||||
}
|
||||
time_q.stop();
|
||||
}
|
||||
|
||||
// Cast quaternions to write buffer
|
||||
@ -347,10 +349,12 @@ class Atom {
|
||||
// Copy quaternions to device
|
||||
/** Copies nall()*4 elements **/
|
||||
inline void add_quat_data() {
|
||||
time_quat.start();
|
||||
if (_quat_avail==false) {
|
||||
quat.update_device(_nall*4,true);
|
||||
_quat_avail=true;
|
||||
}
|
||||
time_quat.stop();
|
||||
}
|
||||
|
||||
/// Cast velocities and tags to write buffer
|
||||
|
||||
@ -34,8 +34,8 @@ using namespace LAMMPS_AL;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
DeviceT::Device() : _init_count(0), _device_init(false),
|
||||
_gpu_mode(GPU_FORCE), _first_device(0),
|
||||
_last_device(0), _compiled(false) {
|
||||
_gpu_mode(GPU_FORCE), _first_device(0),
|
||||
_last_device(0), _platform_id(-1), _compiled(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
@ -67,6 +67,17 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
_particle_split=p_split;
|
||||
_cell_size=cell_size;
|
||||
_block_pair=block_pair;
|
||||
// support selecting platform though "package device" keyword.
|
||||
// "0:generic" will select platform 0 and tune for generic device
|
||||
// "1:fermi" will select platform 1 and tune for Nvidia Fermi gpu
|
||||
if (ocl_vendor) {
|
||||
char *sep = NULL;
|
||||
if ((sep = strstr(ocl_vendor,":"))) {
|
||||
*sep = '\0';
|
||||
_platform_id = atoi(ocl_vendor);
|
||||
ocl_vendor = sep+1;
|
||||
}
|
||||
}
|
||||
|
||||
// Get the rank/size within the world
|
||||
MPI_Comm_rank(_comm_world,&_world_me);
|
||||
@ -119,8 +130,16 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
|
||||
// Time on the device only if 1 proc per gpu
|
||||
_time_device=true;
|
||||
|
||||
#if 0
|
||||
// XXX: the following setting triggers a memory leak with OpenCL and MPI
|
||||
// setting _time_device=true for all processes doesn't seem to be a
|
||||
// problem with either (no segfault, no (large) memory leak.
|
||||
// thus keeping this disabled for now. may need to review later.
|
||||
// 2018-07-23 <akohlmey@gmail.com>
|
||||
if (_procs_per_gpu>1)
|
||||
_time_device=false;
|
||||
#endif
|
||||
|
||||
// Set up a per device communicator
|
||||
MPI_Comm_split(node_comm,my_gpu,0,&_comm_gpu);
|
||||
@ -135,6 +154,9 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
return -7;
|
||||
#endif
|
||||
|
||||
if (gpu->set_platform_accelerator(_platform_id)!=UCL_SUCCESS)
|
||||
return -12;
|
||||
|
||||
if (gpu->set(my_gpu)!=UCL_SUCCESS)
|
||||
return -6;
|
||||
|
||||
@ -191,13 +213,15 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
|
||||
_ocl_vendor_string="-DUSE_OPENCL";
|
||||
int token_count=0;
|
||||
std::string params[13];
|
||||
char *pch = strtok(ocl_vendor,"\" ");
|
||||
char *pch = strtok(ocl_vendor,",");
|
||||
pch = strtok(NULL,",");
|
||||
if (pch == NULL) return -11;
|
||||
while (pch != NULL) {
|
||||
if (token_count==13)
|
||||
return -11;
|
||||
params[token_count]=pch;
|
||||
token_count++;
|
||||
pch = strtok(NULL,"\" ");
|
||||
pch = strtok(NULL,",");
|
||||
}
|
||||
_ocl_vendor_string+=" -DMEM_THREADS="+params[0]+
|
||||
" -DTHREADS_PER_ATOM="+params[1]+
|
||||
@ -656,7 +680,7 @@ int DeviceT::compile_kernels() {
|
||||
dev_program=new UCL_Program(*gpu);
|
||||
int success=dev_program->load_string(device,compile_string().c_str());
|
||||
if (success!=UCL_SUCCESS)
|
||||
return -4;
|
||||
return -6;
|
||||
k_zero.set_function(*dev_program,"kernel_zero");
|
||||
k_info.set_function(*dev_program,"kernel_info");
|
||||
_compiled=true;
|
||||
|
||||
@ -292,7 +292,7 @@ class Device {
|
||||
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
||||
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
|
||||
_replica_size;
|
||||
int _gpu_mode, _first_device, _last_device, _nthreads;
|
||||
int _gpu_mode, _first_device, _last_device, _platform_id, _nthreads;
|
||||
double _particle_split;
|
||||
double _cpu_full;
|
||||
double _ptx_arch;
|
||||
|
||||
@ -127,10 +127,10 @@ void Neighbor::alloc(bool &success) {
|
||||
dev_packed.clear();
|
||||
success=success && (dev_packed.alloc((_max_nbors+2)*_max_atoms,*dev,
|
||||
_packed_permissions)==UCL_SUCCESS);
|
||||
dev_acc.clear();
|
||||
success=success && (dev_acc.alloc(_max_atoms,*dev,
|
||||
dev_ilist.clear();
|
||||
success=success && (dev_ilist.alloc(_max_atoms,*dev,
|
||||
UCL_READ_WRITE)==UCL_SUCCESS);
|
||||
_c_bytes+=dev_packed.row_bytes()+dev_acc.row_bytes();
|
||||
_c_bytes+=dev_packed.row_bytes()+dev_ilist.row_bytes();
|
||||
}
|
||||
if (_max_host>0) {
|
||||
nbor_host.clear();
|
||||
@ -197,7 +197,7 @@ void Neighbor::clear() {
|
||||
|
||||
host_packed.clear();
|
||||
host_acc.clear();
|
||||
dev_acc.clear();
|
||||
dev_ilist.clear();
|
||||
dev_nbor.clear();
|
||||
nbor_host.clear();
|
||||
dev_packed.clear();
|
||||
@ -281,7 +281,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
|
||||
}
|
||||
UCL_D_Vec<int> acc_view;
|
||||
acc_view.view_offset(inum,dev_nbor,inum*2);
|
||||
ucl_copy(acc_view,host_acc,true);
|
||||
ucl_copy(acc_view,host_acc,inum*2,true);
|
||||
|
||||
UCL_H_Vec<int> host_view;
|
||||
host_view.alloc(_max_atoms,*dev,UCL_READ_WRITE);
|
||||
@ -289,7 +289,7 @@ void Neighbor::get_host(const int inum, int *ilist, int *numj,
|
||||
int i=ilist[ii];
|
||||
host_view[i] = ii;
|
||||
}
|
||||
ucl_copy(dev_acc,host_view,true);
|
||||
ucl_copy(dev_ilist,host_view,true);
|
||||
|
||||
time_nbor.stop();
|
||||
|
||||
@ -364,7 +364,7 @@ void Neighbor::get_host3(const int inum, const int nlist, int *ilist, int *numj,
|
||||
}
|
||||
UCL_D_Vec<int> acc_view;
|
||||
acc_view.view_offset(inum,dev_nbor,inum*2);
|
||||
ucl_copy(acc_view,host_acc,true);
|
||||
ucl_copy(acc_view,host_acc,inum*2,true);
|
||||
time_nbor.stop();
|
||||
|
||||
if (_use_packing==false) {
|
||||
|
||||
@ -110,7 +110,7 @@ class Neighbor {
|
||||
}
|
||||
if (_time_device) {
|
||||
time_nbor.add_to_total();
|
||||
time_kernel.add_to_total();
|
||||
if (_use_packing==false) time_kernel.add_to_total();
|
||||
if (_gpu_nbor==2) {
|
||||
time_hybrid1.add_to_total();
|
||||
time_hybrid2.add_to_total();
|
||||
@ -200,7 +200,7 @@ class Neighbor {
|
||||
/// Host storage for nbor counts (row 1) & accumulated neighbor counts (row2)
|
||||
UCL_H_Vec<int> host_acc;
|
||||
/// Device storage for accessing atom indices from the neighbor list (3-body)
|
||||
UCL_D_Vec<int> dev_acc;
|
||||
UCL_D_Vec<int> dev_ilist;
|
||||
|
||||
// ----------------- Data for GPU Neighbor Calculation ---------------
|
||||
|
||||
|
||||
@ -119,6 +119,8 @@
|
||||
#define BLOCK_ELLIPSE 128
|
||||
#define MAX_SHARED_TYPES 11
|
||||
|
||||
#if (__CUDACC_VER_MAJOR__ < 9)
|
||||
|
||||
#ifdef _SINGLE_SINGLE
|
||||
#define shfl_xor __shfl_xor
|
||||
#else
|
||||
@ -132,6 +134,25 @@ ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
}
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#ifdef _SINGLE_SINGLE
|
||||
ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
return __shfl_xor_sync(0xffffffff, var, laneMask, width);
|
||||
}
|
||||
#else
|
||||
ucl_inline double shfl_xor(double var, int laneMask, int width) {
|
||||
int2 tmp;
|
||||
tmp.x = __double2hiint(var);
|
||||
tmp.y = __double2loint(var);
|
||||
tmp.x = __shfl_xor_sync(0xffffffff,tmp.x,laneMask,width);
|
||||
tmp.y = __shfl_xor_sync(0xffffffff,tmp.y,laneMask,width);
|
||||
return __hiloint2double(tmp.x,tmp.y);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@ -243,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
@ -252,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
|
||||
@ -544,7 +544,7 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
||||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -614,13 +614,13 @@ __kernel void k_sw_three_end(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
@ -698,7 +698,7 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -768,13 +768,13 @@ __kernel void k_sw_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
||||
@ -272,7 +272,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
ainum=this->ans->inum();
|
||||
nbor_pitch=this->nbor->nbor_pitch();
|
||||
@ -311,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
@ -320,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
||||
@ -696,7 +696,7 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -777,13 +777,13 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
@ -941,7 +941,7 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -1022,13 +1022,13 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
||||
@ -272,7 +272,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
ainum=this->ans->inum();
|
||||
nbor_pitch=this->nbor->nbor_pitch();
|
||||
@ -311,7 +311,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
@ -320,7 +320,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
||||
@ -272,7 +272,7 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor_j, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -432,7 +432,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
@ -547,7 +547,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -702,7 +702,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -740,7 +740,7 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -785,13 +785,13 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
@ -956,7 +956,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -994,7 +994,7 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem = dev_packed;
|
||||
const __global int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -1039,13 +1039,13 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
||||
@ -297,7 +297,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
&eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
ainum=this->ans->inum();
|
||||
nbor_pitch=this->nbor->nbor_pitch();
|
||||
@ -337,7 +337,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
|
||||
@ -346,7 +346,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq,
|
||||
&map, &elem2param, &_nelements, &_nparams, &_zetaij,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
||||
@ -278,7 +278,7 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor_j, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -445,7 +445,7 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end, i, numj;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
@ -563,7 +563,7 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -714,7 +714,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -750,7 +750,7 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem=dev_packed;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -795,13 +795,13 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
@ -959,7 +959,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const __global acctyp4 *restrict zetaij,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -995,7 +995,7 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int i, numj, nbor_j, nbor_end, k_end;
|
||||
const int* nbor_mem = dev_packed;
|
||||
const __global int* nbor_mem = dev_packed;
|
||||
int offset_j=offset/t_per_atom;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
|
||||
n_stride,nbor_end,nbor_j);
|
||||
@ -1040,13 +1040,13 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
||||
@ -278,7 +278,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end_vatom.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
} else {
|
||||
@ -286,7 +286,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
|
||||
this->k_three_end.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5,
|
||||
&map, &elem2param, &_nelements,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->nbor->dev_acc, &this->dev_short_nbor,
|
||||
&this->nbor->dev_ilist, &this->dev_short_nbor,
|
||||
&end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
|
||||
}
|
||||
|
||||
@ -554,7 +554,7 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
||||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -623,13 +623,13 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
@ -709,7 +709,7 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
const int nelements,
|
||||
const __global int * dev_nbor,
|
||||
const __global int * dev_packed,
|
||||
const __global int * dev_acc,
|
||||
const __global int * dev_ilist,
|
||||
const __global int * dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
@ -778,13 +778,13 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
|
||||
int nbor_k,numk;
|
||||
if (dev_nbor==dev_packed) {
|
||||
if (gpu_nbor) nbor_k=j+nbor_pitch;
|
||||
else nbor_k=dev_acc[j]+nbor_pitch;
|
||||
else nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
|
||||
k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
|
||||
nbor_k+=offset_k;
|
||||
} else {
|
||||
nbor_k=dev_acc[j]+nbor_pitch;
|
||||
nbor_k=dev_ilist[j]+nbor_pitch;
|
||||
numk=dev_nbor[nbor_k];
|
||||
nbor_k+=nbor_pitch;
|
||||
nbor_k=dev_nbor[nbor_k];
|
||||
|
||||
@ -158,7 +158,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
|
||||
endif()
|
||||
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}")
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_STANDARD)
|
||||
|
||||
@ -292,7 +292,8 @@ public:
|
||||
|
||||
#if ! defined( KOKKOS_ENABLE_CUDA_LDG_INTRINSIC )
|
||||
if ( 0 == r ) {
|
||||
Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
|
||||
//Kokkos::abort("Cuda const random access View using Cuda texture memory requires Kokkos to allocate the View's memory");
|
||||
return handle_type();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@ -40,7 +40,7 @@ version = '1.2.1'
|
||||
checksums = { \
|
||||
'1.1.0' : '533635721ee222d0ed2925a18fb5b294', \
|
||||
'1.2.0' : '68bf0db879da5e068a71281020239ae7', \
|
||||
'1.2.1' : 'bed76e7e76c545c36dd848a8f1fd35eb' \
|
||||
'1.2.1' : '85ac414fdada2d04619c8f936344df14', \
|
||||
}
|
||||
|
||||
# print error message or help
|
||||
|
||||
@ -4,9 +4,9 @@
|
||||
|
||||
latte_SYSINC =
|
||||
latte_SYSLIB = ../../lib/latte/filelink.o \
|
||||
-llatte -lifcore -lsvml -lompstub -limf -lmkl_intel_lp64 \
|
||||
-lmkl_intel_thread -lmkl_core -lmkl_intel_thread -lpthread \
|
||||
-openmp -O0
|
||||
-llatte -lifport -lifcore -lsvml -lompstub -limf \
|
||||
-lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core \
|
||||
-lmkl_intel_thread -lpthread -openmp
|
||||
latte_SYSPATH = -openmp -L${MKLROOT}/lib/intel64 -lmkl_lapack95_lp64 \
|
||||
-L/opt/intel/composer_xe_2013_sp1.2.144/compiler/lib/intel64
|
||||
|
||||
|
||||
118
lib/message/Install.py
Normal file
118
lib/message/Install.py
Normal file
@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Install.py tool to build the CSlib library
|
||||
# used to automate the steps described in the README file in this dir
|
||||
|
||||
from __future__ import print_function
|
||||
import sys,os,re,subprocess
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax from src dir: make lib-message args="-m"
|
||||
or: make lib-message args="-s -z"
|
||||
Syntax from lib dir: python Install.py -m
|
||||
or: python Install.py -s -z
|
||||
|
||||
specify zero or more options, order does not matter
|
||||
|
||||
-m = parallel build of CSlib library
|
||||
-s = serial build of CSlib library
|
||||
-z = build CSlib library with ZMQ socket support, default = no ZMQ support
|
||||
|
||||
Example:
|
||||
|
||||
make lib-message args="-m -z" # build parallel CSlib with ZMQ support
|
||||
make lib-message args="-s" # build serial CSlib with no ZMQ support
|
||||
"""
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print(help)
|
||||
else: print("ERROR",str)
|
||||
sys.exit()
|
||||
|
||||
# expand to full path name
|
||||
# process leading '~' or relative path
|
||||
|
||||
def fullpath(path):
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
def which(program):
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
fpath, fname = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
path = path.strip('"')
|
||||
exe_file = os.path.join(path, program)
|
||||
if is_exe(exe_file):
|
||||
return exe_file
|
||||
|
||||
return None
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
mpiflag = False
|
||||
serialflag = False
|
||||
zmqflag = False
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-m":
|
||||
mpiflag = True
|
||||
iarg += 1
|
||||
elif args[iarg] == "-s":
|
||||
serialflag = True
|
||||
iarg += 1
|
||||
elif args[iarg] == "-z":
|
||||
zmqflag = True
|
||||
iarg += 1
|
||||
else: error()
|
||||
|
||||
if (not mpiflag and not serialflag):
|
||||
error("Must use either -m or -s flag")
|
||||
|
||||
if (mpiflag and serialflag):
|
||||
error("Cannot use -m and -s flag at the same time")
|
||||
|
||||
# build CSlib
|
||||
# copy resulting lib to cslib/src/libmessage.a
|
||||
# copy appropriate Makefile.lammps.* to Makefile.lammps
|
||||
|
||||
print("Building CSlib ...")
|
||||
srcdir = fullpath("./cslib/src")
|
||||
|
||||
if mpiflag and zmqflag:
|
||||
cmd = "cd %s; make lib_parallel" % srcdir
|
||||
elif mpiflag and not zmqflag:
|
||||
cmd = "cd %s; make lib_parallel zmq=no" % srcdir
|
||||
elif not mpiflag and zmqflag:
|
||||
cmd = "cd %s; make lib_serial" % srcdir
|
||||
elif not mpiflag and not zmqflag:
|
||||
cmd = "cd %s; make lib_serial zmq=no" % srcdir
|
||||
|
||||
print(cmd)
|
||||
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
print(txt.decode('UTF-8'))
|
||||
|
||||
if mpiflag: cmd = "cd %s; cp libcsmpi.a libmessage.a" % srcdir
|
||||
else: cmd = "cd %s; cp libcsnompi.a libmessage.a" % srcdir
|
||||
print(cmd)
|
||||
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
print(txt.decode('UTF-8'))
|
||||
|
||||
if zmqflag: cmd = "cp Makefile.lammps.zmq Makefile.lammps"
|
||||
else: cmd = "cp Makefile.lammps.nozmq Makefile.lammps"
|
||||
print(cmd)
|
||||
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
print(txt.decode('UTF-8'))
|
||||
5
lib/message/Makefile.lammps.nozmq
Normal file
5
lib/message/Makefile.lammps.nozmq
Normal file
@ -0,0 +1,5 @@
|
||||
# Settings that the LAMMPS build will import when this package library is used
|
||||
|
||||
message_SYSINC =
|
||||
message_SYSLIB =
|
||||
message_SYSPATH =
|
||||
5
lib/message/Makefile.lammps.zmq
Normal file
5
lib/message/Makefile.lammps.zmq
Normal file
@ -0,0 +1,5 @@
|
||||
# Settings that the LAMMPS build will import when this package library is used
|
||||
|
||||
message_SYSINC =
|
||||
message_SYSLIB = -lzmq
|
||||
message_SYSPATH =
|
||||
51
lib/message/README
Normal file
51
lib/message/README
Normal file
@ -0,0 +1,51 @@
|
||||
This directory contains the CSlib library which is required
|
||||
to use the MESSAGE package and its client/server commands
|
||||
in a LAMMPS input script.
|
||||
|
||||
The CSlib libary is included in the LAMMPS distribution. A fuller
|
||||
version including documentation and test programs is available at
|
||||
http://cslib.sandia.gov. It was developed by Steve Plimpton at Sandia
|
||||
National Laboratories.
|
||||
|
||||
You can type "make lib-message" from the src directory to see help on
|
||||
how to build this library via make commands, or you can do the same
|
||||
thing by typing "python Install.py" from within this directory, or you
|
||||
can do it manually by following the instructions below.
|
||||
|
||||
The CSlib can be optionally built with support for sockets using
|
||||
the open-source ZeroMQ (ZMQ) library. If it is not installed
|
||||
on your system, it is easy to download and install.
|
||||
|
||||
Go to the ZMQ website for details: http://zeromq.org
|
||||
|
||||
-----------------
|
||||
|
||||
Instructions:
|
||||
|
||||
1. Compile CSlib from within cslib/src with one of the following:
|
||||
% make lib_parallel # build parallel library with ZMQ socket support
|
||||
% make lib_serial # build serial library with ZMQ support
|
||||
% make lib_parallel zmq=no # build parallel lib with no ZMQ support
|
||||
% make lib_serial zmq=no # build serial lib with no ZMQ support
|
||||
|
||||
2. Copy the produced cslib/src/libcsmpi.a or libscnompi.a file to
|
||||
cslib/src/libmessage.a
|
||||
|
||||
3. Copy either lib/message/Makefile.lammps.zmq or Makefile.lammps.nozmq
|
||||
to lib/message/Makefile.lammps, depending on whether you
|
||||
build the library with ZMQ support or not.
|
||||
If your ZMQ library is not in a place your shell path finds,
|
||||
you can set the INCLUDE and PATH variables in Makefile.lammps
|
||||
to point to the dirs where the ZMQ include and library files are.
|
||||
|
||||
-----------------
|
||||
|
||||
When these steps are complete you can build LAMMPS
|
||||
with the MESSAGAE package installed:
|
||||
|
||||
% cd lammps/src
|
||||
% make yes-message
|
||||
% make mpi (or whatever target you wish)
|
||||
|
||||
Note that if you download and unpack a new LAMMPS tarball, you will
|
||||
need to re-build the CSlib in this dir.
|
||||
32
lib/message/cslib/LICENSE
Normal file
32
lib/message/cslib/LICENSE
Normal file
@ -0,0 +1,32 @@
|
||||
Program: CSlib client/server coupling library
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of Sandia,
|
||||
LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
|
||||
U.S. Government retains certain rights in this software. This
|
||||
software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Sandia Corporation nor the names of contributors
|
||||
to this software may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS''
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
23
lib/message/cslib/README
Normal file
23
lib/message/cslib/README
Normal file
@ -0,0 +1,23 @@
|
||||
This is the the Client/Server messaging library (CSlib).
|
||||
|
||||
Only the source directory and license file are included here as part
|
||||
of the LAMMPS distribution. The full CSlib distribution, including
|
||||
documentation and test codes, can be found at the website:
|
||||
http://cslib.sandia.gov (as of Aug 2018).
|
||||
|
||||
The contact author is
|
||||
|
||||
Steve Plimpton
|
||||
Sandia National Laboratories
|
||||
sjplimp@sandia.gov
|
||||
http://www.sandia.gov/~sjplimp
|
||||
|
||||
The CSlib is distributed as open-source code under the modified
|
||||
Berkeley Software Distribution (BSD) License. See the accompanying
|
||||
LICENSE file.
|
||||
|
||||
This directory contains the following:
|
||||
|
||||
README this file
|
||||
LICENSE GNU LGPL license
|
||||
src source files for library
|
||||
107
lib/message/cslib/src/Makefile
Normal file
107
lib/message/cslib/src/Makefile
Normal file
@ -0,0 +1,107 @@
|
||||
# Makefile for CSlib = client/server messaging library
|
||||
# type "make help" for options
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ----------------------------------------
|
||||
# should only need to change this section
|
||||
# compiler/linker settings
|
||||
# ----------------------------------------
|
||||
|
||||
CC = g++
|
||||
CCFLAGS = -g -O3 -DZMQ_$(ZMQ) -DMPI_$(MPI)
|
||||
SHFLAGS = -fPIC
|
||||
ARCHIVE = ar
|
||||
ARCHFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# files
|
||||
|
||||
LIB = libcsmpi.a
|
||||
SHLIB = libcsmpi.so
|
||||
SRC = $(wildcard *.cpp)
|
||||
INC = $(wildcard *.h)
|
||||
OBJ = $(SRC:.cpp=.o)
|
||||
|
||||
# build with ZMQ support or not
|
||||
|
||||
zmq = yes
|
||||
ZMQ = $(shell echo $(zmq) | tr a-z A-Z)
|
||||
|
||||
ifeq ($(ZMQ),YES)
|
||||
ZMQLIB = -lzmq
|
||||
else
|
||||
CCFLAGS += -I./STUBS_ZMQ
|
||||
endif
|
||||
|
||||
# build with MPI support or not
|
||||
|
||||
mpi = yes
|
||||
MPI = $(shell echo $(mpi) | tr a-z A-Z)
|
||||
|
||||
ifeq ($(MPI),YES)
|
||||
CC = mpicxx
|
||||
else
|
||||
CCFLAGS += -I./STUBS_MPI
|
||||
LIB = libcsnompi.a
|
||||
SHLIB = libcsnompi.so
|
||||
endif
|
||||
|
||||
# targets
|
||||
|
||||
shlib: shlib_parallel shlib_serial
|
||||
|
||||
lib: lib_parallel lib_serial
|
||||
|
||||
all: shlib lib
|
||||
|
||||
help:
|
||||
@echo 'make default = shlib'
|
||||
@echo 'make shlib build 2 shared CSlibs: parallel & serial'
|
||||
@echo 'make lib build 2 static CSlibs: parallel & serial'
|
||||
@echo 'make all build 4 CSlibs: shlib and lib'
|
||||
@echo 'make shlib_parallel build shared parallel CSlib'
|
||||
@echo 'make shlib_serial build shared serial CSlib'
|
||||
@echo 'make lib_parallel build static parallel CSlib'
|
||||
@echo 'make lib_serial build static serial CSlib'
|
||||
@echo 'make ... zmq=no build w/out ZMQ support'
|
||||
@echo 'make clean remove all *.o files'
|
||||
@echo 'make clean-all remove *.o and lib files'
|
||||
@echo 'make tar create a tarball, 2 levels up'
|
||||
|
||||
shlib_parallel:
|
||||
$(MAKE) clean
|
||||
$(MAKE) shared zmq=$(zmq) mpi=yes
|
||||
|
||||
shlib_serial:
|
||||
$(MAKE) clean
|
||||
$(MAKE) shared zmq=$(zmq) mpi=no
|
||||
|
||||
lib_parallel:
|
||||
$(MAKE) clean
|
||||
$(MAKE) static zmq=$(zmq) mpi=yes
|
||||
|
||||
lib_serial:
|
||||
$(MAKE) clean
|
||||
$(MAKE) static zmq=$(zmq) mpi=no
|
||||
|
||||
static: $(OBJ)
|
||||
$(ARCHIVE) $(ARCHFLAGS) $(LIB) $(OBJ)
|
||||
|
||||
shared: $(OBJ)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) -o $(SHLIB) $(OBJ) $(ZMQLIB)
|
||||
|
||||
clean:
|
||||
@rm -f *.o *.pyc
|
||||
|
||||
clean-all:
|
||||
@rm -f *.o *.pyc lib*.a lib*.so
|
||||
|
||||
tar:
|
||||
cd ../..; tar cvf cslib.tar cslib/README cslib/LICENSE \
|
||||
cslib/doc cslib/src cslib/test
|
||||
|
||||
# rules
|
||||
|
||||
%.o:%.cpp
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) -c $<
|
||||
96
lib/message/cslib/src/STUBS_MPI/mpi.h
Normal file
96
lib/message/cslib/src/STUBS_MPI/mpi.h
Normal file
@ -0,0 +1,96 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// MPI constants and dummy functions
|
||||
|
||||
#ifndef MPI_DUMMY_H
|
||||
#define MPI_DUMMY_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
typedef int MPI_Comm;
|
||||
typedef int MPI_Fint;
|
||||
typedef int MPI_Datatype;
|
||||
typedef int MPI_Status;
|
||||
typedef int MPI_Op;
|
||||
typedef int MPI_Info;
|
||||
|
||||
#define MPI_COMM_WORLD 0
|
||||
#define MPI_MAX_PORT_NAME 0
|
||||
#define MPI_INFO_NULL 0
|
||||
#define MPI_INT 1
|
||||
#define MPI_LONG_LONG 2
|
||||
#define MPI_FLOAT 3
|
||||
#define MPI_DOUBLE 4
|
||||
#define MPI_CHAR 5
|
||||
#define MPI_SUM 0
|
||||
|
||||
static void MPI_Init(int *, char ***) {}
|
||||
static MPI_Comm MPI_Comm_f2c(MPI_Comm world) {return world;}
|
||||
static void MPI_Comm_rank(MPI_Comm, int *) {}
|
||||
static void MPI_Comm_size(MPI_Comm, int *) {}
|
||||
|
||||
static void MPI_Open_port(MPI_Info, char *) {}
|
||||
static void MPI_Close_port(const char *) {}
|
||||
static void MPI_Comm_accept(const char *, MPI_Info, int,
|
||||
MPI_Comm, MPI_Comm *) {}
|
||||
static void MPI_Comm_connect(const char *, MPI_Info, int,
|
||||
MPI_Comm, MPI_Comm *) {}
|
||||
|
||||
static void MPI_Comm_split(MPI_Comm, int, int, MPI_Comm *) {}
|
||||
static void MPI_Comm_free(MPI_Comm *) {}
|
||||
|
||||
static void MPI_Send(const void *, int, MPI_Datatype, int, int, MPI_Comm) {}
|
||||
static void MPI_Recv(void *, int, MPI_Datatype, int, int,
|
||||
MPI_Comm, MPI_Status *) {}
|
||||
|
||||
static void MPI_Allreduce(const void *in, void *out, int, MPI_Datatype type,
|
||||
MPI_Op op, MPI_Comm)
|
||||
{
|
||||
if (type == MPI_INT) *((int *) out) = *((int *) in);
|
||||
}
|
||||
static void MPI_Scan(const void *in, void *out, int, MPI_Datatype intype,
|
||||
MPI_Op op,MPI_Comm)
|
||||
{
|
||||
if (intype == MPI_INT) *((int *) out) = *((int *) in);
|
||||
}
|
||||
|
||||
static void MPI_Bcast(void *, int, MPI_Datatype, int, MPI_Comm) {}
|
||||
static void MPI_Allgather(const void *in, int incount, MPI_Datatype intype,
|
||||
void *out, int, MPI_Datatype, MPI_Comm)
|
||||
{
|
||||
// assuming incount = 1
|
||||
if (intype == MPI_INT) *((int *) out) = *((int *) in);
|
||||
}
|
||||
static void MPI_Allgatherv(const void *in, int incount, MPI_Datatype intype,
|
||||
void *out, const int *, const int *,
|
||||
MPI_Datatype, MPI_Comm)
|
||||
{
|
||||
if (intype == MPI_INT) memcpy(out,in,incount*sizeof(int));
|
||||
else if (intype == MPI_LONG_LONG) memcpy(out,in,incount*sizeof(int64_t));
|
||||
else if (intype == MPI_FLOAT) memcpy(out,in,incount*sizeof(float));
|
||||
else if (intype == MPI_DOUBLE) memcpy(out,in,incount*sizeof(double));
|
||||
else if (intype == MPI_CHAR) memcpy(out,in,incount*sizeof(char));
|
||||
}
|
||||
|
||||
static void MPI_Abort(MPI_Comm, int) {exit(1);}
|
||||
static void MPI_Finalize() {}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
36
lib/message/cslib/src/STUBS_ZMQ/zmq.h
Normal file
36
lib/message/cslib/src/STUBS_ZMQ/zmq.h
Normal file
@ -0,0 +1,36 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// ZMQ constants and dummy functions
|
||||
|
||||
#ifndef ZMQ_DUMMY_H
|
||||
#define ZMQ_DUMMY_H
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
#define ZMQ_REQ 0
|
||||
#define ZMQ_REP 0
|
||||
|
||||
static void *zmq_ctx_new() {return NULL;}
|
||||
static void *zmq_connect(void *, char *) {return NULL;}
|
||||
static int zmq_bind(void *, char *) {return 0;}
|
||||
static void *zmq_socket(void *,int) {return NULL;}
|
||||
static void zmq_close(void *) {}
|
||||
static void zmq_ctx_destroy(void *) {}
|
||||
static void zmq_send(void *, void *, int, int) {}
|
||||
static void zmq_recv(void *, void *, int, int) {}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
768
lib/message/cslib/src/cslib.cpp
Normal file
768
lib/message/cslib/src/cslib.cpp
Normal file
@ -0,0 +1,768 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "cslib.h"
|
||||
#include "msg_file.h"
|
||||
#include "msg_zmq.h"
|
||||
#include "msg_mpi_one.h"
|
||||
#include "msg_mpi_two.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
#define MAXTYPE 5 // # of defined field data types
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CSlib::CSlib(int csflag, const char *mode, const void *ptr, const void *pcomm)
|
||||
{
|
||||
if (pcomm) myworld = (uint64_t) *((MPI_Comm *) pcomm);
|
||||
else myworld = 0;
|
||||
|
||||
#ifdef MPI_NO
|
||||
if (pcomm)
|
||||
error_all("constructor(): CSlib invoked with MPI_Comm "
|
||||
"but built w/out MPI support");
|
||||
#endif
|
||||
#ifdef MPI_YES // NOTE: this could be OK to allow ??
|
||||
// would allow a parallel app to invoke CSlib
|
||||
// in parallel and/or in serial
|
||||
if (!pcomm)
|
||||
error_all("constructor(): CSlib invoked w/out MPI_Comm "
|
||||
"but built with MPI support");
|
||||
#endif
|
||||
|
||||
client = server = 0;
|
||||
if (csflag == 0) client = 1;
|
||||
else if (csflag == 1) server = 1;
|
||||
else error_all("constructor(): Invalid client/server arg");
|
||||
|
||||
if (pcomm == NULL) {
|
||||
me = 0;
|
||||
nprocs = 1;
|
||||
|
||||
if (strcmp(mode,"file") == 0) msg = new MsgFile(csflag,ptr);
|
||||
else if (strcmp(mode,"zmq") == 0) msg = new MsgZMQ(csflag,ptr);
|
||||
else if (strcmp(mode,"mpi/one") == 0)
|
||||
error_all("constructor(): No mpi/one mode for serial lib usage");
|
||||
else if (strcmp(mode,"mpi/two") == 0)
|
||||
error_all("constructor(): No mpi/two mode for serial lib usage");
|
||||
else error_all("constructor(): Unknown mode");
|
||||
|
||||
} else if (pcomm) {
|
||||
MPI_Comm world = (MPI_Comm) myworld;
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
if (strcmp(mode,"file") == 0) msg = new MsgFile(csflag,ptr,world);
|
||||
else if (strcmp(mode,"zmq") == 0) msg = new MsgZMQ(csflag,ptr,world);
|
||||
else if (strcmp(mode,"mpi/one") == 0) msg = new MsgMPIOne(csflag,ptr,world);
|
||||
else if (strcmp(mode,"mpi/two") == 0) msg = new MsgMPITwo(csflag,ptr,world);
|
||||
else error_all("constructor(): Unknown mode");
|
||||
}
|
||||
|
||||
maxfield = 0;
|
||||
fieldID = fieldtype = fieldlen = fieldoffset = NULL;
|
||||
maxheader = 0;
|
||||
header = NULL;
|
||||
maxbuf = 0;
|
||||
buf = NULL;
|
||||
|
||||
recvcounts = displs = NULL;
|
||||
maxglobal = 0;
|
||||
allids = NULL;
|
||||
maxfieldbytes = 0;
|
||||
fielddata = NULL;
|
||||
|
||||
pad = "\0\0\0\0\0\0\0"; // just length 7 since will have trailing NULL
|
||||
|
||||
nsend = nrecv = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
CSlib::~CSlib()
|
||||
{
|
||||
deallocate_fields();
|
||||
sfree(header);
|
||||
sfree(buf);
|
||||
|
||||
sfree(recvcounts);
|
||||
sfree(displs);
|
||||
sfree(allids);
|
||||
sfree(fielddata);
|
||||
|
||||
delete msg;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::send(int msgID_caller, int nfield_caller)
|
||||
{
|
||||
if (nfield_caller < 0) error_all("send(): Invalid nfield");
|
||||
|
||||
msgID = msgID_caller;
|
||||
nfield = nfield_caller;
|
||||
allocate_fields();
|
||||
|
||||
fieldcount = 0;
|
||||
nbuf = 0;
|
||||
|
||||
if (fieldcount == nfield) send_message();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_int(int id, int value)
|
||||
{
|
||||
pack(id,1,1,&value);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_int64(int id, int64_t value)
|
||||
{
|
||||
pack(id,2,1,&value);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_float(int id, float value)
|
||||
{
|
||||
pack(id,3,1,&value);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_double(int id, double value)
|
||||
{
|
||||
pack(id,4,1,&value);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_string(int id, char *value)
|
||||
{
|
||||
pack(id,5,strlen(value)+1,value);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack(int id, int ftype, int flen, void *data)
|
||||
{
|
||||
if (find_field(id,fieldcount) >= 0)
|
||||
error_all("pack(): Reuse of field ID");
|
||||
if (ftype < 1 || ftype > MAXTYPE) error_all("pack(): Invalid ftype");
|
||||
if (flen < 0) error_all("pack(): Invalid flen");
|
||||
|
||||
fieldID[fieldcount] = id;
|
||||
fieldtype[fieldcount] = ftype;
|
||||
fieldlen[fieldcount] = flen;
|
||||
|
||||
int nbytes,nbytesround;
|
||||
onefield(ftype,flen,nbytes,nbytesround);
|
||||
|
||||
memcpy(&buf[nbuf],data,nbytes);
|
||||
memcpy(&buf[nbuf+nbytes],pad,nbytesround-nbytes);
|
||||
nbuf += nbytesround;
|
||||
|
||||
fieldcount++;
|
||||
if (fieldcount == nfield) send_message();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::pack_parallel(int id, int ftype,
|
||||
int nlocal, int *ids, int nper, void *data)
|
||||
{
|
||||
int i,j,k,m;
|
||||
|
||||
if (find_field(id,fieldcount) >= 0)
|
||||
error_all("pack_parallel(): Reuse of field ID");
|
||||
if (ftype < 1 || ftype > MAXTYPE) error_all("pack_parallel(): Invalid ftype");
|
||||
if (nlocal < 0) error_all("pack_parallel(): Invalid nlocal");
|
||||
if (nper < 1) error_all("pack_parallel(): Invalid nper");
|
||||
|
||||
MPI_Comm world = (MPI_Comm) myworld;
|
||||
|
||||
// NOTE: check for overflow of maxglobal and flen
|
||||
|
||||
int nglobal;
|
||||
MPI_Allreduce(&nlocal,&nglobal,1,MPI_INT,MPI_SUM,world);
|
||||
int flen = nper*nglobal;
|
||||
|
||||
fieldID[fieldcount] = id;
|
||||
fieldtype[fieldcount] = ftype;
|
||||
fieldlen[fieldcount] = flen;
|
||||
|
||||
// nlocal datums, each of nper length, from all procs
|
||||
// final data in buf = datums for all natoms, ordered by ids
|
||||
|
||||
if (recvcounts == NULL) {
|
||||
recvcounts = (int *) smalloc(nprocs*sizeof(int));
|
||||
displs = (int *) smalloc(nprocs*sizeof(int));
|
||||
}
|
||||
|
||||
MPI_Allgather(&nlocal,1,MPI_INT,recvcounts,1,MPI_INT,world);
|
||||
|
||||
displs[0] = 0;
|
||||
for (int iproc = 1; iproc < nprocs; iproc++)
|
||||
displs[iproc] = displs[iproc-1] + recvcounts[iproc-1];
|
||||
|
||||
if (ids && nglobal > maxglobal) {
|
||||
sfree(allids);
|
||||
maxglobal = nglobal;
|
||||
// NOTE: maxglobal*sizeof(int) could overflow int
|
||||
allids = (int *) smalloc(maxglobal*sizeof(int));
|
||||
}
|
||||
|
||||
MPI_Allgatherv(ids,nlocal,MPI_INT,allids,
|
||||
recvcounts,displs,MPI_INT,world);
|
||||
|
||||
int nlocalsize = nper*nlocal;
|
||||
MPI_Allgather(&nlocalsize,1,MPI_INT,recvcounts,1,MPI_INT,world);
|
||||
|
||||
displs[0] = 0;
|
||||
for (int iproc = 1; iproc < nprocs; iproc++)
|
||||
displs[iproc] = displs[iproc-1] + recvcounts[iproc-1];
|
||||
|
||||
int nbytes,nbytesround;
|
||||
onefield(ftype,flen,nbytes,nbytesround);
|
||||
|
||||
if (ftype == 1) {
|
||||
int *alldata;
|
||||
if (ids) {
|
||||
if (nbytes > maxfieldbytes) {
|
||||
sfree(fielddata);
|
||||
maxfieldbytes = nbytes;
|
||||
fielddata = (char *) smalloc(maxfieldbytes);
|
||||
}
|
||||
alldata = (int *) fielddata;
|
||||
} else alldata = (int *) &buf[nbuf];
|
||||
MPI_Allgatherv(data,nlocalsize,MPI_INT,alldata,
|
||||
recvcounts,displs,MPI_INT,world);
|
||||
if (ids) {
|
||||
int *bufptr = (int *) &buf[nbuf];
|
||||
m = 0;
|
||||
for (i = 0; i < nglobal; i++) {
|
||||
j = (allids[i]-1) * nper;
|
||||
if (nper == 1) bufptr[j] = alldata[m++];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
bufptr[j++] = alldata[m++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (ftype == 2) {
|
||||
int64_t *alldata;
|
||||
if (ids) {
|
||||
if (nbytes > maxfieldbytes) {
|
||||
sfree(fielddata);
|
||||
maxfieldbytes = nbytes;
|
||||
fielddata = (char *) smalloc(maxfieldbytes);
|
||||
}
|
||||
alldata = (int64_t *) fielddata;
|
||||
} else alldata = (int64_t *) &buf[nbuf];
|
||||
// NOTE: may be just MPI_LONG on some machines
|
||||
MPI_Allgatherv(data,nlocalsize,MPI_LONG_LONG,alldata,
|
||||
recvcounts,displs,MPI_LONG_LONG,world);
|
||||
if (ids) {
|
||||
int64_t *bufptr = (int64_t *) &buf[nbuf];
|
||||
m = 0;
|
||||
for (i = 0; i < nglobal; i++) {
|
||||
j = (allids[i]-1) * nper;
|
||||
if (nper == 1) bufptr[j] = alldata[m++];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
bufptr[j++] = alldata[m++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (ftype == 3) {
|
||||
float *alldata;
|
||||
if (ids) {
|
||||
if (nbytes > maxfieldbytes) {
|
||||
sfree(fielddata);
|
||||
maxfieldbytes = nbytes;
|
||||
fielddata = (char *) smalloc(maxfieldbytes);
|
||||
}
|
||||
alldata = (float *) fielddata;
|
||||
} else alldata = (float *) &buf[nbuf];
|
||||
MPI_Allgatherv(data,nlocalsize,MPI_FLOAT,alldata,
|
||||
recvcounts,displs,MPI_FLOAT,world);
|
||||
if (ids) {
|
||||
float *bufptr = (float *) &buf[nbuf];
|
||||
m = 0;
|
||||
for (i = 0; i < nglobal; i++) {
|
||||
j = (allids[i]-1) * nper;
|
||||
if (nper == 1) bufptr[j] = alldata[m++];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
bufptr[j++] = alldata[m++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (ftype == 4) {
|
||||
double *alldata;
|
||||
if (ids) {
|
||||
if (nbytes > maxfieldbytes) {
|
||||
sfree(fielddata);
|
||||
maxfieldbytes = nbytes;
|
||||
fielddata = (char *) smalloc(maxfieldbytes);
|
||||
}
|
||||
alldata = (double *) fielddata;
|
||||
} else alldata = (double *) &buf[nbuf];
|
||||
MPI_Allgatherv(data,nlocalsize,MPI_DOUBLE,alldata,
|
||||
recvcounts,displs,MPI_DOUBLE,world);
|
||||
if (ids) {
|
||||
double *bufptr = (double *) &buf[nbuf];
|
||||
m = 0;
|
||||
for (i = 0; i < nglobal; i++) {
|
||||
j = (allids[i]-1) * nper;
|
||||
if (nper == 1) bufptr[j] = alldata[m++];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
bufptr[j++] = alldata[m++];
|
||||
}
|
||||
}
|
||||
|
||||
/* eventually ftype = BYTE, but not yet
|
||||
} else if (ftype == 5) {
|
||||
char *alldata;
|
||||
if (ids) {
|
||||
if (nbytes > maxfieldbytes) {
|
||||
sfree(fielddata);
|
||||
maxfieldbytes = nbytes;
|
||||
fielddata = (char *) smalloc(maxfieldbytes);
|
||||
}
|
||||
alldata = (char *) fielddata;
|
||||
} else alldata = (char *) &buf[nbuf];
|
||||
MPI_Allgatherv(data,nlocalsize,MPI_CHAR,alldata,
|
||||
recvcounts,displs,MPI_CHAR,world);
|
||||
if (ids) {
|
||||
char *bufptr = (char *) &buf[nbuf];
|
||||
m = 0;
|
||||
for (i = 0; i < nglobal; i++) {
|
||||
j = (allids[i]-1) * nper;
|
||||
memcpy(&bufptr[j],&alldata[m],nper);
|
||||
m += nper;
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
memcpy(&buf[nbuf+nbytes],pad,nbytesround-nbytes);
|
||||
nbuf += nbytesround;
|
||||
|
||||
fieldcount++;
|
||||
if (fieldcount == nfield) send_message();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::send_message()
|
||||
{
|
||||
// setup header message
|
||||
|
||||
int m = 0;
|
||||
header[m++] = msgID;
|
||||
header[m++] = nfield;
|
||||
for (int ifield = 0; ifield < nfield; ifield++) {
|
||||
header[m++] = fieldID[ifield];
|
||||
header[m++] = fieldtype[ifield];
|
||||
header[m++] = fieldlen[ifield];
|
||||
}
|
||||
|
||||
msg->send(nheader,header,nbuf,buf);
|
||||
nsend++;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int CSlib::recv(int &nfield_caller, int *&fieldID_caller,
|
||||
int *&fieldtype_caller, int *&fieldlen_caller)
|
||||
{
|
||||
msg->recv(maxheader,header,maxbuf,buf);
|
||||
nrecv++;
|
||||
|
||||
// unpack header message
|
||||
|
||||
int m = 0;
|
||||
msgID = header[m++];
|
||||
nfield = header[m++];
|
||||
allocate_fields();
|
||||
|
||||
int nbytes,nbytesround;
|
||||
|
||||
nbuf = 0;
|
||||
for (int ifield = 0; ifield < nfield; ifield++) {
|
||||
fieldID[ifield] = header[m++];
|
||||
fieldtype[ifield] = header[m++];
|
||||
fieldlen[ifield] = header[m++];
|
||||
fieldoffset[ifield] = nbuf;
|
||||
onefield(fieldtype[ifield],fieldlen[ifield],nbytes,nbytesround);
|
||||
nbuf += nbytesround;
|
||||
}
|
||||
|
||||
// return message parameters
|
||||
|
||||
nfield_caller = nfield;
|
||||
fieldID_caller = fieldID;
|
||||
fieldtype_caller = fieldtype;
|
||||
fieldlen_caller = fieldlen;
|
||||
|
||||
return msgID;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int CSlib::unpack_int(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_int(): Unknown field ID");
|
||||
if (fieldtype[ifield] != 1) error_all("unpack_int(): Mis-match of ftype");
|
||||
if (fieldlen[ifield] != 1) error_all("unpack_int(): Flen is not 1");
|
||||
|
||||
int *ptr = (int *) unpack(id);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int64_t CSlib::unpack_int64(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_int64(): Unknown field ID");
|
||||
if (fieldtype[ifield] != 2) error_all("unpack_int64(): Mis-match of ftype");
|
||||
if (fieldlen[ifield] != 1) error_all("unpack_int64(): Flen is not 1");
|
||||
|
||||
int64_t *ptr = (int64_t *) unpack(id);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
float CSlib::unpack_float(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_float(): Unknown field ID");
|
||||
if (fieldtype[ifield] != 3) error_all("unpack_float(): Mis-match of ftype");
|
||||
if (fieldlen[ifield] != 1) error_all("unpack_float(): Flen is not 1");
|
||||
|
||||
float *ptr = (float *) unpack(id);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double CSlib::unpack_double(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_double(): Unknown field ID");
|
||||
if (fieldtype[ifield] != 4) error_all("unpack_double(): Mis-match of ftype");
|
||||
if (fieldlen[ifield] != 1) error_all("unpack_double(): Flen is not 1");
|
||||
|
||||
double *ptr = (double *) unpack(id);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
char *CSlib::unpack_string(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_string(): Unknown field ID");
|
||||
if (fieldtype[ifield] != 5) error_all("unpack_string(): Mis-match of ftype");
|
||||
|
||||
char *ptr = (char *) unpack(id);
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *CSlib::unpack(int id)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack(): Unknown field ID");
|
||||
return &buf[fieldoffset[ifield]];
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::unpack(int id, void *data)
|
||||
{
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack(): Unknown field ID");
|
||||
|
||||
int ftype = fieldtype[ifield];
|
||||
int nbytes = fieldlen[ifield];
|
||||
if (ftype == 1) nbytes *= sizeof(int);
|
||||
else if (ftype == 2) nbytes *= sizeof(int64_t);
|
||||
else if (ftype == 3) nbytes *= sizeof(float);
|
||||
else if (ftype == 4) nbytes *= sizeof(double);
|
||||
memcpy(data,&buf[fieldoffset[ifield]],nbytes);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::unpack_parallel(int id, int nlocal, int *ids, int nper, void *data)
|
||||
{
|
||||
int i,j,k,m;
|
||||
|
||||
int ifield = find_field(id,nfield);
|
||||
if (ifield < 0) error_all("unpack_parallel(): Unknown field ID");
|
||||
if (nlocal < 0) error_all("unpack_parallel(): Invalid nlocal");
|
||||
if (nper < 1) error_all("pack_parallel(): Invalid nper");
|
||||
|
||||
MPI_Comm world = (MPI_Comm) myworld;
|
||||
|
||||
int upto;
|
||||
if (!ids) {
|
||||
MPI_Scan(&nlocal,&upto,1,MPI_INT,MPI_SUM,world);
|
||||
upto -= nlocal;
|
||||
}
|
||||
|
||||
if (fieldtype[ifield] == 1) {
|
||||
int *local = (int *) data;
|
||||
int *global = (int *) &buf[fieldoffset[ifield]];
|
||||
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(int));
|
||||
else {
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
j = (ids[i]-1) * nper;
|
||||
if (nper == 1) local[m++] = global[j];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
local[m++] = global[j++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (fieldtype[ifield] == 2) {
|
||||
int64_t *local = (int64_t *) data;
|
||||
int64_t *global = (int64_t *) &buf[fieldoffset[ifield]];
|
||||
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(int64_t));
|
||||
else {
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
j = (ids[i]-1) * nper;
|
||||
if (nper == 1) local[m++] = global[j];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
local[m++] = global[j++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (fieldtype[ifield] == 3) {
|
||||
float *local = (float *) data;
|
||||
float *global = (float *) &buf[fieldoffset[ifield]];
|
||||
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(float));
|
||||
else {
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
j = (ids[i]-1) * nper;
|
||||
if (nper == 1) local[m++] = global[j];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
local[m++] = global[j++];
|
||||
}
|
||||
}
|
||||
|
||||
} else if (fieldtype[ifield] == 4) {
|
||||
double *local = (double *) data;
|
||||
double *global = (double *) &buf[fieldoffset[ifield]];
|
||||
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(double));
|
||||
else {
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
j = (ids[i]-1) * nper;
|
||||
if (nper == 1) local[m++] = global[j];
|
||||
else
|
||||
for (k = 0; k < nper; k++)
|
||||
local[m++] = global[j++];
|
||||
}
|
||||
}
|
||||
|
||||
/* eventually ftype = BYTE, but not yet
|
||||
} else if (fieldtype[ifield] == 5) {
|
||||
char *local = (char *) data;
|
||||
char *global = (char *) &buf[fieldoffset[ifield]];
|
||||
if (!ids) memcpy(local,&global[nper*upto],nper*nlocal*sizeof(char));
|
||||
else {
|
||||
m = 0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
j = (ids[i]-1) * nper;
|
||||
memcpy(&local[m],&global[j],nper);
|
||||
m += nper;
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int CSlib::extract(int flag)
|
||||
{
|
||||
if (flag == 1) return nsend;
|
||||
if (flag == 2) return nrecv;
|
||||
error_all("extract(): Invalid flag");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::onefield(int ftype, int flen, int &nbytes, int &nbytesround)
|
||||
{
|
||||
int64_t bigbytes,bigbytesround;
|
||||
int64_t biglen = flen;
|
||||
|
||||
if (ftype == 1) bigbytes = biglen * sizeof(int);
|
||||
else if (ftype == 2) bigbytes = biglen * sizeof(int64_t);
|
||||
else if (ftype == 3) bigbytes = biglen * sizeof(float);
|
||||
else if (ftype == 4) bigbytes = biglen * sizeof(double);
|
||||
else if (ftype == 5) bigbytes = biglen * sizeof(char);
|
||||
bigbytesround = roundup(bigbytes,8);
|
||||
|
||||
if (nbuf + bigbytesround > INT_MAX)
|
||||
error_all("pack(): Message size exceeds 32-bit integer limit");
|
||||
|
||||
nbytes = (int) bigbytes;
|
||||
nbytesround = (int) bigbytesround;
|
||||
if (nbuf + nbytesround > maxbuf) {
|
||||
maxbuf = nbuf + nbytesround;
|
||||
buf = (char *) srealloc(buf,maxbuf);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int CSlib::find_field(int id, int n)
|
||||
{
|
||||
int ifield;
|
||||
for (ifield = 0; ifield < n; ifield++)
|
||||
if (id == fieldID[ifield]) return ifield;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::allocate_fields()
|
||||
{
|
||||
int64_t bigbytes = (2 + 3*((int64_t) nfield)) * sizeof(int);
|
||||
if (bigbytes > INT_MAX)
|
||||
error_all("send(): Message header size exceeds 32-bit integer limit");
|
||||
|
||||
nheader = 2;
|
||||
nheader += 3 * nfield;
|
||||
|
||||
if (nfield > maxfield) {
|
||||
deallocate_fields();
|
||||
maxfield = nfield;
|
||||
fieldID = new int[maxfield];
|
||||
fieldtype = new int[maxfield];
|
||||
fieldlen = new int[maxfield];
|
||||
fieldoffset = new int[maxfield];
|
||||
}
|
||||
|
||||
if (nheader > maxheader) {
|
||||
sfree(header);
|
||||
maxheader = nheader;
|
||||
header = (int *) smalloc(maxheader*sizeof(int));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::deallocate_fields()
|
||||
{
|
||||
delete [] fieldID;
|
||||
delete [] fieldtype;
|
||||
delete [] fieldlen;
|
||||
delete [] fieldoffset;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *CSlib::smalloc(int nbytes)
|
||||
{
|
||||
if (nbytes == 0) return NULL;
|
||||
void *ptr = malloc(nbytes);
|
||||
if (ptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"malloc(): Failed to allocate %d bytes",nbytes);
|
||||
error_one(str);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *CSlib::srealloc(void *ptr, int nbytes)
|
||||
{
|
||||
if (nbytes == 0) {
|
||||
sfree(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ptr = realloc(ptr,nbytes);
|
||||
if (ptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"realloc(): Failed to reallocate %d bytes",nbytes);
|
||||
error_one(str);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::sfree(void *ptr)
|
||||
{
|
||||
if (ptr == NULL) return;
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::error_all(const char *str)
|
||||
{
|
||||
if (me == 0) printf("CSlib ERROR: %s\n",str);
|
||||
MPI_Comm world = (MPI_Comm) myworld;
|
||||
MPI_Abort(world,1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void CSlib::error_one(const char *str)
|
||||
{
|
||||
printf("CSlib ERROR: %s\n",str);
|
||||
MPI_Comm world = (MPI_Comm) myworld;
|
||||
MPI_Abort(world,1);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
round N up to multiple of nalign and return it
|
||||
NOTE: see mapreduce/src/keyvalue.cpp for doing this as uint64_t
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int64_t CSlib::roundup(int64_t n, int nalign)
|
||||
{
|
||||
if (n % nalign == 0) return n;
|
||||
n = (n/nalign + 1) * nalign;
|
||||
return n;
|
||||
}
|
||||
87
lib/message/cslib/src/cslib.h
Normal file
87
lib/message/cslib/src/cslib.h
Normal file
@ -0,0 +1,87 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef CSLIB_H
|
||||
#define CSLIB_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class CSlib {
|
||||
public:
|
||||
int nsend,nrecv;
|
||||
|
||||
CSlib(int, const char *, const void *, const void *);
|
||||
~CSlib();
|
||||
|
||||
void send(int, int);
|
||||
|
||||
void pack_int(int, int);
|
||||
void pack_int64(int, int64_t);
|
||||
void pack_float(int, float);
|
||||
void pack_double(int, double);
|
||||
void pack_string(int, char *);
|
||||
void pack(int, int, int, void *);
|
||||
void pack_parallel(int, int, int, int *, int, void *);
|
||||
|
||||
int recv(int &, int *&, int *&, int *&);
|
||||
|
||||
int unpack_int(int);
|
||||
int64_t unpack_int64(int);
|
||||
float unpack_float(int);
|
||||
double unpack_double(int);
|
||||
char *unpack_string(int);
|
||||
void *unpack(int);
|
||||
void unpack(int, void *);
|
||||
void unpack_parallel(int, int, int *, int, void *);
|
||||
|
||||
int extract(int);
|
||||
|
||||
private:
|
||||
uint64_t myworld; // really MPI_Comm, but avoids use of mpi.h in this file
|
||||
// so apps can include this file w/ no MPI on system
|
||||
int me,nprocs;
|
||||
int client,server;
|
||||
int nfield,maxfield;
|
||||
int msgID,fieldcount;
|
||||
int nheader,maxheader;
|
||||
int nbuf,maxbuf;
|
||||
int maxglobal,maxfieldbytes;
|
||||
int *fieldID,*fieldtype,*fieldlen,*fieldoffset;
|
||||
int *header;
|
||||
int *recvcounts,*displs; // nprocs size for Allgathers
|
||||
int *allids; // nglobal size for pack_parallel()
|
||||
char *buf; // maxbuf size for msg with all fields
|
||||
char *fielddata; // maxfieldbytes size for one global field
|
||||
const char *pad;
|
||||
|
||||
class Msg *msg;
|
||||
|
||||
void send_message();
|
||||
void onefield(int, int, int &, int &);
|
||||
int find_field(int, int);
|
||||
void allocate_fields();
|
||||
void deallocate_fields();
|
||||
int64_t roundup(int64_t, int);
|
||||
void *smalloc(int);
|
||||
void *srealloc(void *, int);
|
||||
void sfree(void *);
|
||||
void error_all(const char *);
|
||||
void error_one(const char *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
362
lib/message/cslib/src/cslib.py
Normal file
362
lib/message/cslib/src/cslib.py
Normal file
@ -0,0 +1,362 @@
|
||||
# ------------------------------------------------------------------------
|
||||
# CSlib - Client/server library for code coupling
|
||||
# http://cslib.sandia.gov, Sandia National Laboratories
|
||||
# Steve Plimpton, sjplimp@sandia.gov
|
||||
#
|
||||
# Copyright 2018 National Technology & Engineering Solutions of
|
||||
# Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
# NTESS, the U.S. Government retains certain rights in this software.
|
||||
# This software is distributed under the modified Berkeley Software
|
||||
# Distribution (BSD) License.
|
||||
#
|
||||
# See the README file in the top-level CSlib directory.
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
# Python wrapper on CSlib library via ctypes
|
||||
|
||||
# ctypes and Numpy data types:
|
||||
# 32-bit int = c_int = np.intc = np.int32
|
||||
# 64-bit int = c_longlong = np.int64
|
||||
# 32-bit floating point = c_float = np.float32
|
||||
# 64-bit floating point = c_double = np.float = np.float64
|
||||
|
||||
import sys,traceback
|
||||
from ctypes import *
|
||||
|
||||
# Numpy and mpi4py packages may not exist
|
||||
|
||||
try:
|
||||
import numpy as np
|
||||
numpyflag = 1
|
||||
except:
|
||||
numpyflag = 0
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
mpi4pyflag = 1
|
||||
except:
|
||||
mpi4pyflag = 0
|
||||
|
||||
# wrapper class
|
||||
|
||||
class CSlib:
|
||||
|
||||
# instantiate CSlib thru its C-interface
|
||||
|
||||
def __init__(self,csflag,mode,ptr,comm):
|
||||
|
||||
# load libcslib.so
|
||||
|
||||
try:
|
||||
if comm: self.lib = CDLL("libcsmpi.so",RTLD_GLOBAL)
|
||||
else: self.lib = CDLL("libcsnompi.so",RTLD_GLOBAL)
|
||||
except:
|
||||
etype,value,tb = sys.exc_info()
|
||||
traceback.print_exception(etype,value,tb)
|
||||
raise OSError,"Could not load CSlib dynamic library"
|
||||
|
||||
# define ctypes API for each library method
|
||||
|
||||
self.lib.cslib_open.argtypes = [c_int,c_char_p,c_void_p,c_void_p,
|
||||
POINTER(c_void_p)]
|
||||
self.lib.cslib_open.restype = None
|
||||
|
||||
self.lib.cslib_close.argtypes = [c_void_p]
|
||||
self.lib.cslib_close.restype = None
|
||||
|
||||
self.lib.cslib_send.argtypes = [c_void_p,c_int,c_int]
|
||||
self.lib.cslib_send.restype = None
|
||||
|
||||
self.lib.cslib_pack_int.argtypes = [c_void_p,c_int,c_int]
|
||||
self.lib.cslib_pack_int.restype = None
|
||||
|
||||
self.lib.cslib_pack_int64.argtypes = [c_void_p,c_int,c_longlong]
|
||||
self.lib.cslib_pack_int64.restype = None
|
||||
|
||||
self.lib.cslib_pack_float.argtypes = [c_void_p,c_int,c_float]
|
||||
self.lib.cslib_pack_float.restype = None
|
||||
|
||||
self.lib.cslib_pack_double.argtypes = [c_void_p,c_int,c_double]
|
||||
self.lib.cslib_pack_double.restype = None
|
||||
|
||||
self.lib.cslib_pack_string.argtypes = [c_void_p,c_int,c_char_p]
|
||||
self.lib.cslib_pack_string.restype = None
|
||||
|
||||
self.lib.cslib_pack.argtypes = [c_void_p,c_int,c_int,c_int,c_void_p]
|
||||
self.lib.cslib_pack.restype = None
|
||||
|
||||
self.lib.cslib_pack_parallel.argtypes = [c_void_p,c_int,c_int,c_int,
|
||||
POINTER(c_int),c_int,c_void_p]
|
||||
self.lib.cslib_pack_parallel.restype = None
|
||||
|
||||
self.lib.cslib_recv.argtypes = [c_void_p,POINTER(c_int),
|
||||
POINTER(POINTER(c_int)),
|
||||
POINTER(POINTER(c_int)),
|
||||
POINTER(POINTER(c_int))]
|
||||
self.lib.cslib_recv.restype = c_int
|
||||
|
||||
self.lib.cslib_unpack_int.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack_int.restype = c_int
|
||||
|
||||
self.lib.cslib_unpack_int64.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack_int64.restype = c_longlong
|
||||
|
||||
self.lib.cslib_unpack_float.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack_float.restype = c_float
|
||||
|
||||
self.lib.cslib_unpack_double.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack_double.restype = c_double
|
||||
|
||||
self.lib.cslib_unpack_string.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack_string.restype = c_char_p
|
||||
|
||||
# override return in unpack()
|
||||
self.lib.cslib_unpack.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_unpack.restype = c_void_p
|
||||
|
||||
self.lib.cslib_unpack_data.argtypes = [c_void_p,c_int,c_void_p]
|
||||
self.lib.cslib_unpack_data.restype = None
|
||||
|
||||
# override last arg in unpack_parallel()
|
||||
self.lib.cslib_unpack_parallel.argtypes = [c_void_p,c_int,c_int,
|
||||
POINTER(c_int),c_int,c_void_p]
|
||||
self.lib.cslib_unpack_parallel.restype = None
|
||||
|
||||
self.lib.cslib_extract.argtypes = [c_void_p,c_int]
|
||||
self.lib.cslib_extract.restype = c_int
|
||||
|
||||
# create an instance of CSlib with or w/out MPI communicator
|
||||
|
||||
self.cs = c_void_p()
|
||||
|
||||
if not comm:
|
||||
self.lib.cslib_open(csflag,mode,ptr,None,byref(self.cs))
|
||||
elif not mpi4pyflag:
|
||||
print "Cannot pass MPI communicator to CSlib w/out mpi4py package"
|
||||
sys.exit()
|
||||
else:
|
||||
address = MPI._addressof(comm)
|
||||
comm_ptr = c_void_p(address)
|
||||
if mode == "mpi/one":
|
||||
address = MPI._addressof(ptr)
|
||||
ptrcopy = c_void_p(address)
|
||||
else: ptrcopy = ptr
|
||||
self.lib.cslib_open(csflag,mode,ptrcopy,comm_ptr,byref(self.cs))
|
||||
|
||||
# destroy instance of CSlib
|
||||
|
||||
def __del__(self):
|
||||
if self.cs: self.lib.cslib_close(self.cs)
|
||||
|
||||
def close(self):
|
||||
self.lib.cslib_close(self.cs)
|
||||
self.lib = None
|
||||
|
||||
# send a message
|
||||
|
||||
def send(self,msgID,nfield):
|
||||
self.nfield = nfield
|
||||
self.lib.cslib_send(self.cs,msgID,nfield)
|
||||
|
||||
# pack one field of message
|
||||
|
||||
def pack_int(self,id,value):
|
||||
self.lib.cslib_pack_int(self.cs,id,value)
|
||||
|
||||
def pack_int64(self,id,value):
|
||||
self.lib.cslib_pack_int64(self.cs,id,value)
|
||||
|
||||
def pack_float(self,id,value):
|
||||
self.lib.cslib_pack_float(self.cs,id,value)
|
||||
|
||||
def pack_double(self,id,value):
|
||||
self.lib.cslib_pack_double(self.cs,id,value)
|
||||
|
||||
def pack_string(self,id,value):
|
||||
self.lib.cslib_pack_string(self.cs,id,value)
|
||||
|
||||
def pack(self,id,ftype,flen,data):
|
||||
cdata = self.data_convert(ftype,flen,data)
|
||||
self.lib.cslib_pack(self.cs,id,ftype,flen,cdata)
|
||||
|
||||
def pack_parallel(self,id,ftype,nlocal,ids,nper,data):
|
||||
cids = self.data_convert(1,nlocal,ids)
|
||||
cdata = self.data_convert(ftype,nper*nlocal,data)
|
||||
self.lib.cslib_pack_parallel(self.cs,id,ftype,nlocal,cids,nper,cdata)
|
||||
|
||||
# convert input data to a ctypes vector to pass to CSlib
|
||||
|
||||
def data_convert(self,ftype,flen,data):
|
||||
|
||||
# tflag = type of data
|
||||
# tflag = 1 if data is list or tuple
|
||||
# tflag = 2 if data is Numpy array
|
||||
# tflag = 3 if data is ctypes vector
|
||||
# same usage of tflag as in unpack function
|
||||
|
||||
txttype = str(type(data))
|
||||
if "numpy" in txttype: tflag = 2
|
||||
elif "c_" in txttype: tflag = 3
|
||||
else: tflag = 1
|
||||
|
||||
# create ctypes vector out of data to pass to lib
|
||||
# cdata = ctypes vector to return
|
||||
# NOTE: error check on ftype and tflag everywhere, also flen
|
||||
|
||||
if ftype == 1:
|
||||
if tflag == 1: cdata = (flen * c_int)(*data)
|
||||
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_int))
|
||||
elif tflag == 3: cdata = data
|
||||
elif ftype == 2:
|
||||
if tflag == 1: cdata = (flen * c_longlong)(*data)
|
||||
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_longlong))
|
||||
elif tflag == 3: cdata = data
|
||||
elif ftype == 3:
|
||||
if tflag == 1: cdata = (flen * c_float)(*data)
|
||||
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_float))
|
||||
elif tflag == 3: cdata = data
|
||||
elif ftype == 4:
|
||||
if tflag == 1: cdata = (flen * c_double)(*data)
|
||||
elif tflag == 2: cdata = data.ctypes.data_as(POINTER(c_double))
|
||||
elif tflag == 3: cdata = data
|
||||
|
||||
return cdata
|
||||
|
||||
# receive a message
|
||||
|
||||
def recv(self):
|
||||
self.lib.cslib_recv.restype = c_int
|
||||
nfield = c_int()
|
||||
fieldID = POINTER(c_int)()
|
||||
fieldtype = POINTER(c_int)()
|
||||
fieldlen = POINTER(c_int)()
|
||||
msgID = self.lib.cslib_recv(self.cs,byref(nfield),
|
||||
byref(fieldID),byref(fieldtype),byref(fieldlen))
|
||||
|
||||
# copy returned C args to native Python int and lists
|
||||
# store them in class so unpack() methods can access the info
|
||||
|
||||
self.nfield = nfield = nfield.value
|
||||
self.fieldID = fieldID[:nfield]
|
||||
self.fieldtype = fieldtype[:nfield]
|
||||
self.fieldlen = fieldlen[:nfield]
|
||||
|
||||
return msgID,self.nfield,self.fieldID,self.fieldtype,self.fieldlen
|
||||
|
||||
# unpack one field of message
|
||||
# tflag = type of data to return
|
||||
# 3 = ctypes vector is default, since no conversion required
|
||||
|
||||
def unpack_int(self,id):
|
||||
return self.lib.cslib_unpack_int(self.cs,id)
|
||||
|
||||
def unpack_int64(self,id):
|
||||
return self.lib.cslib_unpack_int64(self.cs,id)
|
||||
|
||||
def unpack_float(self,id):
|
||||
return self.lib.cslib_unpack_float(self.cs,id)
|
||||
|
||||
def unpack_double(self,id):
|
||||
return self.lib.cslib_unpack_double(self.cs,id)
|
||||
|
||||
def unpack_string(self,id):
|
||||
return self.lib.cslib_unpack_string(self.cs,id)
|
||||
|
||||
def unpack(self,id,tflag=3):
|
||||
index = self.fieldID.index(id)
|
||||
|
||||
# reset data type of return so can morph by tflag
|
||||
# cannot do this for the generic c_void_p returned by CSlib
|
||||
|
||||
if self.fieldtype[index] == 1:
|
||||
self.lib.cslib_unpack.restype = POINTER(c_int)
|
||||
elif self.fieldtype[index] == 2:
|
||||
self.lib.cslib_unpack.restype = POINTER(c_longlong)
|
||||
elif self.fieldtype[index] == 3:
|
||||
self.lib.cslib_unpack.restype = POINTER(c_float)
|
||||
elif self.fieldtype[index] == 4:
|
||||
self.lib.cslib_unpack.restype = POINTER(c_double)
|
||||
#elif self.fieldtype[index] == 5:
|
||||
# self.lib.cslib_unpack.restype = POINTER(c_char)
|
||||
|
||||
cdata = self.lib.cslib_unpack(self.cs,id)
|
||||
|
||||
# tflag = user-requested type of data to return
|
||||
# tflag = 1 to return data as list
|
||||
# tflag = 2 to return data as Numpy array
|
||||
# tflag = 3 to return data as ctypes vector
|
||||
# same usage of tflag as in pack functions
|
||||
# tflag = 2,3 should NOT perform a data copy
|
||||
|
||||
if tflag == 1:
|
||||
data = cdata[:self.fieldlen[index]]
|
||||
elif tflag == 2:
|
||||
if numpyflag == 0:
|
||||
print "Cannot return Numpy array w/out numpy package"
|
||||
sys.exit()
|
||||
data = np.ctypeslib.as_array(cdata,shape=(self.fieldlen[index],))
|
||||
elif tflag == 3:
|
||||
data = cdata
|
||||
|
||||
return data
|
||||
|
||||
# handle data array like pack() or unpack_parallel() ??
|
||||
|
||||
def unpack_data(self,id,tflag=3):
|
||||
index = self.fieldID.index(id)
|
||||
|
||||
# unpack one field of message in parallel
|
||||
# tflag = type of data to return
|
||||
# 3 = ctypes vector is default, since no conversion required
|
||||
# NOTE: allow direct use of user array (e.g. Numpy), if user provides data arg?
|
||||
# as opposed to creating this cdata
|
||||
# does that make any performance difference ?
|
||||
# e.g. should we allow CSlib to populate an existing Numpy array's memory
|
||||
|
||||
def unpack_parallel(self,id,nlocal,ids,nper,tflag=3):
|
||||
cids = self.data_convert(1,nlocal,ids)
|
||||
|
||||
# allocate memory for the returned data
|
||||
# pass cdata ptr to the memory to CSlib unpack_parallel()
|
||||
# this resets data type of last unpack_parallel() arg
|
||||
|
||||
index = self.fieldID.index(id)
|
||||
if self.fieldtype[index] == 1: cdata = (nper*nlocal * c_int)()
|
||||
elif self.fieldtype[index] == 2: cdata = (nlocal*nper * c_longlong)()
|
||||
elif self.fieldtype[index] == 3: cdata = (nlocal*nper * c_float)()
|
||||
elif self.fieldtype[index] == 4: cdata = (nlocal*nper * c_double)()
|
||||
#elif self.fieldtype[index] == 5: cdata = (nlocal*nper * c_char)()
|
||||
|
||||
self.lib.cslib_unpack_parallel(self.cs,id,nlocal,cids,nper,cdata)
|
||||
|
||||
# tflag = user-requested type of data to return
|
||||
# tflag = 1 to return data as list
|
||||
# tflag = 2 to return data as Numpy array
|
||||
# tflag = 3 to return data as ctypes vector
|
||||
# same usage of tflag as in pack functions
|
||||
|
||||
if tflag == 1:
|
||||
data = cdata[:nper*nlocal]
|
||||
elif tflag == 2:
|
||||
if numpyflag == 0:
|
||||
print "Cannot return Numpy array w/out numpy package"
|
||||
sys.exit()
|
||||
# NOTE: next line gives ctypes warning for fieldtype = 2 = 64-bit int
|
||||
# not sure why, reported as bug between ctypes and Numpy here:
|
||||
# https://stackoverflow.com/questions/4964101/pep-3118-
|
||||
# warning-when-using-ctypes-array-as-numpy-array
|
||||
# but why not same warning when just using unpack() ??
|
||||
# in Python these lines give same warning:
|
||||
# >>> import ctypes,numpy
|
||||
# >>> a = (10 * ctypes.c_longlong)()
|
||||
# >>> b = numpy.ctypeslib.as_array(a)
|
||||
data = np.ctypeslib.as_array(cdata,shape=(nlocal*nper,))
|
||||
elif tflag == 3:
|
||||
data = cdata
|
||||
|
||||
return data
|
||||
|
||||
# extract a library value
|
||||
|
||||
def extract(self,flag):
|
||||
return self.lib.cslib_extract(self.cs,flag)
|
||||
239
lib/message/cslib/src/cslib_wrap.cpp
Normal file
239
lib/message/cslib/src/cslib_wrap.cpp
Normal file
@ -0,0 +1,239 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// C style library interface to CSlib class
|
||||
|
||||
#include <mpi.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "cslib_wrap.h"
|
||||
#include "cslib.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_open(int csflag, const char *mode, const void *ptr,
|
||||
const void *pcomm, void **csptr)
|
||||
{
|
||||
CSlib *cs = new CSlib(csflag,mode,ptr,pcomm);
|
||||
*csptr = (void *) cs;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_open_fortran(int csflag, const char *mode, const char *str,
|
||||
const void *pcomm, void **csptr)
|
||||
{
|
||||
MPI_Comm ccomm;
|
||||
void *pccomm = NULL;
|
||||
|
||||
if (pcomm) {
|
||||
MPI_Fint *fcomm = (MPI_Fint *) pcomm;
|
||||
ccomm = MPI_Comm_f2c(*fcomm);
|
||||
pccomm = &ccomm;
|
||||
}
|
||||
|
||||
CSlib *cs = new CSlib(csflag,mode,str,pccomm);
|
||||
*csptr = (void *) cs;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_open_fortran_mpi_one(int csflag, const char *mode,
|
||||
const void *pboth, const void *pcomm,
|
||||
void **csptr)
|
||||
{
|
||||
MPI_Comm ccomm,cboth;
|
||||
void *pccomm,*pcboth;
|
||||
|
||||
MPI_Fint *fcomm = (MPI_Fint *) pcomm;
|
||||
ccomm = MPI_Comm_f2c(*fcomm);
|
||||
pccomm = &ccomm;
|
||||
|
||||
MPI_Fint *fboth = (MPI_Fint *) pboth;
|
||||
cboth = MPI_Comm_f2c(*fboth);
|
||||
pcboth = &cboth;
|
||||
|
||||
CSlib *cs = new CSlib(csflag,mode,pcboth,pccomm);
|
||||
*csptr = (void *) cs;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_close(void *ptr)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
delete cs;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_send(void *ptr, int msgID, int nfield)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->send(msgID,nfield);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_int(void *ptr, int id, int value)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_int(id,value);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_int64(void *ptr, int id, int64_t value)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_int64(id,value);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_float(void *ptr, int id, float value)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_float(id,value);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_double(void *ptr, int id, double value)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_double(id,value);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_string(void *ptr, int id, char *value)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_string(id,value);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack(void *ptr, int id, int ftype, int flen, void *data)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack(id,ftype,flen,data);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_pack_parallel(void *ptr, int id, int ftype,
|
||||
int nlocal, int *ids, int nper, void *data)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->pack_parallel(id,ftype,nlocal,ids,nper,data);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
int cslib_recv(void *ptr, int *nfield_caller,
|
||||
int **fieldID_caller, int **fieldtype_caller,
|
||||
int **fieldlen_caller)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
|
||||
int nfield;
|
||||
int *fieldID,*fieldtype,*fieldlen;
|
||||
int msgID = cs->recv(nfield,fieldID,fieldtype,fieldlen);
|
||||
|
||||
*nfield_caller = nfield;
|
||||
*fieldID_caller = fieldID;
|
||||
*fieldtype_caller = fieldtype;
|
||||
*fieldlen_caller = fieldlen;
|
||||
|
||||
return msgID;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
int cslib_unpack_int(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack_int(id);
|
||||
}
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
int64_t cslib_unpack_int64(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack_int64(id);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
float cslib_unpack_float(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack_float(id);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
double cslib_unpack_double(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack_double(id);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
char *cslib_unpack_string(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack_string(id);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void *cslib_unpack(void *ptr, int id)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->unpack(id);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_unpack_data(void *ptr, int id, void *data)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->unpack(id,data);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
void cslib_unpack_parallel(void *ptr, int id, int nlocal, int *ids,
|
||||
int nper, void *data)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
cs->unpack_parallel(id,nlocal,ids,nper,data);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
int cslib_extract(void *ptr, int flag)
|
||||
{
|
||||
CSlib *cs = (CSlib *) ptr;
|
||||
return cs->extract(flag);
|
||||
}
|
||||
147
lib/message/cslib/src/cslib_wrap.f90
Normal file
147
lib/message/cslib/src/cslib_wrap.f90
Normal file
@ -0,0 +1,147 @@
|
||||
! ISO_C_binding wrapper on CSlib C interface
|
||||
|
||||
module cslib_wrap
|
||||
|
||||
interface
|
||||
subroutine cslib_open_fortran(csflag,mode,str,pcomm,ptr) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: csflag
|
||||
character(c_char) :: mode(*),str(*)
|
||||
type(c_ptr), value :: pcomm
|
||||
type(c_ptr) :: ptr
|
||||
end subroutine cslib_open_fortran
|
||||
|
||||
subroutine cslib_open_fortran_mpi_one(csflag,mode,pboth,pcomm,ptr) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int), value :: csflag
|
||||
character(c_char) :: mode(*)
|
||||
type(c_ptr), value :: pboth,pcomm
|
||||
type(c_ptr) :: ptr
|
||||
end subroutine cslib_open_fortran_mpi_one
|
||||
|
||||
subroutine cslib_close(ptr) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
end subroutine cslib_close
|
||||
|
||||
subroutine cslib_send(ptr,msgID,nfield) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: msgID,nfield
|
||||
end subroutine cslib_send
|
||||
|
||||
subroutine cslib_pack_int(ptr,id,value) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
integer(c_int), value :: value
|
||||
end subroutine cslib_pack_int
|
||||
|
||||
subroutine cslib_pack_int64(ptr,id,value) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
integer(c_int64_t), value :: value
|
||||
end subroutine cslib_pack_int64
|
||||
|
||||
subroutine cslib_pack_float(ptr,id,value) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
real(c_float), value :: value
|
||||
end subroutine cslib_pack_float
|
||||
|
||||
subroutine cslib_pack_double(ptr,id,value) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
real(c_double), value :: value
|
||||
end subroutine cslib_pack_double
|
||||
|
||||
subroutine cslib_pack_string(ptr,id,value) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
character(c_char) :: value(*)
|
||||
end subroutine cslib_pack_string
|
||||
|
||||
subroutine cslib_pack(ptr,id,ftype,flen,data) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id,ftype,flen
|
||||
type(c_ptr), value :: data
|
||||
end subroutine cslib_pack
|
||||
|
||||
subroutine cslib_pack_parallel(ptr,id,ftype,nlocal,ids,nper,data) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id,ftype,nlocal,nper
|
||||
type(c_ptr), value :: ids,data
|
||||
end subroutine cslib_pack_parallel
|
||||
|
||||
function cslib_recv(ptr,nfield,fieldID,fieldtype,fieldlen) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int) :: cslib_recv
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int) :: nfield
|
||||
type(c_ptr) :: fieldID,fieldtype,fieldlen
|
||||
end function cslib_recv
|
||||
|
||||
function cslib_unpack_int(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int) :: cslib_unpack_int
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack_int
|
||||
|
||||
function cslib_unpack_int64(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int64_t) :: cslib_unpack_int64
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack_int64
|
||||
|
||||
function cslib_unpack_float(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
real(c_float) :: cslib_unpack_float
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack_float
|
||||
|
||||
function cslib_unpack_double(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
real(c_double) :: cslib_unpack_double
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack_double
|
||||
|
||||
function cslib_unpack_string(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr) :: cslib_unpack_string
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack_string
|
||||
|
||||
function cslib_unpack(ptr,id) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr) :: cslib_unpack
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id
|
||||
end function cslib_unpack
|
||||
|
||||
subroutine cslib_unpack_parallel(ptr,id,nlocal,ids,nper,data) bind(c)
|
||||
use iso_c_binding
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: id,nlocal,nper
|
||||
type(c_ptr), value :: ids,data
|
||||
end subroutine cslib_unpack_parallel
|
||||
|
||||
function cslib_extract(ptr,flag) bind(c)
|
||||
use iso_c_binding
|
||||
integer(c_int) :: cslib_extract
|
||||
type(c_ptr), value :: ptr
|
||||
integer(c_int), value :: flag
|
||||
end function cslib_extract
|
||||
end interface
|
||||
|
||||
end module cslib_wrap
|
||||
54
lib/message/cslib/src/cslib_wrap.h
Normal file
54
lib/message/cslib/src/cslib_wrap.h
Normal file
@ -0,0 +1,54 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* C style library interface to CSlib class
|
||||
ifdefs allow this file to be included in a C program
|
||||
*/
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void cslib_open(int, const char *, const void *, const void *, void **);
|
||||
void cslib_open_fortran(int, const char *, const char *, const void *, void **);
|
||||
void cslib_open_fortran_mpi_one(int, const char *, const void *,
|
||||
const void *, void **);
|
||||
void cslib_close(void *);
|
||||
|
||||
void cslib_send(void *, int, int);
|
||||
|
||||
void cslib_pack_int(void *, int, int);
|
||||
void cslib_pack_int64(void *, int, int64_t);
|
||||
void cslib_pack_float(void *, int, float);
|
||||
void cslib_pack_double(void *, int, double);
|
||||
void cslib_pack_string(void *, int, char *);
|
||||
void cslib_pack(void *, int, int, int, void *);
|
||||
void cslib_pack_parallel(void *, int, int, int, int *, int, void *);
|
||||
|
||||
int cslib_recv(void *, int *, int **, int **, int **);
|
||||
|
||||
int cslib_unpack_int(void *, int);
|
||||
int64_t cslib_unpack_int64(void *, int);
|
||||
float cslib_unpack_float(void *, int);
|
||||
double cslib_unpack_double(void *, int);
|
||||
char *cslib_unpack_string(void *, int);
|
||||
void *cslib_unpack(void *, int);
|
||||
void cslib_unpack_data(void *, int, void *);
|
||||
void cslib_unpack_parallel(void *, int, int, int *, int, void *);
|
||||
|
||||
int cslib_extract(void *, int);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
110
lib/message/cslib/src/msg.cpp
Normal file
110
lib/message/cslib/src/msg.cpp
Normal file
@ -0,0 +1,110 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "msg.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Msg::Msg(int csflag, const void *ptr, MPI_Comm cworld)
|
||||
{
|
||||
world = cworld;
|
||||
MPI_Comm_rank(world,&me);
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
|
||||
init(csflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
Msg::Msg(int csflag, const void *ptr)
|
||||
{
|
||||
world = 0;
|
||||
me = 0;
|
||||
nprocs = 1;
|
||||
|
||||
init(csflag);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Msg::init(int csflag)
|
||||
{
|
||||
client = server = 0;
|
||||
if (csflag == 0) client = 1;
|
||||
else if (csflag == 1) server = 1;
|
||||
|
||||
nsend = nrecv = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Msg::allocate(int nheader, int &maxheader, int *&header,
|
||||
int nbuf, int &maxbuf, char *&buf)
|
||||
{
|
||||
if (nheader > maxheader) {
|
||||
sfree(header);
|
||||
maxheader = nheader;
|
||||
header = (int *) smalloc(maxheader*sizeof(int));
|
||||
}
|
||||
|
||||
if (nbuf > maxbuf) {
|
||||
sfree(buf);
|
||||
maxbuf = nbuf;
|
||||
buf = (char *) smalloc(maxbuf*sizeof(char));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *Msg::smalloc(int nbytes)
|
||||
{
|
||||
if (nbytes == 0) return NULL;
|
||||
void *ptr = (void *) malloc(nbytes);
|
||||
if (ptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Failed to allocate %d bytes",nbytes);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Msg::sfree(void *ptr)
|
||||
{
|
||||
if (ptr == NULL) return;
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Msg::error_all(const char *str)
|
||||
{
|
||||
if (me == 0) printf("CSlib ERROR: %s\n",str);
|
||||
MPI_Abort(world,1);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Msg::error_one(const char *str)
|
||||
{
|
||||
printf("CSlib ERROR: %s\n",str);
|
||||
MPI_Abort(world,1);
|
||||
}
|
||||
52
lib/message/cslib/src/msg.h
Normal file
52
lib/message/cslib/src/msg.h
Normal file
@ -0,0 +1,52 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef MSG_H
|
||||
#define MSG_H
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class Msg {
|
||||
public:
|
||||
int nsend,nrecv;
|
||||
MPI_Comm world;
|
||||
|
||||
Msg(int, const void *, MPI_Comm);
|
||||
Msg(int, const void *);
|
||||
virtual ~Msg() {}
|
||||
virtual void send(int, int *, int, char *) = 0;
|
||||
virtual void recv(int &, int *&, int &, char *&) = 0;
|
||||
|
||||
protected:
|
||||
int me,nprocs;
|
||||
int client,server;
|
||||
|
||||
int nfield;
|
||||
int *fieldID,*fieldtype,*fieldlen;
|
||||
int lengths[2];
|
||||
|
||||
void init(int);
|
||||
void allocate(int, int &, int *&, int, int &, char *&);
|
||||
void *smalloc(int);
|
||||
void sfree(void *);
|
||||
void error_all(const char *);
|
||||
void error_one(const char *);
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
143
lib/message/cslib/src/msg_file.cpp
Normal file
143
lib/message/cslib/src/msg_file.cpp
Normal file
@ -0,0 +1,143 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "msg_file.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
#define MAXLINE 256
|
||||
#define SLEEP 0.1 // delay in CPU secs to check for message file
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgFile::MsgFile(int csflag, const void *ptr, MPI_Comm cworld) :
|
||||
Msg(csflag, ptr, cworld)
|
||||
{
|
||||
char *filename = (char *) ptr;
|
||||
init(filename);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgFile::MsgFile(int csflag, const void *ptr) : Msg(csflag, ptr)
|
||||
{
|
||||
char *filename = (char *) ptr;
|
||||
init(filename);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgFile::~MsgFile()
|
||||
{
|
||||
delete [] fileroot;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgFile::init(char *filename)
|
||||
{
|
||||
int n = strlen(filename) + 1;
|
||||
fileroot = new char[n];
|
||||
strcpy(fileroot,filename);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgFile::send(int nheader, int *header, int nbuf, char *buf)
|
||||
{
|
||||
char filename[MAXLINE];
|
||||
|
||||
lengths[0] = nheader;
|
||||
lengths[1] = nbuf;
|
||||
|
||||
if (me == 0) {
|
||||
if (client) sprintf(filename,"%s.%s",fileroot,"client");
|
||||
else if (server) sprintf(filename,"%s.%s",fileroot,"server");
|
||||
|
||||
fp = fopen(filename,"wb");
|
||||
if (!fp) error_one("send(): Could not open send message file");
|
||||
fwrite(lengths,sizeof(int),2,fp);
|
||||
fwrite(header,sizeof(int),nheader,fp);
|
||||
fwrite(buf,1,nbuf,fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
// create empty signal file
|
||||
|
||||
if (me == 0) {
|
||||
if (client) sprintf(filename,"%s.%s",fileroot,"client.signal");
|
||||
else if (server) sprintf(filename,"%s.%s",fileroot,"server.signal");
|
||||
fp = fopen(filename,"w");
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgFile::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
|
||||
{
|
||||
char filename[MAXLINE];
|
||||
|
||||
// wait until signal file exists to open message file
|
||||
|
||||
if (me == 0) {
|
||||
if (client) sprintf(filename,"%s.%s",fileroot,"server.signal");
|
||||
else if (server) sprintf(filename,"%s.%s",fileroot,"client.signal");
|
||||
|
||||
int delay = (int) (1000000 * SLEEP);
|
||||
while (1) {
|
||||
fp = fopen(filename,"r");
|
||||
if (fp) break;
|
||||
usleep(delay);
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
if (client) sprintf(filename,"%s.%s",fileroot,"server");
|
||||
else if (server) sprintf(filename,"%s.%s",fileroot,"client");
|
||||
fp = fopen(filename,"rb");
|
||||
if (!fp) error_one("recv(): Could not open recv message file");
|
||||
}
|
||||
|
||||
// read and broadcast data
|
||||
|
||||
if (me == 0) fread(lengths,sizeof(int),2,fp);
|
||||
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
|
||||
|
||||
int nheader = lengths[0];
|
||||
int nbuf = lengths[1];
|
||||
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
|
||||
|
||||
if (me == 0) fread(header,sizeof(int),nheader,fp);
|
||||
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
|
||||
|
||||
if (me == 0) fread(buf,1,nbuf,fp);
|
||||
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
|
||||
|
||||
// delete both message and signal file
|
||||
|
||||
if (me == 0) {
|
||||
fclose(fp);
|
||||
unlink(filename);
|
||||
if (client) sprintf(filename,"%s.%s",fileroot,"server.signal");
|
||||
else if (server) sprintf(filename,"%s.%s",fileroot,"client.signal");
|
||||
unlink(filename);
|
||||
}
|
||||
}
|
||||
40
lib/message/cslib/src/msg_file.h
Normal file
40
lib/message/cslib/src/msg_file.h
Normal file
@ -0,0 +1,40 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef MSG_FILE_H
|
||||
#define MSG_FILE_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "msg.h"
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class MsgFile : public Msg {
|
||||
public:
|
||||
MsgFile(int, const void *, MPI_Comm);
|
||||
MsgFile(int, const void *);
|
||||
~MsgFile();
|
||||
void send(int, int *, int, char *);
|
||||
void recv(int &, int *&, int &, char *&);
|
||||
|
||||
private:
|
||||
char *fileroot;
|
||||
FILE *fp;
|
||||
|
||||
void init(char *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
82
lib/message/cslib/src/msg_mpi_one.cpp
Normal file
82
lib/message/cslib/src/msg_mpi_one.cpp
Normal file
@ -0,0 +1,82 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "msg_mpi_one.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgMPIOne::MsgMPIOne(int csflag, const void *ptr, MPI_Comm cworld) :
|
||||
Msg(csflag, ptr, cworld)
|
||||
{
|
||||
// NOTE: ideally would skip this call if mpi/two
|
||||
init(ptr);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgMPIOne::init(const void *ptr)
|
||||
{
|
||||
MPI_Comm *pbothcomm = (MPI_Comm *) ptr;
|
||||
bothcomm = *pbothcomm;
|
||||
|
||||
if (client) {
|
||||
MPI_Comm_size(world,&nprocs);
|
||||
otherroot = nprocs;
|
||||
} else if (server) {
|
||||
otherroot = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgMPIOne::send(int nheader, int *header, int nbuf, char *buf)
|
||||
{
|
||||
lengths[0] = nheader;
|
||||
lengths[1] = nbuf;
|
||||
|
||||
if (me == 0) {
|
||||
MPI_Send(lengths,2,MPI_INT,otherroot,0,bothcomm);
|
||||
MPI_Send(header,nheader,MPI_INT,otherroot,0,bothcomm);
|
||||
MPI_Send(buf,nbuf,MPI_CHAR,otherroot,0,bothcomm);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgMPIOne::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
|
||||
{
|
||||
MPI_Status status;
|
||||
|
||||
if (me == 0) MPI_Recv(lengths,2,MPI_INT,otherroot,0,bothcomm,&status);
|
||||
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
|
||||
|
||||
int nheader = lengths[0];
|
||||
int nbuf = lengths[1];
|
||||
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
|
||||
|
||||
if (me == 0) MPI_Recv(header,nheader,MPI_INT,otherroot,0,bothcomm,&status);
|
||||
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
|
||||
|
||||
if (me == 0) MPI_Recv(buf,nbuf,MPI_CHAR,otherroot,0,bothcomm,&status);
|
||||
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
|
||||
}
|
||||
38
lib/message/cslib/src/msg_mpi_one.h
Normal file
38
lib/message/cslib/src/msg_mpi_one.h
Normal file
@ -0,0 +1,38 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef MSG_MPI_ONE_H
|
||||
#define MSG_MPI_ONE_H
|
||||
|
||||
#include "msg.h"
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class MsgMPIOne : public Msg {
|
||||
public:
|
||||
MsgMPIOne(int, const void *, MPI_Comm);
|
||||
virtual ~MsgMPIOne() {}
|
||||
void send(int, int *, int, char *);
|
||||
void recv(int &, int *&, int &, char *&);
|
||||
|
||||
protected:
|
||||
MPI_Comm bothcomm;
|
||||
int otherroot;
|
||||
|
||||
void init(const void *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
81
lib/message/cslib/src/msg_mpi_two.cpp
Normal file
81
lib/message/cslib/src/msg_mpi_two.cpp
Normal file
@ -0,0 +1,81 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "msg_mpi_two.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgMPITwo::MsgMPITwo(int csflag, const void *ptr, MPI_Comm cworld) :
|
||||
MsgMPIOne(csflag, ptr, cworld)
|
||||
{
|
||||
char *filename = (char *) ptr;
|
||||
init(filename);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgMPITwo::~MsgMPITwo()
|
||||
{
|
||||
// free the inter comm that spans both client and server
|
||||
|
||||
MPI_Comm_free(&bothcomm);
|
||||
MPI_Close_port(port);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgMPITwo::init(char *filename)
|
||||
{
|
||||
if (client) {
|
||||
if (me == 0) {
|
||||
FILE *fp = NULL;
|
||||
while (!fp) {
|
||||
fp = fopen(filename,"r");
|
||||
if (!fp) sleep(1);
|
||||
}
|
||||
fgets(port,MPI_MAX_PORT_NAME,fp);
|
||||
//printf("Client port: %s\n",port);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
MPI_Bcast(port,MPI_MAX_PORT_NAME,MPI_CHAR,0,world);
|
||||
MPI_Comm_connect(port,MPI_INFO_NULL,0,world,&bothcomm);
|
||||
//if (me == 0) printf("CLIENT comm connect\n");
|
||||
if (me == 0) unlink(filename);
|
||||
|
||||
} else if (server) {
|
||||
MPI_Open_port(MPI_INFO_NULL,port);
|
||||
|
||||
if (me == 0) {
|
||||
//printf("Server name: %s\n",port);
|
||||
FILE *fp = fopen(filename,"w");
|
||||
fprintf(fp,"%s",port);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
MPI_Comm_accept(port,MPI_INFO_NULL,0,world,&bothcomm);
|
||||
//if (me == 0) printf("SERVER comm accept\n");
|
||||
}
|
||||
|
||||
otherroot = 0;
|
||||
}
|
||||
35
lib/message/cslib/src/msg_mpi_two.h
Normal file
35
lib/message/cslib/src/msg_mpi_two.h
Normal file
@ -0,0 +1,35 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef MSG_MPI_TWO_H
|
||||
#define MSG_MPI_TWO_H
|
||||
|
||||
#include "msg_mpi_one.h"
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class MsgMPITwo : public MsgMPIOne {
|
||||
public:
|
||||
MsgMPITwo(int, const void *, MPI_Comm);
|
||||
~MsgMPITwo();
|
||||
|
||||
private:
|
||||
char port[MPI_MAX_PORT_NAME];
|
||||
|
||||
void init(char *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
140
lib/message/cslib/src/msg_zmq.cpp
Normal file
140
lib/message/cslib/src/msg_zmq.cpp
Normal file
@ -0,0 +1,140 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <zmq.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "msg_zmq.h"
|
||||
|
||||
using namespace CSLIB_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgZMQ::MsgZMQ(int csflag, const void *ptr, MPI_Comm cworld) :
|
||||
Msg(csflag, ptr, cworld)
|
||||
{
|
||||
char *port = (char *) ptr;
|
||||
init(port);
|
||||
}
|
||||
|
||||
MsgZMQ::MsgZMQ(int csflag, const void *ptr) : Msg(csflag, ptr)
|
||||
{
|
||||
char *port = (char *) ptr;
|
||||
init(port);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
MsgZMQ::~MsgZMQ()
|
||||
{
|
||||
if (me == 0) {
|
||||
zmq_close(socket);
|
||||
zmq_ctx_destroy(context);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void MsgZMQ::init(char *port)
|
||||
{
|
||||
#ifdef ZMQ_NO
|
||||
error_all("constructor(): Library not built with ZMQ support");
|
||||
#endif
|
||||
|
||||
if (me == 0) {
|
||||
int n = strlen(port) + 8;
|
||||
char *socket_name = new char[n];
|
||||
strcpy(socket_name,"tcp://");
|
||||
strcat(socket_name,port);
|
||||
|
||||
if (client) {
|
||||
context = zmq_ctx_new();
|
||||
socket = zmq_socket(context,ZMQ_REQ);
|
||||
zmq_connect(socket,socket_name);
|
||||
} else if (server) {
|
||||
context = zmq_ctx_new();
|
||||
socket = zmq_socket(context,ZMQ_REP);
|
||||
int rc = zmq_bind(socket,socket_name);
|
||||
if (rc) error_one("constructor(): Server could not make socket connection");
|
||||
}
|
||||
|
||||
delete [] socket_name;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
client/server sockets (REQ/REP) must follow this protocol:
|
||||
client sends request (REQ) which server receives
|
||||
server sends response (REP) which client receives
|
||||
every exchange is of this form, server cannot initiate a send
|
||||
thus each ZMQ send below has a following ZMQ recv, except last one
|
||||
if client calls send(), it will next call recv()
|
||||
if server calls send(), it will next call recv() from its wait loop
|
||||
in either case, recv() issues a ZMQ recv to match last ZMQ send here
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MsgZMQ::send(int nheader, int *header, int nbuf, char *buf)
|
||||
{
|
||||
lengths[0] = nheader;
|
||||
lengths[1] = nbuf;
|
||||
|
||||
if (me == 0) {
|
||||
zmq_send(socket,lengths,2*sizeof(int),0);
|
||||
zmq_recv(socket,NULL,0,0);
|
||||
}
|
||||
|
||||
if (me == 0) {
|
||||
zmq_send(socket,header,nheader*sizeof(int),0);
|
||||
zmq_recv(socket,NULL,0,0);
|
||||
}
|
||||
|
||||
if (me == 0) zmq_send(socket,buf,nbuf,0);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
client/server sockets (REQ/REP) must follow this protocol:
|
||||
client sends request (REQ) which server receives
|
||||
server sends response (REP) which client receives
|
||||
every exchange is of this form, server cannot initiate a send
|
||||
thus each ZMQ recv below has a following ZMQ send, except last one
|
||||
if client calls recv(), it will next call send() to ping server again,
|
||||
if server calls recv(), it will next call send() to respond to client
|
||||
in either case, send() issues a ZMQ send to match last ZMQ recv here
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void MsgZMQ::recv(int &maxheader, int *&header, int &maxbuf, char *&buf)
|
||||
{
|
||||
if (me == 0) {
|
||||
zmq_recv(socket,lengths,2*sizeof(int),0);
|
||||
zmq_send(socket,NULL,0,0);
|
||||
}
|
||||
if (nprocs > 1) MPI_Bcast(lengths,2,MPI_INT,0,world);
|
||||
|
||||
int nheader = lengths[0];
|
||||
int nbuf = lengths[1];
|
||||
allocate(nheader,maxheader,header,nbuf,maxbuf,buf);
|
||||
|
||||
if (me == 0) {
|
||||
zmq_recv(socket,header,nheader*sizeof(int),0);
|
||||
zmq_send(socket,NULL,0,0);
|
||||
}
|
||||
if (nprocs > 1) MPI_Bcast(header,nheader,MPI_INT,0,world);
|
||||
|
||||
if (me == 0) zmq_recv(socket,buf,nbuf,0);
|
||||
if (nprocs > 1) MPI_Bcast(buf,nbuf,MPI_CHAR,0,world);
|
||||
}
|
||||
38
lib/message/cslib/src/msg_zmq.h
Normal file
38
lib/message/cslib/src/msg_zmq.h
Normal file
@ -0,0 +1,38 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
CSlib - Client/server library for code coupling
|
||||
http://cslib.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright 2018 National Technology & Engineering Solutions of
|
||||
Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with
|
||||
NTESS, the U.S. Government retains certain rights in this software.
|
||||
This software is distributed under the modified Berkeley Software
|
||||
Distribution (BSD) License.
|
||||
|
||||
See the README file in the top-level CSlib directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef MSG_ZMQ_H
|
||||
#define MSG_ZMQ_H
|
||||
|
||||
#include "msg.h"
|
||||
|
||||
namespace CSLIB_NS {
|
||||
|
||||
class MsgZMQ : public Msg {
|
||||
public:
|
||||
MsgZMQ(int, const void *, MPI_Comm);
|
||||
MsgZMQ(int, const void *);
|
||||
~MsgZMQ();
|
||||
void send(int, int *, int, char *);
|
||||
void recv(int &, int *&, int &, char *&);
|
||||
|
||||
private:
|
||||
void *context,*socket;
|
||||
|
||||
void init(char *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
165
lib/scafacos/Install.py
Normal file
165
lib/scafacos/Install.py
Normal file
@ -0,0 +1,165 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# Install.py tool to download, unpack, build, and link to the Scafacos library
|
||||
# used to automate the steps described in the README file in this dir
|
||||
|
||||
from __future__ import print_function
|
||||
import sys,os,re,subprocess
|
||||
|
||||
# help message
|
||||
|
||||
help = """
|
||||
Syntax from src dir: make lib-scafacos args="-b"
|
||||
or: make lib-scafacos args="-p /usr/local/scafacos"
|
||||
Syntax from lib dir: python Install.py -b
|
||||
or: python Install.py -p /usr/local/scafacos
|
||||
|
||||
specify zero or more options, order does not matter
|
||||
|
||||
-b = download and build the Scafacos library
|
||||
-p = specify folder of existing Scafacos installation
|
||||
|
||||
always creates includelink, liblink to Scafacos dirs
|
||||
|
||||
Example:
|
||||
|
||||
make lib-scafacos args="-b" # download/build in lib/scafacos/scafacos
|
||||
make lib-scafacos args="-p $HOME/scafacos" # use existing Scafacos installation in $HOME
|
||||
"""
|
||||
|
||||
# settings
|
||||
|
||||
version = "scafacos-1.0.1"
|
||||
url = "https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz"
|
||||
#url = "https://gigamove.rz.rwth-aachen.de/d/id/CTzyApN76MXMJ6/dd/100" % version
|
||||
|
||||
# print error message or help
|
||||
|
||||
def error(str=None):
|
||||
if not str: print(help)
|
||||
else: print("ERROR",str)
|
||||
sys.exit()
|
||||
|
||||
# expand to full path name
|
||||
# process leading '~' or relative path
|
||||
|
||||
def fullpath(path):
|
||||
return os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
def which(program):
|
||||
def is_exe(fpath):
|
||||
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
|
||||
|
||||
fpath, fname = os.path.split(program)
|
||||
if fpath:
|
||||
if is_exe(program):
|
||||
return program
|
||||
else:
|
||||
for path in os.environ["PATH"].split(os.pathsep):
|
||||
path = path.strip('"')
|
||||
exe_file = os.path.join(path, program)
|
||||
if is_exe(exe_file):
|
||||
return exe_file
|
||||
|
||||
return None
|
||||
|
||||
def geturl(url,fname):
|
||||
success = False
|
||||
|
||||
if which('curl') != None:
|
||||
cmd = 'curl -L -o "%s" %s' % (fname,url)
|
||||
try:
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
success = True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("Calling curl failed with: %s" % e.output.decode('UTF-8'))
|
||||
|
||||
if not success and which('wget') != None:
|
||||
cmd = 'wget -O "%s" %s' % (fname,url)
|
||||
print("Wget command: %s" % cmd)
|
||||
try:
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
success = True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print("Calling wget failed with: %s" % e.output.decode('UTF-8'))
|
||||
|
||||
if not success:
|
||||
error("Failed to download source code with 'curl' or 'wget'")
|
||||
return
|
||||
|
||||
# parse args
|
||||
|
||||
args = sys.argv[1:]
|
||||
nargs = len(args)
|
||||
|
||||
homepath = "."
|
||||
|
||||
buildflag = True
|
||||
pathflag = False
|
||||
linkflag = True
|
||||
|
||||
iarg = 0
|
||||
while iarg < nargs:
|
||||
if args[iarg] == "-v":
|
||||
if iarg+2 > nargs: error()
|
||||
version = args[iarg+1]
|
||||
iarg += 2
|
||||
elif args[iarg] == "-p":
|
||||
if iarg+2 > nargs: error()
|
||||
scafacospath = fullpath(args[iarg+1])
|
||||
pathflag = True
|
||||
iarg += 2
|
||||
elif args[iarg] == "-b":
|
||||
buildflag = True
|
||||
iarg += 1
|
||||
else: error()
|
||||
|
||||
homepath = fullpath(homepath)
|
||||
homedir = "%s/%s" % (homepath,version)
|
||||
|
||||
if (pathflag):
|
||||
if not os.path.isdir(scafacospath): error("Scafacos path does not exist")
|
||||
homedir =scafacospath
|
||||
|
||||
if (buildflag and pathflag):
|
||||
error("Cannot use -b and -p flag at the same time")
|
||||
|
||||
# download and unpack Scafacos tarball
|
||||
|
||||
if buildflag:
|
||||
print("Downloading Scafacos ...")
|
||||
geturl(url,"%s/%s.tar.gz" % (homepath,version))
|
||||
|
||||
print("Unpacking Scafacos tarball ...")
|
||||
if os.path.exists("%s/%s" % (homepath,version)):
|
||||
cmd = 'rm -rf "%s/%s"' % (homepath,version)
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
cmd = 'cd "%s"; tar -xzvf %s.tar.gz' % (homepath,version)
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
os.remove("%s/%s.tar.gz" % (homepath,version))
|
||||
if os.path.basename(homedir) != version:
|
||||
if os.path.exists(homedir):
|
||||
cmd = 'rm -rf "%s"' % homedir
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
os.rename("%s/%s" % (homepath,version),homedir)
|
||||
|
||||
# build Scafacos
|
||||
|
||||
if buildflag:
|
||||
print("Building Scafacos ...")
|
||||
cmd = 'cd "%s"; ./configure --prefix="`pwd`/build" --disable-doc --enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m --with-internal-fftw --with-internal-pfft --with-internal-pnfft CC=mpicc FC=mpif90 CXX=mpicxx F77= > log.txt; make -j; make install' % homedir
|
||||
txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
print(txt.decode('UTF-8'))
|
||||
|
||||
# create 2 links in lib/scafacos to Scafacos include/lib dirs
|
||||
|
||||
if linkflag:
|
||||
print("Creating links to Scafacos include and lib files")
|
||||
if os.path.isfile("includelink") or os.path.islink("includelink"):
|
||||
os.remove("includelink")
|
||||
if os.path.isfile("liblink") or os.path.islink("liblink"):
|
||||
os.remove("liblink")
|
||||
cmd = 'ln -s "%s/build/include" includelink' % homedir
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
cmd = 'ln -s "%s/build/lib" liblink' % homedir
|
||||
subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True)
|
||||
76
lib/scafacos/README
Normal file
76
lib/scafacos/README
Normal file
@ -0,0 +1,76 @@
|
||||
This directory contains links to the ScaFaCoS library which
|
||||
is required to use the KSPACE scafacos and its kspace_style
|
||||
scafacos command in a LAMMPS input script.
|
||||
|
||||
The ScaFaCoS library is available at http://scafacos.de or
|
||||
on github at https://github.com/scafacos, the libary was
|
||||
developed by a consortium of different universities in
|
||||
Germany (Bonn, Chemnitz, Stuttgart, Wuppertal) and
|
||||
the Research Centre Juelich (Juelich Supercomputing Centre).
|
||||
|
||||
-----------------
|
||||
|
||||
Instructions:
|
||||
|
||||
1.) Download ScaFaCoS at http://scafacos.de or directly from github
|
||||
https://github.com/scafacos where you can either clone the
|
||||
repository or download the latest stable release.
|
||||
NOTE: For the P2NFFT solver, you require an installation of the
|
||||
GNU Scientific Library (GSL). Also to ensure the correct
|
||||
linker-flags are used, ScaFaCoS employs the pkg-config
|
||||
tool, which is also required.
|
||||
If you cloned the repository, please refer to 2.), else continue
|
||||
with 3.)
|
||||
|
||||
2.) If you cloned the git repository, you require autotools to setup
|
||||
the library. For that the following packages are required:
|
||||
m4
|
||||
autotools
|
||||
automake
|
||||
libtools
|
||||
In the build_aux folder of the scafacos folder, you can find the
|
||||
get_autotools.sh script, that downloads and installs the tools
|
||||
to ${HOME}/local. To change the target folder, please change the
|
||||
value of 'myprefix' in that script.
|
||||
To start the auto-configuration process, please run the './bootstrap'
|
||||
command in the scafacos base-folder.
|
||||
|
||||
3.) If you downloaded the library as a tarball, please extract the file
|
||||
to somewhere in your file system, or if you finished running
|
||||
'./bootstrap', please run './configure' in the base folder.
|
||||
Important flags for './configure' are:
|
||||
--prefix=<install_dir>: sets the directory the compiled files will
|
||||
be installed to [default: /usr/local]
|
||||
--fcs-enable-solvers=<list>: sets the list of solvers that are going to
|
||||
be built. By default all solvers will be
|
||||
built. Currently supported by the kspace in LAMMPS
|
||||
are: direct, ewald, fmm, p2nfft
|
||||
The other solvers might work, but support
|
||||
is purely experimental at the moment. To
|
||||
give a list of solvers, use a comma seperated
|
||||
list.
|
||||
--fcs-disable-doc: disables the compilation of the documentation,
|
||||
e.g. if no Latex is available on the system.
|
||||
|
||||
4.) To build the library after configuration, run 'make' from the base folder.
|
||||
|
||||
5.) To install the libray in the designated installation folder, run 'make install'.
|
||||
Installation is required, as ScaFaCoS does not support an in-source build!
|
||||
|
||||
6.) Create two soft links to this directory (lib/scafacos) to where the libary
|
||||
is installed. E.g. if you built ScaFaCoS in the default install directory:
|
||||
% ln -s /usr/local/include includelink
|
||||
% ln -s /usr/local/lib liblink
|
||||
for any custom directory <custom_dir>:
|
||||
% ln -s <custom_dir>/include includelink
|
||||
% ln -s <custom_dir>/lib liblink
|
||||
|
||||
7.) ScaFaCoS uses the pkg-config tool to supply the correct, so you need to setup your
|
||||
PKG_CONFIG_PATH environment variable to include the lib/pkgconfig folder in the
|
||||
installation directory.
|
||||
Depending on the shell you use, this can be done either by:
|
||||
% export PKG_CONFIG_PATH=<custom_dir>/lib/pkgconfig:${PKG_CONFIG_PATH}
|
||||
or
|
||||
% setenv PKG_CONFIG_PATH=<custom_dir>/lib/pkgconfig:${PKG_CONFIG_PATH}
|
||||
|
||||
-----------------
|
||||
Reference in New Issue
Block a user