From 21f8abda248c100361b41af45574a50481e932b7 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Wed, 19 Aug 2015 15:15:32 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@13906 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- lib/README | 2 ++ lib/compress/Makefile.lammps | 21 +++++++++++++++ lib/gpu/Nvidia.makefile | 49 +++++++++++++++++++++++++++++++--- lib/gpu/Opencl.makefile | 37 +++++++++++++++++++++++-- lib/gpu/cudpp_mini/license.txt | 2 +- lib/gpu/geryon/ocl_kernel.h | 8 ++++-- lib/gpu/lal_answer.cpp | 2 +- lib/gpu/lal_base_three.cpp | 8 +++--- lib/gpu/lal_base_three.h | 4 ++- lib/gpu/lal_device.cpp | 4 +-- 10 files changed, 122 insertions(+), 15 deletions(-) create mode 100644 lib/compress/Makefile.lammps diff --git a/lib/README b/lib/README index a7a41f00f2..95d645f82a 100644 --- a/lib/README +++ b/lib/README @@ -21,6 +21,8 @@ awpmd antisymmetrized wave packet molecular dynamics, AWPMD package from Ilya Valuev (JIHT RAS) colvars collective variable module (Metadynamics, ABF and more) from Giacomo Fiorin and Jerome Henin (ICMS, Temple U) +compress hook to system lib for performing I/O compression, COMPRESS pkg + from Axel Kohlmeyer (Temple U) cuda NVIDIA GPU routines, USER-CUDA package from Christian Trott (U Tech Ilmenau) gpu general GPU routines, GPU package diff --git a/lib/compress/Makefile.lammps b/lib/compress/Makefile.lammps new file mode 100644 index 0000000000..2d06990d82 --- /dev/null +++ b/lib/compress/Makefile.lammps @@ -0,0 +1,21 @@ +# This file contains the settings to build and link LAMMPS with +# support for data compression libraries. +# +# When you build LAMMPS with the COMPRESS package installed, it will +# use the 3 settings in this file. They should be set as follows. +# +# The compress_SYSLIB setting is for linking the compression library. +# By default, the setting will point to zlib (-lz). +# +# The compress_SYSINC and compress_SYSPATH variables do not typically need +# to be set, as compression libraries are usually installed as packages +# in system locations. Otherwise, specify its directory via the +# compress_SYSPATH variable, e.g. -Ldir or compress_SYSINC variable( -Idir) + +# ----------------------------------------------------------- + +# Settings that the LAMMPS build will import when this package is installed + +compress_SYSINC = +compress_SYSLIB = -lz +compress_SYSPATH = diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index f38b443845..74cee0ee09 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -68,8 +68,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \ $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \ $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \ $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \ + $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \ $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \ - $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o + $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \ + $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \ + $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \ $(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \ @@ -117,8 +120,12 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \ $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \ $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \ $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \ - $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \ - $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h + $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \ + $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \ + $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \ + $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \ + $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h + all: $(OBJ_DIR) $(GPU_LIB) $(EXECS) @@ -680,6 +687,18 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OB $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h $(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_tersoff.cu + +$(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin + $(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h + +$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o + $(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h + $(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h $(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul.cu @@ -704,6 +723,30 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_ $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h $(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_zbl.cu + +$(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin + $(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h + +$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o + $(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h + $(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_cubic.cu + +$(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin + $(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h + +$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o + $(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h + $(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR) + $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda diff --git a/lib/gpu/Opencl.makefile b/lib/gpu/Opencl.makefile index b54704f63c..8f3f4f017e 100644 --- a/lib/gpu/Opencl.makefile +++ b/lib/gpu/Opencl.makefile @@ -57,8 +57,11 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \ $(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \ $(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \ $(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \ + $(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \ $(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \ - $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o + $(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \ + $(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \ + $(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \ $(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \ @@ -82,7 +85,10 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \ $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)/beck_cl.h $(OBJ_DIR)/mie_cl.h \ $(OBJ_DIR)/soft_cl.h $(OBJ_DIR)/lj_coul_msm_cl.h \ $(OBJ_DIR)/lj_gromacs_cl.h $(OBJ_DIR)/dpd_cl.h \ - $(OBJ_DIR)/coul_cl.h $(OBJ_DIR)/coul_debye_cl.h + $(OBJ_DIR)/lj_gauss_cl.h $(OBJ_DIR)/dzugutov_cl.h \ + $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/coul_cl.h \ + $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \ + $(OBJ_DIR)/lj_cubic_cl.h OCL_EXECS = $(BIN_DIR)/ocl_get_devices @@ -488,6 +494,15 @@ $(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cl.h $(OBJ_ $(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h $(OCL) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/tersoff_cl.h: lal_tersoff.cu lal_tersoff_extra.h $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh tersoff $(PRE1_H) lal_tersoff_extra.h lal_tersoff.cu $(OBJ_DIR)/tersoff_cl.h; + +$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/lal_base_three.o + $(OCL) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h + $(OCL) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR) + $(OBJ_DIR)/coul_cl.h: lal_coul.cu $(PRE1_H) $(BSH) ./geryon/file_to_cstr.sh coul $(PRE1_H) lal_coul.cu $(OBJ_DIR)/coul_cl.h; @@ -506,6 +521,24 @@ $(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ $(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h $(OCL) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR) +$(OBJ_DIR)/zbl_cl.h: lal_zbl.cu $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh zbl $(PRE1_H) lal_zbl.cu $(OBJ_DIR)/zbl_cl.h; + +$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/zbl_cl.h $(OBJ_DIR)/lal_base_atomic.o + $(OCL) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h + $(OCL) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lj_cubic_cl.h: lal_lj_cubic.cu $(PRE1_H) + $(BSH) ./geryon/file_to_cstr.sh lj_cubic $(PRE1_H) lal_lj_cubic.cu $(OBJ_DIR)/lj_cubic_cl.h; + +$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/lal_base_atomic.o + $(OCL) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR) + +$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h + $(OCL) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR) + $(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK) diff --git a/lib/gpu/cudpp_mini/license.txt b/lib/gpu/cudpp_mini/license.txt index 2b0d902586..90a6439452 100644 --- a/lib/gpu/cudpp_mini/license.txt +++ b/lib/gpu/cudpp_mini/license.txt @@ -23,4 +23,4 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - \ No newline at end of file + diff --git a/lib/gpu/geryon/ocl_kernel.h b/lib/gpu/geryon/ocl_kernel.h index 0b5e43e93f..e55b6034a6 100644 --- a/lib/gpu/geryon/ocl_kernel.h +++ b/lib/gpu/geryon/ocl_kernel.h @@ -132,7 +132,7 @@ class UCL_Program { } return UCL_SUCCESS; - } + } /// Return the default command queue/stream associated with this data inline command_queue & cq() { return _cq; } @@ -315,6 +315,10 @@ class UCL_Kernel { /// Clear any arguments associated with the kernel inline void clear_args() { _num_args=0; } + /// Return the default command queue/stream associated with this data + inline command_queue & cq() { return _cq; } + /// Change the default command queue associated with matrix + inline void cq(command_queue &cq_in) { _cq=cq_in; } #include "ucl_arg_kludge.h" private: @@ -370,7 +374,7 @@ inline int UCL_Kernel::set_function(UCL_Program &program, const char *function) } #endif #endif - + return UCL_SUCCESS; } diff --git a/lib/gpu/lal_answer.cpp b/lib/gpu/lal_answer.cpp index 7eff7df697..dd0b5d2424 100644 --- a/lib/gpu/lal_answer.cpp +++ b/lib/gpu/lal_answer.cpp @@ -248,7 +248,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom, for (int i=vstart; iclear(); assert(ucl_device->num_queues()==_end_command_queue+1); - ucl_device->pop_command_queue(); + // ucl_device will clean up the command queue in its destructor +// ucl_device->pop_command_queue(); #endif device->clear(); } @@ -183,7 +184,7 @@ int * BaseThreeT::reset_nbors(const int nall, const int inum, const int nlist, // Build neighbor list on device // --------------------------------------------------------------------------- template -inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum, +inline int BaseThreeT::build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, @@ -193,7 +194,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum, resize_atom(inum,nall,success); resize_local(nall,host_inum,nbor->max_nbors(),success); if (!success) - return; + return 1; atom->cast_copy_x(host_x,host_type); int mn; @@ -206,6 +207,7 @@ inline void BaseThreeT::build_nbor_list(const int inum, const int host_inum, #endif if (bytes>_max_an_bytes) _max_an_bytes=bytes; + return mn; } // --------------------------------------------------------------------------- diff --git a/lib/gpu/lal_base_three.h b/lib/gpu/lal_base_three.h index f457b5bd55..15ff8aff13 100644 --- a/lib/gpu/lal_base_three.h +++ b/lib/gpu/lal_base_three.h @@ -28,6 +28,8 @@ #include "geryon/nvd_texture.h" #endif +#define THREE_CONCURRENT + namespace LAMMPS_AL { template @@ -129,7 +131,7 @@ class BaseThree { int *numj, int **firstneigh, bool &success); /// Build neighbor list on device - void build_nbor_list(const int inum, const int host_inum, + int build_nbor_list(const int inum, const int host_inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, bool &success); diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 2ef2ff4d59..f326657e31 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -337,7 +337,7 @@ void DeviceT::init_message(FILE *screen, const char *name, #else std::string fs=toa(gpu->free_gigabytes())+"/"; #endif - + if (_replica_me == 0 && screen) { fprintf(screen,"\n-------------------------------------"); fprintf(screen,"-------------------------------------\n"); @@ -362,7 +362,7 @@ void DeviceT::init_message(FILE *screen, const char *name, sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+ toa(gpu->gigabytes(i))+" GB, "+toa(gpu->clock_rate(i))+" GHZ ("; else - sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+fs+ + sname=gpu->name(i)+", "+toa(gpu->cus(i))+" CUs, "+ toa(gpu->clock_rate(i))+" GHZ ("; if (sizeof(PRECISION)==4) { if (sizeof(ACC_PRECISION)==4)