diff --git a/lib/gpu/Makefile.cuda b/lib/gpu/Makefile.cuda index be8003e02e..75428c9513 100644 --- a/lib/gpu/Makefile.cuda +++ b/lib/gpu/Makefile.cuda @@ -134,11 +134,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu $(GPU_LIB): $(OBJS) $(CUDPP) $(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP) @cp $(EXTRAMAKE) Makefile.lammps - + # test app for querying device info $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) - $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda + $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda clean: -rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo diff --git a/lib/gpu/Makefile.cuda_mps b/lib/gpu/Makefile.cuda_mps index 06d2ef0339..22a34c105c 100644 --- a/lib/gpu/Makefile.cuda_mps +++ b/lib/gpu/Makefile.cuda_mps @@ -133,11 +133,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu $(GPU_LIB): $(OBJS) $(CUDPP) $(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP) @cp $(EXTRAMAKE) Makefile.lammps - + # test app for querying device info $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) - $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda + $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda clean: -rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo diff --git a/lib/gpu/Makefile.hip b/lib/gpu/Makefile.hip index f5a0d03608..0350c841c4 100644 --- a/lib/gpu/Makefile.hip +++ b/lib/gpu/Makefile.hip @@ -98,10 +98,10 @@ HIP_GPU_OPTS += $(HIP_OPTS) -I./ ifeq (spirv,$(HIP_PLATFORM)) HIP_HOST_OPTS += -fPIC HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c - HIP_GPU_OPTS_S = + HIP_GPU_OPTS_S = HIP_GPU_OPTS_E = HIP_KERNEL_SUFFIX = .cpp - HIP_LIBS_TARGET = + HIP_LIBS_TARGET = export HCC_AMDGPU_TARGET := $(HIP_ARCH) else ifeq (clang,$(HIP_COMPILER)) HIP_HOST_OPTS += -fPIC diff --git a/lib/gpu/Makefile.lammps.mac_ocl b/lib/gpu/Makefile.lammps.mac_ocl index 0073efa2ba..dbbd789464 100644 --- a/lib/gpu/Makefile.lammps.mac_ocl +++ b/lib/gpu/Makefile.lammps.mac_ocl @@ -2,4 +2,4 @@ gpu_SYSINC = -DFFT_SINGLE gpu_SYSLIB = -framework OpenCL -gpu_SYSPATH = +gpu_SYSPATH = diff --git a/lib/gpu/Makefile.lammps.mingw-cross b/lib/gpu/Makefile.lammps.mingw-cross index 12d833c744..0b304b0e0c 100644 --- a/lib/gpu/Makefile.lammps.mingw-cross +++ b/lib/gpu/Makefile.lammps.mingw-cross @@ -2,5 +2,5 @@ # settings for OpenCL builds gpu_SYSINC = gpu_SYSLIB = -Wl,--enable-stdcall-fixup -L../../tools/mingw-cross$(LIBOBJDIR) -Wl,-Bdynamic,-lOpenCL,-Bstatic -gpu_SYSPATH = +gpu_SYSPATH = diff --git a/lib/gpu/Makefile.lammps.opencl b/lib/gpu/Makefile.lammps.opencl index 413ae79210..50f5e63f77 100644 --- a/lib/gpu/Makefile.lammps.opencl +++ b/lib/gpu/Makefile.lammps.opencl @@ -2,4 +2,4 @@ gpu_SYSINC = gpu_SYSLIB = -lOpenCL -gpu_SYSPATH = +gpu_SYSPATH = diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux index 3c37672e01..e02413f3ba 100644 --- a/lib/gpu/Makefile.linux +++ b/lib/gpu/Makefile.linux @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Linux Makefile for CUDA # - Change CUDA_ARCH for your GPU # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.linux_multi b/lib/gpu/Makefile.linux_multi index 005f659079..e3a76d9934 100644 --- a/lib/gpu/Makefile.linux_multi +++ b/lib/gpu/Makefile.linux_multi @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Linux Makefile for CUDA complied for multiple compute capabilities # - Add your GPU to CUDA_CODE # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.linux_opencl b/lib/gpu/Makefile.linux_opencl index 43d012dc4a..b4b25544ee 100644 --- a/lib/gpu/Makefile.linux_opencl +++ b/lib/gpu/Makefile.linux_opencl @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Linux Makefile for OpenCL - Mixed precision # ------------------------------------------------------------------------- */ @@ -11,7 +11,7 @@ EXTRAMAKE = Makefile.lammps.opencl LMP_INC = -DLAMMPS_SMALLBIG -OCL_INC = +OCL_INC = OCL_CPP = mpic++ -std=c++11 -O3 -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC) OCL_LINK = -lOpenCL OCL_PREC = -D_SINGLE_DOUBLE diff --git a/lib/gpu/Makefile.mac_opencl b/lib/gpu/Makefile.mac_opencl index ae7e8ca6fd..3a9fd39f35 100644 --- a/lib/gpu/Makefile.mac_opencl +++ b/lib/gpu/Makefile.mac_opencl @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.mac_opencl_mpi b/lib/gpu/Makefile.mac_opencl_mpi index 9be9f07e93..b0c6e39aae 100644 --- a/lib/gpu/Makefile.mac_opencl_mpi +++ b/lib/gpu/Makefile.mac_opencl_mpi @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.oneapi b/lib/gpu/Makefile.oneapi index 32800676aa..e67f4bb082 100644 --- a/lib/gpu/Makefile.oneapi +++ b/lib/gpu/Makefile.oneapi @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Linux Makefile for Intel oneAPI - Mixed precision # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.oneapi_prof b/lib/gpu/Makefile.oneapi_prof index 1e21597373..58a03392e2 100644 --- a/lib/gpu/Makefile.oneapi_prof +++ b/lib/gpu/Makefile.oneapi_prof @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Linux Makefile for Intel oneAPI - Mixed precision (with timing enabled) # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Makefile.serial b/lib/gpu/Makefile.serial index 6c94911f32..67d2ce927d 100644 --- a/lib/gpu/Makefile.serial +++ b/lib/gpu/Makefile.serial @@ -1,4 +1,4 @@ -# /* ---------------------------------------------------------------------- +# /* ---------------------------------------------------------------------- # Generic Linux Makefile for CUDA without MPI libraries # - Change CUDA_ARCH for your GPU # ------------------------------------------------------------------------- */ diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index 298d404117..d351b87b37 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -11,7 +11,7 @@ HOST_H = lal_answer.h lal_atom.h lal_balance.h lal_base_atomic.h lal_base_amoeba lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \ lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \ lal_neighbor_shared.h lal_pre_ocl_config.h $(NVD_H) - + # Source files SRCS := $(wildcard ./lal_*.cpp) OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o)) @@ -127,7 +127,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP) # test app for querying device info $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) - $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda + $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda clean: -rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo diff --git a/lib/gpu/Nvidia.makefile_multi b/lib/gpu/Nvidia.makefile_multi index ddbee4f2a1..c4b27ebbcb 100644 --- a/lib/gpu/Nvidia.makefile_multi +++ b/lib/gpu/Nvidia.makefile_multi @@ -89,7 +89,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP) # test app for querying device info $(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H) - $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda + $(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda clean: -rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo diff --git a/lib/gpu/geryon/ocl_memory.h b/lib/gpu/geryon/ocl_memory.h index 5d8b9808bd..e665654071 100644 --- a/lib/gpu/geryon/ocl_memory.h +++ b/lib/gpu/geryon/ocl_memory.h @@ -108,7 +108,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n, return UCL_MEMORY_ERROR; *mat.host_ptr() = (typename mat_type::data_type*) clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE, - map_perm,0,n,0,NULL,NULL,NULL); + map_perm,0,n,0,NULL,NULL,NULL); mat.cq()=cm.cq(); CL_SAFE_CALL(clRetainCommandQueue(mat.cq())); diff --git a/lib/gpu/lal_edpd.cu b/lib/gpu/lal_edpd.cu index e3a76a766b..0982d219eb 100644 --- a/lib/gpu/lal_edpd.cu +++ b/lib/gpu/lal_edpd.cu @@ -324,8 +324,8 @@ __kernel void k_edpd(const __global numtyp4 *restrict x_, f.z+=delz*force; // heat transfer - - if (r < coeff2w) { + + if (r < coeff2w) { numtyp wrT = (numtyp)1.0 - r/coeff2w; wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] @@ -565,7 +565,7 @@ __kernel void k_edpd_fast(const __global numtyp4 *restrict x_, // heat transfer - if (r < coeff2w) { + if (r < coeff2w) { numtyp wrT = (numtyp)1.0 - r/coeff2w; wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT)); wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype] @@ -579,10 +579,10 @@ __kernel void k_edpd_fast(const __global numtyp4 *restrict x_, factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w; kappaT *= factor; } - + numtyp kij = cvi*cvj*kappaT * T_ij*T_ij; numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij); - + numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj); numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum; dQd /= (cvi+cvj); diff --git a/lib/gpu/lal_sph_heatconduction.cu b/lib/gpu/lal_sph_heatconduction.cu index 90bbd4e839..e2ba40db0c 100644 --- a/lib/gpu/lal_sph_heatconduction.cu +++ b/lib/gpu/lal_sph_heatconduction.cu @@ -238,7 +238,7 @@ __kernel void k_sph_heatconduction_fast(const __global numtyp4 *restrict x_, // Lucy Kernel, 2d wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq; } - + // total thermal energy increment numtyp D = coeffx; // alpha[itype][jtype] diffusion coefficient numtyp deltaE = (numtyp)2.0 * mass_itype * mass_jtype / (mass_itype + mass_jtype); diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu index 23863b5e28..f376dfc533 100644 --- a/lib/gpu/lal_sph_lj.cu +++ b/lib/gpu/lal_sph_lj.cu @@ -362,7 +362,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, // Lucy Kernel, 2d wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq; } - + // function call to LJ EOS numtyp fcj[2]; LJEOS2(rhoj, esphj, cvj, fcj); @@ -404,7 +404,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, drhoEacc.y += deltaE; if (EVFLAG && eflag) { - numtyp e = (numtyp)0; + numtyp e = (numtyp)0; energy+=e; } if (EVFLAG && vflag) { diff --git a/lib/gpu/lal_sph_taitwater.cu b/lib/gpu/lal_sph_taitwater.cu index 708d3ae43b..9424d58996 100644 --- a/lib/gpu/lal_sph_taitwater.cu +++ b/lib/gpu/lal_sph_taitwater.cu @@ -145,9 +145,9 @@ __kernel void k_sph_taitwater(const __global numtyp4 *restrict x_, // Lucy Kernel, 2d wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq; } - + // compute pressure of atom j with Tait EOS - + numtyp tmp = rhoj / rho0_jtype; numtyp fj = tmp * tmp * tmp; fj = B_jtype * (fj * fj * tmp - (numtyp)1.0); @@ -321,7 +321,7 @@ __kernel void k_sph_taitwater_fast(const __global numtyp4 *restrict x_, wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq; } - // compute pressure of atom j with Tait EOS + // compute pressure of atom j with Tait EOS numtyp tmp = rhoj / rho0_jtype; numtyp fj = tmp * tmp * tmp; fj = B_jtype * (fj * fj * tmp - (numtyp)1.0); @@ -356,7 +356,7 @@ __kernel void k_sph_taitwater_fast(const __global numtyp4 *restrict x_, drhoEacc.y += deltaE; if (EVFLAG && eflag) { - numtyp e = (numtyp)0; + numtyp e = (numtyp)0; energy+=e; } if (EVFLAG && vflag) {