remove tabs and trailing whitespace
This commit is contained in:
@ -134,11 +134,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
|
|||||||
$(GPU_LIB): $(OBJS) $(CUDPP)
|
$(GPU_LIB): $(OBJS) $(CUDPP)
|
||||||
$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
|
$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
|
||||||
@cp $(EXTRAMAKE) Makefile.lammps
|
@cp $(EXTRAMAKE) Makefile.lammps
|
||||||
|
|
||||||
# test app for querying device info
|
# test app for querying device info
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
||||||
|
|||||||
@ -133,11 +133,11 @@ $(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
|
|||||||
$(GPU_LIB): $(OBJS) $(CUDPP)
|
$(GPU_LIB): $(OBJS) $(CUDPP)
|
||||||
$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
|
$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
|
||||||
@cp $(EXTRAMAKE) Makefile.lammps
|
@cp $(EXTRAMAKE) Makefile.lammps
|
||||||
|
|
||||||
# test app for querying device info
|
# test app for querying device info
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
||||||
|
|||||||
@ -98,10 +98,10 @@ HIP_GPU_OPTS += $(HIP_OPTS) -I./
|
|||||||
ifeq (spirv,$(HIP_PLATFORM))
|
ifeq (spirv,$(HIP_PLATFORM))
|
||||||
HIP_HOST_OPTS += -fPIC
|
HIP_HOST_OPTS += -fPIC
|
||||||
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
|
HIP_GPU_CC = $(HIP_PATH)/bin/hipcc -c
|
||||||
HIP_GPU_OPTS_S =
|
HIP_GPU_OPTS_S =
|
||||||
HIP_GPU_OPTS_E =
|
HIP_GPU_OPTS_E =
|
||||||
HIP_KERNEL_SUFFIX = .cpp
|
HIP_KERNEL_SUFFIX = .cpp
|
||||||
HIP_LIBS_TARGET =
|
HIP_LIBS_TARGET =
|
||||||
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
|
export HCC_AMDGPU_TARGET := $(HIP_ARCH)
|
||||||
else ifeq (clang,$(HIP_COMPILER))
|
else ifeq (clang,$(HIP_COMPILER))
|
||||||
HIP_HOST_OPTS += -fPIC
|
HIP_HOST_OPTS += -fPIC
|
||||||
|
|||||||
@ -2,4 +2,4 @@
|
|||||||
|
|
||||||
gpu_SYSINC = -DFFT_SINGLE
|
gpu_SYSINC = -DFFT_SINGLE
|
||||||
gpu_SYSLIB = -framework OpenCL
|
gpu_SYSLIB = -framework OpenCL
|
||||||
gpu_SYSPATH =
|
gpu_SYSPATH =
|
||||||
|
|||||||
@ -2,5 +2,5 @@
|
|||||||
# settings for OpenCL builds
|
# settings for OpenCL builds
|
||||||
gpu_SYSINC =
|
gpu_SYSINC =
|
||||||
gpu_SYSLIB = -Wl,--enable-stdcall-fixup -L../../tools/mingw-cross$(LIBOBJDIR) -Wl,-Bdynamic,-lOpenCL,-Bstatic
|
gpu_SYSLIB = -Wl,--enable-stdcall-fixup -L../../tools/mingw-cross$(LIBOBJDIR) -Wl,-Bdynamic,-lOpenCL,-Bstatic
|
||||||
gpu_SYSPATH =
|
gpu_SYSPATH =
|
||||||
|
|
||||||
|
|||||||
@ -2,4 +2,4 @@
|
|||||||
|
|
||||||
gpu_SYSINC =
|
gpu_SYSINC =
|
||||||
gpu_SYSLIB = -lOpenCL
|
gpu_SYSLIB = -lOpenCL
|
||||||
gpu_SYSPATH =
|
gpu_SYSPATH =
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Linux Makefile for CUDA
|
# Generic Linux Makefile for CUDA
|
||||||
# - Change CUDA_ARCH for your GPU
|
# - Change CUDA_ARCH for your GPU
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Linux Makefile for CUDA complied for multiple compute capabilities
|
# Generic Linux Makefile for CUDA complied for multiple compute capabilities
|
||||||
# - Add your GPU to CUDA_CODE
|
# - Add your GPU to CUDA_CODE
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Linux Makefile for OpenCL - Mixed precision
|
# Generic Linux Makefile for OpenCL - Mixed precision
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
@ -11,7 +11,7 @@ EXTRAMAKE = Makefile.lammps.opencl
|
|||||||
|
|
||||||
LMP_INC = -DLAMMPS_SMALLBIG
|
LMP_INC = -DLAMMPS_SMALLBIG
|
||||||
|
|
||||||
OCL_INC =
|
OCL_INC =
|
||||||
OCL_CPP = mpic++ -std=c++11 -O3 -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
|
OCL_CPP = mpic++ -std=c++11 -O3 -DMPICH_IGNORE_CXX_SEEK $(LMP_INC) $(OCL_INC)
|
||||||
OCL_LINK = -lOpenCL
|
OCL_LINK = -lOpenCL
|
||||||
OCL_PREC = -D_SINGLE_DOUBLE
|
OCL_PREC = -D_SINGLE_DOUBLE
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
|
# Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
|
# Generic Mac Makefile for OpenCL - Single precision with FFT_SINGLE
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Linux Makefile for Intel oneAPI - Mixed precision
|
# Linux Makefile for Intel oneAPI - Mixed precision
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Linux Makefile for Intel oneAPI - Mixed precision (with timing enabled)
|
# Linux Makefile for Intel oneAPI - Mixed precision (with timing enabled)
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
# /* ----------------------------------------------------------------------
|
# /* ----------------------------------------------------------------------
|
||||||
# Generic Linux Makefile for CUDA without MPI libraries
|
# Generic Linux Makefile for CUDA without MPI libraries
|
||||||
# - Change CUDA_ARCH for your GPU
|
# - Change CUDA_ARCH for your GPU
|
||||||
# ------------------------------------------------------------------------- */
|
# ------------------------------------------------------------------------- */
|
||||||
|
|||||||
@ -11,7 +11,7 @@ HOST_H = lal_answer.h lal_atom.h lal_balance.h lal_base_atomic.h lal_base_amoeba
|
|||||||
lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \
|
lal_base_charge.h lal_base_dipole.h lal_base_dpd.h \
|
||||||
lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \
|
lal_base_ellipsoid.h lal_base_three.h lal_device.h lal_neighbor.h \
|
||||||
lal_neighbor_shared.h lal_pre_ocl_config.h $(NVD_H)
|
lal_neighbor_shared.h lal_pre_ocl_config.h $(NVD_H)
|
||||||
|
|
||||||
# Source files
|
# Source files
|
||||||
SRCS := $(wildcard ./lal_*.cpp)
|
SRCS := $(wildcard ./lal_*.cpp)
|
||||||
OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
|
OBJS := $(subst ./,$(OBJ_DIR)/,$(SRCS:%.cpp=%.o))
|
||||||
@ -127,7 +127,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP)
|
|||||||
# test app for querying device info
|
# test app for querying device info
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo
|
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.cubin *.linkinfo
|
||||||
|
|||||||
@ -89,7 +89,7 @@ $(GPU_LIB): $(OBJS) $(CUDPP)
|
|||||||
# test app for querying device info
|
# test app for querying device info
|
||||||
|
|
||||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CUHS) *.linkinfo
|
||||||
|
|||||||
@ -108,7 +108,7 @@ inline int _host_alloc(mat_type &mat, copy_type &cm, const size_t n,
|
|||||||
return UCL_MEMORY_ERROR;
|
return UCL_MEMORY_ERROR;
|
||||||
*mat.host_ptr() = (typename mat_type::data_type*)
|
*mat.host_ptr() = (typename mat_type::data_type*)
|
||||||
clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
|
clEnqueueMapBuffer(cm.cq(),mat.cbegin(),CL_TRUE,
|
||||||
map_perm,0,n,0,NULL,NULL,NULL);
|
map_perm,0,n,0,NULL,NULL,NULL);
|
||||||
|
|
||||||
mat.cq()=cm.cq();
|
mat.cq()=cm.cq();
|
||||||
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
CL_SAFE_CALL(clRetainCommandQueue(mat.cq()));
|
||||||
|
|||||||
@ -324,8 +324,8 @@ __kernel void k_edpd(const __global numtyp4 *restrict x_,
|
|||||||
f.z+=delz*force;
|
f.z+=delz*force;
|
||||||
|
|
||||||
// heat transfer
|
// heat transfer
|
||||||
|
|
||||||
if (r < coeff2w) {
|
if (r < coeff2w) {
|
||||||
numtyp wrT = (numtyp)1.0 - r/coeff2w;
|
numtyp wrT = (numtyp)1.0 - r/coeff2w;
|
||||||
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
|
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
|
||||||
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
|
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
|
||||||
@ -565,7 +565,7 @@ __kernel void k_edpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
// heat transfer
|
// heat transfer
|
||||||
|
|
||||||
if (r < coeff2w) {
|
if (r < coeff2w) {
|
||||||
numtyp wrT = (numtyp)1.0 - r/coeff2w;
|
numtyp wrT = (numtyp)1.0 - r/coeff2w;
|
||||||
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
|
wrT = MAX((numtyp)0.0,MIN((numtyp)1.0,wrT));
|
||||||
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
|
wrT = ucl_pow(wrT, (numtyp)0.5*coeff2z); // powerT[itype][jtype]
|
||||||
@ -579,10 +579,10 @@ __kernel void k_edpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w;
|
factor += kcx*T_pow.x + kcy*T_pow.y + kcz*T_pow.z + kcw*T_pow.w;
|
||||||
kappaT *= factor;
|
kappaT *= factor;
|
||||||
}
|
}
|
||||||
|
|
||||||
numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
|
numtyp kij = cvi*cvj*kappaT * T_ij*T_ij;
|
||||||
numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
|
numtyp alphaij = ucl_sqrt((numtyp)2.0*kboltz*kij);
|
||||||
|
|
||||||
numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj);
|
numtyp dQc = kij * wrT*wrT * (Tj - Ti )/(Ti*Tj);
|
||||||
numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
|
numtyp dQd = wr*wr*( GammaIJ * vijeij*vijeij - SigmaIJ*SigmaIJ/mass_itype ) - SigmaIJ * wr *vijeij *randnum;
|
||||||
dQd /= (cvi+cvj);
|
dQd /= (cvi+cvj);
|
||||||
|
|||||||
@ -238,7 +238,7 @@ __kernel void k_sph_heatconduction_fast(const __global numtyp4 *restrict x_,
|
|||||||
// Lucy Kernel, 2d
|
// Lucy Kernel, 2d
|
||||||
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// total thermal energy increment
|
// total thermal energy increment
|
||||||
numtyp D = coeffx; // alpha[itype][jtype] diffusion coefficient
|
numtyp D = coeffx; // alpha[itype][jtype] diffusion coefficient
|
||||||
numtyp deltaE = (numtyp)2.0 * mass_itype * mass_jtype / (mass_itype + mass_jtype);
|
numtyp deltaE = (numtyp)2.0 * mass_itype * mass_jtype / (mass_itype + mass_jtype);
|
||||||
|
|||||||
@ -362,7 +362,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
|||||||
// Lucy Kernel, 2d
|
// Lucy Kernel, 2d
|
||||||
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// function call to LJ EOS
|
// function call to LJ EOS
|
||||||
numtyp fcj[2];
|
numtyp fcj[2];
|
||||||
LJEOS2(rhoj, esphj, cvj, fcj);
|
LJEOS2(rhoj, esphj, cvj, fcj);
|
||||||
@ -404,7 +404,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_,
|
|||||||
drhoEacc.y += deltaE;
|
drhoEacc.y += deltaE;
|
||||||
|
|
||||||
if (EVFLAG && eflag) {
|
if (EVFLAG && eflag) {
|
||||||
numtyp e = (numtyp)0;
|
numtyp e = (numtyp)0;
|
||||||
energy+=e;
|
energy+=e;
|
||||||
}
|
}
|
||||||
if (EVFLAG && vflag) {
|
if (EVFLAG && vflag) {
|
||||||
|
|||||||
@ -145,9 +145,9 @@ __kernel void k_sph_taitwater(const __global numtyp4 *restrict x_,
|
|||||||
// Lucy Kernel, 2d
|
// Lucy Kernel, 2d
|
||||||
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute pressure of atom j with Tait EOS
|
// compute pressure of atom j with Tait EOS
|
||||||
|
|
||||||
numtyp tmp = rhoj / rho0_jtype;
|
numtyp tmp = rhoj / rho0_jtype;
|
||||||
numtyp fj = tmp * tmp * tmp;
|
numtyp fj = tmp * tmp * tmp;
|
||||||
fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
|
fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
|
||||||
@ -321,7 +321,7 @@ __kernel void k_sph_taitwater_fast(const __global numtyp4 *restrict x_,
|
|||||||
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
wfd = (numtyp)-19.098593171027440292 * wfd * wfd * ihsq * ihsq * ihsq;
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute pressure of atom j with Tait EOS
|
// compute pressure of atom j with Tait EOS
|
||||||
numtyp tmp = rhoj / rho0_jtype;
|
numtyp tmp = rhoj / rho0_jtype;
|
||||||
numtyp fj = tmp * tmp * tmp;
|
numtyp fj = tmp * tmp * tmp;
|
||||||
fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
|
fj = B_jtype * (fj * fj * tmp - (numtyp)1.0);
|
||||||
@ -356,7 +356,7 @@ __kernel void k_sph_taitwater_fast(const __global numtyp4 *restrict x_,
|
|||||||
drhoEacc.y += deltaE;
|
drhoEacc.y += deltaE;
|
||||||
|
|
||||||
if (EVFLAG && eflag) {
|
if (EVFLAG && eflag) {
|
||||||
numtyp e = (numtyp)0;
|
numtyp e = (numtyp)0;
|
||||||
energy+=e;
|
energy+=e;
|
||||||
}
|
}
|
||||||
if (EVFLAG && vflag) {
|
if (EVFLAG && vflag) {
|
||||||
|
|||||||
Reference in New Issue
Block a user