Merge pull request #1436 from rbberger/gpu_opencl_bugfixes

GPU package bugfixes
This commit is contained in:
Axel Kohlmeyer
2019-04-30 12:02:52 -04:00
committed by GitHub
3 changed files with 29 additions and 7 deletions

View File

@ -1370,7 +1370,15 @@ if(PKG_GPU)
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
list(REMOVE_ITEM GPU_LIB_CU
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
)
foreach(GPU_KERNEL ${GPU_LIB_CU})
get_filename_component(basename ${GPU_KERNEL} NAME_WE)
@ -1381,7 +1389,21 @@ if(PKG_GPU)
GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h)
GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
list(APPEND GPU_LIB_SOURCES
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
)
add_library(gpu STATIC ${GPU_LIB_SOURCES})
target_link_libraries(gpu ${OpenCL_LIBRARIES})

View File

@ -42,9 +42,9 @@ texture<int4,1> vel_tex;
// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */
#define LCGC 0x2600e1f7
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylPeriod 0xda879add /* Prime period 3666320093 */
#define oWeylOffset 0x8009d14b
#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */

View File

@ -27,9 +27,9 @@ texture<int4,1> pos_tex;
// LJ quantities scaled by epsilon and rmin = sigma*2^1/6 (see src/pair_lj_cubic.h)
#define _RT6TWO (numtyp)1.1224621
#define _PHIS (numtyp)-0.7869823 // energy at s
#define _DPHIDS (numtyp)2.6899009 // gradient at s
#define _A3 (numtyp)27.93357 // cubic coefficient
#define _PHIS (numtyp)-0.7869823 /* energy at s */
#define _DPHIDS (numtyp)2.6899009 /* gradient at s */
#define _A3 (numtyp)27.93357 /* cubic coefficient */
__kernel void k_lj_cubic(const __global numtyp4 *restrict x_,
const __global numtyp4 *restrict lj1,