Merge pull request #1436 from rbberger/gpu_opencl_bugfixes
GPU package bugfixes
This commit is contained in:
@ -1370,7 +1370,15 @@ if(PKG_GPU)
|
||||
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
|
||||
|
||||
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
|
||||
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
|
||||
list(REMOVE_ITEM GPU_LIB_CU
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
|
||||
)
|
||||
|
||||
foreach(GPU_KERNEL ${GPU_LIB_CU})
|
||||
get_filename_component(basename ${GPU_KERNEL} NAME_WE)
|
||||
@ -1381,7 +1389,21 @@ if(PKG_GPU)
|
||||
|
||||
GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
|
||||
GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
|
||||
list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h)
|
||||
GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
|
||||
GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
|
||||
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
|
||||
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
|
||||
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
|
||||
|
||||
list(APPEND GPU_LIB_SOURCES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
|
||||
)
|
||||
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||
target_link_libraries(gpu ${OpenCL_LIBRARIES})
|
||||
|
||||
@ -42,9 +42,9 @@ texture<int4,1> vel_tex;
|
||||
// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
|
||||
// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
|
||||
|
||||
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
|
||||
#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */
|
||||
#define LCGC 0x2600e1f7
|
||||
#define oWeylPeriod 0xda879add // Prime period 3666320093
|
||||
#define oWeylPeriod 0xda879add /* Prime period 3666320093 */
|
||||
#define oWeylOffset 0x8009d14b
|
||||
#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
|
||||
|
||||
|
||||
@ -27,9 +27,9 @@ texture<int4,1> pos_tex;
|
||||
// LJ quantities scaled by epsilon and rmin = sigma*2^1/6 (see src/pair_lj_cubic.h)
|
||||
|
||||
#define _RT6TWO (numtyp)1.1224621
|
||||
#define _PHIS (numtyp)-0.7869823 // energy at s
|
||||
#define _DPHIDS (numtyp)2.6899009 // gradient at s
|
||||
#define _A3 (numtyp)27.93357 // cubic coefficient
|
||||
#define _PHIS (numtyp)-0.7869823 /* energy at s */
|
||||
#define _DPHIDS (numtyp)2.6899009 /* gradient at s */
|
||||
#define _A3 (numtyp)27.93357 /* cubic coefficient */
|
||||
|
||||
__kernel void k_lj_cubic(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
|
||||
Reference in New Issue
Block a user