Merge pull request #3127 from akohlmey/gpu-opencl-updates
Update compiling OpenCL loader lib
This commit is contained in:
@ -1,50 +1,12 @@
|
||||
message(STATUS "Downloading and building OpenCL loader library")
|
||||
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.09.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
|
||||
set(OPENCL_LOADER_MD5 "3b3882627964bd02e5c3b02065daac3c" CACHE STRING "MD5 checksum of OpenCL loader tarball")
|
||||
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2022.01.04.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
|
||||
set(OPENCL_LOADER_MD5 "8d3a801e87a2c6653bf0e27707063914" CACHE STRING "MD5 checksum of OpenCL loader tarball")
|
||||
mark_as_advanced(OPENCL_LOADER_URL)
|
||||
mark_as_advanced(OPENCL_LOADER_MD5)
|
||||
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(opencl_loader
|
||||
URL ${OPENCL_LOADER_URL}
|
||||
URL_MD5 ${OPENCL_LOADER_MD5}
|
||||
SOURCE_DIR "${CMAKE_BINARY_DIR}/opencl_loader-src"
|
||||
BINARY_DIR "${CMAKE_BINARY_DIR}/opencl_loader-build"
|
||||
CMAKE_ARGS ${CMAKE_REQUEST_PIC} ${CMAKE_EXTRA_OPENCL_LOADER_OPTS}
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
|
||||
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
|
||||
BUILD_BYPRODUCTS <BINARY_DIR>/libOpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}
|
||||
LOG_DOWNLOAD ON
|
||||
LOG_CONFIGURE ON
|
||||
LOG_BUILD ON
|
||||
INSTALL_COMMAND ""
|
||||
TEST_COMMAND "")
|
||||
|
||||
ExternalProject_Get_Property(opencl_loader SOURCE_DIR)
|
||||
set(OPENCL_LOADER_INCLUDE_DIR ${SOURCE_DIR}/inc)
|
||||
|
||||
# workaround for CMake 3.10 on ubuntu 18.04
|
||||
file(MAKE_DIRECTORY ${OPENCL_LOADER_INCLUDE_DIR})
|
||||
|
||||
ExternalProject_Get_Property(opencl_loader BINARY_DIR)
|
||||
set(OPENCL_LOADER_LIBRARY_PATH "${BINARY_DIR}/libOpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}")
|
||||
|
||||
find_package(Threads QUIET)
|
||||
if(NOT WIN32)
|
||||
set(OPENCL_LOADER_DEP_LIBS "Threads::Threads;${CMAKE_DL_LIBS}")
|
||||
else()
|
||||
set(OPENCL_LOADER_DEP_LIBS "cfgmgr32;runtimeobject")
|
||||
endif()
|
||||
|
||||
add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
|
||||
add_dependencies(OpenCL::OpenCL opencl_loader)
|
||||
|
||||
set_target_properties(OpenCL::OpenCL PROPERTIES
|
||||
IMPORTED_LOCATION ${OPENCL_LOADER_LIBRARY_PATH}
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${OPENCL_LOADER_INCLUDE_DIR}
|
||||
INTERFACE_LINK_LIBRARIES "${OPENCL_LOADER_DEP_LIBS}")
|
||||
|
||||
set(INSTALL_LIBOPENCL OFF CACHE BOOL "" FORCE)
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
include(ExternalCMakeProject)
|
||||
ExternalCMakeProject(opencl_loader ${OPENCL_LOADER_URL} ${OPENCL_LOADER_MD5} opencl-loader . "")
|
||||
|
||||
add_library(OpenCL::OpenCL ALIAS OpenCL)
|
||||
|
||||
@ -248,7 +248,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
|
||||
return energy_virial(eatom,vatom,virial);
|
||||
|
||||
double evdwl=0.0;
|
||||
int ii, vstart=0, iend=_ev_stride;
|
||||
int vstart=0, iend=_ev_stride;
|
||||
if (_eflag) {
|
||||
iend=_ev_stride*2;
|
||||
#if (LAL_USE_OMP_SIMD == 1)
|
||||
|
||||
@ -239,14 +239,14 @@ void BaseAtomicT::compute(const int f_ago, const int inum_full,
|
||||
// Reneighbor on GPU if necessary and then compute forces, virials, energies
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int ** BaseAtomicT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success) {
|
||||
int **BaseAtomicT::compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success) {
|
||||
acc_timers();
|
||||
int eflag, vflag;
|
||||
if (eatom) eflag=2;
|
||||
|
||||
@ -133,20 +133,12 @@ class BaseAtomic {
|
||||
int &host_start, const double cpu_time, bool &success);
|
||||
|
||||
/// Pair loop with device neighboring
|
||||
int * compute(const int ago, const int inum_full,
|
||||
int **compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success);
|
||||
|
||||
/// Pair loop with device neighboring
|
||||
int ** compute(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success);
|
||||
int **ilist, int **numj, const double cpu_time, bool &success);
|
||||
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
|
||||
@ -69,7 +69,7 @@ BaseEllipsoidT::~BaseEllipsoid() {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int BaseEllipsoidT::bytes_per_atom(const int max_nbors) const {
|
||||
int BaseEllipsoidT::bytes_per_atom_ellipsoid(const int max_nbors) const {
|
||||
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
|
||||
nbor->bytes_per_atom(max_nbors);
|
||||
}
|
||||
|
||||
@ -108,7 +108,7 @@ class BaseEllipsoid {
|
||||
void output_times();
|
||||
|
||||
/// Returns memory usage on device per atom
|
||||
int bytes_per_atom(const int max_nbors) const;
|
||||
int bytes_per_atom_ellipsoid(const int max_nbors) const;
|
||||
|
||||
/// Total host memory used by library for pair style
|
||||
double host_memory_usage_base() const;
|
||||
@ -173,18 +173,13 @@ class BaseEllipsoid {
|
||||
const double cpu_time, bool &success, double **quat);
|
||||
|
||||
/// Pair loop with device neighboring
|
||||
int** compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
|
||||
/// Build neighbor list on accelerator
|
||||
void build_nbor_list(const int inum, const int host_inum, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, bool &success);
|
||||
int**compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, double *sublo,
|
||||
double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
double **host_quat);
|
||||
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
|
||||
@ -198,7 +198,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
||||
// Find deviceID with most CUs (priority given to the accelerator type)
|
||||
if (_first_device < 0) {
|
||||
int best_device = 0;
|
||||
int best_cus = gpu->cus(0);
|
||||
unsigned best_cus = gpu->cus(0);
|
||||
bool type_match = (gpu->device_type(0) == type);
|
||||
for (int i = 1; i < gpu->num_devices(); i++) {
|
||||
if (type_match==true && gpu->device_type(i)!=type)
|
||||
|
||||
@ -62,9 +62,6 @@ class EAM : public BaseAtomic<numtyp, acctyp> {
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
||||
/// Returns memory usage on device per atom
|
||||
int bytes_per_atom(const int max_nbors) const;
|
||||
|
||||
/// Total host memory used by library for pair style
|
||||
double host_memory_usage() const;
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@ GayBerneT::~GayBerne() {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int GayBerneT::bytes_per_atom(const int max_nbors) const {
|
||||
return this->bytes_per_atom(max_nbors);
|
||||
return this->bytes_per_atom_ellipsoid(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -43,7 +43,7 @@ RESquaredT::~RESquared() {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int RESquaredT::bytes_per_atom(const int max_nbors) const {
|
||||
return this->bytes_per_atom(max_nbors);
|
||||
return this->bytes_per_atom_ellipsoid(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
|
||||
@ -51,9 +51,9 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
|
||||
double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
int ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift, double **host_lj_cutsq);
|
||||
void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift, double **host_lj_cutsq);
|
||||
void ljecl_gpu_clear();
|
||||
int ** ljecl_gpu_compute_n(const int ago, const int inum,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
|
||||
@ -42,8 +42,8 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen);
|
||||
|
||||
int ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
|
||||
double **host_uf2, double **host_uf3, double **offset);
|
||||
void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
|
||||
double **host_uf2, double **host_uf3, double **offset);
|
||||
|
||||
void ufml_gpu_clear();
|
||||
int ** ufml_gpu_compute_n(const int ago, const int inum, const int nall,
|
||||
|
||||
Reference in New Issue
Block a user