Merge pull request #3127 from akohlmey/gpu-opencl-updates

Update compiling OpenCL loader lib
This commit is contained in:
Axel Kohlmeyer
2022-02-14 16:10:53 -05:00
committed by GitHub
12 changed files with 35 additions and 89 deletions

View File

@ -1,50 +1,12 @@
message(STATUS "Downloading and building OpenCL loader library")
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.09.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
set(OPENCL_LOADER_MD5 "3b3882627964bd02e5c3b02065daac3c" CACHE STRING "MD5 checksum of OpenCL loader tarball")
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2022.01.04.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
set(OPENCL_LOADER_MD5 "8d3a801e87a2c6653bf0e27707063914" CACHE STRING "MD5 checksum of OpenCL loader tarball")
mark_as_advanced(OPENCL_LOADER_URL)
mark_as_advanced(OPENCL_LOADER_MD5)
include(ExternalProject)
ExternalProject_Add(opencl_loader
URL ${OPENCL_LOADER_URL}
URL_MD5 ${OPENCL_LOADER_MD5}
SOURCE_DIR "${CMAKE_BINARY_DIR}/opencl_loader-src"
BINARY_DIR "${CMAKE_BINARY_DIR}/opencl_loader-build"
CMAKE_ARGS ${CMAKE_REQUEST_PIC} ${CMAKE_EXTRA_OPENCL_LOADER_OPTS}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
BUILD_BYPRODUCTS <BINARY_DIR>/libOpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}
LOG_DOWNLOAD ON
LOG_CONFIGURE ON
LOG_BUILD ON
INSTALL_COMMAND ""
TEST_COMMAND "")
ExternalProject_Get_Property(opencl_loader SOURCE_DIR)
set(OPENCL_LOADER_INCLUDE_DIR ${SOURCE_DIR}/inc)
# workaround for CMake 3.10 on ubuntu 18.04
file(MAKE_DIRECTORY ${OPENCL_LOADER_INCLUDE_DIR})
ExternalProject_Get_Property(opencl_loader BINARY_DIR)
set(OPENCL_LOADER_LIBRARY_PATH "${BINARY_DIR}/libOpenCL${CMAKE_STATIC_LIBRARY_SUFFIX}")
find_package(Threads QUIET)
if(NOT WIN32)
set(OPENCL_LOADER_DEP_LIBS "Threads::Threads;${CMAKE_DL_LIBS}")
else()
set(OPENCL_LOADER_DEP_LIBS "cfgmgr32;runtimeobject")
endif()
add_library(OpenCL::OpenCL UNKNOWN IMPORTED)
add_dependencies(OpenCL::OpenCL opencl_loader)
set_target_properties(OpenCL::OpenCL PROPERTIES
IMPORTED_LOCATION ${OPENCL_LOADER_LIBRARY_PATH}
INTERFACE_INCLUDE_DIRECTORIES ${OPENCL_LOADER_INCLUDE_DIR}
INTERFACE_LINK_LIBRARIES "${OPENCL_LOADER_DEP_LIBS}")
set(INSTALL_LIBOPENCL OFF CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS OFF)
include(ExternalCMakeProject)
ExternalCMakeProject(opencl_loader ${OPENCL_LOADER_URL} ${OPENCL_LOADER_MD5} opencl-loader . "")
add_library(OpenCL::OpenCL ALIAS OpenCL)

View File

@ -248,7 +248,7 @@ double AnswerT::energy_virial(double *eatom, double **vatom,
return energy_virial(eatom,vatom,virial);
double evdwl=0.0;
int ii, vstart=0, iend=_ev_stride;
int vstart=0, iend=_ev_stride;
if (_eflag) {
iend=_ev_stride*2;
#if (LAL_USE_OMP_SIMD == 1)

View File

@ -239,14 +239,14 @@ void BaseAtomicT::compute(const int f_ago, const int inum_full,
// Reneighbor on GPU if necessary and then compute forces, virials, energies
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int ** BaseAtomicT::compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special,
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success) {
int **BaseAtomicT::compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special,
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success) {
acc_timers();
int eflag, vflag;
if (eatom) eflag=2;

View File

@ -133,20 +133,12 @@ class BaseAtomic {
int &host_start, const double cpu_time, bool &success);
/// Pair loop with device neighboring
int * compute(const int ago, const int inum_full,
int **compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
/// Pair loop with device neighboring
int ** compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success);
int **ilist, int **numj, const double cpu_time, bool &success);
// -------------------------- DEVICE DATA -------------------------

View File

@ -69,7 +69,7 @@ BaseEllipsoidT::~BaseEllipsoid() {
}
template <class numtyp, class acctyp>
int BaseEllipsoidT::bytes_per_atom(const int max_nbors) const {
int BaseEllipsoidT::bytes_per_atom_ellipsoid(const int max_nbors) const {
return device->atom.bytes_per_atom()+ans->bytes_per_atom()+
nbor->bytes_per_atom(max_nbors);
}

View File

@ -108,7 +108,7 @@ class BaseEllipsoid {
void output_times();
/// Returns memory usage on device per atom
int bytes_per_atom(const int max_nbors) const;
int bytes_per_atom_ellipsoid(const int max_nbors) const;
/// Total host memory used by library for pair style
double host_memory_usage_base() const;
@ -173,18 +173,13 @@ class BaseEllipsoid {
const double cpu_time, bool &success, double **quat);
/// Pair loop with device neighboring
int** compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success,
double **host_quat);
/// Build neighbor list on accelerator
void build_nbor_list(const int inum, const int host_inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, bool &success);
int**compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **numj, const double cpu_time, bool &success,
double **host_quat);
// -------------------------- DEVICE DATA -------------------------

View File

@ -198,7 +198,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
// Find deviceID with most CUs (priority given to the accelerator type)
if (_first_device < 0) {
int best_device = 0;
int best_cus = gpu->cus(0);
unsigned best_cus = gpu->cus(0);
bool type_match = (gpu->device_type(0) == type);
for (int i = 1; i < gpu->num_devices(); i++) {
if (type_match==true && gpu->device_type(i)!=type)

View File

@ -62,9 +62,6 @@ class EAM : public BaseAtomic<numtyp, acctyp> {
/** \note This is called at the beginning of the init() routine **/
void clear();
/// Returns memory usage on device per atom
int bytes_per_atom(const int max_nbors) const;
/// Total host memory used by library for pair style
double host_memory_usage() const;

View File

@ -43,7 +43,7 @@ GayBerneT::~GayBerne() {
template <class numtyp, class acctyp>
int GayBerneT::bytes_per_atom(const int max_nbors) const {
return this->bytes_per_atom(max_nbors);
return this->bytes_per_atom_ellipsoid(max_nbors);
}
template <class numtyp, class acctyp>

View File

@ -43,7 +43,7 @@ RESquaredT::~RESquared() {
template <class numtyp, class acctyp>
int RESquaredT::bytes_per_atom(const int max_nbors) const {
return this->bytes_per_atom(max_nbors);
return this->bytes_per_atom_ellipsoid(max_nbors);
}
template <class numtyp, class acctyp>

View File

@ -51,9 +51,9 @@ int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
int ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift, double **host_lj_cutsq);
void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift, double **host_lj_cutsq);
void ljecl_gpu_clear();
int ** ljecl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,

View File

@ -42,8 +42,8 @@ int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
double **host_uf2, double **host_uf3, double **offset);
void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
double **host_uf2, double **host_uf3, double **offset);
void ufml_gpu_clear();
int ** ufml_gpu_compute_n(const int ago, const int inum, const int nall,