From 3830711decd56c6cf304524c3cc56524ccb68223 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Sat, 9 Dec 2023 15:15:13 -0600 Subject: [PATCH] Added the GPU version of sph/heatconduction --- lib/gpu/lal_sph_heatconduction.cpp | 222 ++++++++++++++++++++ lib/gpu/lal_sph_heatconduction.cu | 265 ++++++++++++++++++++++++ lib/gpu/lal_sph_heatconduction.h | 95 +++++++++ lib/gpu/lal_sph_heatconduction_ext.cpp | 129 ++++++++++++ lib/gpu/lal_sph_lj.cu | 10 +- src/GPU/pair_sph_heatconduction_gpu.cpp | 197 ++++++++++++++++++ src/GPU/pair_sph_heatconduction_gpu.h | 48 +++++ 7 files changed, 961 insertions(+), 5 deletions(-) create mode 100644 lib/gpu/lal_sph_heatconduction.cpp create mode 100644 lib/gpu/lal_sph_heatconduction.cu create mode 100644 lib/gpu/lal_sph_heatconduction.h create mode 100644 lib/gpu/lal_sph_heatconduction_ext.cpp create mode 100644 src/GPU/pair_sph_heatconduction_gpu.cpp create mode 100644 src/GPU/pair_sph_heatconduction_gpu.h diff --git a/lib/gpu/lal_sph_heatconduction.cpp b/lib/gpu/lal_sph_heatconduction.cpp new file mode 100644 index 0000000000..e8e366e93a --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction.cpp @@ -0,0 +1,222 @@ +/*************************************************************************** + sph_heatconduction.cpp + ------------------- + Trung Nguyen (U Chicago) + + Class for acceleration of the sph_heatconduction pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : September 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "sph_heatconduction_cl.h" +#elif defined(USE_CUDART) +const char *sph_heatconduction=0; +#else +#include "sph_heatconduction_cubin.h" +#endif + +#include "lal_sph_heatconduction.h" +#include +namespace LAMMPS_AL { +#define SPHHeatConductionT SPHHeatConduction + +extern Device device; + +template +SPHHeatConductionT::SPHHeatConduction() : BaseSPH(), _allocated(false) { + _max_dE_size = 0; +} + +template +SPHHeatConductionT::~SPHHeatConduction() { + clear(); +} + +template +int SPHHeatConductionT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int SPHHeatConductionT::init(const int ntypes, + double **host_cutsq, double **host_cut, + double **host_alpha, double* host_mass, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + int extra_fields = 4; // round up to accomodate quadruples of numtyp values + // rho, esph + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,sph_heatconduction,"k_sph_heatconduction", + onetype,extra_fields); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_alpha, + host_cut, host_cutsq); + + UCL_H_Vec dview_mass(ntypes, *(this->ucl_device), UCL_WRITE_ONLY); + for (int i = 0; i < ntypes; i++) + dview_mass[i] = host_mass[i]; + mass.alloc(ntypes,*(this->ucl_device), UCL_READ_ONLY); + ucl_copy(mass,dview_mass,false); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + // allocate per-atom array Q + + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + + _max_dE_size=static_cast(static_cast(ef_nall)*1.10); + dE.alloc(_max_dE_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + + _dimension = dimension; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+dE.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void SPHHeatConductionT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + mass.clear(); + dE.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double SPHHeatConductionT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(SPHHeatConduction); +} + +template +void SPHHeatConductionT::update_dE(void **dE_ptr) { + *dE_ptr=dE.host.begin(); + dE.update_host(_max_dE_size,false); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int SPHHeatConductionT::loop(const int eflag, const int vflag) { + + int nall = this->atom->nall(); + + // Resize dE array if necessary + if (nall > _max_dE_size) { + _max_dE_size=static_cast(static_cast(nall)*1.10); + dE.resize(_max_dE_size); + } + + // signal that we need to transfer extra data from the host + + this->atom->extra_data_unavail(); + + numtyp4 *pextra=reinterpret_cast(&(this->atom->extra[0])); + + int n = 0; + int nstride = 1; + for (int i = 0; i < nall; i++) { + int idx = n+i*nstride; + numtyp4 v; + v.x = rho[i]; + v.y = esph[i]; + v.z = 0; + v.w = 0; + pextra[idx] = v; + } + this->atom->add_extra_data(); + + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &mass, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &mass, + &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &dE, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &_dimension, &this->_threads_per_atom); + } + + this->time_pair.stop(); + return GX; +} + +// --------------------------------------------------------------------------- +// Get the extra data pointers from host +// --------------------------------------------------------------------------- + +template +void SPHHeatConductionT::get_extra_data(double *host_rho, double *host_esph) { + rho = host_rho; + esph = host_esph; +} + +template class SPHHeatConduction; +} diff --git a/lib/gpu/lal_sph_heatconduction.cu b/lib/gpu/lal_sph_heatconduction.cu new file mode 100644 index 0000000000..c88853e2cf --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction.cu @@ -0,0 +1,265 @@ +// ************************************************************************** +// sph_heatconduction.cu +// --------------------- +// Trung Dac Nguyen (U Chicago) +// +// Device code for acceleration of the sph/heatconduction pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : September 2023 +// email : ndactrung@gmail.com +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#if (SHUFFLE_AVAIL == 0) + +#define store_dE(dEacc, ii, inum, tid, t_per_atom, offset, dE) \ + if (t_per_atom>1) { \ + simdsync(); \ + simd_reduce_add1(t_per_atom, red_acc, offset, tid, dEacc); \ + } \ + if (offset==0 && ii1) { \ + for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ + dEacc += shfl_down(dEacc, s, t_per_atom); \ + } \ + } \ + if (offset==0 && ii +class SPHHeatConduction : public BaseSPH { + public: + SPHHeatConduction(); + ~SPHHeatConduction(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double** host_cut, double **host_alpha, double *host_mass, + const int dimension, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + void get_extra_data(double *host_rho, double *host_esph); + + /// copy desph from device to host + void update_dE(void **dE_ptr); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = alpha, coeff.y = cut, coeff.z = cutsq + UCL_D_Vec coeff; + + /// per-type coeffs + UCL_D_Vec mass; + + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Per-atom arrays + UCL_Vector dE; + int _max_dE_size; + + int _dimension; + + /// pointer to host data + double *rho, *esph, *cv; + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_sph_heatconduction_ext.cpp b/lib/gpu/lal_sph_heatconduction_ext.cpp new file mode 100644 index 0000000000..1317ecaccc --- /dev/null +++ b/lib/gpu/lal_sph_heatconduction_ext.cpp @@ -0,0 +1,129 @@ +/*************************************************************************** + sph_heatconduction_ext.cpp + -------------------------- + Trung Dac Nguyen (U Chicago) + + Functions for LAMMPS access to sph/heatconduction acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : December 2023 + email : ndactrung@gmail.com + ***************************************************************************/ + +#include +#include +#include + +#include "lal_sph_heatconduction.h" + +using namespace std; +using namespace LAMMPS_AL; + +static SPHHeatConduction SPHHeatConductionMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int sph_heatconduction_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_alpha, double* host_mass, const int dimension, + double *special_lj, const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + SPHHeatConductionMF.clear(); + gpu_mode=SPHHeatConductionMF.device->gpu_mode(); + double gpu_split=SPHHeatConductionMF.device->particle_split(); + int first_gpu=SPHHeatConductionMF.device->first_device(); + int last_gpu=SPHHeatConductionMF.device->last_device(); + int world_me=SPHHeatConductionMF.device->world_me(); + int gpu_rank=SPHHeatConductionMF.device->gpu_rank(); + int procs_per_gpu=SPHHeatConductionMF.device->procs_per_gpu(); + + SPHHeatConductionMF.device->init_message(screen,"sph_lj",first_gpu,last_gpu); + + bool message=false; + if (SPHHeatConductionMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=SPHHeatConductionMF.init(ntypes, cutsq, host_cut, host_alpha, host_mass, + dimension, special_lj, inum, nall, max_nbors, maxspecial, + cell_size, gpu_split, screen); + + SPHHeatConductionMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + SPHHeatConductionMF.estimate_gpu_overhead(); + return init_ok; +} + +void sph_heatconduction_gpu_clear() { + SPHHeatConductionMF.clear(); +} + +int ** sph_heatconduction_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *host_tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v) { + return SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, host_tag, nspecial, special, eflag, vflag, + eatom, vatom, host_start, ilist, jnum, cpu_time, success, + host_v); +} + +void sph_heatconduction_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *host_tag, + double **host_v, const int nlocal) { + SPHHeatConductionMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + host_tag, host_v, nlocal); +} + +void sph_heatconduction_gpu_get_extra_data(double *host_rho, double *host_esph) { + SPHHeatConductionMF.get_extra_data(host_rho, host_esph); +} + +void sph_heatconduction_gpu_update_dE(void **dE_ptr) { + SPHHeatConductionMF.update_dE(dE_ptr); +} + +double sph_heatconduction_gpu_bytes() { + return SPHHeatConductionMF.host_memory_usage(); +} diff --git a/lib/gpu/lal_sph_lj.cu b/lib/gpu/lal_sph_lj.cu index b105632330..b9fdc5e433 100644 --- a/lib/gpu/lal_sph_lj.cu +++ b/lib/gpu/lal_sph_lj.cu @@ -224,7 +224,7 @@ __kernel void k_sph_lj(const __global numtyp4 *restrict x_, f.z+=delz*force; // and change in density, drho[i] - drhoEacc.x += massj * delVdotDelR * wfd; + drhoEacc.x += mass_jtype * delVdotDelR * wfd; // change in thermal energy, desph[i] drhoEacc.y += deltaE; @@ -313,7 +313,6 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, numtyp rhoi = extrai.x; numtyp esphi = extrai.y; numtyp cvi = extrai.z; - numtyp massi= extrai.w; // compute pressure of particle i with LJ EOS numtyp fci[2]; @@ -331,6 +330,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, #endif numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j]; + int jtype = jx.w; #ifndef ONETYPE int mtype=itype+jx.w; const numtyp cutsq_p=cutsq[mtype]; @@ -345,6 +345,7 @@ __kernel void k_sph_lj_fast(const __global numtyp4 *restrict x_, numtyp rsq = delx*delx+dely*dely+delz*delz; if (rsq + +using namespace LAMMPS_NS; + +// External functions from cuda library for atom decomposition + +int sph_heatconduction_gpu_init(const int ntypes, double **cutsq, double** host_cut, + double **host_alpha, double* host_mass, + const int dimension, double *special_lj, + const int inum, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void sph_heatconduction_gpu_clear(); +int **sph_heatconduction_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *host_tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v); +void sph_heatconduction_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *host_tag, + double **host_v, const int nlocal); +void sph_heatconduction_gpu_get_extra_data(double *host_rho, double *host_esph); +void sph_heatconduction_gpu_update_dE(void **dE_ptr); +double sph_heatconduction_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairSPHHeatConductionGPU::PairSPHHeatConductionGPU(LAMMPS *lmp) : + PairSPHHeatConduction(lmp), gpu_mode(GPU_FORCE) +{ + dE_pinned = nullptr; + respa_enable = 0; + reinitflag = 0; + cpu_time = 0.0; + suffix_flag |= Suffix::GPU; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairSPHHeatConductionGPU::~PairSPHHeatConductionGPU() +{ + sph_heatconduction_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairSPHHeatConductionGPU::compute(int eflag, int vflag) +{ + ev_init(eflag, vflag); + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + + double *rho = atom->rho; + double *esph = atom->esph; + double *cv = atom->cv; + sph_heatconduction_gpu_get_extra_data(rho, esph); + + if (gpu_mode != GPU_FORCE) { + double sublo[3], subhi[3]; + if (domain->triclinic == 0) { + sublo[0] = domain->sublo[0]; + sublo[1] = domain->sublo[1]; + sublo[2] = domain->sublo[2]; + subhi[0] = domain->subhi[0]; + subhi[1] = domain->subhi[1]; + subhi[2] = domain->subhi[2]; + } else { + domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi); + } + inum = atom->nlocal; + firstneigh = sph_heatconduction_gpu_compute_n( + neighbor->ago, inum, nall, atom->x, atom->type, + sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, + eflag_atom, vflag_atom, host_start, &ilist, &numneigh, + cpu_time, success, atom->v); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + sph_heatconduction_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, + eflag_atom, vflag_atom, host_start, cpu_time, success, + atom->tag, atom->v, atom->nlocal); + } + if (!success) error->one(FLERR, "Insufficient memory on accelerator"); + + // get the drho and dE from device + + double *desph = atom->desph; + sph_heatconduction_gpu_update_dE(&dE_pinned); + + int nlocal = atom->nlocal; + if (acc_float) { + auto dE_ptr = (float *)dE_pinned; + for (int i = 0; i < nlocal; i++) { + desph[i] = dE_ptr[i]; + } + + } else { + auto dE_ptr = (float *)dE_pinned; + for (int i = 0; i < nlocal; i++) { + desph[i] = dE_ptr[i]; + } + } + + if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0) + neighbor->build_topology(); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairSPHHeatConductionGPU::init_style() +{ + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double mcut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + mcut = init_one(i, j); + mcut *= mcut; + if (mcut > maxcut) maxcut = mcut; + cutsq[i][j] = cutsq[j][i] = mcut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial = 0; + if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial; + int mnf = 5e-2 * neighbor->oneatom; + int success = + sph_heatconduction_gpu_init(atom->ntypes + 1, cutsq, cut, alpha, atom->mass, + domain->dimension, force->special_lj, atom->nlocal, + atom->nlocal + atom->nghost, + mnf, maxspecial, cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success, error, world); + + acc_float = Info::has_accelerator_feature("GPU", "precision", "single"); + + if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL); +} + +/* ---------------------------------------------------------------------- */ + +double PairSPHHeatConductionGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + sph_heatconduction_gpu_bytes(); +} diff --git a/src/GPU/pair_sph_heatconduction_gpu.h b/src/GPU/pair_sph_heatconduction_gpu.h new file mode 100644 index 0000000000..571334017d --- /dev/null +++ b/src/GPU/pair_sph_heatconduction_gpu.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(sph/heatconduction/gpu,PairSPHHeatConductionGPU); +// clang-format on +#else + +#ifndef LMP_PAIR_SPH_HEATCONDUCTION_GPU_H +#define LMP_PAIR_SPH_HEATCONDUCTION_GPU_H + +#include "pair_sph_heatconduction.h" + +namespace LAMMPS_NS { + +class PairSPHHeatConductionGPU : public PairSPHHeatConduction { + public: + PairSPHHeatConductionGPU(LAMMPS *lmp); + ~PairSPHHeatConductionGPU() override; + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int) override; + void init_style() override; + double memory_usage() override; + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + void *dE_pinned; + bool acc_float; + + private: + int gpu_mode; + double cpu_time; +}; + +} // namespace LAMMPS_NS +#endif +#endif