From fcdcf659953affa5489e994977fc0320dddc660d Mon Sep 17 00:00:00 2001 From: Eddy Barraud Date: Fri, 31 May 2024 15:12:37 +0200 Subject: [PATCH] creation + working CPU new pair style dpd/charged which is the combination of dpd and coul/slater/long Working on CPU, GPU in progress --- lib/gpu/lal_dpd_charged.cpp | 187 ++++++++++ lib/gpu/lal_dpd_charged.cu | 443 ++++++++++++++++++++++ lib/gpu/lal_dpd_charged.h | 89 +++++ lib/gpu/lal_dpd_charged_ext.cpp | 133 +++++++ src/DPD-BASIC/pair_dpd_charged.cpp | 574 +++++++++++++++++++++++++++++ src/DPD-BASIC/pair_dpd_charged.h | 66 ++++ src/GPU/pair_dpd_charged_gpu.cpp | 441 ++++++++++++++++++++++ src/GPU/pair_dpd_charged_gpu.h | 45 +++ 8 files changed, 1978 insertions(+) create mode 100644 lib/gpu/lal_dpd_charged.cpp create mode 100644 lib/gpu/lal_dpd_charged.cu create mode 100644 lib/gpu/lal_dpd_charged.h create mode 100644 lib/gpu/lal_dpd_charged_ext.cpp create mode 100644 src/DPD-BASIC/pair_dpd_charged.cpp create mode 100644 src/DPD-BASIC/pair_dpd_charged.h create mode 100644 src/GPU/pair_dpd_charged_gpu.cpp create mode 100644 src/GPU/pair_dpd_charged_gpu.h diff --git a/lib/gpu/lal_dpd_charged.cpp b/lib/gpu/lal_dpd_charged.cpp new file mode 100644 index 0000000000..1f5209852b --- /dev/null +++ b/lib/gpu/lal_dpd_charged.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + dpd.cpp + ------------------- + Trung Dac Nguyen (ORNL) + + Class for acceleration of the dpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Jan 15, 2014 + email : nguyentd@ornl.gov + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "dpd_cl.h" +#elif defined(USE_CUDART) +const char *dpd=0; +#else +#include "dpd_cubin.h" +#endif + +#include "lal_dpd.h" +#include +namespace LAMMPS_AL { +#define DPDT DPD + +extern Device device; + +template +DPDT::DPD() : BaseDPD(), _allocated(false) { +} + +template +DPDT::~DPD() { + clear(); +} + +template +int DPDT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int DPDT::init(const int ntypes, + double **host_cutsq, double **host_a0, + double **host_gamma, double **host_sigma, + double **host_cut, double *host_special_lj, + const bool tstat_only, + const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, + const double gpu_split, FILE *_screen) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size, + gpu_split,_screen,dpd,"k_dpd",onetype); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma, + host_sigma,host_cut); + + UCL_H_Vec host_rsq(lj_types*lj_types,*(this->ucl_device), + UCL_WRITE_ONLY); + cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); + + double special_sqrt[4]; + special_sqrt[0] = sqrt(host_special_lj[0]); + special_sqrt[1] = sqrt(host_special_lj[1]); + special_sqrt[2] = sqrt(host_special_lj[2]); + special_sqrt[3] = sqrt(host_special_lj[3]); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + sp_sqrt.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(special_sqrt,4,*(this->ucl_device)); + ucl_copy(sp_sqrt,dview,false); + + _tstat_only = 0; + if (tstat_only) _tstat_only=1; + + _allocated=true; + this->_max_bytes=coeff.row_bytes()+cutsq.row_bytes()+sp_lj.row_bytes()+sp_sqrt.row_bytes(); + return 0; +} + +template +void DPDT::clear() { + if (!_allocated) + return; + _allocated=false; + + coeff.clear(); + cutsq.clear(); + sp_lj.clear(); + sp_sqrt.clear(); + this->clear_atomic(); +} + +template +double DPDT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(DPD); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +int DPDT::loop(const int eflag, const int vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, &sp_sqrt, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, + &vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq, + &this->_dtinvsqrt, &this->_seed, &this->_timestep, + &this->_tstat_only, &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, &sp_sqrt, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt, + &this->_seed, &this->_timestep, &this->_tstat_only, + &this->_threads_per_atom); + } + this->time_pair.stop(); + return GX; +} + +template +void DPDT::update_coeff(int ntypes, double **host_a0, double **host_gamma, + double **host_sigma, double **host_cut) +{ + UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_a0,host_gamma, + host_sigma,host_cut); +} + +template class DPD; +} diff --git a/lib/gpu/lal_dpd_charged.cu b/lib/gpu/lal_dpd_charged.cu new file mode 100644 index 0000000000..c6fd4f0e46 --- /dev/null +++ b/lib/gpu/lal_dpd_charged.cu @@ -0,0 +1,443 @@ +// ************************************************************************** +// dpd.cu +// ------------------- +// Trung Dac Nguyen (ORNL) +// +// Device code for acceleration of the dpd pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : Jan 15, 2014 +// email : nguyentd@ornl.gov +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +_texture( vel_tex,float4); +#else +_texture_2d( pos_tex,int4); +_texture_2d( vel_tex,int4); +#endif +#else +#define pos_tex x_ +#define vel_tex v_ +#endif + +#define EPSILON (numtyp)1.0e-10 + +//#define _USE_UNIFORM_SARU_LCG +//#define _USE_UNIFORM_SARU_TEA8 +//#define _USE_GAUSSIAN_SARU_LCG + +#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG) +#define _USE_UNIFORM_SARU_LCG +#endif + +// References: +// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128. +// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201. +// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19 + +#define LCGA 0x4beb5d59 /* Full period 32 bit LCG */ +#define LCGC 0x2600e1f7 +#define oWeylPeriod 0xda879add /* Prime period 3666320093 */ +#define oWeylOffset 0x8009d14b +#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */ + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] +// using the inherent LCG, then multiply u with sqrt(3) to "match" +// with a normal random distribution. +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +// Curly brackets to make variables local to the scope. +#ifdef _USE_UNIFORM_SARU_LCG +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 +// then multiply u with sqrt(3) to "match" with a normal random distribution +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +#ifdef _USE_UNIFORM_SARU_TEA8 +#define SQRT3 (numtyp)1.7320508075688772935274463 +#define k0 0xA341316C +#define k1 0xC8013EA4 +#define k2 0xAD90777D +#define k3 0x7E95761E +#define delta 0x9e3779b9 +#define rounds 8 +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int sum = 0; \ + for (int i=0; i < rounds; i++) { \ + sum += delta; \ + state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1); \ + wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3); \ + } \ + unsigned int v = (state ^ (state>>26)) + wstate; \ + unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \ + randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \ +} +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], +// and uses the polar method (Marsaglia's) to transform to a normal random value +// This is used to compared with CPU DPD using RandMars::gaussian() +#ifdef _USE_GAUSSIAN_SARU_LCG +#define saru(seed1, seed2, seed, timestep, randnum) { \ + unsigned int seed3 = seed + timestep; \ + seed3^=(seed1<<7)^(seed2>>6); \ + seed2+=(seed1>>4)^(seed3>>15); \ + seed1^=(seed2<<9)+(seed3<<8); \ + seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \ + seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \ + seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \ + seed2+=seed1*seed3; \ + seed1+=seed3 ^ (seed2>>2); \ + seed2^=((signed int)seed2)>>17; \ + unsigned int state=0x12345678; \ + unsigned int wstate=12345678; \ + state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \ + wstate = (state + seed2) ^ (((signed int)state)>>8); \ + state = state + (wstate*(wstate^0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate>>1); \ + unsigned int v, s; \ + numtyp r1, r2, rsq; \ + while (1) { \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + state = LCGA*state + LCGC; \ + wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \ + v = (state ^ (state>>26)) + wstate; \ + s = (signed int)((v^(v>>20))*0x6957f5a7); \ + r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \ + rsq = r1 * r1 + r2 * r2; \ + if (rsq < (numtyp)1.0) break; \ + } \ + numtyp fac = ucl_sqrt((numtyp)-2.0*log(rsq)/rsq); \ + randnum = r2*fac; \ +} +#endif + +__kernel void k_dpd(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict coeff, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global numtyp *restrict sp_sqrt, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp4 *restrict v_, + const __global numtyp *restrict cutsq, + const numtyp dtinvsqrt, const int seed, + const int timestep, const int tstat_only, + const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + int n_stride; + local_allocate_store_pair(); + + acctyp3 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp energy, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } + + if (ii tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = a0 * wd, or 0 if tstat only + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + numtyp force = (numtyp)0.0; + if (!tstat_only) force = coeff[mtype].x*wd; + force -= coeff[mtype].y*wd*wd*dot*rinv; + force *= factor_dpd; + force += factor_sqrt*coeff[mtype].z*wd*randnum*dtinvsqrt; + force*=rinv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*coeff[mtype].x*coeff[mtype].w * wd*wd; + energy+=factor_dpd*e; + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); +} + +__kernel void k_dpd_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict coeff_in, + const __global numtyp *restrict sp_lj_in, + const __global numtyp *restrict sp_sqrt_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp3 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, + const __global numtyp4 *restrict v_, + const __global numtyp *restrict cutsq, + const numtyp dtinvsqrt, const int seed, + const int timestep, const int tstat_only, + const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + #ifndef ONETYPE + __local numtyp4 coeff[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + __local numtyp sp_sqrt[4]; + if (tid<4) { + sp_lj[tid]=sp_lj_in[tid]; + sp_sqrt[tid]=sp_sqrt_in[tid]; + } + if (tid tag2) { + tag1 = jtag; tag2 = itag; + } + + numtyp randnum = (numtyp)0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = a0 * wd, or 0 if tstat only + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + + #ifndef ONETYPE + const numtyp coeffx=coeff[mtype].x; + const numtyp coeffy=coeff[mtype].y; + const numtyp coeffz=coeff[mtype].z; + #endif + numtyp force = (numtyp)0.0; + if (!tstat_only) force = coeffx*wd; + force -= coeffy*wd*wd*dot*rinv; + #ifndef ONETYPE + force *= factor_dpd; + force += factor_sqrt*coeffz*wd*randnum*dtinvsqrt; + #else + force += coeffz*wd*randnum*dtinvsqrt; + #endif + force*=rinv; + + f.x+=delx*force; + f.y+=dely*force; + f.z+=delz*force; + + if (EVFLAG && eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + numtyp e = (numtyp)0.5*coeffx*coeffw * wd*wd; + #ifndef ONETYPE + energy+=factor_dpd*e; + #else + energy+=e; + #endif + } + if (EVFLAG && vflag) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); +} + diff --git a/lib/gpu/lal_dpd_charged.h b/lib/gpu/lal_dpd_charged.h new file mode 100644 index 0000000000..2220a3663d --- /dev/null +++ b/lib/gpu/lal_dpd_charged.h @@ -0,0 +1,89 @@ +/*************************************************************************** + dpd.h + ------------------- + Trung Dac Nguyen (ORNL) + + Class for acceleration of the dpd pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Jan 15, 2014 + email : nguyentd@ornl.gov + ***************************************************************************/ + +#ifndef LAL_DPD_CHARGED_H +#define LAL_DPD_CHARGED_H + +#include "lal_base_dpd.h" + +namespace LAMMPS_AL { + +template +class DPDCharged : public BaseDPD { + public: + DPDCharged(); + ~DPDCharged(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, double **host_a0, + double **host_gamma, double **host_sigma, double **host_cut, + double *host_special_lj, bool tstat_only, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, const double gpu_split, FILE *screen); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + /// Update coeff if needed (tstat only) + void update_coeff(int ntypes, double **host_a0, double **host_gamma, + double **host_sigma, double **host_cut); + + // --------------------------- TYPE DATA -------------------------- + + /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut + UCL_D_Vec coeff; + + UCL_D_Vec cutsq; + + /// Special LJ values + UCL_D_Vec sp_lj, sp_sqrt; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + /// Only used for thermostat + int _tstat_only; + + /// pointer to host data of charge + double * + + private: + bool _allocated; + int loop(const int eflag, const int vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_dpd_charged_ext.cpp b/lib/gpu/lal_dpd_charged_ext.cpp new file mode 100644 index 0000000000..d4ab7f8e36 --- /dev/null +++ b/lib/gpu/lal_dpd_charged_ext.cpp @@ -0,0 +1,133 @@ +/*************************************************************************** + dpd_ext.cpp + ------------------- + Trung Dac Nguyen (ORNL) + + Functions for LAMMPS access to dpd acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : Jan 15, 2014 + email : nguyentd@ornl.gov + ***************************************************************************/ + +#include +#include +#include + +#include "lal_dpd.h" + +using namespace std; +using namespace LAMMPS_AL; + +static DPD DPDCMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, + double **host_gamma, double **host_sigma, double **host_cut, + double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen) { + DPDCMF.clear(); + gpu_mode=DPDCMF.device->gpu_mode(); + double gpu_split=DPDCMF.device->particle_split(); + int first_gpu=DPDCMF.device->first_device(); + int last_gpu=DPDCMF.device->last_device(); + int world_me=DPDCMF.device->world_me(); + int gpu_rank=DPDCMF.device->gpu_rank(); + int procs_per_gpu=DPDCMF.device->procs_per_gpu(); + + DPDCMF.device->init_message(screen,"dpd",first_gpu,last_gpu); + + bool message=false; + if (DPDCMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, + host_cut, special_lj, false, inum, nall, max_nbors, + maxspecial, cell_size, gpu_split, screen); + + DPDCMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; iserialize_init(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + DPDCMF.estimate_gpu_overhead(); + return init_ok; +} + +void dpd_charged_gpu_clear() { + DPDCMF.clear(); +} + +int ** dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, bool &success, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + double *boxlo, double *prd) { + return DPDCMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success, + host_v, dtinvsqrt, seed, timestep, boxlo, prd); +} + +void dpd_charged_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, + double **host_v, const double dtinvsqrt, + const int seed, const int timestep, + const int nlocal, double *boxlo, double *prd) { + DPDCMF.compute(ago, inum_full, nall, host_x, host_type, ilist, numj, + firstneigh, eflag, vflag, eatom, vatom, host_start, cpu_time, success, + tag, host_v, dtinvsqrt, seed, timestep, nlocal, boxlo, prd); +} + +void dpd_charged_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma, + double **host_sigma, double **host_cut) +{ + DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut); +} + +double dpd_charged_gpu_bytes() { + return DPDCMF.host_memory_usage(); +} + + diff --git a/src/DPD-BASIC/pair_dpd_charged.cpp b/src/DPD-BASIC/pair_dpd_charged.cpp new file mode 100644 index 0000000000..6468e75280 --- /dev/null +++ b/src/DPD-BASIC/pair_dpd_charged.cpp @@ -0,0 +1,574 @@ +// clang-format off +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Kurt Smith (U Pittsburgh) +------------------------------------------------------------------------- */ + +#include "pair_dpd_charged.h" + +#include "atom.h" +#include "comm.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "random_mars.h" +#include "update.h" + +#include "ewald_const.h" +#include "kspace.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace EwaldConst; + +static constexpr double EPSILON = 1.0e-10; + +/* ---------------------------------------------------------------------- */ + +PairDPDCharged::PairDPDCharged(LAMMPS *lmp) : Pair(lmp) +{ + writedata = 1; + ewaldflag = pppmflag = 1; + qdist = 0.0; + random = nullptr; +} + +/* ---------------------------------------------------------------------- */ + +PairDPDCharged::~PairDPDCharged() +{ + if (copymode) return; + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(cut_dpd); + memory->destroy(cut_dpdsq); + memory->destroy(cut_slater); + memory->destroy(cut_slatersq); + + memory->destroy(cut); + memory->destroy(a0); + memory->destroy(gamma); + memory->destroy(sigma); + memory->destroy(scale); + } + + if (random) delete random; +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDCharged::compute(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double vxtmp,vytmp,vztmp,delvx,delvy,delvz; + double r2inv,forcedpd,forcecoul,factor_coul; + double grij,expm2,prefactor,t,erfc; + double rsq,r,rinv,dot,wd,randnum,factor_dpd,factor_sqrt; + int *ilist,*jlist,*numneigh,**firstneigh; + double slater_term; + + + evdwl = 0.0; + ev_init(eflag,vflag); + ecoul = 0.0; + + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double dtinvsqrt = 1.0/sqrt(update->dt); + + double *q = atom->q; + double *special_coul = force->special_coul; + double qqrd2e = force->qqrd2e; + + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_dpd = special_lj[sbmask(j)]; + factor_sqrt = special_sqrt[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + // forces if below maximum cutoff + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + // apply DPD force if distance below DPD cutoff + if (rsq < cut_dpdsq[itype][jtype]) { + rinv = 1.0/r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx*delvx + dely*delvy + delz*delvz; + wd = 1.0 - r/cut_dpd[itype][jtype]; + randnum = random->gaussian(); + + // conservative force = a0 * wd + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + // random force must be scaled by sqrt(factor_dpd) + + forcedpd = a0[itype][jtype]*wd; + forcedpd -= gamma[itype][jtype]*wd*wd*dot*rinv; + forcedpd *= factor_dpd; + forcedpd += factor_sqrt*sigma[itype][jtype]*wd*randnum*dtinvsqrt; + forcedpd *= rinv; + } else forcedpd = 0.0; + + // apply Slater electrostatic force if distance below Slater cutoff + // and the two species are charged + if (cut_slater[itype][jtype] != 0.0 && rsq < cut_slatersq[itype][jtype]){ + r2inv = 1.0/rsq; + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda))); + prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor*(1-slater_term); + forcecoul *= r2inv; + } else forcecoul = 0.0; + + fpair = forcedpd + forcecoul; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (newton_pair || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + // tallies global or per-atom energy and virial only if needed + if (eflag) { + if (rsq < cut_dpdsq[itype][jtype]) { + // eng shifted to 0.0 at cutoff + evdwl = 0.5*a0[itype][jtype]*cut_dpd[itype][jtype] * wd*wd; + evdwl *= factor_dpd; + } else evdwl = 0.0; + + if (cut_slater[itype][jtype] != 0.0 && rsq < cut_slatersq[itype][jtype]){ + ecoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda)); + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor*(1.0-(1 + r/lamda)*exp(-2*r/lamda)); + } else ecoul = 0.0; + } + + if (evflag) ev_tally(i,j,nlocal,newton_pair, + evdwl,ecoul,fpair,delx,dely,delz); + } + } + } + + if (vflag_fdotr) virial_fdotr_compute(); +} + +/* ---------------------------------------------------------------------- + allocate all arrays +------------------------------------------------------------------------- */ + +void PairDPDCharged::allocate() +{ + int i,j; + allocated = 1; + int n = atom->ntypes; + + memory->create(setflag,n+1,n+1,"pair:setflag"); + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + memory->create(cutsq,n+1,n+1,"pair:cutsq"); + memory->create(scale,n+1,n+1,"pair:scale"); + + memory->create(cut,n+1,n+1,"pair:cut"); + memory->create(cut_dpd,n+1,n+1,"pair:cut_dpd"); + memory->create(cut_dpdsq,n+1,n+1,"pair:cut_dpdsq"); + memory->create(cut_slater,n+1,n+1,"pair:cut_slater"); + memory->create(cut_slatersq,n+1,n+1,"pair:cut_slatersq"); + memory->create(a0,n+1,n+1,"pair:a0"); + memory->create(gamma,n+1,n+1,"pair:gamma"); + memory->create(sigma,n+1,n+1,"pair:sigma"); + for (i = 0; i <= atom->ntypes; i++) + for (j = 0; j <= atom->ntypes; j++) + sigma[i][j] = gamma[i][j] = 0.0; +} + +/* ---------------------------------------------------------------------- + global settings +------------------------------------------------------------------------- */ + +void PairDPDCharged::settings(int narg, char **arg) +{ + // params : T cut_dpd seed lambda cut_coul + if (narg != 5) error->all(FLERR,"Illegal pair_style command"); + + temperature = utils::numeric(FLERR,arg[0],false,lmp); + cut_global = utils::numeric(FLERR,arg[1],false,lmp); + seed = utils::inumeric(FLERR,arg[2],false,lmp); + lamda = utils::numeric(FLERR,arg[3],false,lmp); + cut_coul = utils::numeric(FLERR,arg[4],false,lmp); + // initialize Marsaglia RNG with processor-unique seed + + if (seed <= 0) error->all(FLERR,"Illegal pair_style command"); + delete random; + random = new RanMars(lmp,seed + comm->me); + + // reset cutoffs that have been explicitly set + + if (allocated) { + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) + if (setflag[i][j]) cut_dpd[i][j] = MAX(cut_global,cut_coul); + } +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs +------------------------------------------------------------------------- */ + +void PairDPDCharged::coeff(int narg, char **arg) +{ + if (narg < 4 || narg > 6) + error->all(FLERR,"Incorrect args for pair coefficients"); + if (!allocated) allocate(); + + int ilo,ihi,jlo,jhi; + utils::bounds(FLERR,arg[0],1,atom->ntypes,ilo,ihi,error); + utils::bounds(FLERR,arg[1],1,atom->ntypes,jlo,jhi,error); + + double a0_one = utils::numeric(FLERR,arg[2],false,lmp); + double gamma_one = utils::numeric(FLERR,arg[3],false,lmp); + + double cut_one = cut_global; + double cut_two = 0.0; + + if (narg > 4) { + bool do_slater = utils::logical(FLERR,arg[4],false,lmp); + if (do_slater) cut_two = cut_coul+2.0*qdist; + } + + if (narg > 5) cut_one = utils::numeric(FLERR,arg[5],false,lmp); + + int count = 0; + for (int i = ilo; i <= ihi; i++) { + for (int j = MAX(jlo,i); j <= jhi; j++) { + a0[i][j] = a0_one; + gamma[i][j] = gamma_one; + cut_dpd[i][j] = cut_one; + cut_slater[i][j] = cut_two; + cut[i][j] = MAX(cut_one, cut_two); + setflag[i][j] = 1; + scale[i][j] = 1.0; + count++; + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairDPDCharged::init_style() +{ + if (comm->ghost_velocity == 0) + error->all(FLERR,"Pair dpd requires ghost atoms store velocity"); + if (!atom->q_flag) + error->all(FLERR,"Pair style coul/slater/long requires atom attribute q"); + + // if newton off, forces between atoms ij will be double computed + // using different random numbers + + if (force->newton_pair == 0 && comm->me == 0) + error->warning(FLERR, "Pair dpd needs newton pair on for momentum conservation"); + + neighbor->add_request(this); + + // precompute random force scaling factors + + for (int i = 0; i < 4; ++i) special_sqrt[i] = sqrt(force->special_lj[i]); + + + // ensure use of KSpace long-range solver, set g_ewald + + if (force->kspace == nullptr) + error->all(FLERR,"Pair style requires a KSpace style"); + g_ewald = force->kspace->g_ewald; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i + return the maximum cutoff between Slater or DPD cutoff if charged + return the DPD cutoff for uncharged +------------------------------------------------------------------------- */ + +double PairDPDCharged::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + + sigma[i][j] = sqrt(2.0*force->boltz*temperature*gamma[i][j]); + + cut_dpdsq[i][j] = cut_dpd[i][j] * cut_dpd[i][j]; + cut_dpdsq[j][i] = cut_dpdsq[i][j]; + cut_slatersq[i][j] = cut_slater[i][j] * cut_slater[i][j]; + cut_slatersq[j][i] = cut_slatersq[i][j]; + + a0[j][i] = a0[i][j]; + gamma[j][i] = gamma[i][j]; + sigma[j][i] = sigma[i][j]; + scale[j][i] = scale[i][j]; + cut_dpd[j][i] = cut_dpd[i][j]; + cut_slater[j][i] = cut_slater[i][j]; + cut[j][i] = cut[i][j]; + + //return cut[i][j]; + return MAX(cut_dpd[i][j], cut_slater[i][j]); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairDPDCharged::write_restart(FILE *fp) +{ + write_restart_settings(fp); + + int i,j; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + fwrite(&setflag[i][j],sizeof(int),1,fp); + if (setflag[i][j]) { + fwrite(&a0[i][j],sizeof(double),1,fp); + fwrite(&gamma[i][j],sizeof(double),1,fp); + fwrite(&cut[i][j],sizeof(double),1,fp); + fwrite(&cut_dpd[i][j],sizeof(double),1,fp); + fwrite(&cut_dpdsq[i][j],sizeof(double),1,fp); + fwrite(&cut_slater[i][j],sizeof(double),1,fp); + fwrite(&cut_slatersq[i][j],sizeof(double),1,fp); + fwrite(&scale[i][j],sizeof(double),1,fp); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairDPDCharged::read_restart(FILE *fp) +{ + read_restart_settings(fp); + + allocate(); + + int i,j; + int me = comm->me; + for (i = 1; i <= atom->ntypes; i++) + for (j = i; j <= atom->ntypes; j++) { + if (me == 0) utils::sfread(FLERR,&setflag[i][j],sizeof(int),1,fp,nullptr,error); + MPI_Bcast(&setflag[i][j],1,MPI_INT,0,world); + if (setflag[i][j]) { + if (me == 0) { + utils::sfread(FLERR,&a0[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&gamma[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&cut[i][j],sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &scale[i][j],sizeof(double),1,fp, nullptr, error); + } + MPI_Bcast(&a0[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&gamma[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_dpd[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_dpdsq[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_slater[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_slatersq[i][j],1,MPI_DOUBLE,0,world); + MPI_Bcast(&scale[i][j],1,MPI_DOUBLE,0,world); + } + } +} + +/* ---------------------------------------------------------------------- + proc 0 writes to restart file +------------------------------------------------------------------------- */ + +void PairDPDCharged::write_restart_settings(FILE *fp) +{ + fwrite(&temperature,sizeof(double),1,fp); + fwrite(&cut_global,sizeof(double),1,fp); + fwrite(&seed,sizeof(int),1,fp); + fwrite(&mix_flag,sizeof(int),1,fp); + fwrite(&cut_coul,sizeof(double),1,fp); + fwrite(&cut_dpd,sizeof(double),1,fp); + fwrite(&cut_dpdsq,sizeof(double),1,fp); + fwrite(&cut_slater,sizeof(double),1,fp); + fwrite(&cut_slatersq,sizeof(double),1,fp); + fwrite(&lamda,sizeof(double),1,fp); + fwrite(&offset_flag,sizeof(int),1,fp); +} + +/* ---------------------------------------------------------------------- + proc 0 reads from restart file, bcasts +------------------------------------------------------------------------- */ + +void PairDPDCharged::read_restart_settings(FILE *fp) +{ + if (comm->me == 0) { + utils::sfread(FLERR,&temperature,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&cut_global,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR,&seed,sizeof(int),1,fp,nullptr,error); + utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,nullptr,error); + utils::sfread(FLERR, &cut_coul,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &cut_dpd,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &cut_dpdsq,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &cut_slater,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &cut_slatersq,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &lamda,sizeof(double),1,fp,nullptr,error); + utils::sfread(FLERR, &offset_flag,sizeof(int),1,fp,nullptr,error); + } + MPI_Bcast(&temperature,1,MPI_DOUBLE,0,world); + MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world); + MPI_Bcast(&seed,1,MPI_INT,0,world); + MPI_Bcast(&mix_flag,1,MPI_INT,0,world); + MPI_Bcast(&cut_coul,1,MPI_DOUBLE,0,world); + MPI_Bcast(&lamda,1,MPI_DOUBLE,0,world); + MPI_Bcast(&offset_flag,1,MPI_INT,0,world); + + // initialize Marsaglia RNG with processor-unique seed + // same seed that pair_style command initially specified + + if (random) delete random; + random = new RanMars(lmp,seed + comm->me); +} + +/* ---------------------------------------------------------------------- + proc 0 writes to data file +------------------------------------------------------------------------- */ + +void PairDPDCharged::write_data(FILE *fp) +{ + for (int i = 1; i <= atom->ntypes; i++) + fprintf(fp,"%d %g %g\n",i,a0[i][i],gamma[i][i]); +} + +/* ---------------------------------------------------------------------- + proc 0 writes all pairs to data file +------------------------------------------------------------------------- */ + +void PairDPDCharged::write_data_all(FILE *fp) +{ + for (int i = 1; i <= atom->ntypes; i++) + for (int j = i; j <= atom->ntypes; j++) + fprintf(fp,"%d %d %g %g %g\n",i,j,a0[i][j],gamma[i][j],cut[i][j]); +} + +/* ---------------------------------------------------------------------- */ + +double PairDPDCharged::single(int i, int j, int itype, int jtype, double rsq, + double factor_coul, double factor_dpd, double &fforce) +{ + double r,rinv,wd,phi; + double r2inv,grij,expm2,t,erfc,prefactor; + double slater_term; + double forcecoul,phicoul; + + double energy = 0.0; + fforce = 0.0; + + r = sqrt(rsq); + + // compute DPD force and energy + if (rsq < cut_dpdsq[itype][jtype] && r > EPSILON) { + rinv = 1.0/r; + wd = 1.0 - r/cut_dpd[itype][jtype]; + fforce += a0[itype][jtype]*wd * factor_dpd*rinv; + + phi = 0.5*a0[itype][jtype]*cut_dpd[itype][jtype] * wd*wd; + energy += factor_dpd*phi; + } + + // compute Slater coulombic force and energy + if (atom->q[i]*atom->q[j] != 0.0 && rsq < cut_slatersq[itype][jtype]) { + r2inv = 1.0/rsq; + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda))); + prefactor = force->qqrd2e * atom->q[i]*atom->q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + fforce += forcecoul * r2inv; + phicoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda)); + if (factor_coul < 1.0) phicoul -= (1.0-factor_coul)*prefactor; + energy += phicoul; + } + + return energy; +} + +void *PairDPDCharged::extract(const char *str, int &dim) +{ + if (strcmp(str,"cut_coul") == 0) { + dim = 0; + return (void *) &cut_coul; + } + if (strcmp(str,"lamda") == 0) { + dim = 0; + return (void *) &lamda; + } + if (strcmp(str,"scale") == 0) { + dim = 2; + return (void *) scale; + } + return nullptr; +} \ No newline at end of file diff --git a/src/DPD-BASIC/pair_dpd_charged.h b/src/DPD-BASIC/pair_dpd_charged.h new file mode 100644 index 0000000000..eebe421ed6 --- /dev/null +++ b/src/DPD-BASIC/pair_dpd_charged.h @@ -0,0 +1,66 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(dpd/charged,PairDPDCharged); +// clang-format on +#else + +#ifndef LMP_PAIR_DPD_CHARGED_H +#define LMP_PAIR_DPD_CHARGED_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairDPDCharged : public Pair { + public: + PairDPDCharged(class LAMMPS *); + ~PairDPDCharged() override; + void compute(int, int) override; + void settings(int, char **) override; + void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + void write_restart(FILE *) override; + void read_restart(FILE *) override; + void write_restart_settings(FILE *) override; + void read_restart_settings(FILE *) override; + void write_data(FILE *) override; + void write_data_all(FILE *) override; + double single(int, int, int, int, double, double, double, double &) override; + void *extract(const char *, int &) override; + + protected: + double cut_global, temperature; + double special_sqrt[4]; + int seed; + double **cut; + double **cut_dpd, **cut_dpdsq; + double **cut_slater, **cut_slatersq; + double **a0, **gamma; + double **sigma; + class RanMars *random; + double cut_coul, qdist; + double lamda; + double g_ewald; + double **scale; + + virtual void allocate(); +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/GPU/pair_dpd_charged_gpu.cpp b/src/GPU/pair_dpd_charged_gpu.cpp new file mode 100644 index 0000000000..60620efa24 --- /dev/null +++ b/src/GPU/pair_dpd_charged_gpu.cpp @@ -0,0 +1,441 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "pair_dpd_charged_gpu.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "force.h" +#include "gpu_extra.h" +#include "neigh_list.h" +#include "neighbor.h" +#include "suffix.h" +#include "update.h" + +#include "ewald_const.h" +#include "kspace.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace EwaldConst; + +// External functions from cuda library for atom decomposition + +int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma, + double **host_sigma, double **host_cut, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen); +void dpd_charged_gpu_clear(); +int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, + int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, double **host_v, + const double dtinvsqrt, const int seed, const int timestep, double *boxlo, + double *prd); +void dpd_charged_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x, + int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, tagint *tag, double **host_v, + const double dtinvsqrt, const int seed, const int timestep, const int nlocal, + double *boxlo, double *prd); +double dpd_charged_gpu_bytes(); + +static constexpr double EPSILON = 1.0e-10; + +//#define _USE_UNIFORM_SARU_LCG +//#define _USE_UNIFORM_SARU_TEA8 +//#define _USE_GAUSSIAN_SARU_LCG + +#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && \ + !defined(_USE_GAUSSIAN_SARU_LCG) +#define _USE_UNIFORM_SARU_LCG +#endif + +// References: +// 1. Y. Afshar, F. Schmid, A. Pishevar, S. Worley, Comput. Phys. Comm. 184 (2013), 1119–1128. +// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201. +// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19 + +#define LCGA 0x4beb5d59 // Full period 32 bit LCG +#define LCGC 0x2600e1f7 +#define oWeylPeriod 0xda879add // Prime period 3666320093 +#define oWeylOffset 0x8009d14b +#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */ + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] +// using the inherent LCG, then multiply u with sqrt(3) to "match" +// with a normal random distribution. +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +// Curly brackets to make variables local to the scope. +#ifdef _USE_UNIFORM_SARU_LCG +#define numtyp double +#define SQRT3 (numtyp) 1.7320508075688772935274463 +#define saru(seed1, seed2, seed, timestep, randnum) \ + { \ + unsigned int seed3 = seed + timestep; \ + seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \ + seed2 += (seed1 >> 4) ^ (seed3 >> 15); \ + seed1 ^= (seed2 << 9) + (seed3 << 8); \ + seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \ + seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \ + seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \ + seed2 += seed1 * seed3; \ + seed1 += seed3 ^ (seed2 >> 2); \ + seed2 ^= ((signed int) seed2) >> 17; \ + unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \ + state = state + (wstate * (wstate ^ 0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate >> 1); \ + state = LCGA * state + LCGC; \ + wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \ + unsigned int v = (state ^ (state >> 26)) + wstate; \ + unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \ + randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \ + } +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns uniformly distributed random numbers u in [-1.0;1.0] using TEA8 +// then multiply u with sqrt(3) to "match" with a normal random distribution +// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12) +#ifdef _USE_UNIFORM_SARU_TEA8 +#define numtyp double +#define SQRT3 (numtyp) 1.7320508075688772935274463 +#define k0 0xA341316C +#define k1 0xC8013EA4 +#define k2 0xAD90777D +#define k3 0x7E95761E +#define delta 0x9e3779b9 +#define rounds 8 +#define saru(seed1, seed2, seed, timestep, randnum) \ + { \ + unsigned int seed3 = seed + timestep; \ + seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \ + seed2 += (seed1 >> 4) ^ (seed3 >> 15); \ + seed1 ^= (seed2 << 9) + (seed3 << 8); \ + seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \ + seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \ + seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \ + seed2 += seed1 * seed3; \ + seed1 += seed3 ^ (seed2 >> 2); \ + seed2 ^= ((signed int) seed2) >> 17; \ + unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \ + unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \ + state = state + (wstate * (wstate ^ 0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate >> 1); \ + unsigned int sum = 0; \ + for (int i = 0; i < rounds; i++) { \ + sum += delta; \ + state += ((wstate << 4) + k0) ^ (wstate + sum) ^ ((wstate >> 5) + k1); \ + wstate += ((state << 4) + k2) ^ (state + sum) ^ ((state >> 5) + k3); \ + } \ + unsigned int v = (state ^ (state >> 26)) + wstate; \ + unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \ + randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \ + } +#endif + +// specifically implemented for steps = 1; high = 1.0; low = -1.0 +// returns two uniformly distributed random numbers r1 and r2 in [-1.0;1.0], +// and uses the polar method (Marsaglia's) to transform to a normal random value +// This is used to compared with CPU DPD using RandMars::gaussian() +#ifdef _USE_GAUSSIAN_SARU_LCG +#define numtyp double +#define saru(seed1, seed2, seed, timestep, randnum) \ + { \ + unsigned int seed3 = seed + timestep; \ + seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \ + seed2 += (seed1 >> 4) ^ (seed3 >> 15); \ + seed1 ^= (seed2 << 9) + (seed3 << 8); \ + seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \ + seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \ + seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \ + seed2 += seed1 * seed3; \ + seed1 += seed3 ^ (seed2 >> 2); \ + seed2 ^= ((signed int) seed2) >> 17; \ + unsigned int state = 0x12345678; \ + unsigned int wstate = 12345678; \ + state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \ + wstate = (state + seed2) ^ (((signed int) state) >> 8); \ + state = state + (wstate * (wstate ^ 0xdddf97f5)); \ + wstate = 0xABCB96F7 + (wstate >> 1); \ + unsigned int v, s; \ + numtyp r1, r2, rsq; \ + while (1) { \ + state = LCGA * state + LCGC; \ + wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \ + v = (state ^ (state >> 26)) + wstate; \ + s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \ + r1 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \ + state = LCGA * state + LCGC; \ + wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \ + v = (state ^ (state >> 26)) + wstate; \ + s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \ + r2 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \ + rsq = r1 * r1 + r2 * r2; \ + if (rsq < (numtyp) 1.0) break; \ + } \ + numtyp fac = sqrt((numtyp) -2.0 * log(rsq) / rsq); \ + randnum = r2 * fac; \ + } +#endif + +/* ---------------------------------------------------------------------- */ + +PairDPDChargedGPU::PairDPDCharged(LAMMPS *lmp) : PairDPD(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + reinitflag = 0; + cpu_time = 0.0; + suffix_flag |= Suffix::GPU; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairDPDChargedGPU::~PairDPDChargedGPU() +{ + dpd_charged_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDChargedGPU::compute(int eflag, int vflag) +{ + ev_init(eflag, vflag); + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + double dtinvsqrt = 1.0 / sqrt(update->dt); + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + double sublo[3], subhi[3]; + if (domain->triclinic == 0) { + sublo[0] = domain->sublo[0]; + sublo[1] = domain->sublo[1]; + sublo[2] = domain->sublo[2]; + subhi[0] = domain->subhi[0]; + subhi[1] = domain->subhi[1]; + subhi[2] = domain->subhi[2]; + } else { + domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi); + } + inum = atom->nlocal; + firstneigh = dpd_charged_gpu_compute_n( + neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh, + cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + dpd_charged_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, + eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->tag, + atom->v, dtinvsqrt, seed, update->ntimestep, atom->nlocal, domain->boxlo, + domain->prd); + } + if (!success) error->one(FLERR, "Insufficient memory on accelerator"); + + if (atom->molecular != Atom::ATOMIC && neighbor->ago == 0) + neighbor->build_topology(); + if (host_start < inum) { + cpu_time = platform::walltime(); + cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh); + cpu_time = platform::walltime() - cpu_time; + } +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairDPDChargedGPU::init_style() +{ + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double mcut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + mcut = init_one(i, j); + mcut *= mcut; + if (mcut > maxcut) maxcut = mcut; + cutsq[i][j] = cutsq[j][i] = mcut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial = 0; + if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial; + int mnf = 5e-2 * neighbor->oneatom; + int success = + dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, force->special_lj, atom->nlocal, + atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success, error, world); + + if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL); + +} + +/* ---------------------------------------------------------------------- */ + +double PairDPDChargedGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + dpd_charged_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairDPDChargedGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist, + int *numneigh, int **firstneigh) +{ + int i, j, ii, jj, jnum, itype, jtype; + double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair; + double vxtmp, vytmp, vztmp, delvx, delvy, delvz; + double r2inv,forcedpd,forcecoul,factor_coul; + double grij,expm2,prefactor,t,erfc; + double rsq,r,rinv,dot,wd,randnum,factor_dpd,factor_sqrt; + int *ilist,*jlist,*numneigh,**firstneigh; + double slater_term; + int *jlist; + tagint itag, jtag; + + double *q = atom->q; + double *special_coul = force->special_coul; + double qqrd2e = force->qqrd2e; + + evdwl = 0.0; + ecoul = 0.0; + + double **x = atom->x; + double **v = atom->v; + double **f = atom->f; + int *type = atom->type; + tagint *tag = atom->tag; + double *special_lj = force->special_lj; + double dtinvsqrt = 1.0 / sqrt(update->dt); + int timestep = (int) update->ntimestep; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + vxtmp = v[i][0]; + vytmp = v[i][1]; + vztmp = v[i][2]; + itype = type[i]; + itag = tag[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_dpd = special_lj[sbmask(j)]; + factor_sqrt = special_sqrt[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx * delx + dely * dely + delz * delz; + jtype = type[j]; + jtag = tag[j]; + + // forces if below maximum cutoff + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + if (r < EPSILON) continue; // r can be 0.0 in DPD systems + // apply DPD force if distance below DPD cutoff + if (rsq < cut_dpdsq[itype][jtype]) { + rinv = 1.0 / r; + delvx = vxtmp - v[j][0]; + delvy = vytmp - v[j][1]; + delvz = vztmp - v[j][2]; + dot = delx * delvx + dely * delvy + delz * delvz; + wd = 1.0 - r / cut[itype][jtype]; + + unsigned int tag1 = itag, tag2 = jtag; + if (tag1 > tag2) { + tag1 = jtag; + tag2 = itag; + } + + randnum = 0.0; + saru(tag1, tag2, seed, timestep, randnum); + + // conservative force = a0 * wd + // drag force = -gamma * wd^2 * (delx dot delv) / r + // random force = sigma * wd * rnd * dtinvsqrt; + forcedpd = a0[itype][jtype]*wd; + forcedpd -= gamma[itype][jtype]*wd*wd*dot*rinv; + forcedpd *= factor_dpd; + forcedpd += factor_sqrt*sigma[itype][jtype]*wd*randnum*dtinvsqrt; + forcedpd *= rinv; + } else forcedpd = 0.0; + + // apply Slater electrostatic force if distance below Slater cutoff + // and the two species are charged + if (cut_slater[itype][jtype] != 0.0 && rsq < cut_slatersq[itype][jtype]){ + r2inv = 1.0/rsq; + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda))); + prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor*(1-slater_term); + forcecoul *= r2inv; + } else forcecoul = 0.0; + + f[i][0] += delx * fpair; + f[i][1] += dely * fpair; + f[i][2] += delz * fpair; + + if (eflag) { + // unshifted eng of conservative term: + // evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]); + // eng shifted to 0.0 at cutoff + evdwl = 0.5 * a0[itype][jtype] * cut[itype][jtype] * wd * wd; + evdwl *= factor_dpd; + } + + if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz); + } + } + } +} diff --git a/src/GPU/pair_dpd_charged_gpu.h b/src/GPU/pair_dpd_charged_gpu.h new file mode 100644 index 0000000000..6c755660a1 --- /dev/null +++ b/src/GPU/pair_dpd_charged_gpu.h @@ -0,0 +1,45 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS +// clang-format off +PairStyle(dpd/charged/gpu,PairDPDChargedGPU); +// clang-format on +#else + +#ifndef LMP_PAIR_DPD_CHARGED_GPU_H +#define LMP_PAIR_DPD_CHARGED_GPU_H + +#include "pair_dpd_charged.h" + +namespace LAMMPS_NS { + +class PairDPDChargedGPU : public PairDPDCharged { + public: + PairDPDChargedGPU(LAMMPS *lmp); + ~PairDPDChargedGPU() override; + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int) override; + void init_style() override; + double memory_usage() override; + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; +}; + +} // namespace LAMMPS_NS +#endif +#endif