From 0a06b90b53ed4a5a10df7e873fed845c344ad006 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Wed, 17 Feb 2021 15:33:28 +0300 Subject: [PATCH 01/19] template for smooth/spu --- src/GPU/Install.sh | 2 + src/GPU/pair_lj_smooth_gpu.cpp | 255 +++++++++++++++++++++++++++++++++ src/GPU/pair_lj_smooth_gpu.h | 60 ++++++++ 3 files changed, 317 insertions(+) create mode 100644 src/GPU/pair_lj_smooth_gpu.cpp create mode 100644 src/GPU/pair_lj_smooth_gpu.h diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index 1fefb01d42..536d687e18 100755 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -101,6 +101,8 @@ action pair_lj_cut_coul_msm_gpu.cpp pair_lj_cut_coul_msm.cpp action pair_lj_cut_coul_msm_gpu.h pair_lj_cut_coul_msm.h action pair_lj_cut_gpu.cpp action pair_lj_cut_gpu.h +action pair_lj_smooth_gpu.cpp +action pair_lj_smooth_gpu.h action pair_lj_expand_gpu.cpp action pair_lj_expand_gpu.h action pair_lj_expand_coul_long_gpu.cpp pair_lj_expand_coul_long.cpp diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp new file mode 100644 index 0000000000..4ea5cce92e --- /dev/null +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -0,0 +1,255 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://lammps.sandia.gov/, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (SNL) +------------------------------------------------------------------------- */ + +#include "pair_lj_smooth_gpu.h" +#include +#include + +#include +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "gpu_extra.h" +#include "suffix.h" + +using namespace LAMMPS_NS; + +// External functions from cuda library for atom decomposition + +int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); + +void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset); + +void ljl_gpu_clear(); +int ** ljl_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void ljl_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double ljl_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + suffix_flag |= Suffix::GPU; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairLJSmoothGPU::~PairLJSmoothGPU() +{ + ljl_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSmoothGPU::compute(int eflag, int vflag) +{ + ev_init(eflag,vflag); + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + double sublo[3],subhi[3]; + if (domain->triclinic == 0) { + sublo[0] = domain->sublo[0]; + sublo[1] = domain->sublo[1]; + sublo[2] = domain->sublo[2]; + subhi[0] = domain->subhi[0]; + subhi[1] = domain->subhi[1]; + subhi[2] = domain->subhi[2]; + } else { + domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi); + } + inum = atom->nlocal; + firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, sublo, + subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with lj/cut/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this,instance_me); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSmoothGPU::reinit() +{ + Pair::reinit(); + + ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset); +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSmoothGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + ljl_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcelj,factor_lj; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + fpair = factor_lj*forcelj*r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} + diff --git a/src/GPU/pair_lj_smooth_gpu.h b/src/GPU/pair_lj_smooth_gpu.h new file mode 100644 index 0000000000..fc245918d0 --- /dev/null +++ b/src/GPU/pair_lj_smooth_gpu.h @@ -0,0 +1,60 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/smooth/gpu,PairLJSmoothGPU) + +#else + +#ifndef LMP_PAIR_LJ_SMOOTH_GPU_H +#define LMP_PAIR_LJ_SMOOTH_GPU_H + +#include "pair_lj_cut.h" + +namespace LAMMPS_NS { + +class PairLJSmoothGPU : public PairLJCut { + public: + PairLJSmoothGPU(LAMMPS *lmp); + ~PairLJSmoothGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + void reinit(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with lj/cut/gpu pair style + +Self-explanatory. + +*/ + From 4960aeb3c825b831e4bad060a848cb0e7c8d4dfc Mon Sep 17 00:00:00 2001 From: Gurgen Date: Sat, 6 Mar 2021 23:39:37 +0300 Subject: [PATCH 02/19] error on line 194 --- lib/gpu/lal_lj_smooth.cu | 233 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 lib/gpu/lal_lj_smooth.cu diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu new file mode 100644 index 0000000000..b69f8b9388 --- /dev/null +++ b/lib/gpu/lal_lj_smooth.cu @@ -0,0 +1,233 @@ +// ************************************************************************** +// lj_smooth.cu +// ------------------- +// W. Michael Brown (ORNL) +// +// Device code for acceleration of the lj/smooth pair style +// +// __________________________________________________________________________ +// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) +// __________________________________________________________________________ +// +// begin : +// email : brownw@ornl.gov +// *************************************************************************** + +#if defined(NV_KERNEL) || defined(USE_HIP) +#include "lal_aux_fun1.h" +#ifndef _DOUBLE_DOUBLE +_texture( pos_tex,float4); +#else +_texture_2d( pos_tex,int4); +#endif +#else +#define pos_tex x_ +#endif + +__kernel void k_lj_smooth(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1, + const __global numtyp4 *restrict lj3, + const __global numtyp4 *restrict ljsw, + const int lj_types, + const __global numtyp *restrict sp_lj, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + if (ii0) { + numtyp e; + if (rsq < lj1[mtype].w) + e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; + else + e = ljsw[mtype].x - ljsw[mtype].x*t - + ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - + ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; + + //numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); + energy+=factor_lj*e; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} + +__kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict lj1_in, + const __global numtyp4 *restrict lj3_in, + const __global numtyp4 *restrict ljsw, + const __global numtyp *restrict sp_lj_in, + const __global int * dev_nbor, + const __global int * dev_packed, + __global acctyp4 *restrict ans, + __global acctyp *restrict engv, + const int eflag, const int vflag, const int inum, + const int nbor_pitch, const int t_per_atom) { + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; + __local numtyp sp_lj[4]; + if (tid<4) + sp_lj[tid]=sp_lj_in[tid]; + if (tid0) + lj3[tid]=lj3_in[tid]; + } + + acctyp energy=(acctyp)0; + acctyp4 f; + f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; + acctyp virial[6]; + for (int i=0; i<6; i++) + virial[i]=(acctyp)0; + + __syncthreads(); + + if (ii0) { + numtyp e; + if (rsq < lj1[mtype].w) + e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; + else + e = ljsw[mtype].x - ljsw[mtype].x*t - + ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - + ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; //??? + + //numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); + energy+=factor_lj*e; + } + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; + } + } + + } // for nbor + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); + } // if ii +} + From e32d059d268bb019dfad141f61a2a72e41d87c73 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Sat, 6 Mar 2021 23:43:25 +0300 Subject: [PATCH 03/19] lj/smooth/gpu --- lib/gpu/lal_lj_smooth.cpp | 184 +++++++++++++++++++++++++++++++++ lib/gpu/lal_lj_smooth.h | 92 +++++++++++++++++ lib/gpu/lal_lj_smooth_ext.cpp | 146 ++++++++++++++++++++++++++ src/GPU/pair_lj_smooth_gpu.cpp | 67 ++++++++---- src/GPU/pair_lj_smooth_gpu.h | 6 +- 5 files changed, 470 insertions(+), 25 deletions(-) create mode 100644 lib/gpu/lal_lj_smooth.cpp create mode 100644 lib/gpu/lal_lj_smooth.h create mode 100644 lib/gpu/lal_lj_smooth_ext.cpp diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp new file mode 100644 index 0000000000..5c75539660 --- /dev/null +++ b/lib/gpu/lal_lj_smooth.cpp @@ -0,0 +1,184 @@ +/*************************************************************************** + lj_smooth.cpp + ------------------- + W. Michael Brown (ORNL) + + Class for acceleration of the lj/smooth pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#if defined(USE_OPENCL) +#include "lj_smooth_cl.h" +#elif defined(USE_CUDART) +const char *lj=0; +#else +#include "lj_smooth_cubin.h" +#endif + +#include "lal_lj_smooth.h" +#include +namespace LAMMPS_AL { +#define LJSMOOTHT LJSMOOTH + +extern Device device; + +template +LJSMOOTHT::LJSMOOTH() : BaseAtomic(), _allocated(false) { +} + +template +LJSMOOTHT::~LJSMOOTH() { + clear(); +} + +template +int LJSMOOTHT::bytes_per_atom(const int max_nbors) const { + return this->bytes_per_atom_atomic(max_nbors); +} + +template +int LJSMOOTHT::init(const int ntypes, + double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double *host_special_lj, const int nlocal, + const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *_screen, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_inner_sq) { + int success; + success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, + _screen,lj_smooth,"k_lj_smooth"); + if (success!=0) + return success; + + // If atom type constants fit in shared memory use fast kernel + int lj_types=ntypes; + shared_types=false; + int max_shared_types=this->device->max_shared_types(); + if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { + lj_types=max_shared_types; + shared_types=true; + } + _lj_types=lj_types; + + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(lj_types*lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; iucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2, + host_cutsq, cut_inner_sq); + + lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4, + host_offset, cut_inner); + + ljsw.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, + host_ljsw3,host_ljsw4); + + UCL_H_Vec dview; + sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); + dview.view(host_special_lj,4,*(this->ucl_device)); + ucl_copy(sp_lj,dview,false); + + _allocated=true; + this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+sp_lj.row_bytes(); + return 0; +} + +template +void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_inner_sq) { + // Allocate a host write buffer for data initialization + UCL_H_Vec host_write(_lj_types*_lj_types*32,*(this->ucl_device), + UCL_WRITE_ONLY); + + for (int i=0; i<_lj_types*_lj_types; i++) + host_write[i]=0.0; + + this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2, + host_cutsq, cut_inner_sq); + this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4, + host_offset, cut_inner); + this->atom->type_pack4(ntypes,_lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, + host_ljsw3,host_ljsw4); +} + +template +void LJSMOOTHT::clear() { + if (!_allocated) + return; + _allocated=false; + + lj1.clear(); + lj3.clear(); + ljsw.clear(); + sp_lj.clear(); + this->clear_atomic(); +} + +template +double LJSMOOTHT::host_memory_usage() const { + return this->host_memory_usage_atomic()+sizeof(LJSMOOTH); +} + +// --------------------------------------------------------------------------- +// Calculate energies, forces, and torques +// --------------------------------------------------------------------------- +template +void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { + // Compute the block size and grid size to keep all cores busy + const int BX=this->block_size(); + int eflag, vflag; + if (_eflag) + eflag=1; + else + eflag=0; + + if (_vflag) + vflag=1; + else + vflag=0; + + int GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); + + int ainum=this->ans->inum(); + int nbor_pitch=this->nbor->nbor_pitch(); + this->time_pair.start(); + if (shared_types) { + this->k_pair_fast.set_size(GX,BX); + this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, + &vflag, &ainum, &nbor_pitch, + &this->_threads_per_atom); + } else { + this->k_pair.set_size(GX,BX); + this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &_lj_types, &sp_lj, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->ans->force, &this->ans->engv, &eflag, &vflag, + &ainum, &nbor_pitch, &this->_threads_per_atom); + } + this->time_pair.stop(); +} + +template class LJSMOOTH; +} diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h new file mode 100644 index 0000000000..98dbad87b7 --- /dev/null +++ b/lib/gpu/lal_lj_smooth.h @@ -0,0 +1,92 @@ +/*************************************************************************** + lj_smooth.h + ------------------- + W. Michael Brown (ORNL) + + Class for acceleration of the lj/smooth pair style. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#ifndef LAL_LJ_SMOOTH_H +#define LAL_LJ_SMOOTH_H + +#include "lal_base_atomic.h" + +namespace LAMMPS_AL { + +template +class LJSMOOTH : public BaseAtomic { + public: + LJSMOOTH(); + ~LJSMOOTH(); + + /// Clear any previous data and set up for a new LAMMPS run + /** \param max_nbors initial number of rows in the neighbor matrix + * \param cell_size cutoff + skin + * \param gpu_split fraction of particles handled by device + * + * Returns: + * - 0 if successful + * - -1 if fix gpu not found + * - -3 if there is an out of memory error + * - -4 if the GPU library was not compiled for GPU + * - -5 Double precision is not supported on card **/ + int init(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, double *host_special_lj, + const int nlocal, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + const double gpu_split, FILE *screen, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_inner_sq); + + /// Send updated coeffs from host to device (to be compatible with fix adapt) + void reinit(const int ntypes, double **host_cutsq, + double **host_lj1, double **host_lj2, double **host_lj3, + double **host_lj4, double **host_offset, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_inner_sq); + + /// Clear all host and device data + /** \note This is called at the beginning of the init() routine **/ + void clear(); + + /// Returns memory usage on device per atom + int bytes_per_atom(const int max_nbors) const; + + /// Total host memory used by library for pair style + double host_memory_usage() const; + + // --------------------------- TYPE DATA -------------------------- + + /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cut_inner_sq + UCL_D_Vec lj1; + /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = cut_inner + UCL_D_Vec lj3; + /// ljsw.x = ljsw1, ljsw.y = ljsw2, ljsw.z = ljsw3, ljsw.w = ljsw4 + UCL_D_Vec ljsw; + /// Special LJ values + UCL_D_Vec sp_lj; + + /// If atom type constants fit in shared memory, use fast kernels + bool shared_types; + + /// Number of atom types + int _lj_types; + + private: + bool _allocated; + void loop(const bool _eflag, const bool _vflag); +}; + +} + +#endif diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp new file mode 100644 index 0000000000..92624bf7fa --- /dev/null +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -0,0 +1,146 @@ +/*************************************************************************** + lj_smooth_ext.cpp + ------------------- + W. Michael Brown (ORNL) + + Functions for LAMMPS access to lj/smooth acceleration routines. + + __________________________________________________________________________ + This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) + __________________________________________________________________________ + + begin : + email : brownw@ornl.gov + ***************************************************************************/ + +#include +#include +#include + +#include "lal_lj_smooth.h" + +using namespace std; +using namespace LAMMPS_AL; + +static LJSMOOTH LJSMTMF; + +// --------------------------------------------------------------------------- +// Allocate memory on host and device and copy constants to device +// --------------------------------------------------------------------------- +int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { + LJSMTMF.clear(); + gpu_mode=LJSMTMF.device->gpu_mode(); + double gpu_split=LJSMTMF.device->particle_split(); + int first_gpu=LJSMTMF.device->first_device(); + int last_gpu=LJSMTMF.device->last_device(); + int world_me=LJSMTMF.device->world_me(); + int gpu_rank=LJSMTMF.device->gpu_rank(); + int procs_per_gpu=LJSMTMF.device->procs_per_gpu(); + + LJSMTMF.device->init_message(screen,"lj/smooth",first_gpu,last_gpu); + + bool message=false; + if (LJSMTMF.device->replica_me()==0 && screen) + message=true; + + if (message) { + fprintf(screen,"Initializing Device and compiling on process 0..."); + fflush(screen); + } + + int init_ok=0; + if (world_me==0) + init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, + host_lj4, offset, special_lj, inum, nall, 300, + maxspecial, cell_size, gpu_split, screen, + host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); + + LJSMTMF.device->world_barrier(); + if (message) + fprintf(screen,"Done.\n"); + + for (int i=0; igpu_barrier(); + if (message) + fprintf(screen,"Done.\n"); + } + if (message) + fprintf(screen,"\n"); + + if (init_ok==0) + LJSMTMF.estimate_gpu_overhead(); + return init_ok; +} + +// --------------------------------------------------------------------------- +// Copy updated coeffs from host to device +// --------------------------------------------------------------------------- +void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { + int world_me=LJSMTMF.device->world_me(); + int gpu_rank=LJSMTMF.device->gpu_rank(); + int procs_per_gpu=LJSMTMF.device->procs_per_gpu(); + + if (world_me==0) + LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); + LJSMTMF.device->world_barrier(); + + for (int i=0; igpu_barrier(); + } +} + +void ljsmt_gpu_clear() { + LJSMTMF.clear(); +} + +int ** ljsmt_gpu_compute_n(const int ago, const int inum_full, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success) { + return LJSMTMF.compute(ago, inum_full, nall, host_x, host_type, sublo, + subhi, tag, nspecial, special, eflag, vflag, eatom, + vatom, host_start, ilist, jnum, cpu_time, success); +} + +void ljsmt_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success) { + LJSMTMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj, + firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success); +} + +double ljsmt_gpu_bytes() { + return LJSMTMF.host_memory_usage(); +} + + diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp index 4ea5cce92e..882055e84d 100644 --- a/src/GPU/pair_lj_smooth_gpu.cpp +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -40,34 +40,40 @@ using namespace LAMMPS_NS; // External functions from cuda library for atom decomposition -int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, +int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen); + const double cell_size, int &gpu_mode, FILE *screen, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_innersq); -void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, +void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, - double **offset); + double **offset, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, + double **cut_inner, double **cut_innersq); -void ljl_gpu_clear(); -int ** ljl_gpu_compute_n(const int ago, const int inum, +void ljsmt_gpu_clear(); +int ** ljsmt_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, int **ilist, int **jnum, const double cpu_time, bool &success); -void ljl_gpu_compute(const int ago, const int inum, const int nall, +void ljsmt_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success); -double ljl_gpu_bytes(); +double ljsmt_gpu_bytes(); /* ---------------------------------------------------------------------- */ -PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE) +PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJSmooth(lmp), gpu_mode(GPU_FORCE) { respa_enable = 0; cpu_time = 0.0; @@ -81,7 +87,7 @@ PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FOR PairLJSmoothGPU::~PairLJSmoothGPU() { - ljl_gpu_clear(); + ljsmt_gpu_clear(); } /* ---------------------------------------------------------------------- */ @@ -108,7 +114,7 @@ void PairLJSmoothGPU::compute(int eflag, int vflag) domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi); } inum = atom->nlocal; - firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall, + firstneigh = ljsmt_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom, @@ -119,7 +125,7 @@ void PairLJSmoothGPU::compute(int eflag, int vflag) ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ljsmt_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success); } @@ -140,10 +146,10 @@ void PairLJSmoothGPU::compute(int eflag, int vflag) void PairLJSmoothGPU::init_style() { - cut_respa = nullptr; + //cut_respa = nullptr; if (force->newton_pair) - error->all(FLERR,"Cannot use newton pair with lj/cut/gpu pair style"); + error->all(FLERR,"Cannot use newton pair with lj/smooth/gpu pair style"); // Repeat cutsq calculation because done after call to init_style double maxcut = -1.0; @@ -165,10 +171,11 @@ void PairLJSmoothGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; - int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen); + cell_size, gpu_mode, screen, ljsw1, ljsw2, + ljsw3, ljsw4, cut_inner, cut_inner_sq); GPU_EXTRA::check_flag(success,error,world); if (gpu_mode == GPU_FORCE) { @@ -184,7 +191,7 @@ void PairLJSmoothGPU::reinit() { Pair::reinit(); - ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset); + ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); } /* ---------------------------------------------------------------------- */ @@ -192,7 +199,7 @@ void PairLJSmoothGPU::reinit() double PairLJSmoothGPU::memory_usage() { double bytes = Pair::memory_usage(); - return bytes + ljl_gpu_bytes(); + return bytes + ljsmt_gpu_bytes(); } /* ---------------------------------------------------------------------- */ @@ -202,6 +209,7 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag * int i,j,ii,jj,jnum,itype,jtype; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double rsq,r2inv,r6inv,forcelj,factor_lj; + double r,t,tsq,fskin; int *jlist; double **x = atom->x; @@ -233,8 +241,18 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag * if (rsq < cutsq[itype][jtype]) { r2inv = 1.0/rsq; - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + if (rsq < cut_inner_sq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]); + } else { + r = sqrt(rsq); + t = r - cut_inner[itype][jtype]; + tsq = t*t; + fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t + + ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t; + forcelj = fskin*r; + } + fpair = factor_lj*forcelj*r2inv; f[i][0] += delx*fpair; @@ -242,8 +260,13 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag * f[i][2] += delz*fpair; if (eflag) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; + if (rsq < cut_inner_sq[itype][jtype]) + evdwl = r6inv * (lj3[itype][jtype]*r6inv - + lj4[itype][jtype]) - offset[itype][jtype]; + else + evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t - + ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 - + ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype]; evdwl *= factor_lj; } diff --git a/src/GPU/pair_lj_smooth_gpu.h b/src/GPU/pair_lj_smooth_gpu.h index fc245918d0..414ce4c2d2 100644 --- a/src/GPU/pair_lj_smooth_gpu.h +++ b/src/GPU/pair_lj_smooth_gpu.h @@ -20,11 +20,11 @@ PairStyle(lj/smooth/gpu,PairLJSmoothGPU) #ifndef LMP_PAIR_LJ_SMOOTH_GPU_H #define LMP_PAIR_LJ_SMOOTH_GPU_H -#include "pair_lj_cut.h" +#include "pair_lj_smooth.h" namespace LAMMPS_NS { -class PairLJSmoothGPU : public PairLJCut { +class PairLJSmoothGPU : public PairLJSmooth { public: PairLJSmoothGPU(LAMMPS *lmp); ~PairLJSmoothGPU(); @@ -52,7 +52,7 @@ E: Insufficient memory on accelerator There is insufficient memory on one of the devices specified for the gpu package -E: Cannot use newton pair with lj/cut/gpu pair style +E: Cannot use newton pair with lj/smooth/gpu pair style Self-explanatory. From ca88f97a4bf6eec438c8719751aca705c131819c Mon Sep 17 00:00:00 2001 From: Gurgen Date: Fri, 12 Mar 2021 01:40:52 +0300 Subject: [PATCH 04/19] added acceleration of lj/smooth on gpu --- lib/gpu/lal_lj_smooth.cpp | 19 +++++++++++++------ lib/gpu/lal_lj_smooth.cu | 10 ++++++---- lib/gpu/lal_lj_smooth.h | 10 ++++++---- lib/gpu/lal_lj_smooth_ext.cpp | 12 ++++++------ src/GPU/pair_lj_smooth_gpu.cpp | 12 ++++++------ 5 files changed, 37 insertions(+), 26 deletions(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 5c75539660..d2ab63549d 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -16,7 +16,7 @@ #if defined(USE_OPENCL) #include "lj_smooth_cl.h" #elif defined(USE_CUDART) -const char *lj=0; +const char *lj_smooth=0; #else #include "lj_smooth_cubin.h" #endif @@ -51,7 +51,7 @@ int LJSMOOTHT::init(const int ntypes, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { int success; @@ -88,6 +88,10 @@ int LJSMOOTHT::init(const int ntypes, ljsw.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, host_ljsw3,host_ljsw4); + + ljsw0.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); + this->atom->type_pack4(ntypes,lj_types,ljsw0,host_write,host_ljsw0,cut_inner,host_ljsw2, + host_ljsw3); UCL_H_Vec dview; sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); @@ -95,7 +99,7 @@ int LJSMOOTHT::init(const int ntypes, ucl_copy(sp_lj,dview,false); _allocated=true; - this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+sp_lj.row_bytes(); + this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+ljsw0.row_bytes()+sp_lj.row_bytes(); return 0; } @@ -103,7 +107,7 @@ template void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { // Allocate a host write buffer for data initialization @@ -119,6 +123,8 @@ void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1, host_offset, cut_inner); this->atom->type_pack4(ntypes,_lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, host_ljsw3,host_ljsw4); + this->atom->type_pack4(ntypes,_lj_types,ljsw0,host_write,host_ljsw0, cut_inner, host_ljsw2, + host_ljsw3); } template @@ -130,6 +136,7 @@ void LJSMOOTHT::clear() { lj1.clear(); lj3.clear(); ljsw.clear(); + ljsw0.clear(); sp_lj.clear(); this->clear_atomic(); } @@ -165,14 +172,14 @@ void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { this->time_pair.start(); if (shared_types) { this->k_pair_fast.set_size(GX,BX); - this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &sp_lj, + this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); } else { this->k_pair.set_size(GX,BX); - this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &_lj_types, &sp_lj, + this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &_lj_types, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index b69f8b9388..345da1c702 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -28,6 +28,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj3, const __global numtyp4 *restrict ljsw, + const __global numtyp4 *restrict ljsw0, const int lj_types, const __global numtyp *restrict sp_lj, const __global int * dev_nbor, @@ -82,7 +83,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, } else { r = sqrt(rsq); - t = r - lj3[mtype].w; + t = r - ljsw0[mtype].y; tsq = t*t; fskin = ljsw[mtype].x + ljsw[mtype].y*t + ljsw[mtype].z*tsq + ljsw[mtype].w*tsq*t; @@ -98,7 +99,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, if (rsq < lj1[mtype].w) e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; else - e = ljsw[mtype].x - ljsw[mtype].x*t - + e = ljsw0[mtype].x - ljsw[mtype].x*t - ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; @@ -125,6 +126,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1_in, const __global numtyp4 *restrict lj3_in, const __global numtyp4 *restrict ljsw, + const __global numtyp4 *restrict ljsw0, const __global numtyp *restrict sp_lj_in, const __global int * dev_nbor, const __global int * dev_packed, @@ -191,7 +193,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, } else { r = sqrt(rsq); - t = r - lj3[mtype].w; + t = r - ljsw0[mtype].y; //? //printf("%f\n", r - lj3[mtype].w); tsq = t*t; fskin = ljsw[mtype].x + ljsw[mtype].y*t + @@ -208,7 +210,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, if (rsq < lj1[mtype].w) e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; else - e = ljsw[mtype].x - ljsw[mtype].x*t - + e = ljsw0[mtype].x - ljsw[mtype].x*t - ljsw[mtype].y*tsq/2.0 - ljsw[mtype].z*tsq*t/3.0 - ljsw[mtype].z*tsq*tsq/4.0 - lj3[mtype].z; //??? diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h index 98dbad87b7..0a4cdf78eb 100644 --- a/lib/gpu/lal_lj_smooth.h +++ b/lib/gpu/lal_lj_smooth.h @@ -43,16 +43,16 @@ class LJSMOOTH : public BaseAtomic { const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *screen, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, - double **host_ljsw4, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, + double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq); /// Send updated coeffs from host to device (to be compatible with fix adapt) void reinit(const int ntypes, double **host_cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **host_offset, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, - double **host_ljsw4, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, + double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq); /// Clear all host and device data @@ -73,6 +73,8 @@ class LJSMOOTH : public BaseAtomic { UCL_D_Vec lj3; /// ljsw.x = ljsw1, ljsw.y = ljsw2, ljsw.z = ljsw3, ljsw.w = ljsw4 UCL_D_Vec ljsw; + /// ljsw0 + UCL_D_Vec ljsw0; /// Special LJ values UCL_D_Vec sp_lj; diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp index 92624bf7fa..fd4dfd46be 100644 --- a/lib/gpu/lal_lj_smooth_ext.cpp +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -32,7 +32,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **offset, double *special_lj, const int inum, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { LJSMTMF.clear(); gpu_mode=LJSMTMF.device->gpu_mode(); @@ -59,7 +59,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, special_lj, inum, nall, 300, maxspecial, cell_size, gpu_split, screen, - host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); + host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); LJSMTMF.device->world_barrier(); if (message) @@ -77,7 +77,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, if (gpu_rank==i && world_me!=0) init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, special_lj, inum, nall, 300, maxspecial, - cell_size, gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3, + cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); LJSMTMF.device->gpu_barrier(); @@ -97,19 +97,19 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, // --------------------------------------------------------------------------- void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **offset, double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { int world_me=LJSMTMF.device->world_me(); int gpu_rank=LJSMTMF.device->gpu_rank(); int procs_per_gpu=LJSMTMF.device->procs_per_gpu(); if (world_me==0) - LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); + LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); LJSMTMF.device->world_barrier(); for (int i=0; igpu_barrier(); } } diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp index 882055e84d..0203350507 100644 --- a/src/GPU/pair_lj_smooth_gpu.cpp +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -45,15 +45,15 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **offset, double *special_lj, const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, - double **host_ljsw4, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, + double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_innersq); void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, double **host_lj3, double **host_lj4, double **offset, - double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, - double **host_ljsw4, + double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, + double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_innersq); void ljsmt_gpu_clear(); @@ -174,7 +174,7 @@ void PairLJSmoothGPU::init_style() int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, ljsw1, ljsw2, + cell_size, gpu_mode, screen, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); GPU_EXTRA::check_flag(success,error,world); @@ -191,7 +191,7 @@ void PairLJSmoothGPU::reinit() { Pair::reinit(); - ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); + ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); } /* ---------------------------------------------------------------------- */ From 8d18051232ecde32e3abc0679086aa26e7a42e0f Mon Sep 17 00:00:00 2001 From: Gurgen Date: Thu, 18 Mar 2021 20:48:57 +0300 Subject: [PATCH 05/19] acceleration for pair_style lj/smooth --- lib/gpu/lal_lj_smooth.cpp | 17 ++++++----------- lib/gpu/lal_lj_smooth.cu | 32 ++++++++++++++++---------------- lib/gpu/lal_lj_smooth.h | 11 ++++------- 3 files changed, 26 insertions(+), 34 deletions(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index d2ab63549d..5e4785230b 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -2,13 +2,10 @@ lj_smooth.cpp ------------------- W. Michael Brown (ORNL) - Class for acceleration of the lj/smooth pair style. - __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ - begin : email : brownw@ornl.gov ***************************************************************************/ @@ -83,15 +80,14 @@ int LJSMOOTHT::init(const int ntypes, lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4, - host_offset, cut_inner); + host_offset); ljsw.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); this->atom->type_pack4(ntypes,lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, host_ljsw3,host_ljsw4); ljsw0.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); - this->atom->type_pack4(ntypes,lj_types,ljsw0,host_write,host_ljsw0,cut_inner,host_ljsw2, - host_ljsw3); + this->atom->type_pack2(ntypes,lj_types,ljsw0,host_write,host_ljsw0,cut_inner); UCL_H_Vec dview; sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY); @@ -118,13 +114,12 @@ void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1, host_write[i]=0.0; this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2, - host_cutsq, cut_inner_sq); + host_cutsq,cut_inner_sq); this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4, - host_offset, cut_inner); + host_offset); this->atom->type_pack4(ntypes,_lj_types,ljsw,host_write,host_ljsw1,host_ljsw2, host_ljsw3,host_ljsw4); - this->atom->type_pack4(ntypes,_lj_types,ljsw0,host_write,host_ljsw0, cut_inner, host_ljsw2, - host_ljsw3); + this->atom->type_pack2(ntypes,_lj_types,ljsw0,host_write,host_ljsw0,cut_inner); } template @@ -188,4 +183,4 @@ void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { } template class LJSMOOTH; -} +} \ No newline at end of file diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index 345da1c702..c02e1b5ae0 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -28,7 +28,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, const __global numtyp4 *restrict lj1, const __global numtyp4 *restrict lj3, const __global numtyp4 *restrict ljsw, - const __global numtyp4 *restrict ljsw0, + const __global numtyp2 *restrict ljsw0, const int lj_types, const __global numtyp *restrict sp_lj, const __global int * dev_nbor, @@ -56,8 +56,9 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i]; int itype=ix.w; - numtyp factor_lj; + numtyp force, r6inv, factor_lj, forcelj; numtyp r, t, tsq, fskin; + for ( ; nbor0) { @@ -231,5 +232,4 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii -} - +} \ No newline at end of file diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h index 0a4cdf78eb..f869977c58 100644 --- a/lib/gpu/lal_lj_smooth.h +++ b/lib/gpu/lal_lj_smooth.h @@ -2,13 +2,10 @@ lj_smooth.h ------------------- W. Michael Brown (ORNL) - Class for acceleration of the lj/smooth pair style. - __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ - begin : email : brownw@ornl.gov ***************************************************************************/ @@ -69,12 +66,12 @@ class LJSMOOTH : public BaseAtomic { /// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cut_inner_sq UCL_D_Vec lj1; - /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = cut_inner + /// lj3.x = lj3, lj3.y = lj4, lj3.z = offset UCL_D_Vec lj3; /// ljsw.x = ljsw1, ljsw.y = ljsw2, ljsw.z = ljsw3, ljsw.w = ljsw4 UCL_D_Vec ljsw; - /// ljsw0 - UCL_D_Vec ljsw0; + /// ljsw0.x = ljsw0 ljsw0.y = cut_inner + UCL_D_Vec ljsw0; /// Special LJ values UCL_D_Vec sp_lj; @@ -91,4 +88,4 @@ class LJSMOOTH : public BaseAtomic { } -#endif +#endif \ No newline at end of file From e4e20b67a8afe7471c56c01203eadbf45cc0d7af Mon Sep 17 00:00:00 2001 From: gugmelik <72472448+gugmelik@users.noreply.github.com> Date: Fri, 16 Apr 2021 20:02:32 +0300 Subject: [PATCH 06/19] Update lal_lj_smooth.cpp Added new line at the end of file. --- lib/gpu/lal_lj_smooth.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 5e4785230b..c936b7ca31 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -183,4 +183,4 @@ void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { } template class LJSMOOTH; -} \ No newline at end of file +} From 5b9c0ff6432ca1c209a157e8fc7db4ccaec17f95 Mon Sep 17 00:00:00 2001 From: gugmelik <72472448+gugmelik@users.noreply.github.com> Date: Fri, 16 Apr 2021 20:03:30 +0300 Subject: [PATCH 07/19] Update lal_lj_smooth.cu Added new line at the end of file. --- lib/gpu/lal_lj_smooth.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index c02e1b5ae0..fa87e6fcee 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -232,4 +232,4 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii -} \ No newline at end of file +} From 87a60da150c25ff87ac65d66dd3a52385be68fc8 Mon Sep 17 00:00:00 2001 From: gugmelik <72472448+gugmelik@users.noreply.github.com> Date: Fri, 16 Apr 2021 20:05:58 +0300 Subject: [PATCH 08/19] Update lal_lj_smooth.h Added new line at the end of file. --- lib/gpu/lal_lj_smooth.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h index f869977c58..ae7d628b3b 100644 --- a/lib/gpu/lal_lj_smooth.h +++ b/lib/gpu/lal_lj_smooth.h @@ -88,4 +88,4 @@ class LJSMOOTH : public BaseAtomic { } -#endif \ No newline at end of file +#endif From a91e904f349d49b7015345a5e5d00aeeed6649f7 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Sat, 17 Apr 2021 14:56:16 +0300 Subject: [PATCH 09/19] minor changes --- lib/gpu/lal_lj_smooth.cpp | 4 ++-- lib/gpu/lal_lj_smooth.cu | 2 +- lib/gpu/lal_lj_smooth.h | 8 ++++---- lib/gpu/lal_lj_smooth_ext.cpp | 2 -- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 5e4785230b..47ca4ab6fa 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -145,7 +145,7 @@ double LJSMOOTHT::host_memory_usage() const { // Calculate energies, forces, and torques // --------------------------------------------------------------------------- template -void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { +int LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { // Compute the block size and grid size to keep all cores busy const int BX=this->block_size(); int eflag, vflag; @@ -183,4 +183,4 @@ void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { } template class LJSMOOTH; -} \ No newline at end of file +} diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index c02e1b5ae0..fa87e6fcee 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -232,4 +232,4 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, ans,engv); } // if ii -} \ No newline at end of file +} diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h index f869977c58..fec39c95c6 100644 --- a/lib/gpu/lal_lj_smooth.h +++ b/lib/gpu/lal_lj_smooth.h @@ -1,13 +1,13 @@ /*************************************************************************** lj_smooth.h ------------------- - W. Michael Brown (ORNL) + G. Melikyan Class for acceleration of the lj/smooth pair style. __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ begin : - email : brownw@ornl.gov + email : gkmelikyan@edu.hse.ru ***************************************************************************/ #ifndef LAL_LJ_SMOOTH_H @@ -83,9 +83,9 @@ class LJSMOOTH : public BaseAtomic { private: bool _allocated; - void loop(const bool _eflag, const bool _vflag); + int loop(const int _eflag, const int _vflag); }; } -#endif \ No newline at end of file +#endif diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp index fd4dfd46be..aaebbe1493 100644 --- a/lib/gpu/lal_lj_smooth_ext.cpp +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -142,5 +142,3 @@ void ljsmt_gpu_compute(const int ago, const int inum_full, const int nall, double ljsmt_gpu_bytes() { return LJSMTMF.host_memory_usage(); } - - From 18e5e42ce3f91d58cb425310ec16e52f14e7fc4d Mon Sep 17 00:00:00 2001 From: Gurgen Date: Sun, 18 Apr 2021 04:30:59 +0300 Subject: [PATCH 10/19] minor change --- lib/gpu/lal_lj_smooth.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 47ca4ab6fa..59ebd0f636 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -145,7 +145,7 @@ double LJSMOOTHT::host_memory_usage() const { // Calculate energies, forces, and torques // --------------------------------------------------------------------------- template -int LJSMOOTHT::loop(const bool _eflag, const bool _vflag) { +int LJSMOOTHT::loop(const int _eflag, const int _vflag) { // Compute the block size and grid size to keep all cores busy const int BX=this->block_size(); int eflag, vflag; From c2ddce4c2649d27f58cc854d757c122539abfae9 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Tue, 11 May 2021 22:13:37 +0300 Subject: [PATCH 11/19] new accelerator variant: GPU --- doc/src/pair_lj_smooth.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/pair_lj_smooth.rst b/doc/src/pair_lj_smooth.rst index 7ba12c89ba..d79c762f16 100644 --- a/doc/src/pair_lj_smooth.rst +++ b/doc/src/pair_lj_smooth.rst @@ -5,6 +5,7 @@ pair_style lj/smooth command ============================ Accelerator Variants: *lj/smooth/omp* +Accelerator Variants: *lj/smooth/gpu* Syntax """""" From 94a646cd012472fafc8b0e2ae2e782fd944fcc2d Mon Sep 17 00:00:00 2001 From: Gurgen Date: Tue, 11 May 2021 22:17:29 +0300 Subject: [PATCH 12/19] new accelerator variant: GPU --- doc/src/pair_lj_smooth.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/src/pair_lj_smooth.rst b/doc/src/pair_lj_smooth.rst index d79c762f16..03997b7c3d 100644 --- a/doc/src/pair_lj_smooth.rst +++ b/doc/src/pair_lj_smooth.rst @@ -1,10 +1,12 @@ .. index:: pair_style lj/smooth .. index:: pair_style lj/smooth/omp +.. index:: pair_style lj/smooth/gpu pair_style lj/smooth command ============================ Accelerator Variants: *lj/smooth/omp* + Accelerator Variants: *lj/smooth/gpu* Syntax From d3186b74f7b12a8158095be62d067d82d5bae945 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Wed, 12 May 2021 00:14:27 +0300 Subject: [PATCH 13/19] updated --- lib/gpu/lal_lj_smooth.cpp | 36 ++++++++++-------- lib/gpu/lal_lj_smooth.cu | 67 +++++++++++++++++++++++----------- lib/gpu/lal_lj_smooth_ext.cpp | 4 +- src/GPU/pair_lj_smooth_gpu.cpp | 3 +- 4 files changed, 71 insertions(+), 39 deletions(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 59ebd0f636..4457ce79d2 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -51,16 +51,31 @@ int LJSMOOTHT::init(const int ntypes, double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, double **host_ljsw4, double **cut_inner, double **cut_inner_sq) { + const int max_shared_types=this->device->max_shared_types(); + + int onetype=0; + #ifdef USE_OPENCL + if (maxspecial==0) + for (int i=1; i0) { + if (onetype>0) + onetype=-1; + else if (onetype==0) + onetype=i*max_shared_types+j; + } + if (onetype<0) onetype=0; + #endif + int success; success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split, - _screen,lj_smooth,"k_lj_smooth"); + _screen,lj_smooth,"k_lj_smooth",onetype); if (success!=0) return success; // If atom type constants fit in shared memory use fast kernel int lj_types=ntypes; shared_types=false; - int max_shared_types=this->device->max_shared_types(); if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) { lj_types=max_shared_types; shared_types=true; @@ -145,19 +160,9 @@ double LJSMOOTHT::host_memory_usage() const { // Calculate energies, forces, and torques // --------------------------------------------------------------------------- template -int LJSMOOTHT::loop(const int _eflag, const int _vflag) { +int LJSMOOTHT::loop(const int eflag, const int vflag) { // Compute the block size and grid size to keep all cores busy const int BX=this->block_size(); - int eflag, vflag; - if (_eflag) - eflag=1; - else - eflag=0; - - if (_vflag) - vflag=1; - else - vflag=0; int GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); @@ -166,8 +171,8 @@ int LJSMOOTHT::loop(const int _eflag, const int _vflag) { int nbor_pitch=this->nbor->nbor_pitch(); this->time_pair.start(); if (shared_types) { - this->k_pair_fast.set_size(GX,BX); - this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &sp_lj, + this->k_pair_sel->set_size(GX,BX); + this->k_pair_sel->run(&this->atom->x, &lj1, &lj3, &ljsw, &ljsw0, &sp_lj, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, @@ -180,6 +185,7 @@ int LJSMOOTHT::loop(const int _eflag, const int _vflag) { &ainum, &nbor_pitch, &this->_threads_per_atom); } this->time_pair.stop(); + return GX; } template class LJSMOOTH; diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index fa87e6fcee..346395513c 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -40,16 +40,20 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); - acctyp energy=(acctyp)0; + int n_stride; + local_allocate_store_pair(); + acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; acctyp virial[6]; - for (int i=0; i<6; i++) - virial[i]=(acctyp)0; + acctyp energy, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } if (ii0) { + if (EVFLAG && eflag) { numtyp e; if (rsq < lj1[mtype].w) e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; @@ -108,7 +112,7 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, //numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y); energy+=factor_lj*e; } - if (vflag>0) { + if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; @@ -119,9 +123,9 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, } } // for nbor - store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, - ans,engv); } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); } __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, @@ -139,6 +143,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, int tid, ii, offset; atom_info(t_per_atom,ii,tid,offset); + #ifndef ONETYPE __local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES]; __local numtyp sp_lj[4]; @@ -146,40 +151,60 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, sp_lj[tid]=sp_lj_in[tid]; if (tid0) + if (EVFLAG && eflag) lj3[tid]=lj3_in[tid]; } + __syncthreads(); + #else + const numtyp lj1x=lj1_in[ONETYPE].x; + const numtyp lj1y=lj1_in[ONETYPE].y; + const numtyp cutsq=lj1_in[ONETYPE].z; + numtyp lj3x, lj3y, lj3z; + if (EVFLAG && eflag) { + lj3x=lj3_in[ONETYPE].x; + lj3y=lj3_in[ONETYPE].y; + lj3z=lj3_in[ONETYPE].z; + } + #endif + + int n_stride; + local_allocate_store_pair(); - acctyp energy=(acctyp)0; acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; - acctyp virial[6]; - for (int i=0; i<6; i++) - virial[i]=(acctyp)0; - - __syncthreads(); + acctyp energy, virial[6]; + if (EVFLAG) { + energy=(acctyp)0; + for (int i=0; i<6; i++) virial[i]=(acctyp)0; + } if (ii0) { + if (EVFLAG && eflag) { numtyp e; if (rsq < lj1[mtype].w) e = r6inv * (lj3[mtype].x*r6inv - lj3[mtype].y) - lj3[mtype].z; @@ -218,7 +243,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, energy+=factor_lj*e; } - if (vflag>0) { + if (EVFLAG && vflag) { virial[0] += delx*delx*force; virial[1] += dely*dely*force; virial[2] += delz*delz*force; @@ -229,7 +254,7 @@ __kernel void k_lj_smooth_fast(const __global numtyp4 *restrict x_, } } // for nbor - store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, - ans,engv); } // if ii + store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag, + ans,engv); } diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp index aaebbe1493..7312c8b257 100644 --- a/lib/gpu/lal_lj_smooth_ext.cpp +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -57,7 +57,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, int init_ok=0; if (world_me==0) init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, - host_lj4, offset, special_lj, inum, nall, 300, + host_lj4, offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); @@ -76,7 +76,7 @@ int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, } if (gpu_rank==i && world_me!=0) init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, - offset, special_lj, inum, nall, 300, maxspecial, + offset, special_lj, inum, nall, max_nbors, maxspecial, cell_size, gpu_split, screen, host_ljsw0, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq); diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp index 0203350507..282e189180 100644 --- a/src/GPU/pair_lj_smooth_gpu.cpp +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -171,9 +171,10 @@ void PairLJSmoothGPU::init_style() int maxspecial=0; if (atom->molecular) maxspecial=atom->maxspecial; + int mnf = 5e-2 * neighbor->oneatom; int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, + atom->nlocal+atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); GPU_EXTRA::check_flag(success,error,world); From 5865ced9c7b531954fe60fb3ed143bc01271fef0 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Wed, 12 May 2021 00:25:51 +0300 Subject: [PATCH 14/19] minor change --- lib/gpu/lal_lj_smooth.cu | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index 346395513c..d4e61d9f78 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -45,7 +45,6 @@ __kernel void k_lj_smooth(const __global numtyp4 *restrict x_, acctyp4 f; f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0; - acctyp virial[6]; acctyp energy, virial[6]; if (EVFLAG) { energy=(acctyp)0; From 442a829b1e49e7f1f7e9cc165bece64ffce65369 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Wed, 12 May 2021 00:40:38 +0300 Subject: [PATCH 15/19] changed author --- lib/gpu/lal_lj_smooth.cpp | 4 ++-- lib/gpu/lal_lj_smooth.cu | 4 ++-- lib/gpu/lal_lj_smooth.h | 2 +- lib/gpu/lal_lj_smooth_ext.cpp | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/gpu/lal_lj_smooth.cpp b/lib/gpu/lal_lj_smooth.cpp index 4457ce79d2..42ffdabcf2 100644 --- a/lib/gpu/lal_lj_smooth.cpp +++ b/lib/gpu/lal_lj_smooth.cpp @@ -1,13 +1,13 @@ /*************************************************************************** lj_smooth.cpp ------------------- - W. Michael Brown (ORNL) + Gurgen Melikyan (HSE University) Class for acceleration of the lj/smooth pair style. __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) __________________________________________________________________________ begin : - email : brownw@ornl.gov + email : gkmeliyan@edu.hse.ru ***************************************************************************/ #if defined(USE_OPENCL) diff --git a/lib/gpu/lal_lj_smooth.cu b/lib/gpu/lal_lj_smooth.cu index d4e61d9f78..d4a99ed3a7 100644 --- a/lib/gpu/lal_lj_smooth.cu +++ b/lib/gpu/lal_lj_smooth.cu @@ -1,7 +1,7 @@ // ************************************************************************** // lj_smooth.cu // ------------------- -// W. Michael Brown (ORNL) +// Gurgen Melikyan (HSE University) // // Device code for acceleration of the lj/smooth pair style // @@ -10,7 +10,7 @@ // __________________________________________________________________________ // // begin : -// email : brownw@ornl.gov +// email : gkmeliyan@edu.hse.ru // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) diff --git a/lib/gpu/lal_lj_smooth.h b/lib/gpu/lal_lj_smooth.h index fec39c95c6..1ab517ece6 100644 --- a/lib/gpu/lal_lj_smooth.h +++ b/lib/gpu/lal_lj_smooth.h @@ -1,7 +1,7 @@ /*************************************************************************** lj_smooth.h ------------------- - G. Melikyan + Gurgen Melikyan (HSE University) Class for acceleration of the lj/smooth pair style. __________________________________________________________________________ This file is part of the LAMMPS Accelerator Library (LAMMPS_AL) diff --git a/lib/gpu/lal_lj_smooth_ext.cpp b/lib/gpu/lal_lj_smooth_ext.cpp index 7312c8b257..48dad74071 100644 --- a/lib/gpu/lal_lj_smooth_ext.cpp +++ b/lib/gpu/lal_lj_smooth_ext.cpp @@ -1,7 +1,7 @@ /*************************************************************************** lj_smooth_ext.cpp ------------------- - W. Michael Brown (ORNL) + Gurgen Melikyan (HSE University) Functions for LAMMPS access to lj/smooth acceleration routines. @@ -10,7 +10,7 @@ __________________________________________________________________________ begin : - email : brownw@ornl.gov + email : gkmeliyan@edu.hse.ru ***************************************************************************/ #include From 5cc3f88fcf84eac2bb1dd7a9f7c99e19f3613158 Mon Sep 17 00:00:00 2001 From: Gurgen Date: Wed, 12 May 2021 00:57:41 +0300 Subject: [PATCH 16/19] changed author --- src/GPU/pair_lj_smooth_gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp index 282e189180..b47dc6a131 100644 --- a/src/GPU/pair_lj_smooth_gpu.cpp +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -12,7 +12,7 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Mike Brown (SNL) + Contributing author: Gurgen Melikyan (HSE University) ------------------------------------------------------------------------- */ #include "pair_lj_smooth_gpu.h" From 71da854c38652c808c4a0e7e6fb2432366664340 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 May 2021 00:07:32 -0400 Subject: [PATCH 17/19] small doc updates --- doc/src/Commands_pair.rst | 2 +- doc/src/pair_lj_smooth.rst | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/src/Commands_pair.rst b/doc/src/Commands_pair.rst index 40b81a2fd1..b7baaa8581 100644 --- a/doc/src/Commands_pair.rst +++ b/doc/src/Commands_pair.rst @@ -171,7 +171,7 @@ OPT. * :doc:`lj/sdk/coul/long (go) ` * :doc:`lj/sdk/coul/msm (o) ` * :doc:`lj/sf/dipole/sf (go) ` - * :doc:`lj/smooth (o) ` + * :doc:`lj/smooth (go) ` * :doc:`lj/smooth/linear (o) ` * :doc:`lj/switch3/coulgauss/long ` * :doc:`lj96/cut (go) ` diff --git a/doc/src/pair_lj_smooth.rst b/doc/src/pair_lj_smooth.rst index 03997b7c3d..a77eefd023 100644 --- a/doc/src/pair_lj_smooth.rst +++ b/doc/src/pair_lj_smooth.rst @@ -1,13 +1,11 @@ .. index:: pair_style lj/smooth -.. index:: pair_style lj/smooth/omp .. index:: pair_style lj/smooth/gpu +.. index:: pair_style lj/smooth/omp pair_style lj/smooth command ============================ -Accelerator Variants: *lj/smooth/omp* - -Accelerator Variants: *lj/smooth/gpu* +Accelerator Variants: *lj/smooth/gpu*, *lj/smooth/omp* Syntax """""" From d5c7ef113d866cea00e6bbc85a79dca1d38bc1be Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 May 2021 00:10:06 -0400 Subject: [PATCH 18/19] whitespace fix --- src/MOLECULE/bond_fene_expand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/MOLECULE/bond_fene_expand.h b/src/MOLECULE/bond_fene_expand.h index 6c79b1cfd0..49a8b8ab8e 100644 --- a/src/MOLECULE/bond_fene_expand.h +++ b/src/MOLECULE/bond_fene_expand.h @@ -13,7 +13,7 @@ #ifdef BOND_CLASS -BondStyle(fene / expand, BondFENEExpand) +BondStyle(fene/expand, BondFENEExpand) #else From 8dad40ea49029bcce2cf0d89ce38858b67e3057c Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 12 May 2021 00:17:12 -0400 Subject: [PATCH 19/19] apply clang-format --- src/GPU/pair_lj_smooth_gpu.cpp | 169 +++++++++++++++------------------ src/GPU/pair_lj_smooth_gpu.h | 11 +-- 2 files changed, 82 insertions(+), 98 deletions(-) diff --git a/src/GPU/pair_lj_smooth_gpu.cpp b/src/GPU/pair_lj_smooth_gpu.cpp index b47dc6a131..1613e18fa3 100644 --- a/src/GPU/pair_lj_smooth_gpu.cpp +++ b/src/GPU/pair_lj_smooth_gpu.cpp @@ -16,59 +16,52 @@ ------------------------------------------------------------------------- */ #include "pair_lj_smooth_gpu.h" -#include -#include -#include #include "atom.h" #include "atom_vec.h" #include "comm.h" +#include "domain.h" +#include "error.h" #include "force.h" -#include "neighbor.h" -#include "neigh_list.h" +#include "gpu_extra.h" #include "integrate.h" #include "memory.h" -#include "error.h" +#include "neigh_list.h" #include "neigh_request.h" +#include "neighbor.h" +#include "suffix.h" #include "universe.h" #include "update.h" -#include "domain.h" -#include "gpu_extra.h" -#include "suffix.h" + +#include +#include using namespace LAMMPS_NS; -// External functions from cuda library for atom decomposition +// External functions from gpu library for atom decomposition -int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, - double **host_ljsw3, double **host_ljsw4, - double **cut_inner, double **cut_innersq); +int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, double **offset, double *special_lj, + const int nlocal, const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, double **host_ljsw0, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, double **cut_inner, double **cut_innersq); -void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, - double **host_ljsw0, double **host_ljsw1, double **host_ljsw2, - double **host_ljsw3, double **host_ljsw4, - double **cut_inner, double **cut_innersq); +void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2, + double **host_lj3, double **host_lj4, double **offset, double **host_ljsw0, + double **host_ljsw1, double **host_ljsw2, double **host_ljsw3, + double **host_ljsw4, double **cut_inner, double **cut_innersq); void ljsmt_gpu_clear(); -int ** ljsmt_gpu_compute_n(const int ago, const int inum, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, tagint *tag, int **nspecial, - tagint **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, - const double cpu_time, bool &success); -void ljsmt_gpu_compute(const int ago, const int inum, const int nall, - double **host_x, int *host_type, int *ilist, int *numj, - int **firstneigh, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success); +int **ljsmt_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x, + int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, + tagint **special, const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success); +void ljsmt_gpu_compute(const int ago, const int inum, const int nall, double **host_x, + int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); double ljsmt_gpu_bytes(); /* ---------------------------------------------------------------------- */ @@ -94,7 +87,7 @@ PairLJSmoothGPU::~PairLJSmoothGPU() void PairLJSmoothGPU::compute(int eflag, int vflag) { - ev_init(eflag,vflag); + ev_init(eflag, vflag); int nall = atom->nlocal + atom->nghost; int inum, host_start; @@ -102,7 +95,7 @@ void PairLJSmoothGPU::compute(int eflag, int vflag) bool success = true; int *ilist, *numneigh, **firstneigh; if (gpu_mode != GPU_FORCE) { - double sublo[3],subhi[3]; + double sublo[3], subhi[3]; if (domain->triclinic == 0) { sublo[0] = domain->sublo[0]; sublo[1] = domain->sublo[1]; @@ -111,28 +104,24 @@ void PairLJSmoothGPU::compute(int eflag, int vflag) subhi[1] = domain->subhi[1]; subhi[2] = domain->subhi[2]; } else { - domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi); + domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi); } inum = atom->nlocal; - firstneigh = ljsmt_gpu_compute_n(neighbor->ago, inum, nall, - atom->x, atom->type, sublo, - subhi, atom->tag, atom->nspecial, - atom->special, eflag, vflag, eflag_atom, - vflag_atom, host_start, - &ilist, &numneigh, cpu_time, success); + firstneigh = + ljsmt_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, + atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, success); } else { inum = list->inum; ilist = list->ilist; numneigh = list->numneigh; firstneigh = list->firstneigh; - ljsmt_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, - ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, - vflag_atom, host_start, cpu_time, success); + ljsmt_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh, + eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success); } - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); + if (!success) error->one(FLERR, "Insufficient memory on accelerator"); - if (host_startnewton_pair) - error->all(FLERR,"Cannot use newton pair with lj/smooth/gpu pair style"); + if (force->newton_pair) error->all(FLERR, "Cannot use newton pair with lj/smooth/gpu pair style"); // Repeat cutsq calculation because done after call to init_style double maxcut = -1.0; @@ -157,10 +145,9 @@ void PairLJSmoothGPU::init_style() for (int i = 1; i <= atom->ntypes; i++) { for (int j = i; j <= atom->ntypes; j++) { if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { - cut = init_one(i,j); + cut = init_one(i, j); cut *= cut; - if (cut > maxcut) - maxcut = cut; + if (cut > maxcut) maxcut = cut; cutsq[i][j] = cutsq[j][i] = cut; } else cutsq[i][j] = cutsq[j][i] = 0.0; @@ -168,19 +155,17 @@ void PairLJSmoothGPU::init_style() } double cell_size = sqrt(maxcut) + neighbor->skin; - int maxspecial=0; - if (atom->molecular) - maxspecial=atom->maxspecial; + int maxspecial = 0; + if (atom->molecular) maxspecial = atom->maxspecial; int mnf = 5e-2 * neighbor->oneatom; - int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, mnf, maxspecial, - cell_size, gpu_mode, screen, ljsw0, ljsw1, ljsw2, - ljsw3, ljsw4, cut_inner, cut_inner_sq); - GPU_EXTRA::check_flag(success,error,world); + int success = + ljsmt_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, + atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, + gpu_mode, screen, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); + GPU_EXTRA::check_flag(success, error, world); if (gpu_mode == GPU_FORCE) { - int irequest = neighbor->request(this,instance_me); + int irequest = neighbor->request(this, instance_me); neighbor->requests[irequest]->half = 0; neighbor->requests[irequest]->full = 1; } @@ -192,7 +177,8 @@ void PairLJSmoothGPU::reinit() { Pair::reinit(); - ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq); + ljsmt_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw0, ljsw1, ljsw2, ljsw3, + ljsw4, cut_inner, cut_inner_sq); } /* ---------------------------------------------------------------------- */ @@ -205,12 +191,13 @@ double PairLJSmoothGPU::memory_usage() /* ---------------------------------------------------------------------- */ -void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, - int *ilist, int *numneigh, int **firstneigh) { - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; - double rsq,r2inv,r6inv,forcelj,factor_lj; - double r,t,tsq,fskin; +void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist, + int *numneigh, int **firstneigh) +{ + int i, j, ii, jj, jnum, itype, jtype; + double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair; + double rsq, r2inv, r6inv, forcelj, factor_lj; + double r, t, tsq, fskin; int *jlist; double **x = atom->x; @@ -237,43 +224,41 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag * delx = xtmp - x[j][0]; dely = ytmp - x[j][1]; delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; + rsq = delx * delx + dely * dely + delz * delz; jtype = type[j]; if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; + r2inv = 1.0 / rsq; if (rsq < cut_inner_sq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]); + r6inv = r2inv * r2inv * r2inv; + forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]); } else { r = sqrt(rsq); t = r - cut_inner[itype][jtype]; - tsq = t*t; - fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t + - ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t; - forcelj = fskin*r; + tsq = t * t; + fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype] * t + ljsw3[itype][jtype] * tsq + + ljsw4[itype][jtype] * tsq * t; + forcelj = fskin * r; } - fpair = factor_lj*forcelj*r2inv; + fpair = factor_lj * forcelj * r2inv; - f[i][0] += delx*fpair; - f[i][1] += dely*fpair; - f[i][2] += delz*fpair; + f[i][0] += delx * fpair; + f[i][1] += dely * fpair; + f[i][2] += delz * fpair; if (eflag) { if (rsq < cut_inner_sq[itype][jtype]) - evdwl = r6inv * (lj3[itype][jtype]*r6inv - - lj4[itype][jtype]) - offset[itype][jtype]; + evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype]; else - evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t - - ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 - - ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype]; + evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype] * t - + ljsw2[itype][jtype] * tsq / 2.0 - ljsw3[itype][jtype] * tsq * t / 3.0 - + ljsw4[itype][jtype] * tsq * tsq / 4.0 - offset[itype][jtype]; evdwl *= factor_lj; } - if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz); } } } } - diff --git a/src/GPU/pair_lj_smooth_gpu.h b/src/GPU/pair_lj_smooth_gpu.h index 414ce4c2d2..2b7f978f0c 100644 --- a/src/GPU/pair_lj_smooth_gpu.h +++ b/src/GPU/pair_lj_smooth_gpu.h @@ -12,9 +12,9 @@ ------------------------------------------------------------------------- */ #ifdef PAIR_CLASS - -PairStyle(lj/smooth/gpu,PairLJSmoothGPU) - +// clang-format off +PairStyle(lj/smooth/gpu, PairLJSmoothGPU); +// clang-format on #else #ifndef LMP_PAIR_LJ_SMOOTH_GPU_H @@ -34,14 +34,14 @@ class PairLJSmoothGPU : public PairLJSmooth { void reinit(); double memory_usage(); - enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; private: int gpu_mode; double cpu_time; }; -} +} // namespace LAMMPS_NS #endif #endif @@ -57,4 +57,3 @@ E: Cannot use newton pair with lj/smooth/gpu pair style Self-explanatory. */ -