lj/smooth/gpu

This commit is contained in:
Gurgen
2021-03-06 23:43:25 +03:00
parent 4960aeb3c8
commit e32d059d26
5 changed files with 470 additions and 25 deletions

184
lib/gpu/lal_lj_smooth.cpp Normal file
View File

@ -0,0 +1,184 @@
/***************************************************************************
lj_smooth.cpp
-------------------
W. Michael Brown (ORNL)
Class for acceleration of the lj/smooth pair style.
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#if defined(USE_OPENCL)
#include "lj_smooth_cl.h"
#elif defined(USE_CUDART)
const char *lj=0;
#else
#include "lj_smooth_cubin.h"
#endif
#include "lal_lj_smooth.h"
#include <cassert>
namespace LAMMPS_AL {
#define LJSMOOTHT LJSMOOTH<numtyp, acctyp>
extern Device<PRECISION,ACC_PRECISION> device;
template <class numtyp, class acctyp>
LJSMOOTHT::LJSMOOTH() : BaseAtomic<numtyp,acctyp>(), _allocated(false) {
}
template <class numtyp, class acctyp>
LJSMOOTHT::~LJSMOOTH() {
clear();
}
template <class numtyp, class acctyp>
int LJSMOOTHT::bytes_per_atom(const int max_nbors) const {
return this->bytes_per_atom_atomic(max_nbors);
}
template <class numtyp, class acctyp>
int LJSMOOTHT::init(const int ntypes,
double **host_cutsq, double **host_lj1,
double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset,
double *host_special_lj, const int nlocal,
const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *_screen,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_inner_sq) {
int success;
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
_screen,lj_smooth,"k_lj_smooth");
if (success!=0)
return success;
// If atom type constants fit in shared memory use fast kernel
int lj_types=ntypes;
shared_types=false;
int max_shared_types=this->device->max_shared_types();
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
lj_types=max_shared_types;
shared_types=true;
}
_lj_types=lj_types;
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<lj_types*lj_types; i++)
host_write[i]=0.0;
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
host_cutsq, cut_inner_sq);
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
host_offset, cut_inner);
ljsw.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack4(ntypes,lj_types,ljsw,host_write,host_ljsw1,host_ljsw2,
host_ljsw3,host_ljsw4);
UCL_H_Vec<double> dview;
sp_lj.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
dview.view(host_special_lj,4,*(this->ucl_device));
ucl_copy(sp_lj,dview,false);
_allocated=true;
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+ljsw.row_bytes()+sp_lj.row_bytes();
return 0;
}
template <class numtyp, class acctyp>
void LJSMOOTHT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_inner_sq) {
// Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY);
for (int i=0; i<_lj_types*_lj_types; i++)
host_write[i]=0.0;
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
host_cutsq, cut_inner_sq);
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
host_offset, cut_inner);
this->atom->type_pack4(ntypes,_lj_types,ljsw,host_write,host_ljsw1,host_ljsw2,
host_ljsw3,host_ljsw4);
}
template <class numtyp, class acctyp>
void LJSMOOTHT::clear() {
if (!_allocated)
return;
_allocated=false;
lj1.clear();
lj3.clear();
ljsw.clear();
sp_lj.clear();
this->clear_atomic();
}
template <class numtyp, class acctyp>
double LJSMOOTHT::host_memory_usage() const {
return this->host_memory_usage_atomic()+sizeof(LJSMOOTH<numtyp,acctyp>);
}
// ---------------------------------------------------------------------------
// Calculate energies, forces, and torques
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void LJSMOOTHT::loop(const bool _eflag, const bool _vflag) {
// Compute the block size and grid size to keep all cores busy
const int BX=this->block_size();
int eflag, vflag;
if (_eflag)
eflag=1;
else
eflag=0;
if (_vflag)
vflag=1;
else
vflag=0;
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
(BX/this->_threads_per_atom)));
int ainum=this->ans->inum();
int nbor_pitch=this->nbor->nbor_pitch();
this->time_pair.start();
if (shared_types) {
this->k_pair_fast.set_size(GX,BX);
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &ljsw, &sp_lj,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag,
&vflag, &ainum, &nbor_pitch,
&this->_threads_per_atom);
} else {
this->k_pair.set_size(GX,BX);
this->k_pair.run(&this->atom->x, &lj1, &lj3, &ljsw, &_lj_types, &sp_lj,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
&this->ans->force, &this->ans->engv, &eflag, &vflag,
&ainum, &nbor_pitch, &this->_threads_per_atom);
}
this->time_pair.stop();
}
template class LJSMOOTH<PRECISION,ACC_PRECISION>;
}

92
lib/gpu/lal_lj_smooth.h Normal file
View File

@ -0,0 +1,92 @@
/***************************************************************************
lj_smooth.h
-------------------
W. Michael Brown (ORNL)
Class for acceleration of the lj/smooth pair style.
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#ifndef LAL_LJ_SMOOTH_H
#define LAL_LJ_SMOOTH_H
#include "lal_base_atomic.h"
namespace LAMMPS_AL {
template <class numtyp, class acctyp>
class LJSMOOTH : public BaseAtomic<numtyp, acctyp> {
public:
LJSMOOTH();
~LJSMOOTH();
/// Clear any previous data and set up for a new LAMMPS run
/** \param max_nbors initial number of rows in the neighbor matrix
* \param cell_size cutoff + skin
* \param gpu_split fraction of particles handled by device
*
* Returns:
* - 0 if successful
* - -1 if fix gpu not found
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(const int ntypes, double **host_cutsq,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset, double *host_special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
const double gpu_split, FILE *screen,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_inner_sq);
/// Send updated coeffs from host to device (to be compatible with fix adapt)
void reinit(const int ntypes, double **host_cutsq,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **host_offset,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_inner_sq);
/// Clear all host and device data
/** \note This is called at the beginning of the init() routine **/
void clear();
/// Returns memory usage on device per atom
int bytes_per_atom(const int max_nbors) const;
/// Total host memory used by library for pair style
double host_memory_usage() const;
// --------------------------- TYPE DATA --------------------------
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cut_inner_sq
UCL_D_Vec<numtyp4> lj1;
/// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = cut_inner
UCL_D_Vec<numtyp4> lj3;
/// ljsw.x = ljsw1, ljsw.y = ljsw2, ljsw.z = ljsw3, ljsw.w = ljsw4
UCL_D_Vec<numtyp4> ljsw;
/// Special LJ values
UCL_D_Vec<numtyp> sp_lj;
/// If atom type constants fit in shared memory, use fast kernels
bool shared_types;
/// Number of atom types
int _lj_types;
private:
bool _allocated;
void loop(const bool _eflag, const bool _vflag);
};
}
#endif

View File

@ -0,0 +1,146 @@
/***************************************************************************
lj_smooth_ext.cpp
-------------------
W. Michael Brown (ORNL)
Functions for LAMMPS access to lj/smooth acceleration routines.
__________________________________________________________________________
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
__________________________________________________________________________
begin :
email : brownw@ornl.gov
***************************************************************************/
#include <iostream>
#include <cassert>
#include <cmath>
#include "lal_lj_smooth.h"
using namespace std;
using namespace LAMMPS_AL;
static LJSMOOTH<PRECISION,ACC_PRECISION> LJSMTMF;
// ---------------------------------------------------------------------------
// Allocate memory on host and device and copy constants to device
// ---------------------------------------------------------------------------
int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4, double **cut_inner, double **cut_inner_sq) {
LJSMTMF.clear();
gpu_mode=LJSMTMF.device->gpu_mode();
double gpu_split=LJSMTMF.device->particle_split();
int first_gpu=LJSMTMF.device->first_device();
int last_gpu=LJSMTMF.device->last_device();
int world_me=LJSMTMF.device->world_me();
int gpu_rank=LJSMTMF.device->gpu_rank();
int procs_per_gpu=LJSMTMF.device->procs_per_gpu();
LJSMTMF.device->init_message(screen,"lj/smooth",first_gpu,last_gpu);
bool message=false;
if (LJSMTMF.device->replica_me()==0 && screen)
message=true;
if (message) {
fprintf(screen,"Initializing Device and compiling on process 0...");
fflush(screen);
}
int init_ok=0;
if (world_me==0)
init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3,
host_lj4, offset, special_lj, inum, nall, 300,
maxspecial, cell_size, gpu_split, screen,
host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->world_barrier();
if (message)
fprintf(screen,"Done.\n");
for (int i=0; i<procs_per_gpu; i++) {
if (message) {
if (last_gpu-first_gpu==0)
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
else
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
last_gpu,i);
fflush(screen);
}
if (gpu_rank==i && world_me!=0)
init_ok=LJSMTMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
offset, special_lj, inum, nall, 300, maxspecial,
cell_size, gpu_split, screen, host_ljsw1, host_ljsw2, host_ljsw3,
host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
if (message)
fprintf(screen,"Done.\n");
}
if (message)
fprintf(screen,"\n");
if (init_ok==0)
LJSMTMF.estimate_gpu_overhead();
return init_ok;
}
// ---------------------------------------------------------------------------
// Copy updated coeffs from host to device
// ---------------------------------------------------------------------------
void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4, double **cut_inner, double **cut_inner_sq) {
int world_me=LJSMTMF.device->world_me();
int gpu_rank=LJSMTMF.device->gpu_rank();
int procs_per_gpu=LJSMTMF.device->procs_per_gpu();
if (world_me==0)
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->world_barrier();
for (int i=0; i<procs_per_gpu; i++) {
if (gpu_rank==i && world_me!=0)
LJSMTMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4, offset, host_ljsw1, host_ljsw2, host_ljsw3, host_ljsw4, cut_inner, cut_inner_sq);
LJSMTMF.device->gpu_barrier();
}
}
void ljsmt_gpu_clear() {
LJSMTMF.clear();
}
int ** ljsmt_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success) {
return LJSMTMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
subhi, tag, nspecial, special, eflag, vflag, eatom,
vatom, host_start, ilist, jnum, cpu_time, success);
}
void ljsmt_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success) {
LJSMTMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
}
double ljsmt_gpu_bytes() {
return LJSMTMF.host_memory_usage();
}

View File

@ -40,34 +40,40 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition // External functions from cuda library for atom decomposition
int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, int ljsmt_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal, double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen); const double cell_size, int &gpu_mode, FILE *screen,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_innersq);
void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, void ljsmt_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **host_lj2, double **host_lj3, double **host_lj4,
double **offset); double **offset,
double **host_ljsw1, double **host_ljsw2, double **host_ljsw3,
double **host_ljsw4,
double **cut_inner, double **cut_innersq);
void ljl_gpu_clear(); void ljsmt_gpu_clear();
int ** ljl_gpu_compute_n(const int ago, const int inum, int ** ljsmt_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, int **ilist, int **jnum,
const double cpu_time, bool &success); const double cpu_time, bool &success);
void ljl_gpu_compute(const int ago, const int inum, const int nall, void ljsmt_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj, double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success); const double cpu_time, bool &success);
double ljl_gpu_bytes(); double ljsmt_gpu_bytes();
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FORCE) PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJSmooth(lmp), gpu_mode(GPU_FORCE)
{ {
respa_enable = 0; respa_enable = 0;
cpu_time = 0.0; cpu_time = 0.0;
@ -81,7 +87,7 @@ PairLJSmoothGPU::PairLJSmoothGPU(LAMMPS *lmp) : PairLJCut(lmp), gpu_mode(GPU_FOR
PairLJSmoothGPU::~PairLJSmoothGPU() PairLJSmoothGPU::~PairLJSmoothGPU()
{ {
ljl_gpu_clear(); ljsmt_gpu_clear();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -108,7 +114,7 @@ void PairLJSmoothGPU::compute(int eflag, int vflag)
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi); domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
} }
inum = atom->nlocal; inum = atom->nlocal;
firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall, firstneigh = ljsmt_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo, atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, atom->special, eflag, vflag, eflag_atom,
@ -119,7 +125,7 @@ void PairLJSmoothGPU::compute(int eflag, int vflag)
ilist = list->ilist; ilist = list->ilist;
numneigh = list->numneigh; numneigh = list->numneigh;
firstneigh = list->firstneigh; firstneigh = list->firstneigh;
ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ljsmt_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success); vflag_atom, host_start, cpu_time, success);
} }
@ -140,10 +146,10 @@ void PairLJSmoothGPU::compute(int eflag, int vflag)
void PairLJSmoothGPU::init_style() void PairLJSmoothGPU::init_style()
{ {
cut_respa = nullptr; //cut_respa = nullptr;
if (force->newton_pair) if (force->newton_pair)
error->all(FLERR,"Cannot use newton pair with lj/cut/gpu pair style"); error->all(FLERR,"Cannot use newton pair with lj/smooth/gpu pair style");
// Repeat cutsq calculation because done after call to init_style // Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0; double maxcut = -1.0;
@ -165,10 +171,11 @@ void PairLJSmoothGPU::init_style()
int maxspecial=0; int maxspecial=0;
if (atom->molecular) if (atom->molecular)
maxspecial=atom->maxspecial; maxspecial=atom->maxspecial;
int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, int success = ljsmt_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, 300, maxspecial, atom->nlocal+atom->nghost, 300, maxspecial,
cell_size, gpu_mode, screen); cell_size, gpu_mode, screen, ljsw1, ljsw2,
ljsw3, ljsw4, cut_inner, cut_inner_sq);
GPU_EXTRA::check_flag(success,error,world); GPU_EXTRA::check_flag(success,error,world);
if (gpu_mode == GPU_FORCE) { if (gpu_mode == GPU_FORCE) {
@ -184,7 +191,7 @@ void PairLJSmoothGPU::reinit()
{ {
Pair::reinit(); Pair::reinit();
ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset); ljsmt_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq);
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -192,7 +199,7 @@ void PairLJSmoothGPU::reinit()
double PairLJSmoothGPU::memory_usage() double PairLJSmoothGPU::memory_usage()
{ {
double bytes = Pair::memory_usage(); double bytes = Pair::memory_usage();
return bytes + ljl_gpu_bytes(); return bytes + ljsmt_gpu_bytes();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
@ -202,6 +209,7 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag *
int i,j,ii,jj,jnum,itype,jtype; int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcelj,factor_lj; double rsq,r2inv,r6inv,forcelj,factor_lj;
double r,t,tsq,fskin;
int *jlist; int *jlist;
double **x = atom->x; double **x = atom->x;
@ -233,8 +241,18 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag *
if (rsq < cutsq[itype][jtype]) { if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq; r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv; if (rsq < cut_inner_sq[itype][jtype]) {
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv-lj2[itype][jtype]);
} else {
r = sqrt(rsq);
t = r - cut_inner[itype][jtype];
tsq = t*t;
fskin = ljsw1[itype][jtype] + ljsw2[itype][jtype]*t +
ljsw3[itype][jtype]*tsq + ljsw4[itype][jtype]*tsq*t;
forcelj = fskin*r;
}
fpair = factor_lj*forcelj*r2inv; fpair = factor_lj*forcelj*r2inv;
f[i][0] += delx*fpair; f[i][0] += delx*fpair;
@ -242,8 +260,13 @@ void PairLJSmoothGPU::cpu_compute(int start, int inum, int eflag, int /* vflag *
f[i][2] += delz*fpair; f[i][2] += delz*fpair;
if (eflag) { if (eflag) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - if (rsq < cut_inner_sq[itype][jtype])
offset[itype][jtype]; evdwl = r6inv * (lj3[itype][jtype]*r6inv -
lj4[itype][jtype]) - offset[itype][jtype];
else
evdwl = ljsw0[itype][jtype] - ljsw1[itype][jtype]*t -
ljsw2[itype][jtype]*tsq/2.0 - ljsw3[itype][jtype]*tsq*t/3.0 -
ljsw4[itype][jtype]*tsq*tsq/4.0 - offset[itype][jtype];
evdwl *= factor_lj; evdwl *= factor_lj;
} }

View File

@ -20,11 +20,11 @@ PairStyle(lj/smooth/gpu,PairLJSmoothGPU)
#ifndef LMP_PAIR_LJ_SMOOTH_GPU_H #ifndef LMP_PAIR_LJ_SMOOTH_GPU_H
#define LMP_PAIR_LJ_SMOOTH_GPU_H #define LMP_PAIR_LJ_SMOOTH_GPU_H
#include "pair_lj_cut.h" #include "pair_lj_smooth.h"
namespace LAMMPS_NS { namespace LAMMPS_NS {
class PairLJSmoothGPU : public PairLJCut { class PairLJSmoothGPU : public PairLJSmooth {
public: public:
PairLJSmoothGPU(LAMMPS *lmp); PairLJSmoothGPU(LAMMPS *lmp);
~PairLJSmoothGPU(); ~PairLJSmoothGPU();
@ -52,7 +52,7 @@ E: Insufficient memory on accelerator
There is insufficient memory on one of the devices specified for the gpu There is insufficient memory on one of the devices specified for the gpu
package package
E: Cannot use newton pair with lj/cut/gpu pair style E: Cannot use newton pair with lj/smooth/gpu pair style
Self-explanatory. Self-explanatory.