convert GPU package styles to use new neighbor list request API

This commit is contained in:
Axel Kohlmeyer
2022-03-08 04:37:46 -05:00
parent 005f76a9e6
commit 1aa8b64283
58 changed files with 4429 additions and 5641 deletions

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "math_special.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -37,23 +35,19 @@ using namespace MathSpecial;
// External functions from cuda library for atom decomposition
int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa,
double **alpha, double **beta, double **AA, double **BB,
double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa, double **alpha, double **beta,
double **AA, double **BB, double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen);
void beck_gpu_clear();
int ** beck_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void beck_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **beck_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void beck_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double beck_gpu_bytes();
@ -81,7 +75,7 @@ PairBeckGPU::~PairBeckGPU()
void PairBeckGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +83,7 @@ void PairBeckGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,28 +92,24 @@ void PairBeckGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = beck_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
beck_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -139,10 +129,9 @@ void PairBeckGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +139,15 @@ void PairBeckGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = beck_gpu_init(atom->ntypes+1, cutsq, aa, alpha, beta,
AA, BB, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = beck_gpu_init(atom->ntypes + 1, cutsq, aa, alpha, beta, AA, BB, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -177,15 +160,15 @@ double PairBeckGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBeckGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r5,force_beck,factor_lj;
double r,rinv;
double aaij,alphaij,betaij;
double term1,term1inv,term2,term3,term4,term5,term6;
void PairBeckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r5, force_beck, factor_lj;
double r, rinv;
double aaij, alphaij, betaij;
double term1, term1inv, term2, term3, term4, term5, term6;
int *jlist;
double **x = atom->x;
@ -212,39 +195,39 @@ void PairBeckGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
r5 = rsq*rsq*r;
r5 = rsq * rsq * r;
aaij = aa[itype][jtype];
alphaij = alpha[itype][jtype];
betaij = beta[itype][jtype];
term1 = aaij*aaij + rsq;
term2 = powint(term1,-5);
term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq;
term4 = alphaij + r5*betaij;
term5 = alphaij + 6.0*r5*betaij;
rinv = 1.0/r;
force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5;
force_beck -= BB[itype][jtype]*r*term2*term3;
term1 = aaij * aaij + rsq;
term2 = powint(term1, -5);
term3 = 21.672 + 30.0 * aaij * aaij + 6.0 * rsq;
term4 = alphaij + r5 * betaij;
term5 = alphaij + 6.0 * r5 * betaij;
rinv = 1.0 / r;
force_beck = AA[itype][jtype] * exp(-1.0 * r * term4) * term5;
force_beck -= BB[itype][jtype] * r * term2 * term3;
fpair = factor_lj*force_beck*rinv;
fpair = factor_lj * force_beck * rinv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
term6 = powint(term1,-3);
term1inv = 1.0/term1;
evdwl = AA[itype][jtype]*exp(-1.0*r*term4);
evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv);
term6 = powint(term1, -3);
term1inv = 1.0 / term1;
evdwl = AA[itype][jtype] * exp(-1.0 * r * term4);
evdwl -= BB[itype][jtype] * term6 * (1.0 + (2.709 + 3.0 * aaij * aaij) * term1inv);
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "kspace.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -35,14 +33,14 @@
using namespace LAMMPS_NS;
using namespace MathConst;
#define EWALD_F 1.12837917
#define EWALD_P 9.95473818e-1
#define B0 -0.1335096380159268
#define B1 -2.57839507e-1
#define B2 -1.37203639e-1
#define B3 -8.88822059e-3
#define B4 -5.80844129e-3
#define B5 1.14652755e-1
#define EWALD_F 1.12837917
#define EWALD_P 9.95473818e-1
#define B0 -0.1335096380159268
#define B1 -2.57839507e-1
#define B2 -1.37203639e-1
#define B3 -8.88822059e-3
#define B4 -5.80844129e-3
#define B5 1.14652755e-1
#define EPSILON 1.0e-20
#define EPS_EWALD 1.0e-6
@ -50,37 +48,31 @@ using namespace MathConst;
// External functions from cuda library for atom decomposition
int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a,
double **host_c, double **host_d,
double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald);
void bornclcs_gpu_clear();
int** bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **bornclcs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double bornclcs_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBornCoulLongCSGPU::PairBornCoulLongCSGPU(LAMMPS *lmp) :
PairBornCoulLongCS(lmp), gpu_mode(GPU_FORCE)
PairBornCoulLongCS(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -102,7 +94,7 @@ PairBornCoulLongCSGPU::~PairBornCoulLongCSGPU()
void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -110,7 +102,7 @@ void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -119,30 +111,25 @@ void PairBornCoulLongCSGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = bornclcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = bornclcs_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
bornclcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
bornclcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -164,10 +151,9 @@ void PairBornCoulLongCSGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -179,29 +165,20 @@ void PairBornCoulLongCSGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = bornclcs_gpu_init(atom->ntypes+1, cutsq, rhoinv,
born1, born2, born3, a, c, d, sigma,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul,
force->qqrd2e, g_ewald);
int success = bornclcs_gpu_init(
atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -214,15 +191,14 @@ double PairBornCoulLongCSGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itable,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,rsq,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc,u;
int i, j, ii, jj, jnum, itable, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double fraction, table;
double r, rsq, rexp, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc, u;
int *jlist;
evdwl = ecoul = 0.0;
@ -256,39 +232,42 @@ void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
if (rsq < cut_coulsq) {
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
r2inv = 1.0/rsq;
rsq +=
EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
r2inv = 1.0 / rsq;
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
prefactor = qqrd2e * qtmp*q[j];
prefactor = qqrd2e * qtmp * q[j];
if (factor_coul < 1.0) {
// When bonded parts are being calculated a minimal distance (EPS_EWALD)
// has to be added to the prefactor and erfc in order to make the
// used approximation functions for the Ewald correction valid
grij = g_ewald * (r+EPS_EWALD);
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
grij = g_ewald * (r + EPS_EWALD);
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
u = 1.0 - t;
erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
prefactor /= (r+EPS_EWALD);
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
erfc =
t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
prefactor /= (r + EPS_EWALD);
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2 - (1.0 - factor_coul));
// Additionally r2inv needs to be accordingly modified since the later
// scaling of the overall force shall be consistent
r2inv = 1.0/(rsq + EPS_EWALD_SQR);
r2inv = 1.0 / (rsq + EPS_EWALD_SQR);
} else {
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
u = 1.0 - t;
erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
erfc =
t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
prefactor /= r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
}
} else {
union_int_float_t rsq_lookup;
@ -296,47 +275,51 @@ void PairBornCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
forcecoul *= r2inv;
} else forcecoul = 0;
} else
forcecoul = 0;
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
r6inv = r2inv * r2inv * r2inv;
rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
born3[itype][jtype] * r2inv * r6inv;
} else
forceborn = 0.0;
fpair = forcecoul + factor_lj*forceborn * r2inv;
fpair = forcecoul + factor_lj * forceborn * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = prefactor*erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * erfc;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
+ d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,56 +25,48 @@
#include "kspace.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
using namespace MathConst;
// External functions from cuda library for atom decomposition
int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a,
double **host_c, double **host_d,
double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
int borncl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald);
void borncl_gpu_clear();
int** borncl_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void borncl_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **borncl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void borncl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double borncl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) :
PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
PairBornCoulLongGPU::PairBornCoulLongGPU(LAMMPS *lmp) : PairBornCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -97,7 +88,7 @@ PairBornCoulLongGPU::~PairBornCoulLongGPU()
void PairBornCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -105,7 +96,7 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -114,30 +105,25 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = borncl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
borncl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -150,8 +136,7 @@ void PairBornCoulLongGPU::compute(int eflag, int vflag)
void PairBornCoulLongGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR, "Pair style born/coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style born/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -159,10 +144,9 @@ void PairBornCoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,33 +158,24 @@ void PairBornCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = borncl_gpu_init(atom->ntypes+1, cutsq, rhoinv,
born1, born2, born3, a, c, d, sigma,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul,
force->qqrd2e, g_ewald);
int success = borncl_gpu_init(
atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -213,14 +188,13 @@ double PairBornCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rexp,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double r, rexp, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc;
int *jlist;
double rsq;
@ -255,49 +229,53 @@ void PairBornCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
if (rsq < cut_coulsq) {
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv
+ born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
r6inv = r2inv * r2inv * r2inv;
rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
born3[itype][jtype] * r2inv * r6inv;
} else
forceborn = 0.0;
fpair = (forcecoul + factor_lj*forceborn) * r2inv;
fpair = (forcecoul + factor_lj * forceborn) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = prefactor*erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * erfc;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv
+ d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -38,39 +36,31 @@ using namespace MathConst;
// External functions from cuda library for atom decomposition
int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset,
double *special_lj, const int inum, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double alf, const double e_shift,
const double f_shift);
int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double alf, const double e_shift, const double f_shift);
void borncwcs_gpu_clear();
int ** borncwcs_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd);
void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **borncwcs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double borncwcs_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBornCoulWolfCSGPU::PairBornCoulWolfCSGPU(LAMMPS *lmp) : PairBornCoulWolfCS(lmp),
gpu_mode(GPU_FORCE)
PairBornCoulWolfCSGPU::PairBornCoulWolfCSGPU(LAMMPS *lmp) :
PairBornCoulWolfCS(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -92,7 +82,7 @@ PairBornCoulWolfCSGPU::~PairBornCoulWolfCSGPU()
void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -100,7 +90,7 @@ void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -109,30 +99,25 @@ void PairBornCoulWolfCSGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = borncwcs_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success,
atom->q, domain->boxlo, domain->prd);
firstneigh = borncwcs_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
borncwcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
borncwcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -154,10 +139,9 @@ void PairBornCoulWolfCSGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -167,28 +151,21 @@ void PairBornCoulWolfCSGPU::init_style()
cut_coulsq = cut_coul * cut_coul;
double e_shift = erfc(alf*cut_coul)/cut_coul;
double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
double e_shift = erfc(alf * cut_coul) / cut_coul;
double f_shift =
-(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = borncwcs_gpu_init(atom->ntypes+1, cutsq, rhoinv,
born1, born2, born3, a, c, d, sigma, offset,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e,
alf, e_shift, f_shift);
GPU_EXTRA::check_flag(success,error,world);
int success =
borncwcs_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma,
offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, alf, e_shift, f_shift);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -201,15 +178,15 @@ double PairBornCoulWolfCSGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
double erfcc, erfcd, v_sh, dvdrr, e_self, qisq;
double prefactor;
double r,rexp;
double r, rexp;
int *jlist;
evdwl = ecoul = 0.0;
@ -223,9 +200,9 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
double *special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
double e_shift = erfc(alf*cut_coul)/cut_coul;
double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
double e_shift = erfc(alf * cut_coul) / cut_coul;
double f_shift =
-(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;
// loop over neighbors of my atoms
@ -239,9 +216,9 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
qisq = qtmp * qtmp;
e_self = -(e_shift / 2.0 + alf / MY_PIS) * qisq * qqrd2e;
if (evflag) ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
@ -252,51 +229,56 @@ void PairBornCoulWolfCSGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
rsq += EPSILON; // Add EPSILON for case: r = 0; Interaction must be removed by special bond
r2inv = 1.0/rsq;
rsq +=
EPSILON; // Add EPSILON for case: r = 0; Interaction must be removed by special bond
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
prefactor = qqrd2e * qtmp * q[j] / r;
erfcc = erfc(alf * r);
erfcd = exp(-alf * alf * r * r);
v_sh = (erfcc - e_shift * r) * prefactor;
dvdrr = (erfcc / rsq + 2.0 * alf / MY_PIS * erfcd / r) + f_shift;
forcecoul = dvdrr * rsq * prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r6inv = r2inv * r2inv * r2inv;
r = sqrt(rsq);
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
born3[itype][jtype] * r2inv * r6inv;
} else
forceborn = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forceborn) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -36,39 +34,30 @@ using namespace MathConst;
// External functions from cuda library for atom decomposition
int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset,
double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double alf, const double e_shift,
const double f_shift);
int borncw_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double alf, const double e_shift, const double f_shift);
void borncw_gpu_clear();
int ** borncw_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag,
const bool vflag, const bool eatom,
const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void borncw_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **borncw_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void borncw_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double borncw_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp),
gpu_mode(GPU_FORCE)
PairBornCoulWolfGPU::PairBornCoulWolfGPU(LAMMPS *lmp) : PairBornCoulWolf(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -90,7 +79,7 @@ PairBornCoulWolfGPU::~PairBornCoulWolfGPU()
void PairBornCoulWolfGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -98,7 +87,7 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -107,30 +96,25 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success,
atom->q, domain->boxlo, domain->prd);
firstneigh = borncw_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
borncw_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -143,8 +127,7 @@ void PairBornCoulWolfGPU::compute(int eflag, int vflag)
void PairBornCoulWolfGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR, "Pair style born/coul/wolf/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style born/coul/wolf/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -152,10 +135,9 @@ void PairBornCoulWolfGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -165,28 +147,21 @@ void PairBornCoulWolfGPU::init_style()
cut_coulsq = cut_coul * cut_coul;
double e_shift = erfc(alf*cut_coul)/cut_coul;
double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
double e_shift = erfc(alf * cut_coul) / cut_coul;
double f_shift =
-(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = borncw_gpu_init(atom->ntypes+1, cutsq, rhoinv,
born1, born2, born3, a, c, d, sigma, offset,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e,
alf, e_shift, f_shift);
GPU_EXTRA::check_flag(success,error,world);
int success =
borncw_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul,
force->qqrd2e, alf, e_shift, f_shift);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -199,15 +174,15 @@ double PairBornCoulWolfGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forceborn,factor_coul,factor_lj;
double erfcc,erfcd,v_sh,dvdrr,e_self,qisq;
void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forceborn, factor_coul, factor_lj;
double erfcc, erfcd, v_sh, dvdrr, e_self, qisq;
double prefactor;
double r,rexp;
double r, rexp;
int *jlist;
evdwl = ecoul = 0.0;
@ -221,9 +196,9 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
double *special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
double e_shift = erfc(alf*cut_coul)/cut_coul;
double f_shift = -(e_shift+ 2.0*alf/MY_PIS * exp(-alf*alf*cut_coul*cut_coul)) /
cut_coul;
double e_shift = erfc(alf * cut_coul) / cut_coul;
double f_shift =
-(e_shift + 2.0 * alf / MY_PIS * exp(-alf * alf * cut_coul * cut_coul)) / cut_coul;
// loop over neighbors of my atoms
@ -237,9 +212,9 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
jlist = firstneigh[i];
jnum = numneigh[i];
qisq = qtmp*qtmp;
e_self = -(e_shift/2.0 + alf/MY_PIS) * qisq*qqrd2e;
if (evflag) ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
qisq = qtmp * qtmp;
e_self = -(e_shift / 2.0 + alf / MY_PIS) * qisq * qqrd2e;
if (evflag) ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
@ -250,50 +225,54 @@ void PairBornCoulWolfGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcc = erfc(alf*r);
erfcd = exp(-alf*alf*r*r);
v_sh = (erfcc - e_shift*r) * prefactor;
dvdrr = (erfcc/rsq + 2.0*alf/MY_PIS * erfcd/r) + f_shift;
forcecoul = dvdrr*rsq*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
prefactor = qqrd2e * qtmp * q[j] / r;
erfcc = erfc(alf * r);
erfcd = exp(-alf * alf * r * r);
v_sh = (erfcc - e_shift * r) * prefactor;
dvdrr = (erfcc / rsq + 2.0 * alf / MY_PIS * erfcd / r) + f_shift;
forcecoul = dvdrr * rsq * prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
r6inv = r2inv * r2inv * r2inv;
r = sqrt(rsq);
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
born3[itype][jtype]*r2inv*r6inv;
} else forceborn = 0.0;
rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
born3[itype][jtype] * r2inv * r6inv;
} else
forceborn = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forceborn) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forceborn) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = v_sh;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,29 +32,23 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_born1, double **host_born2, double **host_born3,
double **host_a, double **host_c, double **host_d,
double **host_sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void born_gpu_reinit(const int ntypes, double **host_rhoinv,
double **host_born1, double **host_born2,
double **host_born3, double **host_a, double **host_c,
int born_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **host_sigma, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void born_gpu_reinit(const int ntypes, double **host_rhoinv, double **host_born1,
double **host_born2, double **host_born3, double **host_a, double **host_c,
double **host_d, double **offset);
void born_gpu_clear();
int ** born_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void born_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **born_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void born_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double born_gpu_bytes();
@ -83,7 +75,7 @@ PairBornGPU::~PairBornGPU()
void PairBornGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +83,7 @@ void PairBornGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,28 +92,24 @@ void PairBornGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = born_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
born_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
born_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -141,10 +129,9 @@ void PairBornGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,22 +139,15 @@ void PairBornGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = born_gpu_init(atom->ntypes+1, cutsq, rhoinv,
born1, born2, born3, a, c, d, sigma,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = born_gpu_init(atom->ntypes + 1, cutsq, rhoinv, born1, born2, born3, a, c, d, sigma,
offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -176,8 +156,7 @@ void PairBornGPU::reinit()
{
Pair::reinit();
born_gpu_reinit(atom->ntypes+1, rhoinv, born1, born2, born3,
a, c, d, offset);
born_gpu_reinit(atom->ntypes + 1, rhoinv, born1, born2, born3, a, c, d, offset);
}
/* ---------------------------------------------------------------------- */
@ -190,13 +169,13 @@ double PairBornGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBornGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forceborn,factor_lj;
double r,rexp;
void PairBornGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forceborn, factor_lj;
double r, rexp;
int *jlist;
double **x = atom->x;
@ -223,29 +202,29 @@ void PairBornGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
r = sqrt(rsq);
rexp = exp((sigma[itype][jtype]-r)*rhoinv[itype][jtype]);
forceborn = born1[itype][jtype]*r*rexp - born2[itype][jtype]*r6inv +
born3[itype][jtype]*r2inv*r6inv;
fpair = factor_lj*forceborn*r2inv;
rexp = exp((sigma[itype][jtype] - r) * rhoinv[itype][jtype]);
forceborn = born1[itype][jtype] * r * rexp - born2[itype][jtype] * r6inv +
born3[itype][jtype] * r2inv * r6inv;
fpair = factor_lj * forceborn * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv +
d[itype][jtype]*r6inv*r2inv - offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv +
d[itype][jtype] * r6inv * r2inv - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,35 +32,29 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_buck1, double **host_buck2, double **host_a,
double **host_c, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double **host_cut_coulsq, double *host_special_coul,
int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
double **host_buck2, double **host_a, double **host_c, double **offset,
double *special_lj, const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq, double *host_special_coul,
const double qqrd2e);
void buckc_gpu_clear();
int ** buckc_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void buckc_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **buckc_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void buckc_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double buckc_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp),
gpu_mode(GPU_FORCE)
PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -84,7 +76,7 @@ PairBuckCoulCutGPU::~PairBuckCoulCutGPU()
void PairBuckCoulCutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -92,7 +84,7 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -101,30 +93,25 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success,
atom->q, domain->boxlo, domain->prd);
firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -137,8 +124,7 @@ void PairBuckCoulCutGPU::compute(int eflag, int vflag)
void PairBuckCoulCutGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR, "Pair style buck/coul/cut/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style buck/coul/cut/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -146,10 +132,9 @@ void PairBuckCoulCutGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -157,22 +142,16 @@ void PairBuckCoulCutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = buckc_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
a, c, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success =
buckc_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -185,13 +164,13 @@ double PairBuckCoulCutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
double r,rexp;
void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, qtmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forcebuck, factor_coul, factor_lj;
double r, rexp;
int *jlist;
evdwl = ecoul = 0.0;
@ -225,41 +204,44 @@ void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
if (rsq < cut_coulsq[itype][jtype])
forcecoul = qqrd2e * qtmp*q[j]/r;
else forcecoul = 0.0;
forcecoul = qqrd2e * qtmp * q[j] / r;
else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
rexp = exp(-r*rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
} else forcebuck = 0.0;
r6inv = r2inv * r2inv * r2inv;
rexp = exp(-r * rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
} else
forcebuck = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forcebuck) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq[itype][jtype])
ecoul = factor_coul * qqrd2e * qtmp*q[j]/r;
else ecoul = 0.0;
ecoul = factor_coul * qqrd2e * qtmp * q[j] / r;
else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,53 +24,46 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_buck1, double **host_buck2, double **host_a,
double **host_c, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul,
int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
double **host_buck2, double **host_a, double **host_c, double **offset,
double *special_lj, const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
void buckcl_gpu_clear();
int** buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void buckcl_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void buckcl_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double buckcl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) :
PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE)
PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) : PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -93,7 +85,7 @@ PairBuckCoulLongGPU::~PairBuckCoulLongGPU()
void PairBuckCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -101,7 +93,7 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -110,30 +102,25 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -146,8 +133,7 @@ void PairBuckCoulLongGPU::compute(int eflag, int vflag)
void PairBuckCoulLongGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR, "Pair style buck/coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style buck/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -155,10 +141,9 @@ void PairBuckCoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -170,31 +155,23 @@ void PairBuckCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = buckcl_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
a, c, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e,
g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success = buckcl_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -207,14 +184,13 @@ double PairBuckCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rexp,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double r, rexp, r2inv, r6inv, forcecoul, forcebuck, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc;
int *jlist;
double rsq;
@ -249,48 +225,51 @@ void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
if (rsq < cut_coulsq) {
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
rexp = exp(-r*rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
} else forcebuck = 0.0;
r6inv = r2inv * r2inv * r2inv;
rexp = exp(-r * rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
} else
forcebuck = 0.0;
fpair = (forcecoul + factor_lj*forcebuck) * r2inv;
fpair = (forcecoul + factor_lj * forcebuck) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = prefactor*erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * erfc;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,27 +32,21 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_buck1, double **host_buck2,
double **host_a, double **host_c,
double **offset, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv,
double **host_buck1, double **host_buck2,
double **host_a, double **host_c, double **offset);
int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
double **host_buck2, double **host_a, double **host_c, double **offset,
double *special_lj, const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
void buck_gpu_reinit(const int ntypes, double **cutsq, double **host_rhoinv, double **host_buck1,
double **host_buck2, double **host_a, double **host_c, double **offset);
void buck_gpu_clear();
int ** buck_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void buck_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **buck_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void buck_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double buck_gpu_bytes();
@ -81,7 +73,7 @@ PairBuckGPU::~PairBuckGPU()
void PairBuckGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +81,7 @@ void PairBuckGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairBuckGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = buck_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
buck_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -139,10 +127,9 @@ void PairBuckGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +137,15 @@ void PairBuckGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = buck_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
a, c, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = buck_gpu_init(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -173,8 +154,7 @@ void PairBuckGPU::reinit()
{
Pair::reinit();
buck_gpu_reinit(atom->ntypes+1, cutsq, rhoinv, buck1, buck2,
a, c, offset);
buck_gpu_reinit(atom->ntypes + 1, cutsq, rhoinv, buck1, buck2, a, c, offset);
}
/* ---------------------------------------------------------------------- */
@ -187,12 +167,13 @@ double PairBuckGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcebuck,factor_lj;
double r,rexp;
void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forcebuck, factor_lj;
double r, rexp;
int *jlist;
double **x = atom->x;
@ -219,28 +200,27 @@ void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
r = sqrt(rsq);
rexp = exp(-r*rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv;
fpair = factor_lj*forcebuck*r2inv;
rexp = exp(-r * rhoinv[itype][jtype]);
forcebuck = buck1[itype][jtype] * r * rexp - buck2[itype][jtype] * r6inv;
fpair = factor_lj * forcebuck * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv -
offset[itype][jtype];
evdwl = a[itype][jtype] * rexp - c[itype][jtype] * r6inv - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -35,26 +33,21 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, double **host_a12,
double **host_a1, double **host_a2, double **host_d1,
double **host_d2, double **host_sigma3,
double **host_sigma6, int **host_form, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
int colloid_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
double **host_a12, double **host_a1, double **host_a2, double **host_d1,
double **host_d2, double **host_sigma3, double **host_sigma6, int **host_form,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void colloid_gpu_clear();
int ** colloid_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag,
const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void colloid_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **colloid_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void colloid_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double colloid_gpu_bytes();
@ -82,7 +75,7 @@ PairColloidGPU::~PairColloidGPU()
void PairColloidGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -90,7 +83,7 @@ void PairColloidGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -99,28 +92,24 @@ void PairColloidGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = colloid_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
colloid_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
colloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
colloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -140,10 +129,9 @@ void PairColloidGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,32 +140,29 @@ void PairColloidGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
int **_form = nullptr;
int n=atom->ntypes;
memory->create(_form,n+1,n+1,"colloid/gpu:_form");
int n = atom->ntypes;
memory->create(_form, n + 1, n + 1, "colloid/gpu:_form");
for (int i = 1; i <= n; i++) {
for (int j = 1; j <= n; j++) {
if (form[i][j] == SMALL_SMALL) _form[i][j] = 0;
else if (form[i][j] == SMALL_LARGE) _form[i][j] = 1;
else if (form[i][j] == LARGE_LARGE) _form[i][j] = 2;
if (form[i][j] == SMALL_SMALL)
_form[i][j] = 0;
else if (form[i][j] == SMALL_LARGE)
_form[i][j] = 1;
else if (form[i][j] == LARGE_LARGE)
_form[i][j] = 2;
}
}
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = colloid_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, a12, a1, a2,
d1, d2, sigma3, sigma6, _form, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
int success =
colloid_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, a12,
a1, a2, d1, d2, sigma3, sigma6, _form, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
memory->destroy(_form);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -190,15 +175,14 @@ double PairColloidGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairColloidGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairColloidGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double r,rsq,r2inv,r6inv,forcelj,factor_lj;
double c1,c2,fR,dUR,dUA;
double K[9],h[4],g[4];
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double r, rsq, r2inv, r6inv, forcelj, factor_lj;
double c1, c2, fR, dUR, dUA;
double K[9], h[4], g[4];
int *jlist;
double **x = atom->x;
@ -225,90 +209,91 @@ void PairColloidGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq >= cutsq[itype][jtype]) continue;
switch (form[itype][jtype]) {
case SMALL_SMALL:
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
if (eflag)
evdwl = r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
offset[itype][jtype];
break;
case SMALL_SMALL:
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
fpair = factor_lj * forcelj * r2inv;
if (eflag)
evdwl = r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
break;
case SMALL_LARGE:
c2 = a2[itype][jtype];
K[1] = c2*c2;
K[2] = rsq;
K[0] = K[1] - rsq;
K[4] = rsq*rsq;
K[3] = K[1] - K[2];
K[3] *= K[3]*K[3];
K[6] = K[3]*K[3];
fR = sigma3[itype][jtype]*a12[itype][jtype]*c2*K[1]/K[3];
fpair = 4.0/15.0*fR*factor_lj *
(2.0*(K[1]+K[2]) * (K[1]*(5.0*K[1]+22.0*K[2])+5.0*K[4]) *
sigma6[itype][jtype]/K[6]-5.0) / K[0];
if (eflag)
evdwl = 2.0/9.0*fR *
(1.0-(K[1]*(K[1]*(K[1]/3.0+3.0*K[2])+4.2*K[4])+K[2]*K[4]) *
sigma6[itype][jtype]/K[6]) - offset[itype][jtype];
if (rsq <= K[1])
error->one(FLERR,"Overlapping small/large in pair colloid");
break;
case SMALL_LARGE:
c2 = a2[itype][jtype];
K[1] = c2 * c2;
K[2] = rsq;
K[0] = K[1] - rsq;
K[4] = rsq * rsq;
K[3] = K[1] - K[2];
K[3] *= K[3] * K[3];
K[6] = K[3] * K[3];
fR = sigma3[itype][jtype] * a12[itype][jtype] * c2 * K[1] / K[3];
fpair = 4.0 / 15.0 * fR * factor_lj *
(2.0 * (K[1] + K[2]) * (K[1] * (5.0 * K[1] + 22.0 * K[2]) + 5.0 * K[4]) *
sigma6[itype][jtype] / K[6] -
5.0) /
K[0];
if (eflag)
evdwl = 2.0 / 9.0 * fR *
(1.0 -
(K[1] * (K[1] * (K[1] / 3.0 + 3.0 * K[2]) + 4.2 * K[4]) + K[2] * K[4]) *
sigma6[itype][jtype] / K[6]) -
offset[itype][jtype];
if (rsq <= K[1]) error->one(FLERR, "Overlapping small/large in pair colloid");
break;
case LARGE_LARGE:
r = sqrt(rsq);
c1 = a1[itype][jtype];
c2 = a2[itype][jtype];
K[0] = c1*c2;
K[1] = c1+c2;
K[2] = c1-c2;
K[3] = K[1]+r;
K[4] = K[1]-r;
K[5] = K[2]+r;
K[6] = K[2]-r;
K[7] = 1.0/(K[3]*K[4]);
K[8] = 1.0/(K[5]*K[6]);
g[0] = pow(K[3],-7.0);
g[1] = pow(K[4],-7.0);
g[2] = pow(K[5],-7.0);
g[3] = pow(K[6],-7.0);
h[0] = ((K[3]+5.0*K[1])*K[3]+30.0*K[0])*g[0];
h[1] = ((K[4]+5.0*K[1])*K[4]+30.0*K[0])*g[1];
h[2] = ((K[5]+5.0*K[2])*K[5]-30.0*K[0])*g[2];
h[3] = ((K[6]+5.0*K[2])*K[6]-30.0*K[0])*g[3];
g[0] *= 42.0*K[0]/K[3]+6.0*K[1]+K[3];
g[1] *= 42.0*K[0]/K[4]+6.0*K[1]+K[4];
g[2] *= -42.0*K[0]/K[5]+6.0*K[2]+K[5];
g[3] *= -42.0*K[0]/K[6]+6.0*K[2]+K[6];
case LARGE_LARGE:
r = sqrt(rsq);
c1 = a1[itype][jtype];
c2 = a2[itype][jtype];
K[0] = c1 * c2;
K[1] = c1 + c2;
K[2] = c1 - c2;
K[3] = K[1] + r;
K[4] = K[1] - r;
K[5] = K[2] + r;
K[6] = K[2] - r;
K[7] = 1.0 / (K[3] * K[4]);
K[8] = 1.0 / (K[5] * K[6]);
g[0] = pow(K[3], -7.0);
g[1] = pow(K[4], -7.0);
g[2] = pow(K[5], -7.0);
g[3] = pow(K[6], -7.0);
h[0] = ((K[3] + 5.0 * K[1]) * K[3] + 30.0 * K[0]) * g[0];
h[1] = ((K[4] + 5.0 * K[1]) * K[4] + 30.0 * K[0]) * g[1];
h[2] = ((K[5] + 5.0 * K[2]) * K[5] - 30.0 * K[0]) * g[2];
h[3] = ((K[6] + 5.0 * K[2]) * K[6] - 30.0 * K[0]) * g[3];
g[0] *= 42.0 * K[0] / K[3] + 6.0 * K[1] + K[3];
g[1] *= 42.0 * K[0] / K[4] + 6.0 * K[1] + K[4];
g[2] *= -42.0 * K[0] / K[5] + 6.0 * K[2] + K[5];
g[3] *= -42.0 * K[0] / K[6] + 6.0 * K[2] + K[6];
fR = a12[itype][jtype]*sigma6[itype][jtype]/r/37800.0;
evdwl = fR * (h[0]-h[1]-h[2]+h[3]);
dUR = evdwl/r + 5.0*fR*(g[0]+g[1]-g[2]-g[3]);
dUA = -a12[itype][jtype]/3.0*r*((2.0*K[0]*K[7]+1.0)*K[7] +
(2.0*K[0]*K[8]-1.0)*K[8]);
fpair = factor_lj * (dUR+dUA)/r;
if (eflag)
evdwl += a12[itype][jtype]/6.0 *
(2.0*K[0]*(K[7]+K[8])-log(K[8]/K[7])) - offset[itype][jtype];
if (r <= K[1])
error->one(FLERR,"Overlapping large/large in pair colloid");
break;
fR = a12[itype][jtype] * sigma6[itype][jtype] / r / 37800.0;
evdwl = fR * (h[0] - h[1] - h[2] + h[3]);
dUR = evdwl / r + 5.0 * fR * (g[0] + g[1] - g[2] - g[3]);
dUA = -a12[itype][jtype] / 3.0 * r *
((2.0 * K[0] * K[7] + 1.0) * K[7] + (2.0 * K[0] * K[8] - 1.0) * K[8]);
fpair = factor_lj * (dUR + dUA) / r;
if (eflag)
evdwl += a12[itype][jtype] / 6.0 * (2.0 * K[0] * (K[7] + K[8]) - log(K[8] / K[7])) -
offset[itype][jtype];
if (r <= K[1]) error->one(FLERR, "Overlapping large/large in pair colloid");
break;
}
if (eflag) evdwl *= factor_lj;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,27 +32,21 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int coul_gpu_init(const int ntypes, double **host_scale, double **cutsq,
double *special_coul, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
const double qqrd2e);
int coul_gpu_init(const int ntypes, double **host_scale, double **cutsq, double *special_coul,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, const double qqrd2e);
void coul_gpu_reinit(const int ntypes, double **host_scale);
void coul_gpu_clear();
int ** coul_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void coul_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
int **coul_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void coul_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double coul_gpu_bytes();
@ -81,7 +73,7 @@ PairCoulCutGPU::~PairCoulCutGPU()
void PairCoulCutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +81,7 @@ void PairCoulCutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,30 +90,25 @@ void PairCoulCutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = coul_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = coul_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
coul_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
coul_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -134,9 +121,7 @@ void PairCoulCutGPU::compute(int eflag, int vflag)
void PairCoulCutGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style coul/cut/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style coul/cut/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -144,10 +129,9 @@ void PairCoulCutGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,21 +139,15 @@ void PairCoulCutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = coul_gpu_init(atom->ntypes+1, scale, cutsq,
force->special_coul, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success = coul_gpu_init(atom->ntypes + 1, scale, cutsq, force->special_coul, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -178,7 +156,7 @@ void PairCoulCutGPU::reinit()
{
Pair::reinit();
coul_gpu_reinit(atom->ntypes+1, scale);
coul_gpu_reinit(atom->ntypes + 1, scale);
}
/* ---------------------------------------------------------------------- */
@ -191,13 +169,12 @@ double PairCoulCutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double rsq,r2inv,forcecoul,factor_coul;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
double rsq, r2inv, forcecoul, factor_coul;
int *jlist;
ecoul = 0.0;
@ -229,23 +206,21 @@ void PairCoulCutGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
fpair = factor_coul*forcecoul * r2inv;
r2inv = 1.0 / rsq;
forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
fpair = factor_coul * forcecoul * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
}
if (eflag) { ecoul = factor_coul * qqrd2e * qtmp * q[j] * sqrt(r2inv); }
if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,34 +32,28 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq,
double *special_coul, const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
const double qqrd2e, const double kappa);
int cdebye_gpu_init(const int ntypes, double **host_scale, double **cutsq, double *special_coul,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, const double qqrd2e,
const double kappa);
void cdebye_gpu_reinit(const int ntypes, double **host_scale);
void cdebye_gpu_clear();
int ** cdebye_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag,
const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd);
void cdebye_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist,
int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success,
double *host_q, const int nlocal, double *boxlo,
double *prd);
int **cdebye_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void cdebye_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double cdebye_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairCoulDebyeGPU::PairCoulDebyeGPU(LAMMPS *lmp) :
PairCoulDebye(lmp), gpu_mode(GPU_FORCE)
PairCoulDebyeGPU::PairCoulDebyeGPU(LAMMPS *lmp) : PairCoulDebye(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -82,7 +74,7 @@ PairCoulDebyeGPU::~PairCoulDebyeGPU()
void PairCoulDebyeGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -90,7 +82,7 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -99,30 +91,25 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = cdebye_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = cdebye_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
cdebye_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
cdebye_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -135,9 +122,7 @@ void PairCoulDebyeGPU::compute(int eflag, int vflag)
void PairCoulDebyeGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style coul/debye/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style coul/debye/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -145,10 +130,9 @@ void PairCoulDebyeGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -156,22 +140,15 @@ void PairCoulDebyeGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = cdebye_gpu_init(atom->ntypes+1, scale, cutsq,
force->special_coul, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen,
force->qqrd2e, kappa);
GPU_EXTRA::check_flag(success,error,world);
int success = cdebye_gpu_init(atom->ntypes + 1, scale, cutsq, force->special_coul, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, force->qqrd2e, kappa);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -180,7 +157,7 @@ void PairCoulDebyeGPU::reinit()
{
Pair::reinit();
cdebye_gpu_reinit(atom->ntypes+1, scale);
cdebye_gpu_reinit(atom->ntypes + 1, scale);
}
/* ---------------------------------------------------------------------- */
@ -193,14 +170,13 @@ double PairCoulDebyeGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double rsq,r2inv,forcecoul,factor_coul;
double r,rinv,screening;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
double rsq, r2inv, forcecoul, factor_coul;
double r, rinv, screening;
int *jlist;
ecoul = 0.0;
@ -232,28 +208,26 @@ void PairCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
rinv = 1.0/r;
screening = exp(-kappa*r);
forcecoul = qqrd2e * scale[itype][jtype] *
qtmp*q[j] * screening * (kappa + rinv);
fpair = factor_coul*forcecoul * r2inv;
rinv = 1.0 / r;
screening = exp(-kappa * r);
forcecoul = qqrd2e * scale[itype][jtype] * qtmp * q[j] * screening * (kappa + rinv);
fpair = factor_coul * forcecoul * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
ecoul = factor_coul * qqrd2e * scale[itype][jtype] *
qtmp*q[j] * rinv * screening;
ecoul = factor_coul * qqrd2e * scale[itype][jtype] * qtmp * q[j] * rinv * screening;
}
if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,54 +23,45 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define MY_PIS 1.77245385090551602729
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int cdsf_gpu_init(const int ntypes, const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
const double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double e_shift, const double f_shift,
const double alpha);
int cdsf_gpu_init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double e_shift, const double f_shift, const double alpha);
void cdsf_gpu_clear();
int ** cdsf_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void cdsf_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
int **cdsf_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void cdsf_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double cdsf_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairCoulDSFGPU::PairCoulDSFGPU(LAMMPS *lmp) : PairCoulDSF(lmp),
gpu_mode(GPU_FORCE)
PairCoulDSFGPU::PairCoulDSFGPU(LAMMPS *lmp) : PairCoulDSF(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -93,7 +83,7 @@ PairCoulDSFGPU::~PairCoulDSFGPU()
void PairCoulDSFGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -101,7 +91,7 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -110,30 +100,25 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = cdsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = cdsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
cdsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
cdsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -146,9 +131,7 @@ void PairCoulDSFGPU::compute(int eflag, int vflag)
void PairCoulDSFGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style coul/dsf/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style coul/dsf/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -156,10 +139,9 @@ void PairCoulDSFGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -168,27 +150,20 @@ void PairCoulDSFGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
cut_coulsq = cut_coul * cut_coul;
double erfcc = erfc(alpha*cut_coul);
double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul);
e_shift = erfcc/cut_coul - f_shift*cut_coul;
double erfcc = erfc(alpha * cut_coul);
double erfcd = exp(-alpha * alpha * cut_coul * cut_coul);
f_shift = -(erfcc / cut_coulsq + 2.0 / MY_PIS * alpha * erfcd / cut_coul);
e_shift = erfcc / cut_coul - f_shift * cut_coul;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = cdsf_gpu_init(atom->ntypes+1, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_coulsq,
force->special_coul, force->qqrd2e, e_shift,
f_shift, alpha);
GPU_EXTRA::check_flag(success,error,world);
int success = cdsf_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
force->special_coul, force->qqrd2e, e_shift, f_shift, alpha);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -201,14 +176,13 @@ double PairCoulDSFGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double r,rsq,r2inv,forcecoul,factor_coul;
double prefactor,erfcc,erfcd,t;
int i, j, ii, jj, jnum;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
double r, rsq, r2inv, forcecoul, factor_coul;
double prefactor, erfcc, erfcd, t;
int *jlist;
ecoul = 0.0;
@ -232,8 +206,8 @@ void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
jnum = numneigh[i];
if (evflag) {
double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
double e_self = -(e_shift / 2.0 + alpha / MY_PIS) * qtmp * qtmp * qqrd2e;
ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
}
for (jj = 0; jj < jnum; jj++) {
@ -244,32 +218,32 @@ void PairCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_coulsq) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcd = exp(-alpha*alpha*r*r);
t = 1.0 / (1.0 + EWALD_P*alpha*r);
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
r*f_shift) * r;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
prefactor = qqrd2e * qtmp * q[j] / r;
erfcd = exp(-alpha * alpha * r * r);
t = 1.0 / (1.0 + EWALD_P * alpha * r);
erfcc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * erfcd;
forcecoul = prefactor * (erfcc / r + 2.0 * alpha / MY_PIS * erfcd + r * f_shift) * r;
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
fpair = forcecoul * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * (erfcc - r * e_shift - rsq * f_shift);
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
}
if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -33,14 +31,14 @@
using namespace LAMMPS_NS;
#define EWALD_F 1.12837917
#define EWALD_P 9.95473818e-1
#define B0 -0.1335096380159268
#define B1 -2.57839507e-1
#define B2 -1.37203639e-1
#define B3 -8.88822059e-3
#define B4 -5.80844129e-3
#define B5 1.14652755e-1
#define EWALD_F 1.12837917
#define EWALD_P 9.95473818e-1
#define B0 -0.1335096380159268
#define B1 -2.57839507e-1
#define B2 -1.37203639e-1
#define B3 -8.88822059e-3
#define B4 -5.80844129e-3
#define B5 1.14652755e-1
#define EPSILON 1.0e-20
#define EPS_EWALD 1.0e-6
@ -48,33 +46,28 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int clcs_gpu_init(const int ntypes, double **scale, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double host_cut_coulsq, double *host_special_coul,
int clcs_gpu_init(const int ntypes, double **scale, const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
void clcs_gpu_reinit(const int ntypes, double **scale);
void clcs_gpu_clear();
int ** clcs_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void clcs_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **clcs_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void clcs_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double clcs_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairCoulLongCSGPU::PairCoulLongCSGPU(LAMMPS *lmp) :
PairCoulLongCS(lmp), gpu_mode(GPU_FORCE)
PairCoulLongCSGPU::PairCoulLongCSGPU(LAMMPS *lmp) : PairCoulLongCS(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -95,7 +88,7 @@ PairCoulLongCSGPU::~PairCoulLongCSGPU()
void PairCoulLongCSGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -103,7 +96,7 @@ void PairCoulLongCSGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -112,30 +105,25 @@ void PairCoulLongCSGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = clcs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
clcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
clcs_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -150,15 +138,12 @@ void PairCoulLongCSGPU::init_style()
{
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style coul/long/cs/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style coul/long/cs/gpu requires atom attribute q");
// Call init_one calculation make sure scale is correct
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
init_one(i,j);
}
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { init_one(i, j); }
}
}
double cell_size = cut_coul + neighbor->skin;
@ -167,30 +152,23 @@ void PairCoulLongCSGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = clcs_gpu_init(atom->ntypes+1, scale,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
int success = clcs_gpu_init(atom->ntypes + 1, scale, atom->nlocal, atom->nlocal + atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -199,7 +177,7 @@ void PairCoulLongCSGPU::reinit()
{
Pair::reinit();
clcs_gpu_reinit(atom->ntypes+1, scale);
clcs_gpu_reinit(atom->ntypes + 1, scale);
}
/* ---------------------------------------------------------------------- */
@ -212,15 +190,14 @@ double PairCoulLongCSGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itable,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double fraction,table;
double r,r2inv,forcecoul,factor_coul;
double grij,expm2,prefactor,t,erfc,u;
int i, j, ii, jj, jnum, itable, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
double fraction, table;
double r, r2inv, forcecoul, factor_coul;
double grij, expm2, prefactor, t, erfc, u;
int *jlist;
double rsq;
@ -253,37 +230,38 @@ void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cut_coulsq) {
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
r2inv = 1.0/rsq;
rsq +=
EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
r2inv = 1.0 / rsq;
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j];
prefactor = qqrd2e * scale[itype][jtype] * qtmp * q[j];
if (factor_coul < 1.0) {
// When bonded parts are being calculated a minimal distance (EPS_EWALD)
// has to be added to the prefactor and erfc in order to make the
// used approximation functions for the Ewald correction valid
grij = g_ewald * (r+EPS_EWALD);
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
grij = g_ewald * (r + EPS_EWALD);
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
u = 1.0 - t;
erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
prefactor /= (r+EPS_EWALD);
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - (1.0-factor_coul));
erfc = t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
prefactor /= (r + EPS_EWALD);
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2 - (1.0 - factor_coul));
// Additionally r2inv needs to be accordingly modified since the later
// scaling of the overall force shall be consistent
r2inv = 1.0/(rsq + EPS_EWALD_SQR);
r2inv = 1.0 / (rsq + EPS_EWALD_SQR);
} else {
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
u = 1.0 - t;
erfc = t * (1.+u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
erfc = t * (1. + u * (B0 + u * (B1 + u * (B2 + u * (B3 + u * (B4 + u * B5)))))) * expm2;
prefactor /= r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
}
} else {
union_int_float_t rsq_lookup;
@ -291,34 +269,35 @@ void PairCoulLongCSGPU::cpu_compute(int start, int inum, int eflag,
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = scale[itype][jtype] * qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = scale[itype][jtype] * qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = scale[itype][jtype] * qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = scale[itype][jtype] * qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
fpair = forcecoul * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
ecoul = prefactor * erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = scale[itype][jtype] * qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = scale[itype][jtype] * qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
}
if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,51 +24,44 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int cl_gpu_init(const int ntypes, double **scale,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
int cl_gpu_init(const int ntypes, double **scale, const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
void cl_gpu_reinit(const int ntypes, double **scale);
void cl_gpu_clear();
int ** cl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void cl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **cl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void cl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
double cl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) :
PairCoulLong(lmp), gpu_mode(GPU_FORCE)
PairCoulLongGPU::PairCoulLongGPU(LAMMPS *lmp) : PairCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -90,7 +82,7 @@ PairCoulLongGPU::~PairCoulLongGPU()
void PairCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -98,7 +90,7 @@ void PairCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -107,30 +99,25 @@ void PairCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -145,15 +132,12 @@ void PairCoulLongGPU::init_style()
{
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style coul/long/gpu requires atom attribute q");
// Call init_one calculation make sure scale is correct
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
init_one(i,j);
}
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { init_one(i, j); }
}
}
double cell_size = cut_coul + neighbor->skin;
@ -162,30 +146,23 @@ void PairCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = cl_gpu_init(atom->ntypes+1, scale,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
int success = cl_gpu_init(atom->ntypes + 1, scale, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -194,7 +171,7 @@ void PairCoulLongGPU::reinit()
{
Pair::reinit();
cl_gpu_reinit(atom->ntypes+1, scale);
cl_gpu_reinit(atom->ntypes + 1, scale);
}
/* ---------------------------------------------------------------------- */
@ -207,15 +184,14 @@ double PairCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double fraction,table;
double r,r2inv,forcecoul,factor_coul;
double grij,expm2,prefactor,t,erfc;
int i, j, ii, jj, jnum, itable;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, ecoul, fpair;
double fraction, table;
double r, r2inv, forcecoul, factor_coul;
double grij, expm2, prefactor, t, erfc;
int *jlist;
double rsq;
@ -246,54 +222,55 @@ void PairCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
fpair = forcecoul * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
ecoul = prefactor * erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
}
if (evflag) ev_tally_full(i,0.0,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include "update.h"
@ -35,29 +33,24 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0,
double **host_gamma, double **host_sigma, double **host_cut,
double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int dpd_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void dpd_gpu_clear();
int ** dpd_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double **host_v, const double dtinvsqrt,
const int seed, const int timestep,
double *boxlo, double *prd);
void dpd_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, tagint *tag,
double **host_v, const double dtinvsqrt,
const int seed, const int timestep,
const int nlocal, double *boxlo, double *prd);
int **dpd_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double **host_v,
const double dtinvsqrt, const int seed, const int timestep, double *boxlo,
double *prd);
void dpd_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, tagint *tag, double **host_v,
const double dtinvsqrt, const int seed, const int timestep, const int nlocal,
double *boxlo, double *prd);
double dpd_gpu_bytes();
#define EPSILON 1.0e-10
@ -66,7 +59,8 @@ double dpd_gpu_bytes();
//#define _USE_UNIFORM_SARU_TEA8
//#define _USE_GAUSSIAN_SARU_LCG
#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && \
!defined(_USE_GAUSSIAN_SARU_LCG)
#define _USE_UNIFORM_SARU_LCG
#endif
@ -75,9 +69,9 @@ double dpd_gpu_bytes();
// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGC 0x2600e1f7
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylOffset 0x8009d14b
#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
@ -89,28 +83,29 @@ double dpd_gpu_bytes();
// Curly brackets to make variables local to the scope.
#ifdef _USE_UNIFORM_SARU_LCG
#define numtyp double
#define SQRT3 (numtyp)1.7320508075688772935274463
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
unsigned int v = (state ^ (state>>26)) + wstate; \
unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \
randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \
}
#define SQRT3 (numtyp) 1.7320508075688772935274463
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
unsigned int v = (state ^ (state >> 26)) + wstate; \
unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \
}
#endif
// specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -119,38 +114,39 @@ double dpd_gpu_bytes();
// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
#ifdef _USE_UNIFORM_SARU_TEA8
#define numtyp double
#define SQRT3 (numtyp)1.7320508075688772935274463
#define SQRT3 (numtyp) 1.7320508075688772935274463
#define k0 0xA341316C
#define k1 0xC8013EA4
#define k2 0xAD90777D
#define k3 0x7E95761E
#define delta 0x9e3779b9
#define rounds 8
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
unsigned int sum = 0; \
for (int i=0; i < rounds; i++) { \
sum += delta; \
state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1); \
wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3); \
} \
unsigned int v = (state ^ (state>>26)) + wstate; \
unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \
randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \
}
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
unsigned int sum = 0; \
for (int i = 0; i < rounds; i++) { \
sum += delta; \
state += ((wstate << 4) + k0) ^ (wstate + sum) ^ ((wstate >> 5) + k1); \
wstate += ((state << 4) + k2) ^ (state + sum) ^ ((state >> 5) + k3); \
} \
unsigned int v = (state ^ (state >> 26)) + wstate; \
unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \
}
#endif
// specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -159,42 +155,43 @@ double dpd_gpu_bytes();
// This is used to compared with CPU DPD using RandMars::gaussian()
#ifdef _USE_GAUSSIAN_SARU_LCG
#define numtyp double
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state=0x12345678; \
unsigned int wstate=12345678; \
state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
unsigned int v, s; \
numtyp r1, r2, rsq; \
while (1) { \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
v = (state ^ (state>>26)) + wstate; \
s = (signed int)((v^(v>>20))*0x6957f5a7); \
r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
v = (state ^ (state>>26)) + wstate; \
s = (signed int)((v^(v>>20))*0x6957f5a7); \
r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \
rsq = r1 * r1 + r2 * r2; \
if (rsq < (numtyp)1.0) break; \
} \
numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq); \
randnum = r2*fac; \
}
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x12345678; \
unsigned int wstate = 12345678; \
state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
unsigned int v, s; \
numtyp r1, r2, rsq; \
while (1) { \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
v = (state ^ (state >> 26)) + wstate; \
s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
r1 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
v = (state ^ (state >> 26)) + wstate; \
s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
r2 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \
rsq = r1 * r1 + r2 * r2; \
if (rsq < (numtyp) 1.0) break; \
} \
numtyp fac = sqrt((numtyp) -2.0 * log(rsq) / rsq); \
randnum = r2 * fac; \
}
#endif
/* ---------------------------------------------------------------------- */
@ -221,17 +218,17 @@ PairDPDGPU::~PairDPDGPU()
void PairDPDGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
double dtinvsqrt = 1.0/sqrt(update->dt);
double dtinvsqrt = 1.0 / sqrt(update->dt);
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -240,33 +237,26 @@ void PairDPDGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = dpd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->v, dtinvsqrt, seed,
update->ntimestep,
domain->boxlo, domain->prd);
firstneigh = dpd_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success,
atom->tag, atom->v, dtinvsqrt, seed,
update->ntimestep,
atom->nlocal, domain->boxlo, domain->prd);
dpd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->tag,
atom->v, dtinvsqrt, seed, update->ntimestep, atom->nlocal, domain->boxlo,
domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -286,10 +276,9 @@ void PairDPDGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
mcut = init_one(i,j);
mcut = init_one(i, j);
mcut *= mcut;
if (mcut > maxcut)
maxcut = mcut;
if (mcut > maxcut) maxcut = mcut;
cutsq[i][j] = cutsq[j][i] = mcut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -297,21 +286,15 @@ void PairDPDGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = dpd_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
cut, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
dpd_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -324,14 +307,15 @@ double PairDPDGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
double rsq,r,rinv,dot,wd,randnum,factor_dpd;
void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
int *jlist;
tagint itag,jtag;
tagint itag, jtag;
double **x = atom->x;
double **v = atom->v;
@ -339,8 +323,8 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *type = atom->type;
tagint *tag = atom->tag;
double *special_lj = force->special_lj;
double dtinvsqrt = 1.0/sqrt(update->dt);
int timestep = (int)update->ntimestep;
double dtinvsqrt = 1.0 / sqrt(update->dt);
int timestep = (int) update->ntimestep;
// loop over neighbors of my atoms
@ -365,23 +349,24 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
jtag = tag[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0/r;
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0 / r;
delvx = vxtmp - v[j][0];
delvy = vytmp - v[j][1];
delvz = vztmp - v[j][2];
dot = delx*delvx + dely*delvy + delz*delvz;
wd = 1.0 - r/cut[itype][jtype];
dot = delx * delvx + dely * delvy + delz * delvz;
wd = 1.0 - r / cut[itype][jtype];
unsigned int tag1=itag, tag2=jtag;
unsigned int tag1 = itag, tag2 = jtag;
if (tag1 > tag2) {
tag1 = jtag; tag2 = itag;
tag1 = jtag;
tag2 = itag;
}
randnum = 0.0;
@ -391,24 +376,24 @@ void PairDPDGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
// drag force = -gamma * wd^2 * (delx dot delv) / r
// random force = sigma * wd * rnd * dtinvsqrt;
fpair = a0[itype][jtype]*wd;
fpair -= gamma[itype][jtype]*wd*wd*dot*rinv;
fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
fpair *= factor_dpd*rinv;
fpair = a0[itype][jtype] * wd;
fpair -= gamma[itype][jtype] * wd * wd * dot * rinv;
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
fpair *= factor_dpd * rinv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
// unshifted eng of conservative term:
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
// eng shifted to 0.0 at cutoff
evdwl = 0.5*a0[itype][jtype]*cut[itype][jtype] * wd*wd;
evdwl = 0.5 * a0[itype][jtype] * cut[itype][jtype] * wd * wd;
evdwl *= factor_dpd;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include "update.h"
@ -35,33 +33,26 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0,
double **host_gamma, double **host_sigma, double **host_cut,
double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int dpd_tstat_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void dpd_tstat_gpu_clear();
int ** dpd_tstat_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
double **host_v, const double dtinvsqrt,
const int seed, const int timestep,
double *boxlo, double *prd);
void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, tagint *tag,
double **host_v, const double dtinvsqrt,
const int seed, const int timestep,
const int nlocal, double *boxlo, double *prd);
void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0,
double **host_gamma, double **host_sigma,
double **host_cut);
int **dpd_tstat_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double **host_v,
const double dtinvsqrt, const int seed, const int timestep,
double *boxlo, double *prd);
void dpd_tstat_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success, tagint *tag,
double **host_v, const double dtinvsqrt, const int seed,
const int timestep, const int nlocal, double *boxlo, double *prd);
void dpd_tstat_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut);
double dpd_tstat_gpu_bytes();
#define EPSILON 1.0e-10
@ -70,7 +61,8 @@ double dpd_tstat_gpu_bytes();
//#define _USE_UNIFORM_SARU_TEA8
//#define _USE_GAUSSIAN_SARU_LCG
#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && !defined(_USE_GAUSSIAN_SARU_LCG)
#if !defined(_USE_UNIFORM_SARU_LCG) && !defined(_USE_UNIFORM_SARU_TEA8) && \
!defined(_USE_GAUSSIAN_SARU_LCG)
#define _USE_UNIFORM_SARU_LCG
#endif
@ -79,9 +71,9 @@ double dpd_tstat_gpu_bytes();
// 2. C. L. Phillips, J. A. Anderson, S. C. Glotzer, Comput. Phys. Comm. 230 (2011), 7191-7201.
// PRNG period = 3666320093*2^32 ~ 2^64 ~ 10^19
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGA 0x4beb5d59 // Full period 32 bit LCG
#define LCGC 0x2600e1f7
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylPeriod 0xda879add // Prime period 3666320093
#define oWeylOffset 0x8009d14b
#define TWO_N32 0.232830643653869628906250e-9f /* 2^-32 */
@ -93,28 +85,29 @@ double dpd_tstat_gpu_bytes();
// Curly brackets to make variables local to the scope.
#ifdef _USE_UNIFORM_SARU_LCG
#define numtyp double
#define SQRT3 (numtyp)1.7320508075688772935274463
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
unsigned int v = (state ^ (state>>26)) + wstate; \
unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \
randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \
}
#define SQRT3 (numtyp) 1.7320508075688772935274463
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
unsigned int v = (state ^ (state >> 26)) + wstate; \
unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \
}
#endif
// specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -123,38 +116,39 @@ double dpd_tstat_gpu_bytes();
// Afshar et al. mutlplies u in [-0.5;0.5] with sqrt(12)
#ifdef _USE_UNIFORM_SARU_TEA8
#define numtyp double
#define SQRT3 (numtyp)1.7320508075688772935274463
#define SQRT3 (numtyp) 1.7320508075688772935274463
#define k0 0xA341316C
#define k1 0xC8013EA4
#define k2 0xAD90777D
#define k3 0x7E95761E
#define delta 0x9e3779b9
#define rounds 8
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
unsigned int wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
unsigned int sum = 0; \
for (int i=0; i < rounds; i++) { \
sum += delta; \
state += ((wstate<<4) + k0)^(wstate + sum)^((wstate>>5) + k1); \
wstate += ((state<<4) + k2)^(state + sum)^((state>>5) + k3); \
} \
unsigned int v = (state ^ (state>>26)) + wstate; \
unsigned int s = (signed int)((v^(v>>20))*0x6957f5a7); \
randnum = SQRT3*(s*TWO_N32*(numtyp)2.0-(numtyp)1.0); \
}
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
unsigned int wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
unsigned int sum = 0; \
for (int i = 0; i < rounds; i++) { \
sum += delta; \
state += ((wstate << 4) + k0) ^ (wstate + sum) ^ ((wstate >> 5) + k1); \
wstate += ((state << 4) + k2) ^ (state + sum) ^ ((state >> 5) + k3); \
} \
unsigned int v = (state ^ (state >> 26)) + wstate; \
unsigned int s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
randnum = SQRT3 * (s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0); \
}
#endif
// specifically implemented for steps = 1; high = 1.0; low = -1.0
@ -163,48 +157,48 @@ double dpd_tstat_gpu_bytes();
// This is used to compared with CPU DPD using RandMars::gaussian()
#ifdef _USE_GAUSSIAN_SARU_LCG
#define numtyp double
#define saru(seed1, seed2, seed, timestep, randnum) { \
unsigned int seed3 = seed + timestep; \
seed3^=(seed1<<7)^(seed2>>6); \
seed2+=(seed1>>4)^(seed3>>15); \
seed1^=(seed2<<9)+(seed3<<8); \
seed3^=0xA5366B4D*((seed2>>11) ^ (seed1<<1)); \
seed2+=0x72BE1579*((seed1<<4) ^ (seed3>>16)); \
seed1^=0x3F38A6ED*((seed3>>5) ^ (((signed int)seed2)>>22)); \
seed2+=seed1*seed3; \
seed1+=seed3 ^ (seed2>>2); \
seed2^=((signed int)seed2)>>17; \
unsigned int state=0x12345678; \
unsigned int wstate=12345678; \
state = 0x79dedea3*(seed1^(((signed int)seed1)>>14)); \
wstate = (state + seed2) ^ (((signed int)state)>>8); \
state = state + (wstate*(wstate^0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate>>1); \
unsigned int v, s; \
numtyp r1, r2, rsq; \
while (1) { \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
v = (state ^ (state>>26)) + wstate; \
s = (signed int)((v^(v>>20))*0x6957f5a7); \
r1 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \
state = LCGA*state + LCGC; \
wstate = wstate + oWeylOffset+((((signed int)wstate)>>31) & oWeylPeriod); \
v = (state ^ (state>>26)) + wstate; \
s = (signed int)((v^(v>>20))*0x6957f5a7); \
r2 = s*TWO_N32*(numtyp)2.0-(numtyp)1.0; \
rsq = r1 * r1 + r2 * r2; \
if (rsq < (numtyp)1.0) break; \
} \
numtyp fac = sqrt((numtyp)-2.0*log(rsq)/rsq); \
randnum = r2*fac; \
}
#define saru(seed1, seed2, seed, timestep, randnum) \
{ \
unsigned int seed3 = seed + timestep; \
seed3 ^= (seed1 << 7) ^ (seed2 >> 6); \
seed2 += (seed1 >> 4) ^ (seed3 >> 15); \
seed1 ^= (seed2 << 9) + (seed3 << 8); \
seed3 ^= 0xA5366B4D * ((seed2 >> 11) ^ (seed1 << 1)); \
seed2 += 0x72BE1579 * ((seed1 << 4) ^ (seed3 >> 16)); \
seed1 ^= 0x3F38A6ED * ((seed3 >> 5) ^ (((signed int) seed2) >> 22)); \
seed2 += seed1 * seed3; \
seed1 += seed3 ^ (seed2 >> 2); \
seed2 ^= ((signed int) seed2) >> 17; \
unsigned int state = 0x12345678; \
unsigned int wstate = 12345678; \
state = 0x79dedea3 * (seed1 ^ (((signed int) seed1) >> 14)); \
wstate = (state + seed2) ^ (((signed int) state) >> 8); \
state = state + (wstate * (wstate ^ 0xdddf97f5)); \
wstate = 0xABCB96F7 + (wstate >> 1); \
unsigned int v, s; \
numtyp r1, r2, rsq; \
while (1) { \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
v = (state ^ (state >> 26)) + wstate; \
s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
r1 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \
state = LCGA * state + LCGC; \
wstate = wstate + oWeylOffset + ((((signed int) wstate) >> 31) & oWeylPeriod); \
v = (state ^ (state >> 26)) + wstate; \
s = (signed int) ((v ^ (v >> 20)) * 0x6957f5a7); \
r2 = s * TWO_N32 * (numtyp) 2.0 - (numtyp) 1.0; \
rsq = r1 * r1 + r2 * r2; \
if (rsq < (numtyp) 1.0) break; \
} \
numtyp fac = sqrt((numtyp) -2.0 * log(rsq) / rsq); \
randnum = r2 * fac; \
}
#endif
/* ---------------------------------------------------------------------- */
PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp),
gpu_mode(GPU_FORCE)
PairDPDTstatGPU::PairDPDTstatGPU(LAMMPS *lmp) : PairDPDTstat(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -226,31 +220,31 @@ PairDPDTstatGPU::~PairDPDTstatGPU()
void PairDPDTstatGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
// adjust sigma if target T is changing
if (t_start != t_stop) {
double delta = update->ntimestep - update->beginstep;
if (delta != 0.0) delta /= update->endstep - update->beginstep;
temperature = t_start + delta * (t_stop-t_start);
temperature = t_start + delta * (t_stop - t_start);
double boltz = force->boltz;
for (int i = 1; i <= atom->ntypes; i++)
for (int j = i; j <= atom->ntypes; j++)
sigma[i][j] = sigma[j][i] = sqrt(2.0*boltz*temperature*gamma[i][j]);
sigma[i][j] = sigma[j][i] = sqrt(2.0 * boltz * temperature * gamma[i][j]);
dpd_tstat_gpu_update_coeff(atom->ntypes+1, a0, gamma, sigma, cut);
dpd_tstat_gpu_update_coeff(atom->ntypes + 1, a0, gamma, sigma, cut);
}
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
double dtinvsqrt = 1.0/sqrt(update->dt);
double dtinvsqrt = 1.0 / sqrt(update->dt);
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -259,33 +253,26 @@ void PairDPDTstatGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = dpd_tstat_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->v, dtinvsqrt, seed,
update->ntimestep,
domain->boxlo, domain->prd);
firstneigh = dpd_tstat_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->v, dtinvsqrt, seed, update->ntimestep, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
dpd_tstat_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success,
atom->tag, atom->v, dtinvsqrt, seed,
update->ntimestep,
atom->nlocal, domain->boxlo, domain->prd);
dpd_tstat_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, atom->tag, atom->v, dtinvsqrt, seed, update->ntimestep,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -305,10 +292,9 @@ void PairDPDTstatGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
mcut = init_one(i,j);
mcut = init_one(i, j);
mcut *= mcut;
if (mcut > maxcut)
maxcut = mcut;
if (mcut > maxcut) maxcut = mcut;
cutsq[i][j] = cutsq[j][i] = mcut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -316,21 +302,15 @@ void PairDPDTstatGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = dpd_tstat_gpu_init(atom->ntypes+1, cutsq, a0, gamma, sigma,
cut, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = dpd_tstat_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -343,15 +323,15 @@ double PairDPDTstatGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
double vxtmp,vytmp,vztmp,delvx,delvy,delvz;
double rsq,r,rinv,dot,wd,randnum,factor_dpd;
void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, fpair;
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
double rsq, r, rinv, dot, wd, randnum, factor_dpd;
int *jlist;
tagint itag,jtag;
tagint itag, jtag;
double **x = atom->x;
double **v = atom->v;
@ -359,8 +339,8 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
int *type = atom->type;
tagint *tag = atom->tag;
double *special_lj = force->special_lj;
double dtinvsqrt = 1.0/sqrt(update->dt);
int timestep = (int)update->ntimestep;
double dtinvsqrt = 1.0 / sqrt(update->dt);
int timestep = (int) update->ntimestep;
// loop over neighbors of my atoms
@ -385,23 +365,24 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
jtag = tag[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0/r;
if (r < EPSILON) continue; // r can be 0.0 in DPD systems
rinv = 1.0 / r;
delvx = vxtmp - v[j][0];
delvy = vytmp - v[j][1];
delvz = vztmp - v[j][2];
dot = delx*delvx + dely*delvy + delz*delvz;
wd = 1.0 - r/cut[itype][jtype];
dot = delx * delvx + dely * delvy + delz * delvz;
wd = 1.0 - r / cut[itype][jtype];
unsigned int tag1=itag, tag2=jtag;
unsigned int tag1 = itag, tag2 = jtag;
if (tag1 > tag2) {
tag1 = jtag; tag2 = itag;
tag1 = jtag;
tag2 = itag;
}
randnum = 0.0;
@ -411,15 +392,15 @@ void PairDPDTstatGPU::cpu_compute(int start, int inum, int /* eflag */,
// drag force = -gamma * wd^2 * (delx dot delv) / r
// random force = sigma * wd * rnd * dtinvsqrt;
fpair = -gamma[itype][jtype]*wd*wd*dot*rinv;
fpair += sigma[itype][jtype]*wd*randnum*dtinvsqrt;
fpair *= factor_dpd*rinv;
fpair = -gamma[itype][jtype] * wd * wd * dot * rinv;
fpair += sigma[itype][jtype] * wd * randnum * dtinvsqrt;
fpair *= factor_dpd * rinv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (evflag) ev_tally_full(i,0.0,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, 0.0, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "potential_file_reader.h"
#include "suffix.h"
@ -39,32 +37,26 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq,
int **host_type2rhor, int **host_type2z2r,
int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline,
double** host_cutsq, double rdr, double rdrho, double rhomax,
int nrhor, int nrho, int nz2r, int nfrho, int nr,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, int &fp_size);
int eam_alloy_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
int nfrho, int nr, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
int &fp_size);
void eam_alloy_gpu_clear();
int** eam_alloy_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
int &inum, void **fp_ptr);
void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal,
const int nall,double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, void **fp_ptr);
void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
const bool eatom, const bool vatom);
int **eam_alloy_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, int &inum,
void **fp_ptr);
void eam_alloy_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success, void **fp_ptr);
void eam_alloy_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
const bool vatom);
double eam_alloy_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -98,7 +90,7 @@ double PairEAMAlloyGPU::memory_usage()
void PairEAMAlloyGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
// compute density on each atom on GPU
@ -109,7 +101,7 @@ void PairEAMAlloyGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -118,27 +110,24 @@ void PairEAMAlloyGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
firstneigh = eam_alloy_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial, atom->special, eflag,
vflag, eflag_atom, vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, &fp_pinned);
eam_alloy_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, &fp_pinned);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
// communicate derivative of embedding function
@ -169,10 +158,9 @@ void PairEAMAlloyGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -180,23 +168,17 @@ void PairEAMAlloyGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int fp_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = eam_alloy_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
type2frho, rhor_spline, z2r_spline, frho_spline,
cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, fp_size);
GPU_EXTRA::check_flag(success,error,world);
int success = eam_alloy_gpu_init(
atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline,
frho_spline, cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
if (fp_size == sizeof(double))
fp_single = false;
else
@ -207,64 +189,63 @@ void PairEAMAlloyGPU::init_style()
/* ---------------------------------------------------------------------- */
double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype,
double rsq, double /* factor_coul */,
double /* factor_lj */, double &fforce)
double PairEAMAlloyGPU::single(int i, int j, int itype, int jtype, double rsq,
double /* factor_coul */, double /* factor_lj */, double &fforce)
{
int m;
double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
double *coeff;
r = sqrt(rsq);
p = r*rdr + 1.0;
m = static_cast<int> (p);
m = MIN(m,nr-1);
p = r * rdr + 1.0;
m = static_cast<int>(p);
m = MIN(m, nr - 1);
p -= m;
p = MIN(p,1.0);
p = MIN(p, 1.0);
coeff = rhor_spline[type2rhor[itype][jtype]][m];
rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = rhor_spline[type2rhor[jtype][itype]][m];
rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = z2r_spline[type2z2r[itype][jtype]][m];
z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];
double fp_i,fp_j;
double fp_i, fp_j;
if (fp_single == false) {
fp_i = ((double*)fp_pinned)[i];
fp_j = ((double*)fp_pinned)[j];
fp_i = ((double *) fp_pinned)[i];
fp_j = ((double *) fp_pinned)[j];
} else {
fp_i = ((float*)fp_pinned)[i];
fp_j = ((float*)fp_pinned)[j];
fp_i = ((float *) fp_pinned)[i];
fp_j = ((float *) fp_pinned)[j];
}
recip = 1.0/r;
phi = z2*recip;
phip = z2p*recip - phi*recip;
psip = fp_i*rhojp + fp_j*rhoip + phip;
fforce = -psip*recip;
recip = 1.0 / r;
phi = z2 * recip;
phip = z2p * recip - phi * recip;
psip = fp_i * rhojp + fp_j * rhoip + phip;
fforce = -psip * recip;
return phi;
}
/* ---------------------------------------------------------------------- */
int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf,
int /* pbc_flag */, int * /* pbc */)
int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
int * /* pbc */)
{
int i,j,m;
int i, j, m;
m = 0;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = static_cast<double>(fp_ptr[j]);
}
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = fp_ptr[j];
@ -278,15 +259,15 @@ int PairEAMAlloyGPU::pack_forward_comm(int n, int *list, double *buf,
void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
}
}
@ -298,23 +279,22 @@ void PairEAMAlloyGPU::unpack_forward_comm(int n, int first, double *buf)
void PairEAMAlloyGPU::coeff(int narg, char **arg)
{
int i,j;
int i, j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
if (narg != 3 + atom->ntypes) error->all(FLERR, "Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
if (strcmp(arg[0], "*") != 0 || strcmp(arg[1], "*") != 0)
error->all(FLERR, "Incorrect args for pair coefficients");
// read EAM setfl file
if (setfl) {
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
delete [] setfl->elements;
for (i = 0; i < setfl->nelements; i++) delete[] setfl->elements[i];
delete[] setfl->elements;
memory->destroy(setfl->mass);
memory->destroy(setfl->frho);
memory->destroy(setfl->rhor);
@ -328,22 +308,23 @@ void PairEAMAlloyGPU::coeff(int narg, char **arg)
// map[i] = which element the Ith atom type is, -1 if "NULL"
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
if (strcmp(arg[i], "NULL") == 0) {
map[i - 2] = -1;
continue;
}
for (j = 0; j < setfl->nelements; j++)
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
if (j < setfl->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
if (strcmp(arg[i], setfl->elements[j]) == 0) break;
if (j < setfl->nelements)
map[i - 2] = j;
else
error->all(FLERR, "No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
for (j = i; j <= n; j++) setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
@ -353,14 +334,14 @@ void PairEAMAlloyGPU::coeff(int narg, char **arg)
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
if (i == j) atom->set_mass(FLERR, i, setfl->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
@ -378,8 +359,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
// transparently convert units for supported conversions
int unit_convert = reader.get_unit_convert();
double conversion_factor = utils::get_conversion_factor(utils::ENERGY,
unit_convert);
double conversion_factor = utils::get_conversion_factor(utils::ENERGY, unit_convert);
try {
reader.skip_line();
reader.skip_line();
@ -389,10 +369,10 @@ void PairEAMAlloyGPU::read_file(char *filename)
ValueTokenizer values = reader.next_values(1);
file->nelements = values.next_int();
if ((int)values.count() != file->nelements + 1)
error->one(FLERR,"Incorrect element names in EAM potential file");
if ((int) values.count() != file->nelements + 1)
error->one(FLERR, "Incorrect element names in EAM potential file");
file->elements = new char*[file->nelements];
file->elements = new char *[file->nelements];
for (int i = 0; i < file->nelements; i++)
file->elements[i] = utils::strdup(values.next_string());
@ -401,12 +381,12 @@ void PairEAMAlloyGPU::read_file(char *filename)
values = reader.next_values(5);
file->nrho = values.next_int();
file->drho = values.next_double();
file->nr = values.next_int();
file->dr = values.next_double();
file->cut = values.next_double();
file->nr = values.next_int();
file->dr = values.next_double();
file->cut = values.next_double();
if ((file->nrho <= 0) || (file->nr <= 0) || (file->dr <= 0.0))
error->one(FLERR,"Invalid EAM potential file");
error->one(FLERR, "Invalid EAM potential file");
memory->create(file->mass, file->nelements, "pair:mass");
memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -415,14 +395,13 @@ void PairEAMAlloyGPU::read_file(char *filename)
for (int i = 0; i < file->nelements; i++) {
values = reader.next_values(2);
values.next_int(); // ignore
values.next_int(); // ignore
file->mass[i] = values.next_double();
reader.next_dvector(&file->frho[i][1], file->nrho);
reader.next_dvector(&file->rhor[i][1], file->nr);
if (unit_convert) {
for (int j = 1; j < file->nrho; ++j)
file->frho[i][j] *= conversion_factor;
for (int j = 1; j < file->nrho; ++j) file->frho[i][j] *= conversion_factor;
}
}
@ -430,8 +409,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
for (int j = 0; j <= i; j++) {
reader.next_dvector(&file->z2r[i][j][1], file->nr);
if (unit_convert) {
for (int k = 1; k < file->nr; ++k)
file->z2r[i][j][k] *= conversion_factor;
for (int k = 1; k < file->nr; ++k) file->z2r[i][j][k] *= conversion_factor;
}
}
}
@ -451,7 +429,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
// allocate memory on other procs
if (comm->me != 0) {
file->elements = new char*[file->nelements];
file->elements = new char *[file->nelements];
for (int i = 0; i < file->nelements; i++) file->elements[i] = nullptr;
memory->create(file->mass, file->nelements, "pair:mass");
memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -477,9 +455,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
// broadcast file->z2r
for (int i = 0; i < file->nelements; i++) {
for (int j = 0; j <= i; j++) {
MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world);
}
for (int j = 0; j <= i; j++) { MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world); }
}
}
@ -489,7 +465,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
void PairEAMAlloyGPU::file2array()
{
int i,j,m,n;
int i, j, m, n;
int ntypes = atom->ntypes;
// set function params directly from setfl file
@ -498,7 +474,7 @@ void PairEAMAlloyGPU::file2array()
nr = setfl->nr;
drho = setfl->drho;
dr = setfl->dr;
rhomax = (nrho-1) * drho;
rhomax = (nrho - 1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
@ -509,7 +485,7 @@ void PairEAMAlloyGPU::file2array()
nfrho = setfl->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
memory->create(frho, nfrho, nrho + 1, "pair:frho");
// copy each element's frho to global frho
@ -519,15 +495,17 @@ void PairEAMAlloyGPU::file2array()
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
for (m = 1; m <= nrho; m++) frho[nfrho - 1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
if (map[i] >= 0)
type2frho[i] = map[i];
else
type2frho[i] = nfrho - 1;
// ------------------------------------------------------------------
// setup rhor arrays
@ -538,7 +516,7 @@ void PairEAMAlloyGPU::file2array()
nrhor = setfl->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
memory->create(rhor, nrhor, nr + 1, "pair:rhor");
// copy each element's rhor to global rhor
@ -550,8 +528,7 @@ void PairEAMAlloyGPU::file2array()
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i];
for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i];
// ------------------------------------------------------------------
// setup z2r arrays
@ -560,9 +537,9 @@ void PairEAMAlloyGPU::file2array()
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of setfl elements
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
nz2r = setfl->nelements * (setfl->nelements + 1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
memory->create(z2r, nz2r, nr + 1, "pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
@ -581,7 +558,7 @@ void PairEAMAlloyGPU::file2array()
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
int irow, icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "potential_file_reader.h"
#include "suffix.h"
@ -39,31 +37,26 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int eam_fs_gpu_init(const int ntypes, double host_cutforcesq,
int **host_type2rhor, int **host_type2z2r,
int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline,
double** host_cutsq, double rdr, double rdrho, double rhomax,
int nrhor, int nrho, int nz2r, int nfrho, int nr,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, int &fp_size);
int eam_fs_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r,
int nfrho, int nr, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
int &fp_size);
void eam_fs_gpu_clear();
int** eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, int &inum, void **fp_ptr);
void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal,
const int nall,double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, void **fp_ptr);
void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
const bool eatom, const bool vatom);
int **eam_fs_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, int &inum,
void **fp_ptr);
void eam_fs_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success, void **fp_ptr);
void eam_fs_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
const bool vatom);
double eam_fs_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -97,7 +90,7 @@ double PairEAMFSGPU::memory_usage()
void PairEAMFSGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
// compute density on each atom on GPU
@ -108,7 +101,7 @@ void PairEAMFSGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -117,27 +110,24 @@ void PairEAMFSGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
firstneigh = eam_fs_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, &fp_pinned);
eam_fs_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, &fp_pinned);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
// communicate derivative of embedding function
@ -168,10 +158,9 @@ void PairEAMFSGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -179,23 +168,17 @@ void PairEAMFSGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int fp_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = eam_fs_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
type2frho, rhor_spline, z2r_spline, frho_spline,
cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, fp_size);
GPU_EXTRA::check_flag(success,error,world);
int success = eam_fs_gpu_init(
atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho, rhor_spline, z2r_spline,
frho_spline, cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen, fp_size);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
if (fp_size == sizeof(double))
fp_single = false;
else
@ -206,64 +189,63 @@ void PairEAMFSGPU::init_style()
/* ---------------------------------------------------------------------- */
double PairEAMFSGPU::single(int i, int j, int itype, int jtype,
double rsq, double /* factor_coul */,
double /* factor_lj */, double &fforce)
double PairEAMFSGPU::single(int i, int j, int itype, int jtype, double rsq,
double /* factor_coul */, double /* factor_lj */, double &fforce)
{
int m;
double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
double *coeff;
r = sqrt(rsq);
p = r*rdr + 1.0;
m = static_cast<int> (p);
m = MIN(m,nr-1);
p = r * rdr + 1.0;
m = static_cast<int>(p);
m = MIN(m, nr - 1);
p -= m;
p = MIN(p,1.0);
p = MIN(p, 1.0);
coeff = rhor_spline[type2rhor[itype][jtype]][m];
rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = rhor_spline[type2rhor[jtype][itype]][m];
rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = z2r_spline[type2z2r[itype][jtype]][m];
z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];
double fp_i,fp_j;
double fp_i, fp_j;
if (fp_single == false) {
fp_i = ((double*)fp_pinned)[i];
fp_j = ((double*)fp_pinned)[j];
fp_i = ((double *) fp_pinned)[i];
fp_j = ((double *) fp_pinned)[j];
} else {
fp_i = ((float*)fp_pinned)[i];
fp_j = ((float*)fp_pinned)[j];
fp_i = ((float *) fp_pinned)[i];
fp_j = ((float *) fp_pinned)[j];
}
recip = 1.0/r;
phi = z2*recip;
phip = z2p*recip - phi*recip;
psip = fp_i*rhojp + fp_j*rhoip + phip;
fforce = -psip*recip;
recip = 1.0 / r;
phi = z2 * recip;
phip = z2p * recip - phi * recip;
psip = fp_i * rhojp + fp_j * rhoip + phip;
fforce = -psip * recip;
return phi;
}
/* ---------------------------------------------------------------------- */
int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf,
int /* pbc_flag */, int * /* pbc */)
int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
int * /* pbc */)
{
int i,j,m;
int i, j, m;
m = 0;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = static_cast<double>(fp_ptr[j]);
}
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = fp_ptr[j];
@ -277,15 +259,15 @@ int PairEAMFSGPU::pack_forward_comm(int n, int *list, double *buf,
void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
}
}
@ -297,23 +279,22 @@ void PairEAMFSGPU::unpack_forward_comm(int n, int first, double *buf)
void PairEAMFSGPU::coeff(int narg, char **arg)
{
int i,j;
int i, j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
if (narg != 3 + atom->ntypes) error->all(FLERR, "Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
if (strcmp(arg[0], "*") != 0 || strcmp(arg[1], "*") != 0)
error->all(FLERR, "Incorrect args for pair coefficients");
// read EAM Finnis-Sinclair file
if (fs) {
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
delete [] fs->elements;
for (i = 0; i < fs->nelements; i++) delete[] fs->elements[i];
delete[] fs->elements;
memory->destroy(fs->mass);
memory->destroy(fs->frho);
memory->destroy(fs->rhor);
@ -327,22 +308,23 @@ void PairEAMFSGPU::coeff(int narg, char **arg)
// map[i] = which element the Ith atom type is, -1 if "NULL"
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
if (strcmp(arg[i], "NULL") == 0) {
map[i - 2] = -1;
continue;
}
for (j = 0; j < fs->nelements; j++)
if (strcmp(arg[i],fs->elements[j]) == 0) break;
if (j < fs->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
if (strcmp(arg[i], fs->elements[j]) == 0) break;
if (j < fs->nelements)
map[i - 2] = j;
else
error->all(FLERR, "No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
for (j = i; j <= n; j++) setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
@ -352,14 +334,14 @@ void PairEAMFSGPU::coeff(int narg, char **arg)
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,fs->mass[map[i]]);
if (i == j) atom->set_mass(FLERR, i, fs->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
if (count == 0) error->all(FLERR, "Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
@ -372,14 +354,12 @@ void PairEAMFSGPU::read_file(char *filename)
// read potential file
if (comm->me == 0) {
PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs",
unit_convert_flag);
PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs", unit_convert_flag);
// transparently convert units for supported conversions
int unit_convert = reader.get_unit_convert();
double conversion_factor = utils::get_conversion_factor(utils::ENERGY,
unit_convert);
double conversion_factor = utils::get_conversion_factor(utils::ENERGY, unit_convert);
try {
reader.skip_line();
reader.skip_line();
@ -389,10 +369,10 @@ void PairEAMFSGPU::read_file(char *filename)
ValueTokenizer values = reader.next_values(1);
file->nelements = values.next_int();
if ((int)values.count() != file->nelements + 1)
error->one(FLERR,"Incorrect element names in EAM potential file");
if ((int) values.count() != file->nelements + 1)
error->one(FLERR, "Incorrect element names in EAM potential file");
file->elements = new char*[file->nelements];
file->elements = new char *[file->nelements];
for (int i = 0; i < file->nelements; i++) {
const std::string word = values.next_string();
file->elements[i] = utils::strdup(word);
@ -403,13 +383,13 @@ void PairEAMFSGPU::read_file(char *filename)
values = reader.next_values(5);
file->nrho = values.next_int();
file->drho = values.next_double();
file->nr = values.next_int();
file->dr = values.next_double();
file->cut = values.next_double();
file->nr = values.next_int();
file->dr = values.next_double();
file->cut = values.next_double();
rhomax = 0.0;
if ((file->nrho <= 0) || (file->nr <= 0) || (file->dr <= 0.0))
error->one(FLERR,"Invalid EAM potential file");
error->one(FLERR, "Invalid EAM potential file");
memory->create(file->mass, file->nelements, "pair:mass");
memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -418,13 +398,12 @@ void PairEAMFSGPU::read_file(char *filename)
for (int i = 0; i < file->nelements; i++) {
values = reader.next_values(2);
values.next_int(); // ignore
values.next_int(); // ignore
file->mass[i] = values.next_double();
reader.next_dvector(&file->frho[i][1], file->nrho);
if (unit_convert) {
for (int j = 1; j <= file->nrho; ++j)
file->frho[i][j] *= conversion_factor;
for (int j = 1; j <= file->nrho; ++j) file->frho[i][j] *= conversion_factor;
}
for (int j = 0; j < file->nelements; j++) {
@ -436,8 +415,7 @@ void PairEAMFSGPU::read_file(char *filename)
for (int j = 0; j <= i; j++) {
reader.next_dvector(&file->z2r[i][j][1], file->nr);
if (unit_convert) {
for (int k = 1; k <= file->nr; ++k)
file->z2r[i][j][k] *= conversion_factor;
for (int k = 1; k <= file->nr; ++k) file->z2r[i][j][k] *= conversion_factor;
}
}
}
@ -458,7 +436,7 @@ void PairEAMFSGPU::read_file(char *filename)
// allocate memory on other procs
if (comm->me != 0) {
file->elements = new char*[file->nelements];
file->elements = new char *[file->nelements];
for (int i = 0; i < file->nelements; i++) file->elements[i] = nullptr;
memory->create(file->mass, file->nelements, "pair:mass");
memory->create(file->frho, file->nelements, file->nrho + 1, "pair:frho");
@ -487,9 +465,7 @@ void PairEAMFSGPU::read_file(char *filename)
// broadcast file->z2r
for (int i = 0; i < file->nelements; i++) {
for (int j = 0; j <= i; j++) {
MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world);
}
for (int j = 0; j <= i; j++) { MPI_Bcast(&file->z2r[i][j][1], file->nr, MPI_DOUBLE, 0, world); }
}
}
@ -499,7 +475,7 @@ void PairEAMFSGPU::read_file(char *filename)
void PairEAMFSGPU::file2array()
{
int i,j,m,n;
int i, j, m, n;
int ntypes = atom->ntypes;
// set function params directly from fs file
@ -508,7 +484,7 @@ void PairEAMFSGPU::file2array()
nr = fs->nr;
drho = fs->drho;
dr = fs->dr;
rhomax = (nrho-1) * drho;
rhomax = (nrho - 1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
@ -519,7 +495,7 @@ void PairEAMFSGPU::file2array()
nfrho = fs->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
memory->create(frho, nfrho, nrho + 1, "pair:frho");
// copy each element's frho to global frho
@ -529,15 +505,17 @@ void PairEAMFSGPU::file2array()
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
for (m = 1; m <= nrho; m++) frho[nfrho - 1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
if (map[i] >= 0)
type2frho[i] = map[i];
else
type2frho[i] = nfrho - 1;
// ------------------------------------------------------------------
// setup rhor arrays
@ -548,7 +526,7 @@ void PairEAMFSGPU::file2array()
nrhor = fs->nelements * fs->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
memory->create(rhor, nrhor, nr + 1, "pair:rhor");
// copy each element pair rhor to global rhor
@ -564,8 +542,7 @@ void PairEAMFSGPU::file2array()
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i] * fs->nelements + map[j];
for (j = 1; j <= ntypes; j++) type2rhor[i][j] = map[i] * fs->nelements + map[j];
// ------------------------------------------------------------------
// setup z2r arrays
@ -574,9 +551,9 @@ void PairEAMFSGPU::file2array()
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of fs elements
nz2r = fs->nelements * (fs->nelements+1) / 2;
nz2r = fs->nelements * (fs->nelements + 1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
memory->create(z2r, nz2r, nr + 1, "pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
@ -595,7 +572,7 @@ void PairEAMFSGPU::file2array()
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
int irow, icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];

View File

@ -24,7 +24,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -36,31 +35,25 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int eam_gpu_init(const int ntypes, double host_cutforcesq,
int **host_type2rhor, int **host_type2z2r,
int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline,
double** host_cutsq, double rdr, double rdrho, double rhomax,
int nrhor, int nrho, int nz2r, int nfrho, int nr,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, int &fp_size);
int eam_gpu_init(const int ntypes, double host_cutforcesq, int **host_type2rhor,
int **host_type2z2r, int *host_type2frho, double ***host_rhor_spline,
double ***host_z2r_spline, double ***host_frho_spline, double **host_cutsq,
double rdr, double rdrho, double rhomax, int nrhor, int nrho, int nz2r, int nfrho,
int nr, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
int &fp_size);
void eam_gpu_clear();
int** eam_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, int &inum, void **fp_ptr);
void eam_gpu_compute(const int ago, const int inum_full, const int nlocal,
const int nall,double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, void **fp_ptr);
void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag,
const bool eatom, const bool vatom);
int **eam_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, int &inum, void **fp_ptr);
void eam_gpu_compute(const int ago, const int inum_full, const int nlocal, const int nall,
double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success, void **fp_ptr);
void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, const bool eatom,
const bool vatom);
double eam_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -95,7 +88,7 @@ double PairEAMGPU::memory_usage()
void PairEAMGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
// compute density on each atom on GPU
@ -106,7 +99,7 @@ void PairEAMGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -115,27 +108,24 @@ void PairEAMGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
firstneigh =
eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success, inum_dev, &fp_pinned);
} else { // gpu_mode == GPU_FORCE
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, &fp_pinned);
eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success,
&fp_pinned);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
// communicate derivative of embedding function
@ -165,10 +155,9 @@ void PairEAMGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -176,23 +165,17 @@ void PairEAMGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int fp_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = eam_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r,
type2frho, rhor_spline, z2r_spline, frho_spline,
cutsq, rdr, rdrho, rhomax, nrhor, nrho, nz2r, nfrho, nr,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
int success = eam_gpu_init(atom->ntypes + 1, cutforcesq, type2rhor, type2z2r, type2frho,
rhor_spline, z2r_spline, frho_spline, cutsq, rdr, rdrho, rhomax, nrhor,
nrho, nz2r, nfrho, nr, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, fp_size);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
if (fp_size == sizeof(double))
fp_single = false;
else
@ -203,64 +186,63 @@ void PairEAMGPU::init_style()
/* ---------------------------------------------------------------------- */
double PairEAMGPU::single(int i, int j, int itype, int jtype,
double rsq, double /* factor_coul */,
double PairEAMGPU::single(int i, int j, int itype, int jtype, double rsq, double /* factor_coul */,
double /* factor_lj */, double &fforce)
{
int m;
double r,p,rhoip,rhojp,z2,z2p,recip,phi,phip,psip;
double r, p, rhoip, rhojp, z2, z2p, recip, phi, phip, psip;
double *coeff;
r = sqrt(rsq);
p = r*rdr + 1.0;
m = static_cast<int> (p);
m = MIN(m,nr-1);
p = r * rdr + 1.0;
m = static_cast<int>(p);
m = MIN(m, nr - 1);
p -= m;
p = MIN(p,1.0);
p = MIN(p, 1.0);
coeff = rhor_spline[type2rhor[itype][jtype]][m];
rhoip = (coeff[0]*p + coeff[1])*p + coeff[2];
rhoip = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = rhor_spline[type2rhor[jtype][itype]][m];
rhojp = (coeff[0]*p + coeff[1])*p + coeff[2];
rhojp = (coeff[0] * p + coeff[1]) * p + coeff[2];
coeff = z2r_spline[type2z2r[itype][jtype]][m];
z2p = (coeff[0]*p + coeff[1])*p + coeff[2];
z2 = ((coeff[3]*p + coeff[4])*p + coeff[5])*p + coeff[6];
z2p = (coeff[0] * p + coeff[1]) * p + coeff[2];
z2 = ((coeff[3] * p + coeff[4]) * p + coeff[5]) * p + coeff[6];
double fp_i,fp_j;
double fp_i, fp_j;
if (fp_single == false) {
fp_i = ((double*)fp_pinned)[i];
fp_j = ((double*)fp_pinned)[j];
fp_i = ((double *) fp_pinned)[i];
fp_j = ((double *) fp_pinned)[j];
} else {
fp_i = ((float*)fp_pinned)[i];
fp_j = ((float*)fp_pinned)[j];
fp_i = ((float *) fp_pinned)[i];
fp_j = ((float *) fp_pinned)[j];
}
recip = 1.0/r;
phi = z2*recip;
phip = z2p*recip - phi*recip;
psip = fp_i*rhojp + fp_j*rhoip + phip;
fforce = -psip*recip;
recip = 1.0 / r;
phi = z2 * recip;
phip = z2p * recip - phi * recip;
psip = fp_i * rhojp + fp_j * rhoip + phip;
fforce = -psip * recip;
return phi;
}
/* ---------------------------------------------------------------------- */
int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf,
int /* pbc_flag */, int * /* pbc */)
int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf, int /* pbc_flag */,
int * /* pbc */)
{
int i,j,m;
int i, j, m;
m = 0;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = static_cast<double>(fp_ptr[j]);
}
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = 0; i < n; i++) {
j = list[i];
buf[m++] = fp_ptr[j];
@ -274,15 +256,15 @@ int PairEAMGPU::pack_forward_comm(int n, int *list, double *buf,
void PairEAMGPU::unpack_forward_comm(int n, int first, double *buf)
{
int i,m,last;
int i, m, last;
m = 0;
last = first + n;
if (fp_single) {
float *fp_ptr = (float *)fp_pinned;
float *fp_ptr = (float *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
} else {
double *fp_ptr = (double *)fp_pinned;
double *fp_ptr = (double *) fp_pinned;
for (i = first; i < last; i++) fp_ptr[i] = buf[m++];
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,25 +32,20 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a,
double **b, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a,
double **b, double **offset);
int gauss_gpu_init(const int ntypes, double **cutsq, double **host_a, double **b, double **offset,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
void gauss_gpu_reinit(const int ntypes, double **cutsq, double **host_a, double **b,
double **offset);
void gauss_gpu_clear();
int ** gauss_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void gauss_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **gauss_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void gauss_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double gauss_gpu_bytes();
@ -79,7 +72,7 @@ PairGaussGPU::~PairGaussGPU()
void PairGaussGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -87,7 +80,7 @@ void PairGaussGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -96,28 +89,24 @@ void PairGaussGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = gauss_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
gauss_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
gauss_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -137,10 +126,9 @@ void PairGaussGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -148,21 +136,15 @@ void PairGaussGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = gauss_gpu_init(atom->ntypes+1, cutsq, a, b,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
gauss_gpu_init(atom->ntypes + 1, cutsq, a, b, offset, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -171,7 +153,7 @@ void PairGaussGPU::reinit()
{
Pair::reinit();
gauss_gpu_reinit(atom->ntypes+1, cutsq, a, b, offset);
gauss_gpu_reinit(atom->ntypes + 1, cutsq, a, b, offset);
}
/* ---------------------------------------------------------------------- */
@ -184,11 +166,12 @@ double PairGaussGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,forcelj,factor_lj;
void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, forcelj, factor_lj;
int *jlist;
double **x = atom->x;
@ -215,26 +198,24 @@ void PairGaussGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
forcelj = - 2.0*a[itype][jtype]*b[itype][jtype] * rsq *
exp(-b[itype][jtype]*rsq);
fpair = factor_lj*forcelj*r2inv;
r2inv = 1.0 / rsq;
forcelj = -2.0 * a[itype][jtype] * b[itype][jtype] * rsq * exp(-b[itype][jtype] * rsq);
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = -(a[itype][jtype]*exp(-b[itype][jtype]*rsq) -
offset[itype][jtype]);
evdwl = -(a[itype][jtype] * exp(-b[itype][jtype] * rsq) - offset[itype][jtype]);
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -27,7 +26,6 @@
#include "math_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -37,35 +35,29 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int gb_gpu_init(const int ntypes, const double gamma, const double upsilon,
const double mu, double **shape, double **well, double **cutsq,
double **sigma, double **epsilon, double *host_lshape,
int **form, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset,
double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
int gb_gpu_init(const int ntypes, const double gamma, const double upsilon, const double mu,
double **shape, double **well, double **cutsq, double **sigma, double **epsilon,
double *host_lshape, int **form, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void gb_gpu_clear();
int ** gb_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double **host_quat);
int * gb_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double **host_quat);
int **gb_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double **host_quat);
int *gb_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double **host_quat);
double gb_gpu_bytes();
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
enum { SPHERE_SPHERE, SPHERE_ELLIPSE, ELLIPSE_SPHERE, ELLIPSE_ELLIPSE };
/* ---------------------------------------------------------------------- */
PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp),
gpu_mode(GPU_FORCE)
PairGayBerneGPU::PairGayBerneGPU(LAMMPS *lmp) : PairGayBerne(lmp), gpu_mode(GPU_FORCE)
{
quat_nmax = 0;
reinitflag = 0;
@ -89,7 +81,7 @@ PairGayBerneGPU::~PairGayBerneGPU()
void PairGayBerneGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -103,7 +95,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
}
AtomVecEllipsoid::Bonus *bonus = avec->bonus;
int *ellipsoid = atom->ellipsoid;
for (int i=0; i<nall; i++) {
for (int i = 0; i < nall; i++) {
int qi = ellipsoid[i];
if (qi > -1) {
quat[i][0] = bonus[qi].quat[0];
@ -114,7 +106,7 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
}
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -123,26 +115,22 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, quat);
firstneigh =
gb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success, quat);
} else {
inum = list->inum;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ilist = gb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
list->ilist, numneigh, firstneigh, eflag, vflag,
eflag_atom, vflag_atom, host_start,
cpu_time, success, quat);
ilist = gb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, list->ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, quat);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start < inum) {
cpu_time = platform::walltime();
@ -158,25 +146,22 @@ void PairGayBerneGPU::compute(int eflag, int vflag)
void PairGayBerneGPU::init_style()
{
avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
if (!avec)
error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
if (!atom->ellipsoid_flag)
error->all(FLERR,"Pair gayberne/gpu requires atom style ellipsoid");
if (!avec) error->all(FLERR, "Pair gayberne/gpu requires atom style ellipsoid");
if (!atom->ellipsoid_flag) error->all(FLERR, "Pair gayberne/gpu requires atom style ellipsoid");
// per-type shape precalculations
// require that atom shapes are identical within each type
// if shape = 0 for point particle, set shape = 1 as required by Gay-Berne
for (int i = 1; i <= atom->ntypes; i++) {
if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
error->all(FLERR,"Pair gayberne/gpu requires atoms with same type have same shape");
if (shape1[i][0] == 0.0)
shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
shape2[i][0] = shape1[i][0]*shape1[i][0];
shape2[i][1] = shape1[i][1]*shape1[i][1];
shape2[i][2] = shape1[i][2]*shape1[i][2];
lshape[i] = (shape1[i][0]*shape1[i][1]+shape1[i][2]*shape1[i][2]) *
sqrt(shape1[i][0]*shape1[i][1]);
if (!atom->shape_consistency(i, shape1[i][0], shape1[i][1], shape1[i][2]))
error->all(FLERR, "Pair gayberne/gpu requires atoms with same type have same shape");
if (shape1[i][0] == 0.0) shape1[i][0] = shape1[i][1] = shape1[i][2] = 1.0;
shape2[i][0] = shape1[i][0] * shape1[i][0];
shape2[i][1] = shape1[i][1] * shape1[i][1];
shape2[i][2] = shape1[i][2] * shape1[i][2];
lshape[i] = (shape1[i][0] * shape1[i][1] + shape1[i][2] * shape1[i][2]) *
sqrt(shape1[i][0] * shape1[i][1]);
}
// Repeat cutsq calculation because done after call to init_style
@ -185,10 +170,9 @@ void PairGayBerneGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -197,22 +181,16 @@ void PairGayBerneGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = gb_gpu_init(atom->ntypes+1, gamma, upsilon, mu,
shape2, well, cutsq, sigma, epsilon, lshape, form,
lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal+atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
gb_gpu_init(atom->ntypes + 1, gamma, upsilon, mu, shape2, well, cutsq, sigma, epsilon, lshape,
form, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
memory->grow(quat, quat_nmax, 4, "pair:quat");
}
@ -222,21 +200,20 @@ void PairGayBerneGPU::init_style()
double PairGayBerneGPU::memory_usage()
{
double bytes = Pair::memory_usage();
return bytes + memory->usage(quat,quat_nmax)+gb_gpu_bytes();
return bytes + memory->usage(quat, quat_nmax) + gb_gpu_bytes();
}
/* ---------------------------------------------------------------------- */
void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
double fforce[3],ttor[3],rtor[3],r12[3];
double a1[3][3],b1[3][3],g1[3][3],a2[3][3],b2[3][3],g2[3][3],temp[3][3];
int i, j, ii, jj, jnum, itype, jtype;
double evdwl, one_eng, rsq, r2inv, r6inv, forcelj, factor_lj;
double fforce[3], ttor[3], rtor[3], r12[3];
double a1[3][3], b1[3][3], g1[3][3], a2[3][3], b2[3][3], g2[3][3], temp[3][3];
int *jlist;
double *iquat,*jquat;
double *iquat, *jquat;
AtomVecEllipsoid::Bonus *bonus = avec->bonus;
int *ellipsoid = atom->ellipsoid;
@ -254,11 +231,11 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
if (form[itype][itype] == ELLIPSE_ELLIPSE) {
iquat = bonus[ellipsoid[i]].quat;
MathExtra::quat_to_mat_trans(iquat,a1);
MathExtra::diag_times3(well[itype],a1,temp);
MathExtra::transpose_times3(a1,temp,b1);
MathExtra::diag_times3(shape2[itype],a1,temp);
MathExtra::transpose_times3(a1,temp,g1);
MathExtra::quat_to_mat_trans(iquat, a1);
MathExtra::diag_times3(well[itype], a1, temp);
MathExtra::transpose_times3(a1, temp, b1);
MathExtra::diag_times3(shape2[itype], a1, temp);
MathExtra::transpose_times3(a1, temp, g1);
}
jlist = firstneigh[i];
@ -271,10 +248,10 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
// r12 = center to center vector
r12[0] = x[j][0]-x[i][0];
r12[1] = x[j][1]-x[i][1];
r12[2] = x[j][2]-x[i][2];
rsq = MathExtra::dot3(r12,r12);
r12[0] = x[j][0] - x[i][0];
r12[1] = x[j][1] - x[i][1];
r12[2] = x[j][2] - x[i][2];
rsq = MathExtra::dot3(r12, r12);
jtype = type[j];
// compute if less than cutoff
@ -282,47 +259,46 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
if (rsq < cutsq[itype][jtype]) {
switch (form[itype][jtype]) {
case SPHERE_SPHERE:
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj *= -r2inv;
if (eflag) one_eng =
r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
offset[itype][jtype];
fforce[0] = r12[0]*forcelj;
fforce[1] = r12[1]*forcelj;
fforce[2] = r12[2]*forcelj;
ttor[0] = ttor[1] = ttor[2] = 0.0;
rtor[0] = rtor[1] = rtor[2] = 0.0;
break;
case SPHERE_SPHERE:
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
forcelj *= -r2inv;
if (eflag)
one_eng =
r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
fforce[0] = r12[0] * forcelj;
fforce[1] = r12[1] * forcelj;
fforce[2] = r12[2] * forcelj;
ttor[0] = ttor[1] = ttor[2] = 0.0;
rtor[0] = rtor[1] = rtor[2] = 0.0;
break;
case SPHERE_ELLIPSE:
jquat = bonus[ellipsoid[j]].quat;
MathExtra::quat_to_mat_trans(jquat,a2);
MathExtra::diag_times3(well[jtype],a2,temp);
MathExtra::transpose_times3(a2,temp,b2);
MathExtra::diag_times3(shape2[jtype],a2,temp);
MathExtra::transpose_times3(a2,temp,g2);
one_eng = gayberne_lj(j,i,a2,b2,g2,r12,rsq,fforce,rtor);
ttor[0] = ttor[1] = ttor[2] = 0.0;
break;
case SPHERE_ELLIPSE:
jquat = bonus[ellipsoid[j]].quat;
MathExtra::quat_to_mat_trans(jquat, a2);
MathExtra::diag_times3(well[jtype], a2, temp);
MathExtra::transpose_times3(a2, temp, b2);
MathExtra::diag_times3(shape2[jtype], a2, temp);
MathExtra::transpose_times3(a2, temp, g2);
one_eng = gayberne_lj(j, i, a2, b2, g2, r12, rsq, fforce, rtor);
ttor[0] = ttor[1] = ttor[2] = 0.0;
break;
case ELLIPSE_SPHERE:
one_eng = gayberne_lj(i,j,a1,b1,g1,r12,rsq,fforce,ttor);
rtor[0] = rtor[1] = rtor[2] = 0.0;
break;
case ELLIPSE_SPHERE:
one_eng = gayberne_lj(i, j, a1, b1, g1, r12, rsq, fforce, ttor);
rtor[0] = rtor[1] = rtor[2] = 0.0;
break;
default:
jquat = bonus[ellipsoid[j]].quat;
MathExtra::quat_to_mat_trans(jquat,a2);
MathExtra::diag_times3(well[jtype],a2,temp);
MathExtra::transpose_times3(a2,temp,b2);
MathExtra::diag_times3(shape2[jtype],a2,temp);
MathExtra::transpose_times3(a2,temp,g2);
one_eng = gayberne_analytic(i,j,a1,a2,b1,b2,g1,g2,r12,rsq,
fforce,ttor,rtor);
break;
default:
jquat = bonus[ellipsoid[j]].quat;
MathExtra::quat_to_mat_trans(jquat, a2);
MathExtra::diag_times3(well[jtype], a2, temp);
MathExtra::transpose_times3(a2, temp, b2);
MathExtra::diag_times3(shape2[jtype], a2, temp);
MathExtra::transpose_times3(a2, temp, g2);
one_eng = gayberne_analytic(i, j, a1, a2, b1, b2, g1, g2, r12, rsq, fforce, ttor, rtor);
break;
}
fforce[0] *= factor_lj;
@ -339,10 +315,11 @@ void PairGayBerneGPU::cpu_compute(int start, int inum, int eflag,
tor[i][1] += ttor[1];
tor[i][2] += ttor[2];
if (eflag) evdwl = factor_lj*one_eng;
if (eflag) evdwl = factor_lj * one_eng;
if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],fforce[2],
-r12[0],-r12[1],-r12[2]);
if (evflag)
ev_tally_xyz_full(i, evdwl, 0.0, fforce[0], fforce[1], fforce[2], -r12[0], -r12[1],
-r12[2]);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,23 +32,19 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void lj96_gpu_clear();
int ** lj96_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void lj96_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **lj96_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void lj96_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double lj96_gpu_bytes();
@ -78,7 +72,7 @@ PairLJ96CutGPU::~PairLJ96CutGPU()
void PairLJ96CutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -86,7 +80,7 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -95,28 +89,24 @@ void PairLJ96CutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success);
firstneigh =
lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -131,17 +121,15 @@ void PairLJ96CutGPU::init_style()
{
cut_respa = nullptr;
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
double cut;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -149,21 +137,15 @@ void PairLJ96CutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = lj96_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -176,13 +158,12 @@ double PairLJ96CutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r3inv, r6inv, forcelj, factor_lj;
int *jlist;
double **x = atom->x;
@ -209,27 +190,26 @@ void PairLJ96CutGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
r3inv = sqrt(r6inv);
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include <cmath>
@ -33,38 +31,30 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double cut_lj_innersq, const double cut_coul_innersq,
const double denom_lj, const double denom_coul,
double **epsilon, double **sigma,
const bool mix_arithmetic);
int crm_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e, const double cut_lj_innersq,
const double cut_coul_innersq, const double denom_lj, const double denom_coul,
double **epsilon, double **sigma, const bool mix_arithmetic);
void crm_gpu_clear();
int ** crm_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success,
double *host_q, double *boxlo, double *prd);
void crm_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **crm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void crm_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
double crm_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmGPU::PairLJCharmmCoulCharmmGPU(LAMMPS *lmp) :
PairLJCharmmCoulCharmm(lmp), gpu_mode(GPU_FORCE)
PairLJCharmmCoulCharmm(lmp), gpu_mode(GPU_FORCE)
{
reinitflag = 0;
cpu_time = 0.0;
@ -84,8 +74,10 @@ PairLJCharmmCoulCharmmGPU::~PairLJCharmmCoulCharmmGPU()
void PairLJCharmmCoulCharmmGPU::compute(int eflag, int vflag)
{
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
if (eflag || vflag)
ev_setup(eflag, vflag);
else
evflag = vflag_fdotr = 0;
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -94,27 +86,22 @@ void PairLJCharmmCoulCharmmGPU::compute(int eflag, int vflag)
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
inum = atom->nlocal;
firstneigh = crm_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, domain->sublo, domain->subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = crm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, domain->sublo,
domain->subhi, atom->tag, atom->nspecial, atom->special, eflag,
vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
crm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
crm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -134,8 +121,7 @@ void PairLJCharmmCoulCharmmGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
init_one(i,j);
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) init_one(i, j);
}
}
@ -143,46 +129,37 @@ void PairLJCharmmCoulCharmmGPU::init_style()
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
(cut_ljsq-cut_lj_innersq);
denom_lj =
(cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq);
denom_lj = 1.0 / denom_lj;
denom_coul = (cut_coulsq-cut_coul_innersq) * (cut_coulsq-cut_coul_innersq) *
(cut_coulsq-cut_coul_innersq);
denom_coul = (cut_coulsq - cut_coul_innersq) * (cut_coulsq - cut_coul_innersq) *
(cut_coulsq - cut_coul_innersq);
denom_coul = 1.0 / denom_coul;
double cell_size = sqrt(cut_bothsq) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
bool arithmetic = true;
for (int i = 1; i < atom->ntypes + 1; i++)
for (int j = i + 1; j < atom->ntypes + 1; j++) {
if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j]))
arithmetic = false;
if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j]))
arithmetic = false;
if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j])) arithmetic = false;
if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j])) arithmetic = false;
}
int mnf = 5e-2 * neighbor->oneatom;
int success = crm_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e,
cut_lj_innersq,cut_coul_innersq,denom_lj,
denom_coul,epsilon,sigma,arithmetic);
GPU_EXTRA::check_flag(success,error,world);
int success =
crm_gpu_init(atom->ntypes + 1, cut_bothsq, lj1, lj2, lj3, lj4, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, cut_lj_innersq,
cut_coul_innersq, denom_lj, denom_coul, epsilon, sigma, arithmetic);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -195,14 +172,13 @@ double PairLJCharmmCoulCharmmGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double philj,switch1,switch2;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double philj, switch1, switch2;
int *jlist;
evdwl = ecoul = 0.0;
@ -236,64 +212,66 @@ void PairLJCharmmCoulCharmmGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
if (rsq > cut_coul_innersq) {
switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
(cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) * denom_coul;
switch1 = (cut_coulsq - rsq) * (cut_coulsq - rsq) *
(cut_coulsq + 2.0 * rsq - 3.0 * cut_coul_innersq) * denom_coul;
forcecoul *= switch1;
}
} else forcecoul = 0.0;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
r6inv = r2inv * r2inv * r2inv;
jtype = type[j];
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) * denom_lj;
philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
forcelj = forcelj*switch1 + philj*switch2;
switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
(cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) * denom_lj;
switch2 = 12.0 * rsq * (cut_ljsq - rsq) * (rsq - cut_lj_innersq) * denom_lj;
philj = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
forcelj = forcelj * switch1 + philj * switch2;
}
} else forcelj = 0.0;
} else
forcelj = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
ecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
if (rsq > cut_coul_innersq) {
switch1 = (cut_coulsq-rsq) * (cut_coulsq-rsq) *
(cut_coulsq + 2.0*rsq - 3.0*cut_coul_innersq) *
denom_coul;
switch1 = (cut_coulsq - rsq) * (cut_coulsq - rsq) *
(cut_coulsq + 2.0 * rsq - 3.0 * cut_coul_innersq) * denom_coul;
ecoul *= switch1;
}
ecoul *= factor_coul;
} else ecoul = 0.0;
} else
ecoul = 0.0;
if (rsq < cut_ljsq) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) * denom_lj;
switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
(cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) * denom_lj;
evdwl *= switch1;
}
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,55 +24,48 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double g_ewald, const double cut_lj_innersq,
const double denom_lj, double **epsilon, double **sigma,
const bool mix_arithmetic);
int crml_gpu_init(const int ntypes, double cut_bothsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald, const double cut_lj_innersq, const double denom_lj,
double **epsilon, double **sigma, const bool mix_arithmetic);
void crml_gpu_clear();
int ** crml_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void crml_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **crml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void crml_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double crml_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulLongGPU::PairLJCharmmCoulLongGPU(LAMMPS *lmp) :
PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
PairLJCharmmCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -95,7 +87,7 @@ PairLJCharmmCoulLongGPU::~PairLJCharmmCoulLongGPU()
void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -103,7 +95,7 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -112,30 +104,25 @@ void PairLJCharmmCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = crml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
crml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -151,65 +138,54 @@ void PairLJCharmmCoulLongGPU::init_style()
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/charmm/coul/long/gpu requires atom attribute q");
error->all(FLERR, "Pair style lj/charmm/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0))
init_one(i,j);
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) init_one(i, j);
}
}
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq,cut_coulsq);
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
denom_lj = (cut_ljsq-cut_lj_innersq) * (cut_ljsq-cut_lj_innersq) *
(cut_ljsq-cut_lj_innersq);
denom_lj =
(cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq) * (cut_ljsq - cut_lj_innersq);
double cell_size = sqrt(cut_bothsq) + neighbor->skin;
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
bool arithmetic = true;
for (int i = 1; i < atom->ntypes + 1; i++)
for (int j = i + 1; j < atom->ntypes + 1; j++) {
if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j]))
arithmetic = false;
if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j]))
arithmetic = false;
if (epsilon[i][j] != sqrt(epsilon[i][i] * epsilon[j][j])) arithmetic = false;
if (sigma[i][j] != 0.5 * (sigma[i][i] + sigma[j][j])) arithmetic = false;
}
int mnf = 5e-2 * neighbor->oneatom;
int success = crml_gpu_init(atom->ntypes+1, cut_bothsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul, force->qqrd2e,
g_ewald, cut_lj_innersq,denom_lj,epsilon,sigma,
arithmetic);
GPU_EXTRA::check_flag(success,error,world);
int success =
crml_gpu_init(atom->ntypes + 1, cut_bothsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald,
cut_lj_innersq, denom_lj, epsilon, sigma, arithmetic);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -222,16 +198,15 @@ double PairLJCharmmCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
double philj,switch1,switch2;
int i, j, ii, jj, jnum, itype, jtype, itable;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double fraction, table;
double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc;
double philj, switch1, switch2;
int *jlist;
double rsq;
@ -266,80 +241,83 @@ void PairLJCharmmCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_bothsq) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
} else forcecoul = 0.0;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq) {
r6inv = r2inv*r2inv*r2inv;
r6inv = r2inv * r2inv * r2inv;
jtype = type[j];
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
switch2 = 12.0*rsq * (cut_ljsq-rsq) *
(rsq-cut_lj_innersq) / denom_lj;
philj = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
forcelj = forcelj*switch1 + philj*switch2;
switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
(cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) / denom_lj;
switch2 = 12.0 * rsq * (cut_ljsq - rsq) * (rsq - cut_lj_innersq) / denom_lj;
philj = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
forcelj = forcelj * switch1 + philj * switch2;
}
} else forcelj = 0.0;
} else
forcelj = 0.0;
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
fpair = (forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
ecoul = prefactor * erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]);
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
if (rsq > cut_lj_innersq) {
switch1 = (cut_ljsq-rsq) * (cut_ljsq-rsq) *
(cut_ljsq + 2.0*rsq - 3.0*cut_lj_innersq) / denom_lj;
switch1 = (cut_ljsq - rsq) * (cut_ljsq - rsq) *
(cut_ljsq + 2.0 * rsq - 3.0 * cut_lj_innersq) / denom_lj;
evdwl *= switch1;
}
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,53 +24,47 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
int c2cl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald);
void c2cl_gpu_clear();
int ** c2cl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void c2cl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **c2cl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void c2cl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double c2cl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJClass2CoulLongGPU::PairLJClass2CoulLongGPU(LAMMPS *lmp) :
PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
PairLJClass2CoulLong(lmp), gpu_mode(GPU_FORCE)
{
cpu_time = 0.0;
reinitflag = 0;
@ -92,7 +85,7 @@ PairLJClass2CoulLongGPU::~PairLJClass2CoulLongGPU()
void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -100,7 +93,7 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -109,30 +102,25 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = c2cl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
c2cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
c2cl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -146,7 +134,7 @@ void PairLJClass2CoulLongGPU::compute(int eflag, int vflag)
void PairLJClass2CoulLongGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/class2/coul/long/gpu requires atom attribute q");
error->all(FLERR, "Pair style lj/class2/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -154,10 +142,9 @@ void PairLJClass2CoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,30 +156,23 @@ void PairLJClass2CoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = c2cl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success =
c2cl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -205,15 +185,14 @@ double PairLJClass2CoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r,rinv,r2inv,r3inv,r6inv,forcecoul,forcelj;
double grij,expm2,prefactor,t,erfc;
double factor_coul,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r, rinv, r2inv, r3inv, r6inv, forcecoul, forcelj;
double grij, expm2, prefactor, t, erfc;
double factor_coul, factor_lj;
int *jlist;
evdwl = ecoul = 0.0;
@ -247,49 +226,52 @@ void PairLJClass2CoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
} else forcecoul = 0.0;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r3inv = r2inv * rinv;
r6inv = r3inv * r3inv;
forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
fpair = (forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
ecoul = prefactor*erfc;
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * erfc;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,23 +32,19 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
int lj96_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void lj96_gpu_clear();
int **lj96_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
int **lj96_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void lj96_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
void lj96_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double lj96_gpu_bytes();
@ -78,7 +72,7 @@ PairLJClass2GPU::~PairLJClass2GPU()
void PairLJClass2GPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -86,7 +80,7 @@ void PairLJClass2GPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -95,28 +89,24 @@ void PairLJClass2GPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success);
firstneigh =
lj96_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
lj96_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -136,10 +126,9 @@ void PairLJClass2GPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -147,21 +136,15 @@ void PairLJClass2GPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = lj96_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = lj96_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -174,13 +157,12 @@ double PairLJClass2GPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r3inv,r6inv,forcelj,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r3inv, r6inv, forcelj, factor_lj;
int *jlist;
double **x = atom->x;
@ -207,27 +189,26 @@ void PairLJClass2GPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
r3inv = sqrt(r6inv);
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -37,32 +35,27 @@ using namespace PairLJCubicConstants;
// External functions from cuda library for atom decomposition
int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq,
double **cut_inner, double **sigma, double **epsilon,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int ljcb_gpu_init(const int ntypes, double **cutsq, double **cut_inner_sq, double **cut_inner,
double **sigma, double **epsilon, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void ljcb_gpu_clear();
int ** ljcb_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void ljcb_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ljcb_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void ljcb_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double ljcb_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCubicGPU::PairLJCubicGPU(LAMMPS *lmp) : PairLJCubic(lmp),
gpu_mode(GPU_FORCE)
PairLJCubicGPU::PairLJCubicGPU(LAMMPS *lmp) : PairLJCubic(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -84,7 +77,7 @@ PairLJCubicGPU::~PairLJCubicGPU()
void PairLJCubicGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -92,7 +85,7 @@ void PairLJCubicGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -101,28 +94,24 @@ void PairLJCubicGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljcb_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
ljcb_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljcb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
ljcb_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -142,10 +131,9 @@ void PairLJCubicGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
mcut = init_one(i,j);
mcut = init_one(i, j);
mcut *= mcut;
if (mcut > maxcut)
maxcut = mcut;
if (mcut > maxcut) maxcut = mcut;
cutsq[i][j] = cutsq[j][i] = mcut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,22 +141,16 @@ void PairLJCubicGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljcb_gpu_init(atom->ntypes+1, cutsq, cut_inner_sq,
cut_inner, sigma, epsilon, lj1, lj2,
lj3, lj4, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljcb_gpu_init(atom->ntypes + 1, cutsq, cut_inner_sq, cut_inner, sigma, epsilon, lj1, lj2, lj3,
lj4, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -181,13 +163,13 @@ double PairLJCubicGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcelj,factor_lj;
double r,t,rmin;
void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forcelj, factor_lj;
double r, t, rmin;
int *jlist;
double **x = atom->x;
@ -214,36 +196,35 @@ void PairLJCubicGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq <= cut_inner_sq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else {
r = sqrt(rsq);
rmin = sigma[itype][jtype]*RT6TWO;
t = (r - cut_inner[itype][jtype])/rmin;
forcelj = epsilon[itype][jtype]*(-DPHIDS + A3*t*t/2.0)*r/rmin;
rmin = sigma[itype][jtype] * RT6TWO;
t = (r - cut_inner[itype][jtype]) / rmin;
forcelj = epsilon[itype][jtype] * (-DPHIDS + A3 * t * t / 2.0) * r / rmin;
}
fpair = factor_lj*forcelj*r2inv;
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq <= cut_inner_sq[itype][jtype])
evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
else
evdwl = epsilon[itype][jtype]*
(PHIS + DPHIDS*t - A3*t*t*t/6.0);
evdwl = epsilon[itype][jtype] * (PHIS + DPHIDS * t - A3 * t * t * t / 6.0);
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,29 +32,22 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e);
int ljc_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
void ljc_gpu_clear();
int ** ljc_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void ljc_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
int **ljc_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljc_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
double ljc_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -83,7 +74,7 @@ PairLJCutCoulCutGPU::~PairLJCutCoulCutGPU()
void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +82,7 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,30 +91,25 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljc_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
ljc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -136,9 +122,7 @@ void PairLJCutCoulCutGPU::compute(int eflag, int vflag)
void PairLJCutCoulCutGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/cut/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/cut/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -146,10 +130,9 @@ void PairLJCutCoulCutGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -157,22 +140,16 @@ void PairLJCutCoulCutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljc_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljc_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -185,13 +162,12 @@ double PairLJCutCoulCutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
int *jlist;
evdwl = ecoul = 0.0;
@ -225,39 +201,42 @@ void PairLJCutCoulCutGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq[itype][jtype])
forcecoul = qqrd2e * qtmp*q[j]*sqrt(r2inv);
else forcecoul = 0.0;
forcecoul = qqrd2e * qtmp * q[j] * sqrt(r2inv);
else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq[itype][jtype])
ecoul = factor_coul * qqrd2e * qtmp*q[j]*sqrt(r2inv);
else ecoul = 0.0;
ecoul = factor_coul * qqrd2e * qtmp * q[j] * sqrt(r2inv);
else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,36 +32,30 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
int ljcd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double **host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double kappa);
void ljcd_gpu_clear();
int ** ljcd_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void ljcd_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
int **ljcd_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljcd_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double ljcd_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCutCoulDebyeGPU::PairLJCutCoulDebyeGPU(LAMMPS *lmp) :
PairLJCutCoulDebye(lmp), gpu_mode(GPU_FORCE)
PairLJCutCoulDebye(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -78,14 +70,14 @@ PairLJCutCoulDebyeGPU::PairLJCutCoulDebyeGPU(LAMMPS *lmp) :
PairLJCutCoulDebyeGPU::~PairLJCutCoulDebyeGPU()
{
ljcd_gpu_clear();
ljcd_gpu_clear();
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -93,7 +85,7 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -102,30 +94,25 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljcd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljcd_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljcd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
ljcd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -139,8 +126,7 @@ void PairLJCutCoulDebyeGPU::compute(int eflag, int vflag)
void PairLJCutCoulDebyeGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/debye/gpu requires atom attribute q");
error->all(FLERR, "Pair style lj/cut/coul/debye/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -148,10 +134,9 @@ void PairLJCutCoulDebyeGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -159,23 +144,16 @@ void PairLJCutCoulDebyeGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljcd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul,
force->qqrd2e, kappa);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljcd_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, kappa);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -188,14 +166,13 @@ double PairLJCutCoulDebyeGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double r,rinv,screening;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double r, rinv, screening;
int *jlist;
evdwl = ecoul = 0.0;
@ -229,42 +206,45 @@ void PairLJCutCoulDebyeGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq[itype][jtype]) {
r = sqrt(rsq);
rinv = 1.0/r;
screening = exp(-kappa*r);
forcecoul = qqrd2e * qtmp*q[j] * screening * (kappa + rinv);
} else forcecoul = 0.0;
rinv = 1.0 / r;
screening = exp(-kappa * r);
forcecoul = qqrd2e * qtmp * q[j] * screening * (kappa + rinv);
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
fpair = (factor_coul*forcecoul + factor_lj*forcelj) * r2inv;
fpair = (factor_coul * forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq[itype][jtype])
ecoul = factor_coul * qqrd2e * qtmp*q[j] * rinv * screening;
else ecoul = 0.0;
ecoul = factor_coul * qqrd2e * qtmp * q[j] * rinv * screening;
else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,50 +23,41 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define MY_PIS 1.77245385090551602729
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, const double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double e_shift, const double f_shift,
const double alpha);
int ljd_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double e_shift, const double f_shift, const double alpha);
void ljd_gpu_clear();
int ** ljd_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void ljd_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
int **ljd_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljd_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, double *boxlo, double *prd);
double ljd_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -94,7 +84,7 @@ PairLJCutCoulDSFGPU::~PairLJCutCoulDSFGPU()
void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -102,7 +92,7 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -111,30 +101,25 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljd_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljd_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
ljd_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -147,9 +132,7 @@ void PairLJCutCoulDSFGPU::compute(int eflag, int vflag)
void PairLJCutCoulDSFGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/dsf/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/dsf/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -157,10 +140,9 @@ void PairLJCutCoulDSFGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,28 +151,21 @@ void PairLJCutCoulDSFGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
cut_coulsq = cut_coul * cut_coul;
double erfcc = erfc(alpha*cut_coul);
double erfcd = exp(-alpha*alpha*cut_coul*cut_coul);
f_shift = -(erfcc/cut_coulsq + 2.0/MY_PIS*alpha*erfcd/cut_coul);
e_shift = erfcc/cut_coul - f_shift*cut_coul;
double erfcc = erfc(alpha * cut_coul);
double erfcd = exp(-alpha * alpha * cut_coul * cut_coul);
f_shift = -(erfcc / cut_coulsq + 2.0 / MY_PIS * alpha * erfcd / cut_coul);
e_shift = erfcc / cut_coul - f_shift * cut_coul;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljd_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, e_shift,
f_shift, alpha);
GPU_EXTRA::check_flag(success,error,world);
int success = ljd_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen, cut_ljsq, cut_coulsq, force->special_coul,
force->qqrd2e, e_shift, f_shift, alpha);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -203,14 +178,13 @@ double PairLJCutCoulDSFGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double prefactor,erfcc,erfcd,t;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double r, rsq, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double prefactor, erfcc, erfcd, t;
int *jlist;
evdwl = ecoul = 0.0;
@ -237,8 +211,8 @@ void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
jnum = numneigh[i];
if (evflag) {
double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
ev_tally(i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0);
double e_self = -(e_shift / 2.0 + alpha / MY_PIS) * qtmp * qtmp * qqrd2e;
ev_tally(i, i, nlocal, 0, 0.0, e_self, 0.0, 0.0, 0.0, 0.0);
}
for (jj = 0; jj < jnum; jj++) {
@ -250,47 +224,48 @@ void PairLJCutCoulDSFGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = qqrd2e*qtmp*q[j]/r;
erfcd = exp(-alpha*alpha*r*r);
t = 1.0 / (1.0 + EWALD_P*alpha*r);
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
r*f_shift) * r;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
prefactor = qqrd2e * qtmp * q[j] / r;
erfcd = exp(-alpha * alpha * r * r);
t = 1.0 / (1.0 + EWALD_P * alpha * r);
erfcc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * erfcd;
forcecoul = prefactor * (erfcc / r + 2.0 * alpha / MY_PIS * erfcd + r * f_shift) * r;
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
}
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
fpair = (forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
if (rsq < cut_coulsq) {
ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
ecoul = prefactor * (erfcc - r * e_shift - rsq * f_shift);
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,56 +24,49 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
int ljcl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald);
void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **host_lj_cutsq);
void ljcl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double **host_lj_cutsq);
void ljcl_gpu_clear();
int ** ljcl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void ljcl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **ljcl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljcl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double ljcl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCutCoulLongGPU::PairLJCutCoulLongGPU(LAMMPS *lmp) :
PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
PairLJCutCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -95,7 +87,7 @@ PairLJCutCoulLongGPU::~PairLJCutCoulLongGPU()
void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -103,7 +95,7 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -112,30 +104,25 @@ void PairLJCutCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
ljcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -150,8 +137,7 @@ void PairLJCutCoulLongGPU::init_style()
{
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -159,10 +145,9 @@ void PairLJCutCoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,30 +159,23 @@ void PairLJCutCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljcl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljcl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -206,7 +184,7 @@ void PairLJCutCoulLongGPU::reinit()
{
Pair::reinit();
ljcl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, cut_ljsq);
ljcl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, cut_ljsq);
}
/* ---------------------------------------------------------------------- */
@ -219,15 +197,14 @@ double PairLJCutCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
int i, j, ii, jj, jnum, itype, jtype, itable;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double fraction, table;
double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc;
int *jlist;
double rsq;
@ -262,68 +239,71 @@ void PairLJCutCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
} else forcecoul = 0.0;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
fpair = (forcecoul + factor_lj * forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
ecoul = prefactor * erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -35,36 +33,29 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **host_gcons, double **host_dgcons,
double **offset, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const int order,
const double qqrd2e);
int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **host_gcons, double **host_dgcons,
double **offset, double *special_lj, const int inum, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const int order, const double qqrd2e);
void ljcm_gpu_clear();
int ** ljcm_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void ljcm_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **ljcm_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljcm_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double ljcm_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) :
PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE)
PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) : PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -86,7 +77,7 @@ PairLJCutCoulMSMGPU::~PairLJCutCoulMSMGPU()
void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -94,7 +85,7 @@ void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -103,30 +94,25 @@ void PairLJCutCoulMSMGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success,
atom->q, domain->boxlo, domain->prd);
firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -141,12 +127,10 @@ void PairLJCutCoulMSMGPU::init_style()
{
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/cut/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/cut/gpu requires atom attribute q");
if (force->kspace->scalar_pressure_flag)
error->all(FLERR,"Must use 'kspace_modify pressure/scalar no' with GPU MSM Pair styles");
error->all(FLERR, "Must use 'kspace_modify pressure/scalar no' with GPU MSM Pair styles");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -154,10 +138,9 @@ void PairLJCutCoulMSMGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -169,27 +152,19 @@ void PairLJCutCoulMSMGPU::init_style()
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljcm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
force->kspace->get_gcons(),
force->kspace->get_dgcons(),
offset, force->special_lj,
atom->nlocal, atom->nlocal+atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen,
cut_ljsq, cut_coulsq, force->special_coul,
force->kspace->order, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljcm_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->kspace->get_gcons(),
force->kspace->get_dgcons(), offset, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
cut_ljsq, cut_coulsq, force->special_coul, force->kspace->order, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -202,14 +177,14 @@ double PairLJCutCoulMSMGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double egamma,fgamma,prefactor;
void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype, itable;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double fraction, table;
double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double egamma, fgamma, prefactor;
int *jlist;
double rsq;
@ -242,66 +217,69 @@ void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
prefactor = qqrd2e * qtmp*q[j]/r;
egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul);
fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul);
prefactor = qqrd2e * qtmp * q[j] / r;
egamma = 1.0 - (r / cut_coul) * force->kspace->gamma(r / cut_coul);
fgamma = 1.0 + (rsq / cut_coulsq) * force->kspace->dgamma(r / cut_coul);
forcecoul = prefactor * fgamma;
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
} else forcecoul = 0.0;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
} else forcelj = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
} else
forcelj = 0.0;
fpair = (forcecoul + forcelj) * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*egamma;
ecoul = prefactor * egamma;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include "update.h"
@ -36,36 +34,29 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e);
int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
void dpl_gpu_clear();
int ** dpl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
double *host_q, double **host_mu,
double *boxlo, double *prd);
void dpl_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, double **host_mu,
const int nlocal, double *boxlo, double *prd);
int **dpl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double **host_mu,
double *boxlo, double *prd);
void dpl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, double **host_mu, const int nlocal,
double *boxlo, double *prd);
double dpl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp),
gpu_mode(GPU_FORCE)
PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) :
PairLJCutDipoleCut(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -87,7 +78,7 @@ PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU()
void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -95,7 +86,7 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -104,30 +95,25 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo,
domain->prd);
firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -141,11 +127,10 @@ void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
void PairLJCutDipoleCutGPU::init_style()
{
if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
error->all(FLERR, "Pair dipole/cut/gpu requires atom attributes q, mu, torque");
if (strcmp(update->unit_style,"electron") == 0)
error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
if (strcmp(update->unit_style, "electron") == 0)
error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -153,10 +138,9 @@ void PairLJCutDipoleCutGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -164,22 +148,16 @@ void PairLJCutDipoleCutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success =
dpl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -192,21 +170,20 @@ double PairLJCutDipoleCutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int *ilist, int *numneigh,
int **firstneigh)
void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv;
double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
double forcelj,factor_coul,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
double rsq, rinv, r2inv, r6inv, r3inv, r5inv, r7inv;
double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
double forcelj, factor_coul, factor_lj;
int *jlist;
evdwl = ecoul = 0.0;
ev_init(eflag,vflag);
ev_init(eflag, vflag);
double **x = atom->x;
double **f = atom->f;
@ -218,7 +195,6 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
double *special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
// loop over neighbors of my atoms
for (ii = start; ii < inum; ii++) {
@ -240,11 +216,11 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
rinv = sqrt(r2inv);
// atom can have both a charge and dipole
@ -257,119 +233,119 @@ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vfla
if (rsq < cut_coulsq[itype][jtype]) {
if (qtmp != 0.0 && q[j] != 0.0) {
r3inv = r2inv*rinv;
pre1 = qtmp*q[j]*r3inv;
r3inv = r2inv * rinv;
pre1 = qtmp * q[j] * r3inv;
forcecoulx += pre1*delx;
forcecouly += pre1*dely;
forcecoulz += pre1*delz;
forcecoulx += pre1 * delx;
forcecouly += pre1 * dely;
forcecoulz += pre1 * delz;
}
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
r7inv = r5inv*r2inv;
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
r7inv = r5inv * r2inv;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
pre2 = 3.0*r5inv*pjdotr;
pre3 = 3.0*r5inv*pidotr;
pre4 = -1.0*r3inv;
pre1 = 3.0 * r5inv * pdotp - 15.0 * r7inv * pidotr * pjdotr;
pre2 = 3.0 * r5inv * pjdotr;
pre3 = 3.0 * r5inv * pidotr;
pre4 = -1.0 * r3inv;
forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
forcecoulx += pre1 * delx + pre2 * mu[i][0] + pre3 * mu[j][0];
forcecouly += pre1 * dely + pre2 * mu[i][1] + pre3 * mu[j][1];
forcecoulz += pre1 * delz + pre2 * mu[i][2] + pre3 * mu[j][2];
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
crossx = pre4 * (mu[i][1] * mu[j][2] - mu[i][2] * mu[j][1]);
crossy = pre4 * (mu[i][2] * mu[j][0] - mu[i][0] * mu[j][2]);
crossz = pre4 * (mu[i][0] * mu[j][1] - mu[i][1] * mu[j][0]);
tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
}
if (mu[i][3] > 0.0 && q[j] != 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pre1 = 3.0*q[j]*r5inv * pidotr;
pre2 = q[j]*r3inv;
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
pre1 = 3.0 * q[j] * r5inv * pidotr;
pre2 = q[j] * r3inv;
forcecoulx += pre2*mu[i][0] - pre1*delx;
forcecouly += pre2*mu[i][1] - pre1*dely;
forcecoulz += pre2*mu[i][2] - pre1*delz;
tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
forcecoulx += pre2 * mu[i][0] - pre1 * delx;
forcecouly += pre2 * mu[i][1] - pre1 * dely;
forcecoulz += pre2 * mu[i][2] - pre1 * delz;
tixcoul += pre2 * (mu[i][1] * delz - mu[i][2] * dely);
tiycoul += pre2 * (mu[i][2] * delx - mu[i][0] * delz);
tizcoul += pre2 * (mu[i][0] * dely - mu[i][1] * delx);
}
if (mu[j][3] > 0.0 && qtmp != 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pre1 = 3.0*qtmp*r5inv * pjdotr;
pre2 = qtmp*r3inv;
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
pre1 = 3.0 * qtmp * r5inv * pjdotr;
pre2 = qtmp * r3inv;
forcecoulx += pre1*delx - pre2*mu[j][0];
forcecouly += pre1*dely - pre2*mu[j][1];
forcecoulz += pre1*delz - pre2*mu[j][2];
tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
forcecoulx += pre1 * delx - pre2 * mu[j][0];
forcecouly += pre1 * dely - pre2 * mu[j][1];
forcecoulz += pre1 * delz - pre2 * mu[j][2];
tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
}
}
// LJ interaction
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
forcelj *= factor_lj * r2inv;
} else forcelj = 0.0;
} else
forcelj = 0.0;
// total force
fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj;
fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj;
fq = factor_coul * qqrd2e;
fx = fq * forcecoulx + delx * forcelj;
fy = fq * forcecouly + dely * forcelj;
fz = fq * forcecoulz + delz * forcelj;
// force & torque accumulation
f[i][0] += fx;
f[i][1] += fy;
f[i][2] += fz;
torque[i][0] += fq*tixcoul;
torque[i][1] += fq*tiycoul;
torque[i][2] += fq*tizcoul;
torque[i][0] += fq * tixcoul;
torque[i][1] += fq * tiycoul;
torque[i][2] += fq * tizcoul;
if (eflag) {
if (rsq < cut_coulsq[itype][jtype]) {
ecoul = qtmp*q[j]*rinv;
ecoul = qtmp * q[j] * rinv;
if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
if (mu[i][3] > 0.0 && q[j] != 0.0)
ecoul += -q[j]*r3inv*pidotr;
if (mu[j][3] > 0.0 && qtmp != 0.0)
ecoul += qtmp*r3inv*pjdotr;
ecoul *= factor_coul*qqrd2e;
} else ecoul = 0.0;
ecoul += r3inv * pdotp - 3.0 * r5inv * pidotr * pjdotr;
if (mu[i][3] > 0.0 && q[j] != 0.0) ecoul += -q[j] * r3inv * pidotr;
if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += qtmp * r3inv * pjdotr;
ecoul *= factor_coul * qqrd2e;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "kspace.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include "update.h"
@ -34,50 +32,43 @@
#include <cmath>
#include <cstring>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
using namespace MathConst;
// External functions from cuda library for atom decomposition
int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, const double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
const double host_cut_coulsq, double *host_special_coul, const double qqrd2e,
const double g_ewald);
void dplj_gpu_clear();
int ** dplj_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
double *host_q, double **host_mu,
double *boxlo, double *prd);
void dplj_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, double **host_mu,
int **dplj_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double **host_mu,
double *boxlo, double *prd);
void dplj_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, double **host_mu,
const int nlocal, double *boxlo, double *prd);
double dplj_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJCutDipoleLongGPU::PairLJCutDipoleLongGPU(LAMMPS *lmp) : PairLJCutDipoleLong(lmp),
gpu_mode(GPU_FORCE)
PairLJCutDipoleLongGPU::PairLJCutDipoleLongGPU(LAMMPS *lmp) :
PairLJCutDipoleLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -99,7 +90,7 @@ PairLJCutDipoleLongGPU::~PairLJCutDipoleLongGPU()
void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -107,7 +98,7 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -116,30 +107,25 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = dplj_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo,
domain->prd);
firstneigh = dplj_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
dplj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd);
dplj_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -153,11 +139,10 @@ void PairLJCutDipoleLongGPU::compute(int eflag, int vflag)
void PairLJCutDipoleLongGPU::init_style()
{
if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
error->all(FLERR, "Pair dipole/cut/gpu requires atom attributes q, mu, torque");
if (strcmp(update->unit_style,"electron") == 0)
error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
if (strcmp(update->unit_style, "electron") == 0)
error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -165,10 +150,9 @@ void PairLJCutDipoleLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -180,30 +164,23 @@ void PairLJCutDipoleLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,nullptr);
if (ncoultablebits) init_tables(cut_coul, nullptr);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = dplj_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success =
dplj_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -216,27 +193,26 @@ double PairLJCutDipoleLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int *ilist, int *numneigh,
int **firstneigh)
void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz;
double rsq,r,rinv,r2inv,r6inv;
double forcecoulx,forcecouly,forcecoulz,fforce;
double tixcoul,tiycoul,tizcoul;
double fx,fy,fz,fdx,fdy,fdz,fax,fay,faz;
double pdotp,pidotr,pjdotr,pre1,pre2,pre3;
double grij,expm2,t,erfc;
double g0,g1,g2,b0,b1,b2,b3,d0,d1,d2,d3;
double zdix,zdiy,zdiz,zdjx,zdjy,zdjz,zaix,zaiy,zaiz,zajx,zajy,zajz;
double g0b1_g1b2_g2b3,g0d1_g1d2_g2d3;
double forcelj,factor_coul,factor_lj,facm1;
double evdwl,ecoul;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz;
double rsq, r, rinv, r2inv, r6inv;
double forcecoulx, forcecouly, forcecoulz, fforce;
double tixcoul, tiycoul, tizcoul;
double fx, fy, fz, fdx, fdy, fdz, fax, fay, faz;
double pdotp, pidotr, pjdotr, pre1, pre2, pre3;
double grij, expm2, t, erfc;
double g0, g1, g2, b0, b1, b2, b3, d0, d1, d2, d3;
double zdix, zdiy, zdiz, zdjx, zdjy, zdjz, zaix, zaiy, zaiz, zajx, zajy, zajz;
double g0b1_g1b2_g2b3, g0d1_g1d2_g2d3;
double forcelj, factor_coul, factor_lj, facm1;
double evdwl, ecoul;
int *jlist;
evdwl = ecoul = 0.0;
ev_init(eflag,vflag);
ev_init(eflag, vflag);
double **x = atom->x;
double **f = atom->f;
@ -249,8 +225,8 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
double qqrd2e = force->qqrd2e;
pre1 = 2.0 * g_ewald / MY_PIS;
pre2 = 4.0 * pow(g_ewald,3.0) / MY_PIS;
pre3 = 8.0 * pow(g_ewald,5.0) / MY_PIS;
pre2 = 4.0 * pow(g_ewald, 3.0) / MY_PIS;
pre3 = 8.0 * pow(g_ewald, 5.0) / MY_PIS;
// loop over neighbors of my atoms
@ -273,51 +249,48 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
rinv = sqrt(r2inv);
if (rsq < cut_coulsq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
g0 = qtmp*q[j];
g1 = qtmp*pjdotr - q[j]*pidotr + pdotp;
g2 = -pidotr*pjdotr;
g0 = qtmp * q[j];
g1 = qtmp * pjdotr - q[j] * pidotr + pdotp;
g2 = -pidotr * pjdotr;
if (factor_coul > 0.0) {
b0 = erfc * rinv;
b1 = (b0 + pre1*expm2) * r2inv;
b2 = (3.0*b1 + pre2*expm2) * r2inv;
b3 = (5.0*b2 + pre3*expm2) * r2inv;
b1 = (b0 + pre1 * expm2) * r2inv;
b2 = (3.0 * b1 + pre2 * expm2) * r2inv;
b3 = (5.0 * b2 + pre3 * expm2) * r2inv;
g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3;
fdx = delx * g0b1_g1b2_g2b3 -
b1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) +
b2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
fdy = dely * g0b1_g1b2_g2b3 -
b1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) +
b2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
fdz = delz * g0b1_g1b2_g2b3 -
b1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
b2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
g0b1_g1b2_g2b3 = g0 * b1 + g1 * b2 + g2 * b3;
fdx = delx * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][0] - q[j] * mu[i][0]) +
b2 * (pjdotr * mu[i][0] + pidotr * mu[j][0]);
fdy = dely * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][1] - q[j] * mu[i][1]) +
b2 * (pjdotr * mu[i][1] + pidotr * mu[j][1]);
fdz = delz * g0b1_g1b2_g2b3 - b1 * (qtmp * mu[j][2] - q[j] * mu[i][2]) +
b2 * (pjdotr * mu[i][2] + pidotr * mu[j][2]);
zdix = delx * (q[j]*b1 + b2*pjdotr) - b1*mu[j][0];
zdiy = dely * (q[j]*b1 + b2*pjdotr) - b1*mu[j][1];
zdiz = delz * (q[j]*b1 + b2*pjdotr) - b1*mu[j][2];
zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mu[i][0];
zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mu[i][1];
zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mu[i][2];
zdix = delx * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][0];
zdiy = dely * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][1];
zdiz = delz * (q[j] * b1 + b2 * pjdotr) - b1 * mu[j][2];
zdjx = delx * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][0];
zdjy = dely * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][1];
zdjz = delz * (-qtmp * b1 + b2 * pidotr) - b1 * mu[i][2];
if (factor_coul < 1.0) {
fdx *= factor_coul;
@ -338,27 +311,24 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
if (factor_coul < 1.0) {
d0 = (erfc - 1.0) * rinv;
d1 = (d0 + pre1*expm2) * r2inv;
d2 = (3.0*d1 + pre2*expm2) * r2inv;
d3 = (5.0*d2 + pre3*expm2) * r2inv;
d1 = (d0 + pre1 * expm2) * r2inv;
d2 = (3.0 * d1 + pre2 * expm2) * r2inv;
d3 = (5.0 * d2 + pre3 * expm2) * r2inv;
g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3;
fax = delx * g0d1_g1d2_g2d3 -
d1 * (qtmp*mu[j][0] - q[j]*mu[i][0]) +
d2 * (pjdotr*mu[i][0] + pidotr*mu[j][0]);
fay = dely * g0d1_g1d2_g2d3 -
d1 * (qtmp*mu[j][1] - q[j]*mu[i][1]) +
d2 * (pjdotr*mu[i][1] + pidotr*mu[j][1]);
faz = delz * g0d1_g1d2_g2d3 -
d1 * (qtmp*mu[j][2] - q[j]*mu[i][2]) +
d2 * (pjdotr*mu[i][2] + pidotr*mu[j][2]);
g0d1_g1d2_g2d3 = g0 * d1 + g1 * d2 + g2 * d3;
fax = delx * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][0] - q[j] * mu[i][0]) +
d2 * (pjdotr * mu[i][0] + pidotr * mu[j][0]);
fay = dely * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][1] - q[j] * mu[i][1]) +
d2 * (pjdotr * mu[i][1] + pidotr * mu[j][1]);
faz = delz * g0d1_g1d2_g2d3 - d1 * (qtmp * mu[j][2] - q[j] * mu[i][2]) +
d2 * (pjdotr * mu[i][2] + pidotr * mu[j][2]);
zaix = delx * (q[j]*d1 + d2*pjdotr) - d1*mu[j][0];
zaiy = dely * (q[j]*d1 + d2*pjdotr) - d1*mu[j][1];
zaiz = delz * (q[j]*d1 + d2*pjdotr) - d1*mu[j][2];
zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mu[i][0];
zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mu[i][1];
zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mu[i][2];
zaix = delx * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][0];
zaiy = dely * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][1];
zaiz = delz * (q[j] * d1 + d2 * pjdotr) - d1 * mu[j][2];
zajx = delx * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][0];
zajy = dely * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][1];
zajz = delz * (-qtmp * d1 + d2 * pidotr) - d1 * mu[i][2];
if (factor_coul > 0.0) {
facm1 = 1.0 - factor_coul;
@ -382,9 +352,9 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
forcecouly = fdy + fay;
forcecoulz = fdz + faz;
tixcoul = mu[i][1]*(zdiz + zaiz) - mu[i][2]*(zdiy + zaiy);
tiycoul = mu[i][2]*(zdix + zaix) - mu[i][0]*(zdiz + zaiz);
tizcoul = mu[i][0]*(zdiy + zaiy) - mu[i][1]*(zdix + zaix);
tixcoul = mu[i][1] * (zdiz + zaiz) - mu[i][2] * (zdiy + zaiy);
tiycoul = mu[i][2] * (zdix + zaix) - mu[i][0] * (zdiz + zaiz);
tizcoul = mu[i][0] * (zdiy + zaiy) - mu[i][1] * (zdix + zaix);
} else {
forcecoulx = forcecouly = forcecoulz = 0.0;
tixcoul = tiycoul = tizcoul = 0.0;
@ -393,43 +363,45 @@ void PairLJCutDipoleLongGPU::cpu_compute(int start, int inum, int eflag, int vfl
// LJ interaction
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
fforce = factor_lj * forcelj*r2inv;
} else fforce = 0.0;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
fforce = factor_lj * forcelj * r2inv;
} else
fforce = 0.0;
// total force
fx = qqrd2e*forcecoulx + delx*fforce;
fy = qqrd2e*forcecouly + dely*fforce;
fz = qqrd2e*forcecoulz + delz*fforce;
fx = qqrd2e * forcecoulx + delx * fforce;
fy = qqrd2e * forcecouly + dely * fforce;
fz = qqrd2e * forcecoulz + delz * fforce;
// force & torque accumulation
f[i][0] += fx;
f[i][1] += fy;
f[i][2] += fz;
torque[i][0] += qqrd2e*tixcoul;
torque[i][1] += qqrd2e*tiycoul;
torque[i][2] += qqrd2e*tizcoul;
torque[i][0] += qqrd2e * tixcoul;
torque[i][1] += qqrd2e * tiycoul;
torque[i][2] += qqrd2e * tizcoul;
if (eflag) {
if (rsq < cut_coulsq && factor_coul > 0.0) {
ecoul = qqrd2e*(b0*g0 + b1*g1 + b2*g2);
ecoul = qqrd2e * (b0 * g0 + b1 * g1 + b2 * g2);
if (factor_coul < 1.0) {
ecoul *= factor_coul;
ecoul += (1-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2);
ecoul += (1 - factor_coul) * qqrd2e * (d0 * g0 + d1 * g1 + d2 * g2);
}
} else ecoul = 0.0;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,29 +32,24 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
int ljl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset);
void ljl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset);
void ljl_gpu_clear();
int ** ljl_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void ljl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **ljl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void ljl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double ljl_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -82,7 +75,7 @@ PairLJCutGPU::~PairLJCutGPU()
void PairLJCutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -90,7 +83,7 @@ void PairLJCutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -99,28 +92,24 @@ void PairLJCutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljl_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
ljl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
ljl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -135,17 +124,15 @@ void PairLJCutGPU::init_style()
{
cut_respa = nullptr;
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
double cut;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,21 +140,15 @@ void PairLJCutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = ljl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size,
gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -176,7 +157,7 @@ void PairLJCutGPU::reinit()
{
Pair::reinit();
ljl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset);
ljl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset);
}
/* ---------------------------------------------------------------------- */
@ -189,11 +170,12 @@ double PairLJCutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcelj,factor_lj;
void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forcelj, factor_lj;
int *jlist;
double **x = atom->x;
@ -220,26 +202,25 @@ void PairLJCutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -34,55 +33,47 @@
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal,
const int tH, const int tO, const double alpha, const double qdist,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, const double host_cut_coulsq,
const double host_cut_coulsqplus, double *host_special_coul,
const double qqrd2e, const double g_ewald,
int map_size, int max_same);
int ljtip4p_long_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int tH, const int tO, const double alpha,
const double qdist, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, const double host_cut_coulsq,
const double host_cut_coulsqplus, double *host_special_coul,
const double qqrd2e, const double g_ewald, int map_size, int max_same);
void ljtip4p_long_gpu_clear();
int ** ljtip4p_long_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi,
tagint *tag, int *map_array, int map_size,
int *sametag, int max_same,
int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time,
bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
int **ljtip4p_long_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int *map_array, int map_size, int *sametag, int max_same,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo, double *prd);
void ljtip4p_long_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, const int nlocal, double *boxlo,
double *prd);
double ljtip4p_long_gpu_bytes();
void ljtip4p_long_copy_molecule_data(int, tagint *, int *,
int, int *, int, int);
void ljtip4p_long_copy_molecule_data(int, tagint *, int *, int, int *, int, int);
/* ---------------------------------------------------------------------- */
PairLJCutTIP4PLongGPU::PairLJCutTIP4PLongGPU(LAMMPS *lmp)
: PairLJCutTIP4PLong(lmp), gpu_mode(GPU_FORCE)
PairLJCutTIP4PLongGPU::PairLJCutTIP4PLongGPU(LAMMPS *lmp) :
PairLJCutTIP4PLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -104,14 +95,14 @@ PairLJCutTIP4PLongGPU::~PairLJCutTIP4PLongGPU()
void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -120,40 +111,26 @@ void PairLJCutTIP4PLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljtip4p_long_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi,
atom->tag, atom->get_map_array(), atom->get_map_size(),
atom->sametag, atom->get_max_same(),
atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljtip4p_long_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->get_map_array(), atom->get_map_size(), atom->sametag, atom->get_max_same(),
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljtip4p_long_copy_molecule_data(nall, atom->tag,
atom->get_map_array(), atom->get_map_size(),
atom->sametag, atom->get_max_same(), neighbor->ago);
ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
ljtip4p_long_copy_molecule_data(nall, atom->tag, atom->get_map_array(), atom->get_map_size(),
atom->sametag, atom->get_max_same(), neighbor->ago);
ljtip4p_long_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, atom->q, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
// if (host_start<inum) {
// cpu_time = platform::walltime();
// cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
// cpu_time = platform::walltime() - cpu_time;
// }
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ----------------------------------------------------------------------
@ -165,17 +142,16 @@ void PairLJCutTIP4PLongGPU::init_style()
cut_respa = nullptr;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style lj/cut/tip4p/long/gpu requires atom IDs");
error->all(FLERR, "Pair style lj/cut/tip4p/long/gpu requires atom IDs");
if (!atom->q_flag)
error->all(FLERR, "Pair style lj/cut/tip4p/long/gpu requires atom attribute q");
if (force->bond == nullptr)
error->all(FLERR,"Must use a bond style with TIP4P potential");
if (force->angle == nullptr)
error->all(FLERR,"Must use an angle style with TIP4P potential");
if (force->bond == nullptr) error->all(FLERR, "Must use a bond style with TIP4P potential");
if (force->angle == nullptr) error->all(FLERR, "Must use an angle style with TIP4P potential");
if (atom->map_style == Atom::MAP_HASH)
error->all(FLERR,"GPU-accelerated lj/cut/tip4p/long currently"
" requires 'array' style atom map (atom_modify map array)");
error->all(FLERR,
"GPU-accelerated lj/cut/tip4p/long currently"
" requires 'array' style atom map (atom_modify map array)");
//PairLJCutCoulLong::init_style();
// Repeat cutsq calculation because done after call to init_style
@ -184,10 +160,9 @@ void PairLJCutTIP4PLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -196,51 +171,41 @@ void PairLJCutTIP4PLongGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
// set alpha parameter
double theta = force->angle->equilibrium_angle(typeA);
double blen = force->bond->equilibrium_distance(typeB);
alpha = qdist / (cos(0.5*theta) * blen);
alpha = qdist / (cos(0.5 * theta) * blen);
cut_coulsq = cut_coul * cut_coul;
double cut_coulsqplus = (cut_coul+qdist+blen) * (cut_coul+qdist+blen);
if (maxcut < cut_coulsqplus) {
cell_size = (cut_coul+qdist+blen) + neighbor->skin;
}
double cut_coulsqplus = (cut_coul + qdist + blen) * (cut_coul + qdist + blen);
if (maxcut < cut_coulsqplus) { cell_size = (cut_coul + qdist + blen) + neighbor->skin; }
if (comm->cutghostuser < cell_size) {
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff from {:.8} "
"to {:.8} for TIP4P GPU style",comm->cutghostuser,cell_size);
error->warning(FLERR,
"Increasing communication cutoff from {:.8} to {:.8} for TIP4P GPU style",
comm->cutghostuser, cell_size);
comm->cutghostuser = cell_size;
}
int mnf = 5e-2 * neighbor->oneatom;
int success = ljtip4p_long_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal,
typeH, typeO, alpha, qdist,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, cut_coulsqplus,
force->special_coul, force->qqrd2e,
g_ewald, atom->get_map_size(),
atom->get_max_same());
GPU_EXTRA::check_flag(success,error,world);
int success = ljtip4p_long_gpu_init(
atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, force->special_lj, atom->nlocal, typeH,
typeO, alpha, qdist, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, cut_coulsqplus, force->special_coul, force->qqrd2e, g_ewald,
atom->get_map_size(), atom->get_max_same());
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->cut = 1;
neighbor->requests[irequest]->cutoff = cut_coul+qdist+blen + neighbor->skin;
auto req = neighbor->add_request(this, NeighConst::REQ_FULL);
req->set_cutoff(cut_coul + qdist + blen + neighbor->skin);
}
}
@ -253,4 +218,3 @@ double PairLJCutTIP4PLongGPU::memory_usage()
}
/* ---------------------------------------------------------------------- */

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,56 +24,50 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift, double *special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double host_cut_coulsq, double *host_special_coul,
int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double **shift,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift, double **host_lj_cutsq);
void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double **shift,
double **host_lj_cutsq);
void ljecl_gpu_clear();
int ** ljecl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q,
double *boxlo, double *prd);
void ljecl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **ljecl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void ljecl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double ljecl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJExpandCoulLongGPU::PairLJExpandCoulLongGPU(LAMMPS *lmp) :
PairLJExpandCoulLong(lmp), gpu_mode(GPU_FORCE)
PairLJExpandCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
cpu_time = 0.0;
@ -95,7 +88,7 @@ PairLJExpandCoulLongGPU::~PairLJExpandCoulLongGPU()
void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -103,7 +96,7 @@ void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -112,30 +105,25 @@ void PairLJExpandCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljecl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = ljecl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljecl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
ljecl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -150,8 +138,7 @@ void PairLJExpandCoulLongGPU::init_style()
{
cut_respa = nullptr;
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/cut/coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/cut/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -159,10 +146,9 @@ void PairLJExpandCoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -174,30 +160,23 @@ void PairLJExpandCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style requires a KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,cut_respa);
if (ncoultablebits) init_tables(cut_coul, cut_respa);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljecl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, shift, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success = ljecl_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -206,7 +185,7 @@ void PairLJExpandCoulLongGPU::reinit()
{
Pair::reinit();
ljecl_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, shift, cut_ljsq);
ljecl_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift, cut_ljsq);
}
/* ---------------------------------------------------------------------- */
@ -219,16 +198,15 @@ double PairLJExpandCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double grij,expm2,prefactor,t,erfc;
double rsq,rshift,rshiftsq,rshift2inv;
int i, j, ii, jj, jnum, itype, jtype, itable;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
double fraction, table;
double r, r2inv, r6inv, forcecoul, forcelj, factor_coul, factor_lj;
double grij, expm2, prefactor, t, erfc;
double rsq, rshift, rshiftsq, rshift2inv;
int *jlist;
@ -263,73 +241,76 @@ void PairLJExpandCoulLongGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
expm2 = exp(-grij * grij);
t = 1.0 / (1.0 + EWALD_P * grij);
erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (factor_coul < 1.0) forcecoul -= (1.0 - factor_coul) * prefactor;
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
table = ctable[itable] + fraction * dctable[itable];
prefactor = qtmp * q[j] * table;
forcecoul -= (1.0 - factor_coul) * prefactor;
}
}
} else forcecoul = 0.0;
} else
forcecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
r = sqrt(rsq);
rshift = r - shift[itype][jtype];
rshiftsq = rshift*rshift;
rshift2inv = 1.0/rshiftsq;
r6inv = rshift2inv*rshift2inv*rshift2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj = factor_lj*forcelj/rshift/r;
} else forcelj = 0.0;
rshiftsq = rshift * rshift;
rshift2inv = 1.0 / rshiftsq;
r6inv = rshift2inv * rshift2inv * rshift2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
forcelj = factor_lj * forcelj / rshift / r;
} else
forcelj = 0.0;
fpair = forcecoul*r2inv + forcelj;
fpair = forcecoul * r2inv + forcelj;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq)
ecoul = prefactor*erfc;
ecoul = prefactor * erfc;
else {
table = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table;
table = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table;
}
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
} else ecoul = 0.0;
if (factor_coul < 1.0) ecoul -= (1.0 - factor_coul) * prefactor;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,28 +32,22 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift, double *special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen);
void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double **shift);
int lje_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double **shift,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
void lje_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double **shift);
void lje_gpu_clear();
int ** lje_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void lje_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **lje_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void lje_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double lje_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -81,7 +73,7 @@ PairLJExpandGPU::~PairLJExpandGPU()
void PairLJExpandGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +81,7 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairLJExpandGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success);
firstneigh =
lje_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
lje_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -139,10 +127,9 @@ void PairLJExpandGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +137,15 @@ void PairLJExpandGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = lje_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
offset, shift, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = lje_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -173,7 +154,7 @@ void PairLJExpandGPU::reinit()
{
Pair::reinit();
lje_gpu_reinit(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, offset, shift);
lje_gpu_reinit(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, offset, shift);
}
/* ---------------------------------------------------------------------- */
@ -186,14 +167,13 @@ double PairLJExpandGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcelj,factor_lj;
double r,rshift,rshiftsq;
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forcelj, factor_lj;
double r, rshift, rshiftsq;
int *jlist;
double **x = atom->x;
@ -220,29 +200,28 @@ void PairLJExpandGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
rshift = r - shift[itype][jtype];
rshiftsq = rshift*rshift;
r2inv = 1.0/rshiftsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj/rshift/r;
rshiftsq = rshift * rshift;
r2inv = 1.0 / rshiftsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
fpair = factor_lj * forcelj / rshift / r;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,34 +32,27 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double *special_lj, const int inum, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_ljsw1, double **host_ljsw2,
double **host_ljsw3, double **host_ljsw4,
double **host_ljsw5, double **cut_inner,
double **cut_innersq);
int ljgrm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, double **host_ljsw1,
double **host_ljsw2, double **host_ljsw3, double **host_ljsw4,
double **host_ljsw5, double **cut_inner, double **cut_innersq);
void ljgrm_gpu_clear();
int ** ljgrm_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void ljgrm_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ljgrm_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void ljgrm_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double ljgrm_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJGromacsGPU::PairLJGromacsGPU(LAMMPS *lmp) :
PairLJGromacs(lmp), gpu_mode(GPU_FORCE)
PairLJGromacsGPU::PairLJGromacsGPU(LAMMPS *lmp) : PairLJGromacs(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -83,7 +74,7 @@ PairLJGromacsGPU::~PairLJGromacsGPU()
void PairLJGromacsGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +82,7 @@ void PairLJGromacsGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,28 +91,24 @@ void PairLJGromacsGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ljgrm_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success);
firstneigh =
ljgrm_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ljgrm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
ljgrm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -141,10 +128,9 @@ void PairLJGromacsGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
mcut = init_one(i,j);
mcut = init_one(i, j);
mcut *= mcut;
if (mcut > maxcut)
maxcut = mcut;
if (mcut > maxcut) maxcut = mcut;
cutsq[i][j] = cutsq[j][i] = mcut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -152,23 +138,17 @@ void PairLJGromacsGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ljgrm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, ljsw1, ljsw2,
ljsw3, ljsw4, ljsw5, cut_inner, cut_inner_sq);
GPU_EXTRA::check_flag(success,error,world);
int success =
ljgrm_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
ljsw1, ljsw2, ljsw3, ljsw4, ljsw5, cut_inner, cut_inner_sq);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -181,14 +161,13 @@ double PairLJGromacsGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r6inv,forcelj,factor_lj;
double r,t,fswitch,eswitch;
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r6inv, forcelj, factor_lj;
double r, t, fswitch, eswitch;
int *jlist;
double **x = atom->x;
@ -215,36 +194,36 @@ void PairLJGromacsGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
if (rsq > cut_inner_sq[itype][jtype]) {
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
if (rsq > cut_inner_sq[itype][jtype]) {
r = sqrt(rsq);
t = r - cut_inner[itype][jtype];
fswitch = r*t*t*(ljsw1[itype][jtype] + ljsw2[itype][jtype]*t);
forcelj += fswitch;
fswitch = r * t * t * (ljsw1[itype][jtype] + ljsw2[itype][jtype] * t);
forcelj += fswitch;
}
fpair = factor_lj*forcelj * r2inv;
fpair = factor_lj * forcelj * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = r6inv * (lj3[itype][jtype]*r6inv - lj4[itype][jtype]);
evdwl += ljsw5[itype][jtype];
if (eflag) {
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]);
evdwl += ljsw5[itype][jtype];
if (rsq > cut_inner_sq[itype][jtype]) {
eswitch = t*t*t*(ljsw3[itype][jtype] + ljsw4[itype][jtype]*t);
eswitch = t * t * t * (ljsw3[itype][jtype] + ljsw4[itype][jtype] * t);
evdwl += eswitch;
}
evdwl *= factor_lj;
}
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,58 +24,51 @@
#include "gpu_extra.h"
#include "kspace.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include <cmath>
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int sdkl_gpu_init(const int ntypes, double **cutsq, int **lj_type,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
double *host_special_coul, const double qqrd2e,
const double g_ewald);
int sdkl_gpu_init(const int ntypes, double **cutsq, int **lj_type, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **offset,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double host_cut_coulsq, double *host_special_coul,
const double qqrd2e, const double g_ewald);
void sdkl_gpu_clear();
int ** sdkl_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double *boxlo,
double *prd);
void sdkl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
const int nlocal, double *boxlo, double *prd);
int **sdkl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double *boxlo,
double *prd);
void sdkl_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, const int nlocal,
double *boxlo, double *prd);
double sdkl_gpu_bytes();
#include "lj_sdk_common.h"
using namespace LJSDKParms;
/* ---------------------------------------------------------------------- */
PairLJSDKCoulLongGPU::PairLJSDKCoulLongGPU(LAMMPS *lmp) :
PairLJSDKCoulLong(lmp), gpu_mode(GPU_FORCE)
PairLJSDKCoulLong(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -98,7 +90,7 @@ PairLJSDKCoulLongGPU::~PairLJSDKCoulLongGPU()
void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -106,7 +98,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -115,35 +107,33 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = sdkl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo,
domain->prd);
firstneigh = sdkl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag,
eflag_atom, vflag_atom, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
sdkl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
sdkl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
if (evflag) {
if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
} else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
if (eflag)
cpu_compute<1, 1>(host_start, inum, ilist, numneigh, firstneigh);
else
cpu_compute<1, 0>(host_start, inum, ilist, numneigh, firstneigh);
} else
cpu_compute<0, 0>(host_start, inum, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
}
}
@ -154,8 +144,7 @@ void PairLJSDKCoulLongGPU::compute(int eflag, int vflag)
void PairLJSDKCoulLongGPU::init_style()
{
if (!atom->q_flag)
error->all(FLERR,"Pair style lj/sdk/coul/long/gpu requires atom attribute q");
if (!atom->q_flag) error->all(FLERR, "Pair style lj/sdk/coul/long/gpu requires atom attribute q");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -163,10 +152,9 @@ void PairLJSDKCoulLongGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -178,31 +166,23 @@ void PairLJSDKCoulLongGPU::init_style()
// insure use of KSpace long-range solver, set g_ewald
if (force->kspace == nullptr)
error->all(FLERR,"Pair style is incompatible with KSpace style");
if (force->kspace == nullptr) error->all(FLERR, "Pair style is incompatible with KSpace style");
g_ewald = force->kspace->g_ewald;
// setup force tables
if (ncoultablebits) init_tables(cut_coul,nullptr);
if (ncoultablebits) init_tables(cut_coul, nullptr);
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = sdkl_gpu_init(atom->ntypes+1, cutsq, lj_type, lj1, lj2, lj3,
lj4, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq,
cut_coulsq, force->special_coul,
force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success,error,world);
int success =
sdkl_gpu_init(atom->ntypes + 1, cutsq, lj_type, lj1, lj2, lj3, lj4, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode,
screen, cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e, g_ewald);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -215,21 +195,21 @@ double PairLJSDKCoulLongGPU::memory_usage()
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG>
void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
int *numneigh, int **firstneigh)
void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist, int *numneigh,
int **firstneigh)
{
int i,j,ii,jj;
double qtmp,xtmp,ytmp,ztmp;
double r2inv,forcecoul,forcelj,factor_coul,factor_lj;
int i, j, ii, jj;
double qtmp, xtmp, ytmp, ztmp;
double r2inv, forcecoul, forcelj, factor_coul, factor_lj;
const double * const * const x = atom->x;
double * const * const f = atom->f;
const double * const q = atom->q;
const int * const type = atom->type;
const double * const special_coul = force->special_coul;
const double * const special_lj = force->special_lj;
const double *const *const x = atom->x;
double *const *const f = atom->f;
const double *const q = atom->q;
const int *const type = atom->type;
const double *const special_coul = force->special_coul;
const double *const special_lj = force->special_lj;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
double fxtmp, fytmp, fztmp;
// loop over neighbors of my atoms
@ -239,10 +219,10 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
fxtmp=fytmp=fztmp=0.0;
fxtmp = fytmp = fztmp = 0.0;
const int itype = type[i];
const int * const jlist = firstneigh[i];
const int *const jlist = firstneigh[i];
const int jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
@ -254,7 +234,7 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
const double delx = xtmp - x[j][0];
const double dely = ytmp - x[j][1];
const double delz = ztmp - x[j][2];
const double rsq = delx*delx + dely*dely + delz*delz;
const double rsq = delx * delx + dely * dely + delz * delz;
const int jtype = type[j];
double evdwl = 0.0;
@ -262,41 +242,40 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
double fpair = 0.0;
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
const int ljt = lj_type[itype][jtype];
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
const double r = sqrt(rsq);
const double grij = g_ewald * r;
const double expm2 = exp(-grij*grij);
const double t = 1.0 / (1.0 + EWALD_P*grij);
const double erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
const double prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (EFLAG) ecoul = prefactor*erfc;
const double expm2 = exp(-grij * grij);
const double t = 1.0 / (1.0 + EWALD_P * grij);
const double erfc = t * (A1 + t * (A2 + t * (A3 + t * (A4 + t * A5)))) * expm2;
const double prefactor = qqrd2e * qtmp * q[j] / r;
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
if (EFLAG) ecoul = prefactor * erfc;
if (factor_coul < 1.0) {
forcecoul -= (1.0-factor_coul)*prefactor;
if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
forcecoul -= (1.0 - factor_coul) * prefactor;
if (EFLAG) ecoul -= (1.0 - factor_coul) * prefactor;
}
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
int itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
const double fraction = (rsq_lookup.f - rtable[itable]) *
drtable[itable];
const double table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
const double fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
const double table = ftable[itable] + fraction * dftable[itable];
forcecoul = qtmp * q[j] * table;
if (EFLAG) {
const double table2 = etable[itable] + fraction*detable[itable];
ecoul = qtmp*q[j] * table2;
const double table2 = etable[itable] + fraction * detable[itable];
ecoul = qtmp * q[j] * table2;
}
if (factor_coul < 1.0) {
const double table2 = ctable[itable] + fraction*dctable[itable];
const double prefactor = qtmp*q[j] * table2;
forcecoul -= (1.0-factor_coul)*prefactor;
if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
const double table2 = ctable[itable] + fraction * dctable[itable];
const double prefactor = qtmp * q[j] * table2;
forcecoul -= (1.0 - factor_coul) * prefactor;
if (EFLAG) ecoul -= (1.0 - factor_coul) * prefactor;
}
}
} else {
@ -304,50 +283,46 @@ void PairLJSDKCoulLongGPU::cpu_compute(int start, int inum, int *ilist,
ecoul = 0.0;
}
if (rsq < cut_ljsq[itype][jtype]) {
if (ljt == LJ12_4) {
const double r4inv=r2inv*r2inv;
forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
- lj2[itype][jtype]);
const double r4inv = r2inv * r2inv;
forcelj = r4inv * (lj1[itype][jtype] * r4inv * r4inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
- lj4[itype][jtype]) - offset[itype][jtype];
evdwl = r4inv * (lj3[itype][jtype] * r4inv * r4inv - lj4[itype][jtype]) -
offset[itype][jtype];
} else if (ljt == LJ9_6) {
const double r3inv = r2inv*sqrt(r2inv);
const double r6inv = r3inv*r3inv;
forcelj = r6inv*(lj1[itype][jtype]*r3inv
- lj2[itype][jtype]);
const double r3inv = r2inv * sqrt(r2inv);
const double r6inv = r3inv * r3inv;
forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r6inv*(lj3[itype][jtype]*r3inv
- lj4[itype][jtype]) - offset[itype][jtype];
evdwl =
r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
} else if (ljt == LJ12_6) {
const double r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv*(lj1[itype][jtype]*r6inv
- lj2[itype][jtype]);
const double r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r6inv*(lj3[itype][jtype]*r6inv
- lj4[itype][jtype]) - offset[itype][jtype];
evdwl =
r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
}
if (EFLAG) evdwl *= factor_lj;
} else {
forcelj=0.0;
forcelj = 0.0;
evdwl = 0.0;
}
fpair = (forcecoul + factor_lj*forcelj) * r2inv;
fpair = (forcecoul + factor_lj * forcelj) * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (EVFLAG) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz);
if (EVFLAG) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
}
}
f[i][0] += fxtmp;

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,25 +32,20 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types,
double **host_lj1, double **host_lj2, double **host_lj3,
double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen);
int sdk_gpu_init(const int ntypes, double **cutsq, int **cg_types, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **offset,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
void sdk_gpu_clear();
int ** sdk_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void sdk_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **sdk_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void sdk_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double sdk_gpu_bytes();
#include "lj_sdk_common.h"
@ -83,7 +76,7 @@ PairLJSDKGPU::~PairLJSDKGPU()
void PairLJSDKGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +84,7 @@ void PairLJSDKGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,33 +93,32 @@ void PairLJSDKGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = sdk_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success);
firstneigh =
sdk_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
sdk_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
sdk_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
if (evflag) {
if (eflag) cpu_compute<1,1>(host_start, inum, ilist, numneigh, firstneigh);
else cpu_compute<1,0>(host_start, inum, ilist, numneigh, firstneigh);
} else cpu_compute<0,0>(host_start, inum, ilist, numneigh, firstneigh);
if (eflag)
cpu_compute<1, 1>(host_start, inum, ilist, numneigh, firstneigh);
else
cpu_compute<1, 0>(host_start, inum, ilist, numneigh, firstneigh);
} else
cpu_compute<0, 0>(host_start, inum, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
}
}
@ -144,10 +136,9 @@ void PairLJSDKGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,21 +146,15 @@ void PairLJSDKGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = sdk_gpu_init(atom->ntypes+1,cutsq,lj_type,lj1,lj2,lj3,lj4,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = sdk_gpu_init(atom->ntypes + 1, cutsq, lj_type, lj1, lj2, lj3, lj4, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -182,19 +167,18 @@ double PairLJSDKGPU::memory_usage()
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG>
void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
int *numneigh, int **firstneigh)
void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist, int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,forcelj,factor_lj;
int i, j, ii, jj, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, forcelj, factor_lj;
const double * const * const x = atom->x;
double * const * const f = atom->f;
const int * const type = atom->type;
const double * const special_lj = force->special_lj;
double fxtmp,fytmp,fztmp;
evdwl=0.0;
const double *const *const x = atom->x;
double *const *const f = atom->f;
const int *const type = atom->type;
const double *const special_lj = force->special_lj;
double fxtmp, fytmp, fztmp;
evdwl = 0.0;
// loop over neighbors of my atoms
@ -203,10 +187,10 @@ void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
fxtmp=fytmp=fztmp=0.0;
fxtmp = fytmp = fztmp = 0.0;
const int itype = type[i];
const int * const jlist = firstneigh[i];
const int *const jlist = firstneigh[i];
const int jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
@ -217,47 +201,43 @@ void PairLJSDKGPU::cpu_compute(int start, int inum, int *ilist,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
const int ljt = lj_type[itype][jtype];
if (ljt == LJ12_4) {
const double r4inv=r2inv*r2inv;
forcelj = r4inv*(lj1[itype][jtype]*r4inv*r4inv
- lj2[itype][jtype]);
const double r4inv = r2inv * r2inv;
forcelj = r4inv * (lj1[itype][jtype] * r4inv * r4inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r4inv*(lj3[itype][jtype]*r4inv*r4inv
- lj4[itype][jtype]) - offset[itype][jtype];
evdwl = r4inv * (lj3[itype][jtype] * r4inv * r4inv - lj4[itype][jtype]) -
offset[itype][jtype];
} else if (ljt == LJ9_6) {
const double r3inv = r2inv*sqrt(r2inv);
const double r6inv = r3inv*r3inv;
forcelj = r6inv*(lj1[itype][jtype]*r3inv
- lj2[itype][jtype]);
const double r3inv = r2inv * sqrt(r2inv);
const double r6inv = r3inv * r3inv;
forcelj = r6inv * (lj1[itype][jtype] * r3inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r6inv*(lj3[itype][jtype]*r3inv
- lj4[itype][jtype]) - offset[itype][jtype];
evdwl = r6inv * (lj3[itype][jtype] * r3inv - lj4[itype][jtype]) - offset[itype][jtype];
} else if (ljt == LJ12_6) {
const double r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv*(lj1[itype][jtype]*r6inv
- lj2[itype][jtype]);
const double r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
if (EFLAG)
evdwl = r6inv*(lj3[itype][jtype]*r6inv
- lj4[itype][jtype]) - offset[itype][jtype];
} else continue;
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) - offset[itype][jtype];
} else
continue;
fpair = factor_lj*forcelj*r2inv;
fpair = factor_lj * forcelj * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
fxtmp += delx * fpair;
fytmp += dely * fpair;
fztmp += delz * fpair;
if (EVFLAG) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (EVFLAG) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
f[i][0] += fxtmp;

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
#include "update.h"
@ -36,35 +34,28 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4,
double *special_lj, const int nlocal,
int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e);
const double cell_size, int &gpu_mode, FILE *screen, double **host_cut_ljsq,
double **host_cut_coulsq, double *host_special_coul, const double qqrd2e);
void dplsf_gpu_clear();
int ** dplsf_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double **host_mu,
double *boxlo, double *prd);
void dplsf_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q,
double **host_mu, const int nlocal, double *boxlo,
double *prd);
int **dplsf_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double *host_q, double **host_mu,
double *boxlo, double *prd);
void dplsf_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double *host_q, double **host_mu,
const int nlocal, double *boxlo, double *prd);
double dplsf_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp),
gpu_mode(GPU_FORCE)
PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -86,7 +77,7 @@ PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU()
void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -94,7 +85,7 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -103,30 +94,25 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo,
domain->prd);
firstneigh = dplsf_gpu_compute_n(
neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom, vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success, atom->q, atom->mu, domain->boxlo, domain->prd);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q,
dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -140,11 +126,10 @@ void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
void PairLJSFDipoleSFGPU::init_style()
{
if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque");
error->all(FLERR, "Pair dipole/sf/gpu requires atom attributes q, mu, torque");
if (strcmp(update->unit_style,"electron") == 0)
error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
if (strcmp(update->unit_style, "electron") == 0)
error->all(FLERR, "Cannot (yet) use 'electron' units with dipoles");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -152,10 +137,9 @@ void PairLJSFDipoleSFGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -163,22 +147,16 @@ void PairLJSFDipoleSFGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success,error,world);
int success =
dplsf_gpu_init(atom->ntypes + 1, cutsq, lj1, lj2, lj3, lj4, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
cut_ljsq, cut_coulsq, force->special_coul, force->qqrd2e);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -191,25 +169,24 @@ double PairLJSFDipoleSFGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int *ilist, int *numneigh,
int **firstneigh)
void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
double rsq,rinv,r2inv,r6inv,r3inv,r5inv;
double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
double forcelj,factor_coul,factor_lj;
double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
double aforcecoulx,aforcecouly,aforcecoulz;
double bforcecoulx,bforcecouly,bforcecoulz;
double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
int i, j, ii, jj, jnum, itype, jtype;
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fx, fy, fz;
double rsq, rinv, r2inv, r6inv, r3inv, r5inv;
double forcecoulx, forcecouly, forcecoulz, crossx, crossy, crossz;
double tixcoul, tiycoul, tizcoul, tjxcoul, tjycoul, tjzcoul;
double fq, pdotp, pidotr, pjdotr, pre1, pre2, pre3, pre4;
double forcelj, factor_coul, factor_lj;
double presf, afac, bfac, pqfac, qpfac, forceljcut, forceljsf;
double aforcecoulx, aforcecouly, aforcecoulz;
double bforcecoulx, bforcecouly, bforcecoulz;
double rcutlj2inv, rcutcoul2inv, rcutlj6inv;
int *jlist;
evdwl = ecoul = 0.0;
ev_init(eflag,vflag);
ev_init(eflag, vflag);
double **x = atom->x;
double **f = atom->f;
@ -221,7 +198,6 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
double *special_lj = force->special_lj;
double qqrd2e = force->qqrd2e;
// loop over neighbors of my atoms
for (ii = start; ii < inum; ii++) {
@ -243,11 +219,11 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
rinv = sqrt(r2inv);
// atom can have both a charge and dipole
@ -260,146 +236,145 @@ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
if (rsq < cut_coulsq[itype][jtype]) {
if (qtmp != 0.0 && q[j] != 0.0) {
pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
pre1 = qtmp * q[j] * rinv * (r2inv - 1.0 / cut_coulsq[itype][jtype]);
forcecoulx += pre1*delx;
forcecouly += pre1*dely;
forcecoulz += pre1*delz;
forcecoulx += pre1 * delx;
forcecouly += pre1 * dely;
forcecoulz += pre1 * delz;
}
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pdotp = mu[i][0] * mu[j][0] + mu[i][1] * mu[j][1] + mu[i][2] * mu[j][2];
pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
aforcecoulx = pre1*delx;
aforcecouly = pre1*dely;
aforcecoulz = pre1*delz;
afac = 1.0 - rsq * rsq * rcutcoul2inv * rcutcoul2inv;
pre1 = afac * (pdotp - 3.0 * r2inv * pidotr * pjdotr);
aforcecoulx = pre1 * delx;
aforcecouly = pre1 * dely;
aforcecoulz = pre1 * delz;
bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
bfac = 1.0 - 4.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv) +
3.0 * rsq * rsq * rcutcoul2inv * rcutcoul2inv;
presf = 2.0 * r2inv * pidotr * pjdotr;
bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
bforcecoulx = bfac * (pjdotr * mu[i][0] + pidotr * mu[j][0] - presf * delx);
bforcecouly = bfac * (pjdotr * mu[i][1] + pidotr * mu[j][1] - presf * dely);
bforcecoulz = bfac * (pjdotr * mu[i][2] + pidotr * mu[j][2] - presf * delz);
forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
forcecoulx += 3.0 * r5inv * (aforcecoulx + bforcecoulx);
forcecouly += 3.0 * r5inv * (aforcecouly + bforcecouly);
forcecoulz += 3.0 * r5inv * (aforcecoulz + bforcecoulz);
pre2 = 3.0 * bfac * r5inv * pjdotr;
pre3 = 3.0 * bfac * r5inv * pidotr;
pre4 = -bfac * r3inv;
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
crossx = pre4 * (mu[i][1] * mu[j][2] - mu[i][2] * mu[j][1]);
crossy = pre4 * (mu[i][2] * mu[j][0] - mu[i][0] * mu[j][2]);
crossz = pre4 * (mu[i][0] * mu[j][1] - mu[i][1] * mu[j][0]);
tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
tixcoul += crossx + pre2 * (mu[i][1] * delz - mu[i][2] * dely);
tiycoul += crossy + pre2 * (mu[i][2] * delx - mu[i][0] * delz);
tizcoul += crossz + pre2 * (mu[i][0] * dely - mu[i][1] * delx);
tjxcoul += -crossx + pre3 * (mu[j][1] * delz - mu[j][2] * dely);
tjycoul += -crossy + pre3 * (mu[j][2] * delx - mu[j][0] * delz);
tjzcoul += -crossz + pre3 * (mu[j][0] * dely - mu[j][1] * delx);
}
if (mu[i][3] > 0.0 && q[j] != 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
pidotr = mu[i][0] * delx + mu[i][1] * dely + mu[i][2] * delz;
rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];
pre1 = 3.0 * q[j] * r5inv * pidotr * (1 - rsq * rcutcoul2inv);
pqfac = 1.0 - 3.0 * rsq * rcutcoul2inv +
2.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv);
pre2 = q[j] * r3inv * pqfac;
forcecoulx += pre2*mu[i][0] - pre1*delx;
forcecouly += pre2*mu[i][1] - pre1*dely;
forcecoulz += pre2*mu[i][2] - pre1*delz;
tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
forcecoulx += pre2 * mu[i][0] - pre1 * delx;
forcecouly += pre2 * mu[i][1] - pre1 * dely;
forcecoulz += pre2 * mu[i][2] - pre1 * delz;
tixcoul += pre2 * (mu[i][1] * delz - mu[i][2] * dely);
tiycoul += pre2 * (mu[i][2] * delx - mu[i][0] * delz);
tizcoul += pre2 * (mu[i][0] * dely - mu[i][1] * delx);
}
if (mu[j][3] > 0.0 && qtmp != 0.0) {
r3inv = r2inv*rinv;
r5inv = r3inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
r3inv = r2inv * rinv;
r5inv = r3inv * r2inv;
pjdotr = mu[j][0] * delx + mu[j][1] * dely + mu[j][2] * delz;
rcutcoul2inv = 1.0 / cut_coulsq[itype][jtype];
pre1 = 3.0 * qtmp * r5inv * pjdotr * (1 - rsq * rcutcoul2inv);
qpfac = 1.0 - 3.0 * rsq * rcutcoul2inv +
2.0 * rsq * sqrt(rsq) * rcutcoul2inv * sqrt(rcutcoul2inv);
pre2 = qtmp * r3inv * qpfac;
forcecoulx += pre1*delx - pre2*mu[j][0];
forcecouly += pre1*dely - pre2*mu[j][1];
forcecoulz += pre1*delz - pre2*mu[j][2];
tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
forcecoulx += pre1 * delx - pre2 * mu[j][0];
forcecouly += pre1 * dely - pre2 * mu[j][1];
forcecoulz += pre1 * delz - pre2 * mu[j][2];
tjxcoul += -pre2 * (mu[j][1] * delz - mu[j][2] * dely);
tjycoul += -pre2 * (mu[j][2] * delx - mu[j][0] * delz);
tjzcoul += -pre2 * (mu[j][0] * dely - mu[j][1] * delx);
}
}
// LJ interaction
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
r6inv = r2inv * r2inv * r2inv;
forceljcut = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]) * r2inv;
rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
rcutlj6inv * rcutlj2inv;
forceljsf =
(lj1[itype][jtype] * rcutlj6inv - lj2[itype][jtype]) * rcutlj6inv * rcutlj2inv;
forcelj = factor_lj * (forceljcut - forceljsf);
} else forcelj = 0.0;
} else
forcelj = 0.0;
// total force
fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj;
fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj;
fq = factor_coul * qqrd2e;
fx = fq * forcecoulx + delx * forcelj;
fy = fq * forcecouly + dely * forcelj;
fz = fq * forcecoulz + delz * forcelj;
// force & torque accumulation
f[i][0] += fx;
f[i][1] += fy;
f[i][2] += fz;
torque[i][0] += fq*tixcoul;
torque[i][1] += fq*tiycoul;
torque[i][2] += fq*tizcoul;
torque[i][0] += fq * tixcoul;
torque[i][1] += fq * tiycoul;
torque[i][2] += fq * tizcoul;
if (eflag) {
if (rsq < cut_coulsq[itype][jtype]) {
ecoul = qtmp*q[j]*rinv*
pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2);
ecoul = qtmp * q[j] * rinv * pow((1.0 - sqrt(rsq) / sqrt(cut_coulsq[itype][jtype])), 2);
if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
if (mu[i][3] > 0.0 && q[j] != 0.0)
ecoul += -q[j]*r3inv * pqfac * pidotr;
if (mu[j][3] > 0.0 && qtmp != 0.0)
ecoul += qtmp*r3inv * qpfac * pjdotr;
ecoul *= factor_coul*qqrd2e;
} else ecoul = 0.0;
ecoul += bfac * (r3inv * pdotp - 3.0 * r5inv * pidotr * pjdotr);
if (mu[i][3] > 0.0 && q[j] != 0.0) ecoul += -q[j] * r3inv * pqfac * pidotr;
if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += qtmp * r3inv * qpfac * pjdotr;
ecoul *= factor_coul * qqrd2e;
} else
ecoul = 0.0;
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) +
rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
rsq*rcutlj2inv +
rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
evdwl = r6inv * (lj3[itype][jtype] * r6inv - lj4[itype][jtype]) +
rcutlj6inv * (6 * lj3[itype][jtype] * rcutlj6inv - 3 * lj4[itype][jtype]) * rsq *
rcutlj2inv +
rcutlj6inv * (-7 * lj3[itype][jtype] * rcutlj6inv + 4 * lj4[itype][jtype]);
evdwl *= factor_lj;
} else evdwl = 0.0;
} else
evdwl = 0.0;
}
if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,
fx,fy,fz,delx,dely,delz);
if (evflag) ev_tally_xyz_full(i, evdwl, ecoul, fx, fy, fz, delx, dely, delz);
}
}
}

View File

@ -23,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -155,11 +154,7 @@ void PairLJSmoothGPU::init_style()
gpu_mode, screen, ljsw0, ljsw1, ljsw2, ljsw3, ljsw4, cut_inner, cut_inner_sq);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this, instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,25 +32,21 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1,
double **host_mie2, double **host_mie3, double **host_mie4,
double **host_gamA, double **host_gamR, double **offset,
double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1, double **host_mie2,
double **host_mie3, double **host_mie4, double **host_gamA, double **host_gamR,
double **offset, double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen);
void mie_gpu_clear();
int ** mie_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void mie_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **mie_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void mie_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double mie_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -79,7 +73,7 @@ PairMIECutGPU::~PairMIECutGPU()
void PairMIECutGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -87,7 +81,7 @@ void PairMIECutGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -96,28 +90,24 @@ void PairMIECutGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = mie_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
mie_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -132,17 +122,15 @@ void PairMIECutGPU::init_style()
{
cut_respa = nullptr;
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
double cut;
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -150,21 +138,15 @@ void PairMIECutGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = mie_gpu_init(atom->ntypes+1, cutsq, mie1, mie2, mie3, mie4,
gamA, gamR, offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = mie_gpu_init(atom->ntypes + 1, cutsq, mie1, mie2, mie3, mie4, gamA, gamR, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -177,11 +159,12 @@ double PairMIECutGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie;
void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, rgamR, rgamA, forcemie, factor_mie;
int *jlist;
double **x = atom->x;
@ -208,27 +191,26 @@ void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
rgamA = pow(r2inv,(gamA[itype][jtype]/2.0));
rgamR = pow(r2inv,(gamR[itype][jtype]/2.0));
forcemie = (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA);
fpair = factor_mie*forcemie*r2inv;
r2inv = 1.0 / rsq;
rgamA = pow(r2inv, (gamA[itype][jtype] / 2.0));
rgamR = pow(r2inv, (gamR[itype][jtype] / 2.0));
forcemie = (mie1[itype][jtype] * rgamR - mie2[itype][jtype] * rgamA);
fpair = factor_mie * forcemie * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) -
offset[itype][jtype];
evdwl = (mie3[itype][jtype] * rgamR - mie4[itype][jtype] * rgamA) - offset[itype][jtype];
evdwl *= factor_mie;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,24 +32,20 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1,
double **host_r0, double **host_alpha, double **host_d0,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
int mor_gpu_init(const int ntypes, double **cutsq, double **host_morse1, double **host_r0,
double **host_alpha, double **host_d0, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void mor_gpu_clear();
int ** mor_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void mor_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **mor_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void mor_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double mor_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -77,7 +71,7 @@ PairMorseGPU::~PairMorseGPU()
void PairMorseGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -85,7 +79,7 @@ void PairMorseGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -94,28 +88,24 @@ void PairMorseGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = mor_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh,
cpu_time, success);
firstneigh =
mor_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
mor_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -135,10 +125,9 @@ void PairMorseGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -146,21 +135,15 @@ void PairMorseGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = mor_gpu_init(atom->ntypes+1, cutsq, morse1, r0, alpha, d0,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success = mor_gpu_init(atom->ntypes + 1, cutsq, morse1, r0, alpha, d0, offset,
force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -173,12 +156,12 @@ double PairMorseGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh)
void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r,dr,dexp,factor_lj;
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r, dr, dexp, factor_lj;
int *jlist;
double **x = atom->x;
@ -205,26 +188,25 @@ void PairMorseGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
dr = r - r0[itype][jtype];
dexp = exp(-alpha[itype][jtype] * dr);
fpair = factor_lj * morse1[itype][jtype] * (dexp*dexp - dexp) / r;
fpair = factor_lj * morse1[itype][jtype] * (dexp * dexp - dexp) / r;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = d0[itype][jtype] * (dexp*dexp - 2.0*dexp) -
offset[itype][jtype];
evdwl = d0[itype][jtype] * (dexp * dexp - 2.0 * dexp) - offset[itype][jtype];
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -27,7 +26,6 @@
#include "math_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -37,39 +35,32 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int re_gpu_init(const int ntypes, double **shape, double **well,
double **cutsq, double **sigma, double **epsilon,
int **form, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset,
double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
int re_gpu_init(const int ntypes, double **shape, double **well, double **cutsq, double **sigma,
double **epsilon, int **form, double **host_lj1, double **host_lj2,
double **host_lj3, double **host_lj4, double **offset, double *special_lj,
const int nlocal, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void re_gpu_clear();
int ** re_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, double **host_quat);
int * re_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success, double **host_quat);
int **re_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, double **host_quat);
int *re_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success, double **host_quat);
double re_gpu_bytes();
enum{SPHERE_SPHERE,SPHERE_ELLIPSE,ELLIPSE_SPHERE,ELLIPSE_ELLIPSE};
enum { SPHERE_SPHERE, SPHERE_ELLIPSE, ELLIPSE_SPHERE, ELLIPSE_ELLIPSE };
/* ---------------------------------------------------------------------- */
PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp),
gpu_mode(GPU_FORCE)
PairRESquaredGPU::PairRESquaredGPU(LAMMPS *lmp) : PairRESquared(lmp), gpu_mode(GPU_FORCE)
{
reinitflag = 0;
avec = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
if (!avec)
error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
if (!avec) error->all(FLERR, "Pair resquared/gpu requires atom style ellipsoid");
quat_nmax = 0;
quat = nullptr;
suffix_flag |= Suffix::GPU;
@ -91,7 +82,7 @@ PairRESquaredGPU::~PairRESquaredGPU()
void PairRESquaredGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -105,7 +96,7 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
}
AtomVecEllipsoid::Bonus *bonus = avec->bonus;
int *ellipsoid = atom->ellipsoid;
for (int i=0; i<nall; i++) {
for (int i = 0; i < nall; i++) {
int qi = ellipsoid[i];
if (qi > -1) {
quat[i][0] = bonus[qi].quat[0];
@ -116,7 +107,7 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
}
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -125,26 +116,22 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = re_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success, quat);
firstneigh =
re_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success, quat);
} else {
inum = list->inum;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ilist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
list->ilist, numneigh, firstneigh, eflag, vflag,
eflag_atom, vflag_atom, host_start,
cpu_time, success, quat);
ilist = re_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, list->ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, quat);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start < inum) {
cpu_time = platform::walltime();
@ -159,21 +146,20 @@ void PairRESquaredGPU::compute(int eflag, int vflag)
void PairRESquaredGPU::init_style()
{
if (!atom->ellipsoid_flag)
error->all(FLERR,"Pair resquared/gpu requires atom style ellipsoid");
if (!atom->ellipsoid_flag) error->all(FLERR, "Pair resquared/gpu requires atom style ellipsoid");
// per-type shape precalculations
// require that atom shapes are identical within each type
// if shape = 0 for point particle, set shape = 1 as required by Gay-Berne
for (int i = 1; i <= atom->ntypes; i++) {
if (!atom->shape_consistency(i,shape1[i][0],shape1[i][1],shape1[i][2]))
error->all(FLERR,"Pair resquared/gpu requires atoms with same type have same shape");
if (!atom->shape_consistency(i, shape1[i][0], shape1[i][1], shape1[i][2]))
error->all(FLERR, "Pair resquared/gpu requires atoms with same type have same shape");
if (setwell[i]) {
shape2[i][0] = shape1[i][0]*shape1[i][0];
shape2[i][1] = shape1[i][1]*shape1[i][1];
shape2[i][2] = shape1[i][2]*shape1[i][2];
lshape[i] = shape1[i][0]*shape1[i][1]*shape1[i][2];
shape2[i][0] = shape1[i][0] * shape1[i][0];
shape2[i][1] = shape1[i][1] * shape1[i][1];
shape2[i][2] = shape1[i][2] * shape1[i][2];
lshape[i] = shape1[i][0] * shape1[i][1] * shape1[i][2];
}
}
@ -183,10 +169,9 @@ void PairRESquaredGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -195,22 +180,16 @@ void PairRESquaredGPU::init_style()
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = re_gpu_init(atom->ntypes+1, shape1, well, cutsq, sigma,
epsilon, form, lj1, lj2, lj3, lj4, offset,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
re_gpu_init(atom->ntypes + 1, shape1, well, cutsq, sigma, epsilon, form, lj1, lj2, lj3, lj4,
offset, force->special_lj, atom->nlocal, atom->nlocal + atom->nghost, mnf,
maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
quat_nmax = static_cast<int>(1.1 * (atom->nlocal + atom->nghost));
memory->grow(quat, quat_nmax, 4, "pair:quat");
}
@ -220,20 +199,19 @@ void PairRESquaredGPU::init_style()
double PairRESquaredGPU::memory_usage()
{
double bytes = Pair::memory_usage();
return bytes + memory->usage(quat,quat_nmax)+re_gpu_bytes();
return bytes + memory->usage(quat, quat_nmax) + re_gpu_bytes();
}
/* ---------------------------------------------------------------------- */
void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i,j,ii,jj,jnum,itype,jtype;
double evdwl,one_eng,rsq,r2inv,r6inv,forcelj,factor_lj;
double fforce[3],ttor[3],rtor[3],r12[3];
int i, j, ii, jj, jnum, itype, jtype;
double evdwl, one_eng, rsq, r2inv, r6inv, forcelj, factor_lj;
double fforce[3], ttor[3], rtor[3], r12[3];
int *jlist;
RE2Vars wi,wj;
RE2Vars wi, wj;
double **x = atom->x;
double **f = atom->f;
@ -249,7 +227,7 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
// not a LJ sphere
if (lshape[itype] != 0.0) precompute_i(i,wi);
if (lshape[itype] != 0.0) precompute_i(i, wi);
jlist = firstneigh[i];
jnum = numneigh[i];
@ -261,10 +239,10 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
// r12 = center to center vector
r12[0] = x[j][0]-x[i][0];
r12[1] = x[j][1]-x[i][1];
r12[2] = x[j][2]-x[i][2];
rsq = MathExtra::dot3(r12,r12);
r12[0] = x[j][0] - x[i][0];
r12[1] = x[j][1] - x[i][1];
r12[2] = x[j][2] - x[i][2];
rsq = MathExtra::dot3(r12, r12);
jtype = type[j];
// compute if less than cutoff
@ -272,39 +250,39 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
if (rsq < cutsq[itype][jtype]) {
switch (form[itype][jtype]) {
case SPHERE_SPHERE:
r2inv = 1.0/rsq;
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj *= -r2inv;
if (eflag) one_eng =
r6inv*(r6inv*lj3[itype][jtype]-lj4[itype][jtype]) -
offset[itype][jtype];
fforce[0] = r12[0]*forcelj;
fforce[1] = r12[1]*forcelj;
fforce[2] = r12[2]*forcelj;
break;
case SPHERE_SPHERE:
r2inv = 1.0 / rsq;
r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lj1[itype][jtype] * r6inv - lj2[itype][jtype]);
forcelj *= -r2inv;
if (eflag)
one_eng =
r6inv * (r6inv * lj3[itype][jtype] - lj4[itype][jtype]) - offset[itype][jtype];
fforce[0] = r12[0] * forcelj;
fforce[1] = r12[1] * forcelj;
fforce[2] = r12[2] * forcelj;
break;
case SPHERE_ELLIPSE:
precompute_i(j,wj);
one_eng = resquared_lj(j,i,wj,r12,rsq,fforce,rtor,false);
break;
case SPHERE_ELLIPSE:
precompute_i(j, wj);
one_eng = resquared_lj(j, i, wj, r12, rsq, fforce, rtor, false);
break;
case ELLIPSE_SPHERE:
one_eng = resquared_lj(i,j,wi,r12,rsq,fforce,ttor,true);
tor[i][0] += ttor[0]*factor_lj;
tor[i][1] += ttor[1]*factor_lj;
tor[i][2] += ttor[2]*factor_lj;
break;
case ELLIPSE_SPHERE:
one_eng = resquared_lj(i, j, wi, r12, rsq, fforce, ttor, true);
tor[i][0] += ttor[0] * factor_lj;
tor[i][1] += ttor[1] * factor_lj;
tor[i][2] += ttor[2] * factor_lj;
break;
default:
precompute_i(j,wj);
one_eng = resquared_analytic(i,j,wi,wj,r12,rsq,fforce,ttor,rtor);
tor[i][0] += ttor[0]*factor_lj;
tor[i][1] += ttor[1]*factor_lj;
tor[i][2] += ttor[2]*factor_lj;
default:
precompute_i(j, wj);
one_eng = resquared_analytic(i, j, wi, wj, r12, rsq, fforce, ttor, rtor);
tor[i][0] += ttor[0] * factor_lj;
tor[i][1] += ttor[1] * factor_lj;
tor[i][2] += ttor[2] * factor_lj;
break;
break;
}
fforce[0] *= factor_lj;
@ -314,10 +292,11 @@ void PairRESquaredGPU::cpu_compute(int start, int inum, int eflag,
f[i][1] += fforce[1];
f[i][2] += fforce[2];
if (eflag) evdwl = factor_lj*one_eng;
if (eflag) evdwl = factor_lj * one_eng;
if (evflag) ev_tally_xyz_full(i,evdwl,0.0,fforce[0],fforce[1],
fforce[2],-r12[0],-r12[1],-r12[2]);
if (evflag)
ev_tally_xyz_full(i, evdwl, 0.0, fforce[0], fforce[1], fforce[2], -r12[0], -r12[1],
-r12[2]);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "math_const.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -35,28 +33,22 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor,
double **cut, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor,
double **host_cut);
int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor, double **cut,
double *special_lj, const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen);
void soft_gpu_reinit(const int ntypes, double **cutsq, double **host_prefactor, double **host_cut);
void soft_gpu_clear();
int ** soft_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void soft_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **soft_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void soft_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double soft_gpu_bytes();
using namespace MathConst;
/* ---------------------------------------------------------------------- */
@ -82,7 +74,7 @@ PairSoftGPU::~PairSoftGPU()
void PairSoftGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -90,7 +82,7 @@ void PairSoftGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -99,28 +91,24 @@ void PairSoftGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = soft_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
soft_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -140,10 +128,9 @@ void PairSoftGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
mcut = init_one(i,j);
mcut = init_one(i, j);
mcut *= mcut;
if (mcut > maxcut)
maxcut = mcut;
if (mcut > maxcut) maxcut = mcut;
cutsq[i][j] = cutsq[j][i] = mcut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -151,21 +138,15 @@ void PairSoftGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = soft_gpu_init(atom->ntypes+1, cutsq, prefactor, cut,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
soft_gpu_init(atom->ntypes + 1, cutsq, prefactor, cut, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -174,7 +155,7 @@ void PairSoftGPU::reinit()
{
Pair::reinit();
soft_gpu_reinit(atom->ntypes+1, cutsq, prefactor, cut);
soft_gpu_reinit(atom->ntypes + 1, cutsq, prefactor, cut);
}
/* ---------------------------------------------------------------------- */
@ -187,11 +168,12 @@ double PairSoftGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double r,rsq,arg,factor_lj;
void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double r, rsq, arg, factor_lj;
int *jlist;
double **x = atom->x;
@ -218,24 +200,24 @@ void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
arg = MY_PI*r/cut[itype][jtype];
if (r > 0.0) fpair = factor_lj * prefactor[itype][jtype] *
sin(arg) * MY_PI/cut[itype][jtype]/r;
else fpair = 0.0;
arg = MY_PI * r / cut[itype][jtype];
if (r > 0.0)
fpair = factor_lj * prefactor[itype][jtype] * sin(arg) * MY_PI / cut[itype][jtype] / r;
else
fpair = 0.0;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag)
evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg));
if (eflag) evdwl = factor_lj * prefactor[itype][jtype] * (1.0 + cos(arg));
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,26 +32,21 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int sw_gpu_init(const int ntypes, const int inum, const int nall,
const int max_nbors, const double cell_size, int &gpu_mode,
FILE *screen, double **ncutsq, double **ncut, double **sigma,
double **powerp, double **powerq, double **sigma_gamma,
double **c1, double **c2, double **c3,double **c4,
double **c5, double **c6, double ***lambda_epsilon,
double ***costheta, const int *map, int ***e2param);
int sw_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, double **ncutsq, double **ncut,
double **sigma, double **powerp, double **powerq, double **sigma_gamma, double **c1,
double **c2, double **c3, double **c4, double **c5, double **c6,
double ***lambda_epsilon, double ***costheta, const int *map, int ***e2param);
void sw_gpu_clear();
int ** sw_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void sw_gpu_compute(const int ago, const int nloc, const int nall,
const int ln, double **host_x, int *host_type, int *ilist,
int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
int **sw_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void sw_gpu_compute(const int ago, const int nloc, const int nall, const int ln, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double sw_gpu_bytes();
#define MAXLINE 1024
@ -79,15 +72,14 @@ PairSWGPU::PairSWGPU(LAMMPS *lmp) : PairSW(lmp), gpu_mode(GPU_FORCE)
PairSWGPU::~PairSWGPU()
{
sw_gpu_clear();
if (allocated)
memory->destroy(cutghost);
if (allocated) memory->destroy(cutghost);
}
/* ---------------------------------------------------------------------- */
void PairSWGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -95,7 +87,7 @@ void PairSWGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -104,28 +96,24 @@ void PairSWGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = sw_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
sw_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
sw_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
vflag, eflag_atom, vflag_atom, host_start, cpu_time,
sw_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ---------------------------------------------------------------------- */
@ -135,7 +123,7 @@ void PairSWGPU::allocate()
PairSW::allocate();
int n = atom->ntypes;
memory->create(cutghost,n+1,n+1,"pair:cutghost");
memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
}
/* ----------------------------------------------------------------------
@ -146,8 +134,7 @@ void PairSWGPU::init_style()
{
double cell_size = cutmax + neighbor->skin;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style sw/gpu requires atom IDs");
if (atom->tag_enable == 0) error->all(FLERR, "Pair style sw/gpu requires atom IDs");
double **c1, **c2, **c3, **c4, **c5, **c6;
double **ncutsq, **ncut, **sigma, **powerp, **powerq, **sigma_gamma;
@ -209,10 +196,9 @@ void PairSWGPU::init_style()
}
int mnf = 5e-2 * neighbor->oneatom;
int success = sw_gpu_init(tp1, atom->nlocal, atom->nlocal+atom->nghost, mnf,
cell_size, gpu_mode, screen, ncutsq, ncut, sigma,
powerp, powerq, sigma_gamma, c1, c2, c3, c4, c5,
c6, lambda_epsilon, costheta, map, elem3param);
int success = sw_gpu_init(tp1, atom->nlocal, atom->nlocal + atom->nghost, mnf, cell_size,
gpu_mode, screen, ncutsq, ncut, sigma, powerp, powerq, sigma_gamma, c1,
c2, c3, c4, c5, c6, lambda_epsilon, costheta, map, elem3param);
memory->destroy(ncutsq);
memory->destroy(ncut);
@ -229,18 +215,13 @@ void PairSWGPU::init_style()
memory->destroy(lambda_epsilon);
memory->destroy(costheta);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->ghost = 1;
}
if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
comm->cutghostuser=2.0*cutmax + neighbor->skin;
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff for GPU style");
if (gpu_mode == GPU_FORCE)
neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
}
}
@ -250,10 +231,9 @@ void PairSWGPU::init_style()
double PairSWGPU::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
cutghost[i][j] = cutmax;
cutghost[j][i] = cutmax;
return cutmax;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,7 +24,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -35,31 +33,25 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int table_gpu_init(const int ntypes, double **cutsq,
double ***host_table_coeffs, double **host_table_data,
double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen,
int tabstyle, int ntables, int tablength);
int table_gpu_init(const int ntypes, double **cutsq, double ***host_table_coeffs,
double **host_table_data, double *special_lj, const int nlocal, const int nall,
const int max_nbors, const int maxspecial, const double cell_size, int &gpu_mode,
FILE *screen, int tabstyle, int ntables, int tablength);
void table_gpu_clear();
int ** table_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void table_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **table_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void table_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double table_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp),
gpu_mode(GPU_FORCE)
PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -81,7 +73,7 @@ PairTableGPU::~PairTableGPU()
void PairTableGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +81,7 @@ void PairTableGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,28 +90,24 @@ void PairTableGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = table_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time,
success);
firstneigh =
table_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -141,10 +129,9 @@ void PairTableGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -155,7 +142,7 @@ void PairTableGPU::init_style()
// pack tables and send them to device
double ***table_coeffs = nullptr;
double **table_data = nullptr;
memory->create(table_coeffs, ntypes+1, ntypes+1, 6, "table:coeffs");
memory->create(table_coeffs, ntypes + 1, ntypes + 1, 6, "table:coeffs");
Table *tb;
for (int i = 1; i <= atom->ntypes; i++)
@ -171,67 +158,60 @@ void PairTableGPU::init_style()
}
if (tabstyle != BITMAP) {
memory->create(table_data, ntables, 6*tablength, "table:data");
memory->create(table_data, ntables, 6 * tablength, "table:data");
for (int n = 0; n < ntables; n++) {
tb = &tables[n];
if (tabstyle == LOOKUP) {
for (int k = 0; k<tablength-1; k++) {
table_data[n][6*k+1] = tb->e[k];
table_data[n][6*k+2] = tb->f[k];
for (int k = 0; k < tablength - 1; k++) {
table_data[n][6 * k + 1] = tb->e[k];
table_data[n][6 * k + 2] = tb->f[k];
}
} else if (tabstyle == LINEAR) {
for (int k = 0; k<tablength; k++) {
table_data[n][6*k+0] = tb->rsq[k];
table_data[n][6*k+1] = tb->e[k];
table_data[n][6*k+2] = tb->f[k];
if (k<tablength-1) {
table_data[n][6*k+3] = tb->de[k];
table_data[n][6*k+4] = tb->df[k];
for (int k = 0; k < tablength; k++) {
table_data[n][6 * k + 0] = tb->rsq[k];
table_data[n][6 * k + 1] = tb->e[k];
table_data[n][6 * k + 2] = tb->f[k];
if (k < tablength - 1) {
table_data[n][6 * k + 3] = tb->de[k];
table_data[n][6 * k + 4] = tb->df[k];
}
}
}
} else if (tabstyle == SPLINE) {
for (int k = 0; k<tablength; k++) {
table_data[n][6*k+0] = tb->rsq[k];
table_data[n][6*k+1] = tb->e[k];
table_data[n][6*k+2] = tb->f[k];
table_data[n][6*k+3] = tb->e2[k];
table_data[n][6*k+4] = tb->f2[k];
for (int k = 0; k < tablength; k++) {
table_data[n][6 * k + 0] = tb->rsq[k];
table_data[n][6 * k + 1] = tb->e[k];
table_data[n][6 * k + 2] = tb->f[k];
table_data[n][6 * k + 3] = tb->e2[k];
table_data[n][6 * k + 4] = tb->f2[k];
}
}
}
} else {
int ntable = 1 << tablength;
memory->create(table_data, ntables, 6*ntable, "table:data");
memory->create(table_data, ntables, 6 * ntable, "table:data");
for (int n = 0; n < ntables; n++) {
tb = &tables[n];
for (int k = 0; k<ntable; k++) {
table_data[n][6*k+0] = tb->rsq[k];
table_data[n][6*k+1] = tb->e[k];
table_data[n][6*k+2] = tb->f[k];
table_data[n][6*k+3] = tb->de[k];
table_data[n][6*k+4] = tb->df[k];
table_data[n][6*k+5] = tb->drsq[k];
for (int k = 0; k < ntable; k++) {
table_data[n][6 * k + 0] = tb->rsq[k];
table_data[n][6 * k + 1] = tb->e[k];
table_data[n][6 * k + 2] = tb->f[k];
table_data[n][6 * k + 3] = tb->de[k];
table_data[n][6 * k + 4] = tb->df[k];
table_data[n][6 * k + 5] = tb->drsq[k];
}
}
}
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = table_gpu_init(atom->ntypes+1, cutsq, table_coeffs, table_data,
force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, tabstyle, ntables,
tablength);
GPU_EXTRA::check_flag(success,error,world);
int success = table_gpu_init(atom->ntypes + 1, cutsq, table_coeffs, table_data, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, tabstyle, ntables, tablength);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
memory->destroy(table_coeffs);
memory->destroy(table_data);
}
@ -246,11 +226,12 @@ double PairTableGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype,itable;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,factor_lj,fraction,value,a,b;
void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype, itable;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, factor_lj, fraction, value, a, b;
int *jlist;
Table *tb;
@ -281,62 +262,58 @@ void PairTableGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
tb = &tables[tabindex[itype][jtype]];
if (rsq < tb->innersq)
error->one(FLERR,"Pair distance < table inner cutoff");
if (rsq < tb->innersq) error->one(FLERR, "Pair distance < table inner cutoff");
if (tabstyle == LOOKUP) {
itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1)
error->one(FLERR,"Pair distance > table outer cutoff");
itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
fpair = factor_lj * tb->f[itable];
} else if (tabstyle == LINEAR) {
itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1)
error->one(FLERR,"Pair distance > table outer cutoff");
itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
fraction = (rsq - tb->rsq[itable]) * tb->invdelta;
value = tb->f[itable] + fraction*tb->df[itable];
value = tb->f[itable] + fraction * tb->df[itable];
fpair = factor_lj * value;
} else if (tabstyle == SPLINE) {
itable = static_cast<int> ((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1)
error->one(FLERR,"Pair distance > table outer cutoff");
itable = static_cast<int>((rsq - tb->innersq) * tb->invdelta);
if (itable >= tlm1) error->one(FLERR, "Pair distance > table outer cutoff");
b = (rsq - tb->rsq[itable]) * tb->invdelta;
a = 1.0 - b;
value = a * tb->f[itable] + b * tb->f[itable+1] +
((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) *
tb->deltasq6;
value = a * tb->f[itable] + b * tb->f[itable + 1] +
((a * a * a - a) * tb->f2[itable] + (b * b * b - b) * tb->f2[itable + 1]) *
tb->deltasq6;
fpair = factor_lj * value;
} else {
rsq_lookup.f = rsq;
itable = rsq_lookup.i & tb->nmask;
itable >>= tb->nshiftbits;
fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable];
value = tb->f[itable] + fraction*tb->df[itable];
value = tb->f[itable] + fraction * tb->df[itable];
fpair = factor_lj * value;
}
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
if (tabstyle == LOOKUP)
evdwl = tb->e[itable];
else if (tabstyle == LINEAR || tabstyle == BITMAP)
evdwl = tb->e[itable] + fraction*tb->de[itable];
evdwl = tb->e[itable] + fraction * tb->de[itable];
else
evdwl = a * tb->e[itable] + b * tb->e[itable+1] +
((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) *
tb->deltasq6;
evdwl = a * tb->e[itable] + b * tb->e[itable + 1] +
((a * a * a - a) * tb->e2[itable] + (b * b * b - b) * tb->e2[itable + 1]) *
tb->deltasq6;
evdwl *= factor_lj;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -25,42 +24,34 @@
#include "force.h"
#include "gpu_extra.h"
#include "memory.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int tersoff_gpu_init(const int ntypes, const int inum, const int nall,
const int max_nbors, const double cell_size, int &gpu_mode,
FILE *screen, int* host_map, const int nelements,
int*** host_elem3param, const int nparams,
const double* ts_lam1, const double* ts_lam2,
const double* ts_lam3, const double* ts_powermint,
const double* ts_biga, const double* ts_bigb,
const double* ts_bigr, const double* ts_bigd,
const double* ts_c1, const double* ts_c2,
const double* ts_c3, const double* ts_c4,
const double* ts_c, const double* ts_d,
const double* ts_h, const double* ts_gamma,
const double* ts_beta, const double* ts_powern,
const double* ts_cutsq);
int tersoff_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
const int nelements, int ***host_elem3param, const int nparams,
const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
const double *ts_c2, const double *ts_c3, const double *ts_c4,
const double *ts_c, const double *ts_d, const double *ts_h,
const double *ts_gamma, const double *ts_beta, const double *ts_powern,
const double *ts_cutsq);
void tersoff_gpu_clear();
int ** tersoff_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void tersoff_gpu_compute(const int ago, const int nlocal, const int nall,
const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
int **tersoff_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void tersoff_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
double tersoff_gpu_bytes();
#define MAXLINE 1024
@ -85,15 +76,14 @@ PairTersoffGPU::PairTersoffGPU(LAMMPS *lmp) : PairTersoff(lmp), gpu_mode(GPU_FOR
PairTersoffGPU::~PairTersoffGPU()
{
tersoff_gpu_clear();
if (allocated)
memory->destroy(cutghost);
if (allocated) memory->destroy(cutghost);
}
/* ---------------------------------------------------------------------- */
void PairTersoffGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -101,7 +91,7 @@ void PairTersoffGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -110,28 +100,24 @@ void PairTersoffGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = tersoff_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
tersoff_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
tersoff_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success);
tersoff_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start,
cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ---------------------------------------------------------------------- */
@ -141,7 +127,7 @@ void PairTersoffGPU::allocate()
PairTersoff::allocate();
int n = atom->ntypes;
memory->create(cutghost,n+1,n+1,"pair:cutghost");
memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
}
/* ----------------------------------------------------------------------
@ -152,8 +138,7 @@ void PairTersoffGPU::init_style()
{
double cell_size = cutmax + neighbor->skin;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style tersoff/gpu requires atom IDs");
if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/gpu requires atom IDs");
double *lam1, *lam2, *lam3, *powermint;
double *biga, *bigb, *bigr, *bigd;
@ -166,25 +151,25 @@ void PairTersoffGPU::init_style()
c = d = h = gamma = nullptr;
beta = powern = _cutsq = nullptr;
memory->create(lam1,nparams,"pair:lam1");
memory->create(lam2,nparams,"pair:lam2");
memory->create(lam3,nparams,"pair:lam3");
memory->create(powermint,nparams,"pair:powermint");
memory->create(biga,nparams,"pair:biga");
memory->create(bigb,nparams,"pair:bigb");
memory->create(bigr,nparams,"pair:bigr");
memory->create(bigd,nparams,"pair:bigd");
memory->create(c1,nparams,"pair:c1");
memory->create(c2,nparams,"pair:c2");
memory->create(c3,nparams,"pair:c3");
memory->create(c4,nparams,"pair:c4");
memory->create(c,nparams,"pair:c");
memory->create(d,nparams,"pair:d");
memory->create(h,nparams,"pair:h");
memory->create(gamma,nparams,"pair:gamma");
memory->create(beta,nparams,"pair:beta");
memory->create(powern,nparams,"pair:powern");
memory->create(_cutsq,nparams,"pair:_cutsq");
memory->create(lam1, nparams, "pair:lam1");
memory->create(lam2, nparams, "pair:lam2");
memory->create(lam3, nparams, "pair:lam3");
memory->create(powermint, nparams, "pair:powermint");
memory->create(biga, nparams, "pair:biga");
memory->create(bigb, nparams, "pair:bigb");
memory->create(bigr, nparams, "pair:bigr");
memory->create(bigd, nparams, "pair:bigd");
memory->create(c1, nparams, "pair:c1");
memory->create(c2, nparams, "pair:c2");
memory->create(c3, nparams, "pair:c3");
memory->create(c4, nparams, "pair:c4");
memory->create(c, nparams, "pair:c");
memory->create(d, nparams, "pair:d");
memory->create(h, nparams, "pair:h");
memory->create(gamma, nparams, "pair:gamma");
memory->create(beta, nparams, "pair:beta");
memory->create(powern, nparams, "pair:powern");
memory->create(_cutsq, nparams, "pair:_cutsq");
for (int i = 0; i < nparams; i++) {
lam1[i] = params[i].lam1;
@ -209,13 +194,10 @@ void PairTersoffGPU::init_style()
}
int mnf = 5e-2 * neighbor->oneatom;
int success = tersoff_gpu_init(atom->ntypes+1, atom->nlocal,
atom->nlocal+atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements,
elem3param, nparams, lam1, lam2, lam3,
powermint, biga, bigb, bigr, bigd,
c1, c2, c3, c4, c, d, h, gamma,
beta, powern, _cutsq);
int success = tersoff_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements, elem3param, nparams,
lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd, c1, c2, c3,
c4, c, d, h, gamma, beta, powern, _cutsq);
memory->destroy(lam1);
memory->destroy(lam2);
@ -237,18 +219,13 @@ void PairTersoffGPU::init_style()
memory->destroy(powern);
memory->destroy(_cutsq);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->ghost = 1;
}
if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0*cutmax + neighbor->skin;
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff for GPU style");
if (gpu_mode == GPU_FORCE)
neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
}
}
@ -258,10 +235,9 @@ void PairTersoffGPU::init_style()
double PairTersoffGPU::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
cutghost[i][j] = cutmax;
cutghost[j][i] = cutmax;
return cutmax;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,35 +32,33 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall,
const int max_nbors, const double cell_size, int &gpu_mode, FILE *screen,
int* host_map, const int nelements, int*** host_elem3param, const int nparams,
const double* ts_lam1, const double* ts_lam2, const double* ts_lam3,
const double* ts_powermint, const double* ts_biga, const double* ts_bigb,
const double* ts_bigr, const double* ts_bigd, const double* ts_c1,
const double* ts_c2, const double* ts_c3, const double* ts_c4,
const double* ts_c5, const double* ts_h, const double* ts_beta,
const double* ts_powern, const double* ts_powern_del,
const double* ts_ca1, const double* ts_cutsq);
int tersoff_mod_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
const int nelements, int ***host_elem3param, const int nparams,
const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
const double *ts_c2, const double *ts_c3, const double *ts_c4,
const double *ts_c5, const double *ts_h, const double *ts_beta,
const double *ts_powern, const double *ts_powern_del, const double *ts_ca1,
const double *ts_cutsq);
void tersoff_mod_gpu_clear();
int ** tersoff_mod_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void tersoff_mod_gpu_compute(const int ago, const int nlocal, const int nall,
const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
int **tersoff_mod_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum, const double cpu_time,
bool &success);
void tersoff_mod_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success);
double tersoff_mod_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp),
gpu_mode(GPU_FORCE)
PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp), gpu_mode(GPU_FORCE)
{
cpu_time = 0.0;
suffix_flag |= Suffix::GPU;
@ -79,15 +75,14 @@ PairTersoffMODGPU::PairTersoffMODGPU(LAMMPS *lmp) : PairTersoffMOD(lmp),
PairTersoffMODGPU::~PairTersoffMODGPU()
{
tersoff_mod_gpu_clear();
if (allocated)
memory->destroy(cutghost);
if (allocated) memory->destroy(cutghost);
}
/* ---------------------------------------------------------------------- */
void PairTersoffMODGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -95,7 +90,7 @@ void PairTersoffMODGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -104,28 +99,24 @@ void PairTersoffMODGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = tersoff_mod_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh = tersoff_mod_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial, atom->special, eflag,
vflag, eflag_atom, vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
tersoff_mod_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success);
tersoff_mod_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom,
host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ---------------------------------------------------------------------- */
@ -135,7 +126,7 @@ void PairTersoffMODGPU::allocate()
PairTersoffMOD::allocate();
int n = atom->ntypes;
memory->create(cutghost,n+1,n+1,"pair:cutghost");
memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
}
/* ----------------------------------------------------------------------
@ -146,8 +137,7 @@ void PairTersoffMODGPU::init_style()
{
double cell_size = cutmax + neighbor->skin;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style tersoff/mod/gpu requires atom IDs");
if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/mod/gpu requires atom IDs");
double *lam1, *lam2, *lam3, *powermint;
double *biga, *bigb, *bigr, *bigd;
@ -159,25 +149,25 @@ void PairTersoffMODGPU::init_style()
c1 = c2 = c3 = c4 = c5 = h = nullptr;
beta = powern = _cutsq = nullptr;
memory->create(lam1,nparams,"pair:lam1");
memory->create(lam2,nparams,"pair:lam2");
memory->create(lam3,nparams,"pair:lam3");
memory->create(powermint,nparams,"pair:powermint");
memory->create(biga,nparams,"pair:biga");
memory->create(bigb,nparams,"pair:bigb");
memory->create(bigr,nparams,"pair:bigr");
memory->create(bigd,nparams,"pair:bigd");
memory->create(c1,nparams,"pair:c1");
memory->create(c2,nparams,"pair:c2");
memory->create(c3,nparams,"pair:c3");
memory->create(c4,nparams,"pair:c4");
memory->create(c5,nparams,"pair:c5");
memory->create(h,nparams,"pair:h");
memory->create(beta,nparams,"pair:beta");
memory->create(powern,nparams,"pair:powern");
memory->create(powern_del,nparams,"pair:powern_del");
memory->create(ca1,nparams,"pair:ca1");
memory->create(_cutsq,nparams,"pair:_cutsq");
memory->create(lam1, nparams, "pair:lam1");
memory->create(lam2, nparams, "pair:lam2");
memory->create(lam3, nparams, "pair:lam3");
memory->create(powermint, nparams, "pair:powermint");
memory->create(biga, nparams, "pair:biga");
memory->create(bigb, nparams, "pair:bigb");
memory->create(bigr, nparams, "pair:bigr");
memory->create(bigd, nparams, "pair:bigd");
memory->create(c1, nparams, "pair:c1");
memory->create(c2, nparams, "pair:c2");
memory->create(c3, nparams, "pair:c3");
memory->create(c4, nparams, "pair:c4");
memory->create(c5, nparams, "pair:c5");
memory->create(h, nparams, "pair:h");
memory->create(beta, nparams, "pair:beta");
memory->create(powern, nparams, "pair:powern");
memory->create(powern_del, nparams, "pair:powern_del");
memory->create(ca1, nparams, "pair:ca1");
memory->create(_cutsq, nparams, "pair:_cutsq");
for (int i = 0; i < nparams; i++) {
lam1[i] = params[i].lam1;
@ -202,13 +192,10 @@ void PairTersoffMODGPU::init_style()
}
int mnf = 5e-2 * neighbor->oneatom;
int success = tersoff_mod_gpu_init(atom->ntypes+1, atom->nlocal,
atom->nlocal+atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements,
elem3param, nparams, lam1, lam2, lam3,
powermint, biga, bigb, bigr, bigd,
c1, c2, c3, c4, c5, h, beta, powern,
powern_del, ca1, _cutsq);
int success = tersoff_mod_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost,
mnf, cell_size, gpu_mode, screen, map, nelements, elem3param,
nparams, lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd,
c1, c2, c3, c4, c5, h, beta, powern, powern_del, ca1, _cutsq);
memory->destroy(lam1);
memory->destroy(lam2);
@ -230,18 +217,13 @@ void PairTersoffMODGPU::init_style()
memory->destroy(powern_del);
memory->destroy(_cutsq);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->ghost = 1;
}
if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0*cutmax + neighbor->skin;
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff for GPU style");
if (gpu_mode == GPU_FORCE)
neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
}
}
@ -251,10 +233,9 @@ void PairTersoffMODGPU::init_style()
double PairTersoffMODGPU::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
cutghost[i][j] = cutmax;
cutghost[j][i] = cutmax;
return cutmax;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -34,42 +33,36 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall,
const int max_nbors, const double cell_size, int &gpu_mode,
FILE *screen, int* host_map, const int nelements,
int*** host_elem3param, const int nparams,
const double* ts_lam1, const double* ts_lam2,
const double* ts_lam3, const double* ts_powermint,
const double* ts_biga, const double* ts_bigb,
const double* ts_bigr, const double* ts_bigd,
const double* ts_c1, const double* ts_c2,
const double* ts_c3, const double* ts_c4,
const double* ts_c, const double* ts_d,
const double* ts_h, const double* ts_gamma,
const double* ts_beta, const double* ts_powern,
const double* ts_Z_i, const double* ts_Z_j,
const double* ts_ZBLcut, const double* ts_ZBLexpscale,
const double global_e, const double global_a_0,
const double global_epsilon_0, const double* ts_cutsq);
int tersoff_zbl_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
const int nelements, int ***host_elem3param, const int nparams,
const double *ts_lam1, const double *ts_lam2, const double *ts_lam3,
const double *ts_powermint, const double *ts_biga, const double *ts_bigb,
const double *ts_bigr, const double *ts_bigd, const double *ts_c1,
const double *ts_c2, const double *ts_c3, const double *ts_c4,
const double *ts_c, const double *ts_d, const double *ts_h,
const double *ts_gamma, const double *ts_beta, const double *ts_powern,
const double *ts_Z_i, const double *ts_Z_j, const double *ts_ZBLcut,
const double *ts_ZBLexpscale, const double global_e,
const double global_a_0, const double global_epsilon_0,
const double *ts_cutsq);
void tersoff_zbl_gpu_clear();
int ** tersoff_zbl_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success);
void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall,
const int nlist, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
int **tersoff_zbl_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum, const double cpu_time,
bool &success);
void tersoff_zbl_gpu_compute(const int ago, const int nlocal, const int nall, const int nlist,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time,
bool &success);
double tersoff_zbl_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
gpu_mode(GPU_FORCE)
PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp), gpu_mode(GPU_FORCE)
{
cpu_time = 0.0;
suffix_flag |= Suffix::GPU;
@ -86,15 +79,14 @@ PairTersoffZBLGPU::PairTersoffZBLGPU(LAMMPS *lmp) : PairTersoffZBL(lmp),
PairTersoffZBLGPU::~PairTersoffZBLGPU()
{
tersoff_zbl_gpu_clear();
if (allocated)
memory->destroy(cutghost);
if (allocated) memory->destroy(cutghost);
}
/* ---------------------------------------------------------------------- */
void PairTersoffZBLGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -102,7 +94,7 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -111,28 +103,24 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh = tersoff_zbl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial, atom->special, eflag,
vflag, eflag_atom, vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success);
tersoff_zbl_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom,
host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ---------------------------------------------------------------------- */
@ -140,9 +128,9 @@ void PairTersoffZBLGPU::compute(int eflag, int vflag)
void PairTersoffZBLGPU::allocate()
{
PairTersoffZBL::allocate();
int n = atom->ntypes;
int np1 = atom->ntypes + 1;
memory->create(cutghost,n+1,n+1,"pair:cutghost");
memory->create(cutghost, np1, np1, "pair:cutghost");
}
/* ----------------------------------------------------------------------
@ -153,8 +141,7 @@ void PairTersoffZBLGPU::init_style()
{
double cell_size = cutmax + neighbor->skin;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style tersoff/zbl/gpu requires atom IDs");
if (atom->tag_enable == 0) error->all(FLERR, "Pair style tersoff/zbl/gpu requires atom IDs");
double *lam1, *lam2, *lam3, *powermint;
double *biga, *bigb, *bigr, *bigd;
@ -167,29 +154,29 @@ void PairTersoffZBLGPU::init_style()
c = d = h = gamma = nullptr;
beta = powern = Z_i = Z_j = ZBLcut = ZBLexpscale = _cutsq = nullptr;
memory->create(lam1,nparams,"pair:lam1");
memory->create(lam2,nparams,"pair:lam2");
memory->create(lam3,nparams,"pair:lam3");
memory->create(powermint,nparams,"pair:powermint");
memory->create(biga,nparams,"pair:biga");
memory->create(bigb,nparams,"pair:bigb");
memory->create(bigr,nparams,"pair:bigr");
memory->create(bigd,nparams,"pair:bigd");
memory->create(c1,nparams,"pair:c1");
memory->create(c2,nparams,"pair:c2");
memory->create(c3,nparams,"pair:c3");
memory->create(c4,nparams,"pair:c4");
memory->create(c,nparams,"pair:c");
memory->create(d,nparams,"pair:d");
memory->create(h,nparams,"pair:h");
memory->create(gamma,nparams,"pair:gamma");
memory->create(beta,nparams,"pair:beta");
memory->create(powern,nparams,"pair:powern");
memory->create(Z_i,nparams,"pair:Z_i");
memory->create(Z_j,nparams,"pair:Z_j");
memory->create(ZBLcut,nparams,"pair:ZBLcut");
memory->create(ZBLexpscale,nparams,"pair:ZBLexpscale");
memory->create(_cutsq,nparams,"pair:_cutsq");
memory->create(lam1, nparams, "pair:lam1");
memory->create(lam2, nparams, "pair:lam2");
memory->create(lam3, nparams, "pair:lam3");
memory->create(powermint, nparams, "pair:powermint");
memory->create(biga, nparams, "pair:biga");
memory->create(bigb, nparams, "pair:bigb");
memory->create(bigr, nparams, "pair:bigr");
memory->create(bigd, nparams, "pair:bigd");
memory->create(c1, nparams, "pair:c1");
memory->create(c2, nparams, "pair:c2");
memory->create(c3, nparams, "pair:c3");
memory->create(c4, nparams, "pair:c4");
memory->create(c, nparams, "pair:c");
memory->create(d, nparams, "pair:d");
memory->create(h, nparams, "pair:h");
memory->create(gamma, nparams, "pair:gamma");
memory->create(beta, nparams, "pair:beta");
memory->create(powern, nparams, "pair:powern");
memory->create(Z_i, nparams, "pair:Z_i");
memory->create(Z_j, nparams, "pair:Z_j");
memory->create(ZBLcut, nparams, "pair:ZBLcut");
memory->create(ZBLexpscale, nparams, "pair:ZBLexpscale");
memory->create(_cutsq, nparams, "pair:_cutsq");
for (int i = 0; i < nparams; i++) {
lam1[i] = params[i].lam1;
@ -218,14 +205,11 @@ void PairTersoffZBLGPU::init_style()
}
int mnf = 5e-2 * neighbor->oneatom;
int success = tersoff_zbl_gpu_init(atom->ntypes+1, atom->nlocal,
atom->nlocal+atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements,
elem3param, nparams, lam1, lam2, lam3,
powermint, biga, bigb, bigr, bigd,
c1, c2, c3, c4, c, d, h, gamma,
beta, powern, Z_i, Z_j, ZBLcut, ZBLexpscale,
global_e, global_a_0, global_epsilon_0, _cutsq);
int success = tersoff_zbl_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost,
mnf, cell_size, gpu_mode, screen, map, nelements, elem3param,
nparams, lam1, lam2, lam3, powermint, biga, bigb, bigr, bigd,
c1, c2, c3, c4, c, d, h, gamma, beta, powern, Z_i, Z_j, ZBLcut,
ZBLexpscale, global_e, global_a_0, global_epsilon_0, _cutsq);
memory->destroy(lam1);
memory->destroy(lam2);
@ -251,18 +235,13 @@ void PairTersoffZBLGPU::init_style()
memory->destroy(ZBLexpscale);
memory->destroy(_cutsq);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->ghost = 1;
}
if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0*cutmax + neighbor->skin;
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff for GPU style");
if (gpu_mode == GPU_FORCE)
neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
}
}
@ -272,10 +251,9 @@ void PairTersoffZBLGPU::init_style()
double PairTersoffZBLGPU::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
cutghost[i][j] = cutmax;
cutghost[j][i] = cutmax;
return cutmax;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -36,27 +34,23 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1,
double **host_uf2, double **host_uf3,
double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen);
int ufml_gpu_init(const int ntypes, double **cutsq, double **host_uf1, double **host_uf2,
double **host_uf3, double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1,
double **host_uf2, double **host_uf3, double **offset);
void ufml_gpu_reinit(const int ntypes, double **cutsq, double **host_uf1, double **host_uf2,
double **host_uf3, double **offset);
void ufml_gpu_clear();
int ** ufml_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void ufml_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ufml_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void ufml_gpu_compute(const int ago, const int inum, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double ufml_gpu_bytes();
@ -83,7 +77,7 @@ PairUFMGPU::~PairUFMGPU()
void PairUFMGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +85,7 @@ void PairUFMGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,28 +94,24 @@ void PairUFMGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ufml_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
ufml_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
ufml_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -134,8 +124,7 @@ void PairUFMGPU::compute(int eflag, int vflag)
void PairUFMGPU::init_style()
{
// cut_respa = nullptr;
// cut_respa = nullptr;
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -143,10 +132,9 @@ void PairUFMGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -154,21 +142,15 @@ void PairUFMGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ufml_gpu_init(atom->ntypes+1, cutsq, uf1, uf2, uf3,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
ufml_gpu_init(atom->ntypes + 1, cutsq, uf1, uf2, uf3, offset, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -177,7 +159,7 @@ void PairUFMGPU::reinit()
{
Pair::reinit();
ufml_gpu_reinit(atom->ntypes+1, cutsq, uf1, uf2, uf3, offset);
ufml_gpu_reinit(atom->ntypes + 1, cutsq, uf1, uf2, uf3, offset);
}
/* ---------------------------------------------------------------------- */
@ -190,11 +172,12 @@ double PairUFMGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,expuf,factor_lj;
void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, expuf, factor_lj;
int *jlist;
double **x = atom->x;
@ -202,7 +185,6 @@ void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *type = atom->type;
double *special_lj = force->special_lj;
// loop over neighbors of my atoms
for (ii = start; ii < inum; ii++) {
@ -222,22 +204,22 @@ void PairUFMGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
expuf = exp(- rsq * uf2[itype][jtype]);
fpair = factor_lj * uf1[itype][jtype] * expuf /(1.0 - expuf);
expuf = exp(-rsq * uf2[itype][jtype]);
fpair = factor_lj * uf1[itype][jtype] * expuf / (1.0 - expuf);
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = -factor_lj * uf3[itype][jtype] * log(1.0 - expuf) - offset[itype][jtype];
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -26,7 +25,6 @@
#include "gpu_extra.h"
#include "memory.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,33 +32,25 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int vashishta_gpu_init(const int ntypes, const int inum, const int nall,
const int max_nbors, const double cell_size,
int &gpu_mode, FILE *screen, int* host_map,
const int nelements, int*** host_elem3param,
const int nparams, const double* cutsq, const double* r0,
const double* gamma, const double* eta,
const double* lam1inv, const double* lam4inv,
const double* zizj, const double* mbigd,
const double* dvrc, const double* big6w,
const double* heta, const double* bigh,
const double* bigw, const double* c0,
const double* costheta, const double* bigb,
const double* big2b, const double* bigc);
int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
const double cell_size, int &gpu_mode, FILE *screen, int *host_map,
const int nelements, int ***host_elem3param, const int nparams,
const double *cutsq, const double *r0, const double *gamma,
const double *eta, const double *lam1inv, const double *lam4inv,
const double *zizj, const double *mbigd, const double *dvrc,
const double *big6w, const double *heta, const double *bigh,
const double *bigw, const double *c0, const double *costheta,
const double *bigb, const double *big2b, const double *bigc);
void vashishta_gpu_clear();
int ** vashishta_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void vashishta_gpu_compute(const int ago, const int nloc, const int nall,
const int ln, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **vashishta_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void vashishta_gpu_compute(const int ago, const int nloc, const int nall, const int ln,
double **host_x, int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success);
double vashishta_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -84,15 +74,14 @@ PairVashishtaGPU::PairVashishtaGPU(LAMMPS *lmp) : PairVashishta(lmp), gpu_mode(G
PairVashishtaGPU::~PairVashishtaGPU()
{
vashishta_gpu_clear();
if (allocated)
memory->destroy(cutghost);
if (allocated) memory->destroy(cutghost);
}
/* ---------------------------------------------------------------------- */
void PairVashishtaGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -100,7 +89,7 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -109,40 +98,34 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = vashishta_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
vashishta_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
vashishta_gpu_compute(neighbor->ago, inum, nall, inum+list->gnum,
atom->x, atom->type, ilist, numneigh, firstneigh, eflag,
vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success);
vashishta_gpu_compute(neighbor->ago, inum, nall, inum + list->gnum, atom->x, atom->type, ilist,
numneigh, firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start,
cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
}
/* ---------------------------------------------------------------------- */
void PairVashishtaGPU::allocate()
{
if (!allocated) {
PairVashishta::allocate();
}
if (!allocated) { PairVashishta::allocate(); }
int n = atom->ntypes;
memory->create(cutghost,n+1,n+1,"pair:cutghost");
memory->create(cutghost, n + 1, n + 1, "pair:cutghost");
gpu_allocated = true;
}
@ -154,8 +137,7 @@ void PairVashishtaGPU::init_style()
{
double cell_size = cutmax + neighbor->skin;
if (atom->tag_enable == 0)
error->all(FLERR,"Pair style vashishta/gpu requires atom IDs");
if (atom->tag_enable == 0) error->all(FLERR, "Pair style vashishta/gpu requires atom IDs");
double *cutsq, *r0, *gamma, *eta;
double *lam1inv, *lam4inv, *zizj, *mbigd;
@ -169,24 +151,24 @@ void PairVashishtaGPU::init_style()
bigw = c0 = costheta = bigb = nullptr;
big2b = bigc = nullptr;
memory->create(cutsq,nparams,"pair:cutsq");
memory->create(r0,nparams,"pair:r0");
memory->create(gamma,nparams,"pair:gamma");
memory->create(eta,nparams,"pair:eta");
memory->create(lam1inv,nparams,"pair:lam1inv");
memory->create(lam4inv,nparams,"pair:lam4inv");
memory->create(zizj,nparams,"pair:zizj");
memory->create(mbigd,nparams,"pair:mbigd");
memory->create(dvrc,nparams,"pair:dvrc");
memory->create(big6w,nparams,"pair:big6w");
memory->create(heta,nparams,"pair:heta");
memory->create(bigh,nparams,"pair:bigh");
memory->create(bigw,nparams,"pair:bigw");
memory->create(c0,nparams,"pair:c0");
memory->create(costheta,nparams,"pair:costheta");
memory->create(bigb,nparams,"pair:bigb");
memory->create(big2b,nparams,"pair:big2b");
memory->create(bigc,nparams,"pair:bigc");
memory->create(cutsq, nparams, "pair:cutsq");
memory->create(r0, nparams, "pair:r0");
memory->create(gamma, nparams, "pair:gamma");
memory->create(eta, nparams, "pair:eta");
memory->create(lam1inv, nparams, "pair:lam1inv");
memory->create(lam4inv, nparams, "pair:lam4inv");
memory->create(zizj, nparams, "pair:zizj");
memory->create(mbigd, nparams, "pair:mbigd");
memory->create(dvrc, nparams, "pair:dvrc");
memory->create(big6w, nparams, "pair:big6w");
memory->create(heta, nparams, "pair:heta");
memory->create(bigh, nparams, "pair:bigh");
memory->create(bigw, nparams, "pair:bigw");
memory->create(c0, nparams, "pair:c0");
memory->create(costheta, nparams, "pair:costheta");
memory->create(bigb, nparams, "pair:bigb");
memory->create(big2b, nparams, "pair:big2b");
memory->create(bigc, nparams, "pair:bigc");
for (int i = 0; i < nparams; i++) {
cutsq[i] = params[i].cutsq;
@ -209,11 +191,10 @@ void PairVashishtaGPU::init_style()
bigc[i] = params[i].bigc;
}
int mnf = 5e-2 * neighbor->oneatom;
int success = vashishta_gpu_init(atom->ntypes+1, atom->nlocal, atom->nlocal+atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements,
elem3param, nparams, cutsq, r0, gamma, eta, lam1inv,
lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw,
c0, costheta, bigb, big2b, bigc);
int success = vashishta_gpu_init(atom->ntypes + 1, atom->nlocal, atom->nlocal + atom->nghost, mnf,
cell_size, gpu_mode, screen, map, nelements, elem3param, nparams,
cutsq, r0, gamma, eta, lam1inv, lam4inv, zizj, mbigd, dvrc,
big6w, heta, bigh, bigw, c0, costheta, bigb, big2b, bigc);
memory->destroy(cutsq);
memory->destroy(r0);
memory->destroy(gamma);
@ -233,18 +214,13 @@ void PairVashishtaGPU::init_style()
memory->destroy(big2b);
memory->destroy(bigc);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->ghost = 1;
}
if (comm->cutghostuser < (2.0*cutmax + neighbor->skin)) {
comm->cutghostuser=2.0*cutmax + neighbor->skin;
if (comm->me == 0)
error->warning(FLERR,"Increasing communication cutoff for GPU style");
if (gpu_mode == GPU_FORCE)
neighbor->add_request(this, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
if (comm->cutghostuser < (2.0 * cutmax + neighbor->skin)) {
comm->cutghostuser = 2.0 * cutmax + neighbor->skin;
if (comm->me == 0) error->warning(FLERR, "Increasing communication cutoff for GPU style");
}
}
@ -254,13 +230,10 @@ void PairVashishtaGPU::init_style()
double PairVashishtaGPU::init_one(int i, int j)
{
if (!gpu_allocated) {
allocate();
}
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
if (!gpu_allocated) { allocate(); }
if (setflag[i][j] == 0) error->all(FLERR, "All pair coeffs are not set");
cutghost[i][j] = cutmax;
cutghost[j][i] = cutmax;
return cutmax;
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,34 +32,26 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a,
double **host_offset, double *special_lj, const int inum,
const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen, const double kappa);
int ykcolloid_gpu_init(const int ntypes, double **cutsq, double **host_a, double **host_offset,
double *special_lj, const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size, int &gpu_mode, FILE *screen,
const double kappa);
void ykcolloid_gpu_clear();
int ** ykcolloid_gpu_compute_n(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success,
double *host_rad);
void ykcolloid_gpu_compute(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success,
double *host_rad);
int **ykcolloid_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success, double *host_rad);
void ykcolloid_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
int &host_start, const double cpu_time, bool &success, double *host_rad);
double ykcolloid_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairYukawaColloidGPU::PairYukawaColloidGPU(LAMMPS *lmp) : PairYukawaColloid(lmp),
gpu_mode(GPU_FORCE)
PairYukawaColloidGPU::PairYukawaColloidGPU(LAMMPS *lmp) :
PairYukawaColloid(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -83,7 +73,7 @@ PairYukawaColloidGPU::~PairYukawaColloidGPU()
void PairYukawaColloidGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -91,7 +81,7 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -100,32 +90,25 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = ykcolloid_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type,
sublo,
subhi, atom->tag,
atom->nspecial, atom->special,
eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist,
&numneigh, cpu_time,
success, atom->radius);
firstneigh = ykcolloid_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial, atom->special, eflag,
vflag, eflag_atom, vflag_atom, host_start, &ilist,
&numneigh, cpu_time, success, atom->radius);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
ykcolloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag,
eflag_atom, vflag_atom, host_start, cpu_time,
ykcolloid_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh,
firstneigh, eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time,
success, atom->radius);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -138,9 +121,7 @@ void PairYukawaColloidGPU::compute(int eflag, int vflag)
void PairYukawaColloidGPU::init_style()
{
if (!atom->sphere_flag)
error->all(FLERR,"Pair yukawa/colloid/gpu requires atom style sphere");
if (!atom->sphere_flag) error->all(FLERR, "Pair yukawa/colloid/gpu requires atom style sphere");
// Repeat cutsq calculation because done after call to init_style
double maxcut = -1.0;
@ -148,10 +129,9 @@ void PairYukawaColloidGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -159,21 +139,15 @@ void PairYukawaColloidGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = ykcolloid_gpu_init(atom->ntypes+1, cutsq, a,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
int success = ykcolloid_gpu_init(atom->ntypes + 1, cutsq, a, offset, force->special_lj,
atom->nlocal, atom->nlocal + atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen, kappa);
GPU_EXTRA::check_flag(success,error,world);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -186,12 +160,12 @@ double PairYukawaColloidGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag,
int /* vflag */, int *ilist,
int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair,radi,radj;
double r,rsq,rinv,screening,forceyukawa,factor;
void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair, radi, radj;
double r, rsq, rinv, screening, forceyukawa, factor;
int *jlist;
double **x = atom->x;
@ -220,28 +194,28 @@ void PairYukawaColloidGPU::cpu_compute(int start, int inum, int eflag,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
radj = radius[j];
if (rsq < cutsq[itype][jtype]) {
r = sqrt(rsq);
rinv = 1.0/r;
screening = exp(-kappa*(r-(radi+radj)));
rinv = 1.0 / r;
screening = exp(-kappa * (r - (radi + radj)));
forceyukawa = a[itype][jtype] * screening;
fpair = factor*forceyukawa * rinv;
fpair = factor * forceyukawa * rinv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = a[itype][jtype]/kappa * screening - offset[itype][jtype];
evdwl = a[itype][jtype] / kappa * screening - offset[itype][jtype];
evdwl *= factor;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,30 +32,25 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa,
double **host_a, double **offset, double *special_lj,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa, double **host_a,
double **offset, double *special_lj, const int inum, const int nall,
const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void yukawa_gpu_clear();
int ** yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag,
const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void yukawa_gpu_compute(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag,
int **nspecial, tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, int **ilist,
int **jnum, const double cpu_time, bool &success);
void yukawa_gpu_compute(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, int *ilist, int *numj, int **firstneigh, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
double yukawa_gpu_bytes();
/* ---------------------------------------------------------------------- */
PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp),
gpu_mode(GPU_FORCE)
PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp), gpu_mode(GPU_FORCE)
{
respa_enable = 0;
reinitflag = 0;
@ -79,7 +72,7 @@ PairYukawaGPU::~PairYukawaGPU()
void PairYukawaGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -87,7 +80,7 @@ void PairYukawaGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -96,28 +89,24 @@ void PairYukawaGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = yukawa_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
yukawa_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi,
atom->tag, atom->nspecial, atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -137,10 +126,9 @@ void PairYukawaGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -148,21 +136,15 @@ void PairYukawaGPU::init_style()
}
double cell_size = sqrt(maxcut) + neighbor->skin;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = yukawa_gpu_init(atom->ntypes+1, cutsq, kappa, a,
offset, force->special_lj, atom->nlocal,
atom->nlocal+atom->nghost, mnf, maxspecial,
cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
yukawa_gpu_init(atom->ntypes + 1, cutsq, kappa, a, offset, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this,instance_me);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -175,11 +157,12 @@ double PairYukawaGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r2inv,r,rinv,screening,forceyukawa,factor;
void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r2inv, r, rinv, screening, forceyukawa, factor;
int *jlist;
double **x = atom->x;
@ -206,28 +189,28 @@ void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
r2inv = 1.0 / rsq;
r = sqrt(rsq);
rinv = 1.0/r;
screening = exp(-kappa*r);
rinv = 1.0 / r;
screening = exp(-kappa * r);
forceyukawa = a[itype][jtype] * screening * (kappa + rinv);
fpair = factor*forceyukawa * r2inv;
fpair = factor * forceyukawa * r2inv;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype];
evdwl *= factor;
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}

View File

@ -1,4 +1,3 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
@ -24,7 +23,6 @@
#include "force.h"
#include "gpu_extra.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "neighbor.h"
#include "suffix.h"
@ -34,27 +32,22 @@ using namespace LAMMPS_NS;
// External functions from cuda library for atom decomposition
int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1,
double **host_sw2, double **host_sw3, double **host_sw4,
double **host_sw5, double **host_d1a, double **host_d2a,
double **host_d3a, double **host_d4a, double **host_zze,
double cut_globalsq, double cut_innersq, double cut_inner,
const int inum, const int nall, const int max_nbors,
const int maxspecial, const double cell_size,
int zbl_gpu_init(const int ntypes, double **cutsq, double **host_sw1, double **host_sw2,
double **host_sw3, double **host_sw4, double **host_sw5, double **host_d1a,
double **host_d2a, double **host_d3a, double **host_d4a, double **host_zze,
double cut_globalsq, double cut_innersq, double cut_inner, const int inum,
const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen);
void zbl_gpu_clear();
int ** zbl_gpu_compute_n(const int ago, const int inum, const int nall,
double **host_x, int *host_type, double *sublo,
double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum,
const double cpu_time, bool &success);
void zbl_gpu_compute(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *ilist, int *numj,
int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
const double cpu_time, bool &success);
int **zbl_gpu_compute_n(const int ago, const int inum, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success);
void zbl_gpu_compute(const int ago, const int inum, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const double cpu_time,
bool &success);
double zbl_gpu_bytes();
/* ---------------------------------------------------------------------- */
@ -81,7 +74,7 @@ PairZBLGPU::~PairZBLGPU()
void PairZBLGPU::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
ev_init(eflag, vflag);
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -89,7 +82,7 @@ void PairZBLGPU::compute(int eflag, int vflag)
bool success = true;
int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) {
double sublo[3],subhi[3];
double sublo[3], subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
sublo[1] = domain->sublo[1];
@ -98,28 +91,24 @@ void PairZBLGPU::compute(int eflag, int vflag)
subhi[1] = domain->subhi[1];
subhi[2] = domain->subhi[2];
} else {
domain->bbox(domain->sublo_lamda,domain->subhi_lamda,sublo,subhi);
domain->bbox(domain->sublo_lamda, domain->subhi_lamda, sublo, subhi);
}
inum = atom->nlocal;
firstneigh = zbl_gpu_compute_n(neighbor->ago, inum, nall,
atom->x, atom->type, sublo,
subhi, atom->tag, atom->nspecial,
atom->special, eflag, vflag, eflag_atom,
vflag_atom, host_start,
&ilist, &numneigh, cpu_time, success);
firstneigh =
zbl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, atom->type, sublo, subhi, atom->tag,
atom->nspecial, atom->special, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, success);
} else {
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
zbl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success);
zbl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, ilist, numneigh, firstneigh,
eflag, vflag, eflag_atom, vflag_atom, host_start, cpu_time, success);
}
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
if (!success) error->one(FLERR, "Insufficient memory on accelerator");
if (host_start<inum) {
if (host_start < inum) {
cpu_time = platform::walltime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = platform::walltime() - cpu_time;
@ -139,10 +128,9 @@ void PairZBLGPU::init_style()
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i,j);
cut = init_one(i, j);
cut *= cut;
if (cut > maxcut)
maxcut = cut;
if (cut > maxcut) maxcut = cut;
cutsq[i][j] = cutsq[j][i] = cut;
} else
cutsq[i][j] = cutsq[j][i] = 0.0;
@ -153,22 +141,16 @@ void PairZBLGPU::init_style()
cut_innersq = cut_inner * cut_inner;
cut_globalsq = cut_global * cut_global;
int maxspecial=0;
if (atom->molecular != Atom::ATOMIC)
maxspecial=atom->maxspecial;
int maxspecial = 0;
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom;
int success = zbl_gpu_init(atom->ntypes+1, cutsq, sw1, sw2, sw3, sw4,
sw5, d1a, d2a, d3a, d4a, zze,
cut_globalsq, cut_innersq, cut_inner,
atom->nlocal, atom->nlocal+atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success,error,world);
int success =
zbl_gpu_init(atom->ntypes + 1, cutsq, sw1, sw2, sw3, sw4, sw5, d1a, d2a, d3a, d4a, zze,
cut_globalsq, cut_innersq, cut_inner, atom->nlocal, atom->nlocal + atom->nghost,
mnf, maxspecial, cell_size, gpu_mode, screen);
GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) {
int irequest = neighbor->request(this);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
}
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);
}
/* ---------------------------------------------------------------------- */
@ -181,11 +163,12 @@ double PairZBLGPU::memory_usage()
/* ---------------------------------------------------------------------- */
void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
int *ilist, int *numneigh, int **firstneigh) {
int i,j,ii,jj,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,r,t,fswitch,eswitch;
void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */, int *ilist,
int *numneigh, int **firstneigh)
{
int i, j, ii, jj, jnum, itype, jtype;
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
double rsq, r, t, fswitch, eswitch;
int *jlist;
double **x = atom->x;
@ -210,36 +193,34 @@ void PairZBLGPU::cpu_compute(int start, int inum, int eflag, int /* vflag */,
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
rsq = delx * delx + dely * dely + delz * delz;
jtype = type[j];
if (rsq < cut_globalsq) {
r = sqrt(rsq);
r = sqrt(rsq);
fpair = dzbldr(r, itype, jtype);
if (rsq > cut_innersq) {
t = r - cut_inner;
fswitch = t*t *
(sw1[itype][jtype] + sw2[itype][jtype]*t);
fpair += fswitch;
}
if (rsq > cut_innersq) {
t = r - cut_inner;
fswitch = t * t * (sw1[itype][jtype] + sw2[itype][jtype] * t);
fpair += fswitch;
}
fpair *= -1.0/r;
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
fpair *= -1.0 / r;
f[i][0] += delx * fpair;
f[i][1] += dely * fpair;
f[i][2] += delz * fpair;
if (eflag) {
evdwl = e_zbl(r, itype, jtype);
evdwl += sw5[itype][jtype];
if (rsq > cut_innersq) {
eswitch = t*t*t *
(sw3[itype][jtype] + sw4[itype][jtype]*t);
evdwl += eswitch;
}
evdwl += sw5[itype][jtype];
if (rsq > cut_innersq) {
eswitch = t * t * t * (sw3[itype][jtype] + sw4[itype][jtype] * t);
evdwl += eswitch;
}
}
if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz);
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
}
}
}