git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10030 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2013-06-06 14:52:02 +00:00
parent 94d3ef2744
commit c3d6358742
5 changed files with 908 additions and 894 deletions

View File

@ -45,10 +45,10 @@ action pair_coul_dsf_gpu.cpp
action pair_coul_dsf_gpu.h action pair_coul_dsf_gpu.h
action pair_coul_long_gpu.cpp pair_coul_long.cpp action pair_coul_long_gpu.cpp pair_coul_long.cpp
action pair_coul_long_gpu.h pair_coul_long.cpp action pair_coul_long_gpu.h pair_coul_long.cpp
action pair_dipole_cut_gpu.cpp pair_dipole_cut.cpp action pair_lj_cut_dipole_cut_gpu.cpp pair_lj_cut_dipole_cut.cpp
action pair_dipole_cut_gpu.h pair_dipole_cut.cpp action pair_lj_cut_dipole_cut_gpu.h pair_lj_cut_dipole_cut.cpp
action pair_dipole_sf_gpu.cpp pair_dipole_sf.cpp action pair_lj_sf_dipole_sf_gpu.cpp pair_lj_sf_dipole_sf.cpp
action pair_dipole_sf_gpu.h pair_dipole_sf.cpp action pair_lj_sf_dipole_sf_gpu.h pair_lj_sf_dipole_sf.cpp
action pair_eam_alloy_gpu.cpp pair_eam.cpp action pair_eam_alloy_gpu.cpp pair_eam.cpp
action pair_eam_alloy_gpu.h pair_eam.cpp action pair_eam_alloy_gpu.h pair_eam.cpp
action pair_eam_fs_gpu.cpp pair_eam.cpp action pair_eam_fs_gpu.cpp pair_eam.cpp

View File

@ -1,367 +1,370 @@
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under certain rights in this software. This software is distributed under
the GNU General Public License. the GNU General Public License.
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
Contributing author: Trung Dac Nguyen (ORNL) Contributing author: Trung Dac Nguyen (ORNL)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "lmptype.h" #include "lmptype.h"
#include "math.h" #include "math.h"
#include "stdio.h" #include "stdio.h"
#include "stdlib.h" #include "stdlib.h"
#include "pair_dipole_cut_gpu.h" #include "pair_lj_cut_dipole_cut_gpu.h"
#include "atom.h" #include "atom.h"
#include "atom_vec.h" #include "atom_vec.h"
#include "comm.h" #include "comm.h"
#include "force.h" #include "force.h"
#include "neighbor.h" #include "neighbor.h"
#include "neigh_list.h" #include "neigh_list.h"
#include "integrate.h" #include "integrate.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
#include "neigh_request.h" #include "neigh_request.h"
#include "universe.h" #include "universe.h"
#include "update.h" #include "update.h"
#include "domain.h" #include "domain.h"
#include "string.h" #include "string.h"
#include "gpu_extra.h" #include "gpu_extra.h"
// External functions from cuda library for atom decomposition // External functions from cuda library for atom decomposition
int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **host_lj2, double **host_lj3, double **host_lj4,
double **offset, double *special_lj, const int nlocal, double **offset, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq, double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e); double *host_special_coul, const double qqrd2e);
void dpl_gpu_clear(); void dpl_gpu_clear();
int ** dpl_gpu_compute_n(const int ago, const int inum, int ** dpl_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, int *tag, double *sublo, double *subhi, int *tag,
int **nspecial, int **special, const bool eflag, int **nspecial, int **special, const bool eflag,
const bool vflag, const bool eatom, const bool vatom, const bool vflag, const bool eatom, const bool vatom,
int &host_start, int **ilist, int **jnum, int &host_start, int **ilist, int **jnum,
const double cpu_time, bool &success, const double cpu_time, bool &success,
double *host_q, double **host_mu, double *host_q, double **host_mu,
double *boxlo, double *prd); double *boxlo, double *prd);
void dpl_gpu_compute(const int ago, const int inum, void dpl_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, double **host_mu, bool &success, double *host_q, double **host_mu,
const int nlocal, double *boxlo, double *prd); const int nlocal, double *boxlo, double *prd);
double dpl_gpu_bytes(); double dpl_gpu_bytes();
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
PairDipoleCutGPU::PairDipoleCutGPU(LAMMPS *lmp) : PairDipoleCut(lmp), PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp),
gpu_mode(GPU_FORCE) gpu_mode(GPU_FORCE)
{ {
respa_enable = 0; respa_enable = 0;
cpu_time = 0.0; cpu_time = 0.0;
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
free all arrays free all arrays
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
PairDipoleCutGPU::~PairDipoleCutGPU() PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU()
{ {
dpl_gpu_clear(); dpl_gpu_clear();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void PairDipoleCutGPU::compute(int eflag, int vflag) void PairLJCutDipoleCutGPU::compute(int eflag, int vflag)
{ {
if (eflag || vflag) ev_setup(eflag,vflag); if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0; else evflag = vflag_fdotr = 0;
int nall = atom->nlocal + atom->nghost; int nall = atom->nlocal + atom->nghost;
int inum, host_start; int inum, host_start;
bool success = true; bool success = true;
int *ilist, *numneigh, **firstneigh; int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) { if (gpu_mode != GPU_FORCE) {
inum = atom->nlocal; inum = atom->nlocal;
firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, domain->sublo, domain->subhi, atom->type, domain->sublo, domain->subhi,
atom->tag, atom->nspecial, atom->special, atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo, success, atom->q, atom->mu, domain->boxlo,
domain->prd); domain->prd);
} else { } else {
inum = list->inum; inum = list->inum;
ilist = list->ilist; ilist = list->ilist;
numneigh = list->numneigh; numneigh = list->numneigh;
firstneigh = list->firstneigh; firstneigh = list->firstneigh;
dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q, vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd); atom->mu, atom->nlocal, domain->boxlo, domain->prd);
} }
if (!success) if (!success)
error->one(FLERR,"Insufficient memory on accelerator"); error->one(FLERR,"Insufficient memory on accelerator");
if (host_start<inum) { if (host_start<inum) {
cpu_time = MPI_Wtime(); cpu_time = MPI_Wtime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh); cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = MPI_Wtime() - cpu_time; cpu_time = MPI_Wtime() - cpu_time;
} }
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
init specific to this pair style init specific to this pair style
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PairDipoleCutGPU::init_style() void PairLJCutDipoleCutGPU::init_style()
{ {
if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag) if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque"); error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque");
if (force->newton_pair) if (force->newton_pair)
error->all(FLERR,"Cannot use newton pair with dipole/cut/gpu pair style"); error->all(FLERR,"Cannot use newton pair with dipole/cut/gpu pair style");
// Repeat cutsq calculation because done after call to init_style if (strcmp(update->unit_style,"electron") == 0)
double maxcut = -1.0; error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
double cut;
for (int i = 1; i <= atom->ntypes; i++) { // Repeat cutsq calculation because done after call to init_style
for (int j = i; j <= atom->ntypes; j++) { double maxcut = -1.0;
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { double cut;
cut = init_one(i,j); for (int i = 1; i <= atom->ntypes; i++) {
cut *= cut; for (int j = i; j <= atom->ntypes; j++) {
if (cut > maxcut) if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
maxcut = cut; cut = init_one(i,j);
cutsq[i][j] = cutsq[j][i] = cut; cut *= cut;
} else if (cut > maxcut)
cutsq[i][j] = cutsq[j][i] = 0.0; maxcut = cut;
} cutsq[i][j] = cutsq[j][i] = cut;
} } else
double cell_size = sqrt(maxcut) + neighbor->skin; cutsq[i][j] = cutsq[j][i] = 0.0;
}
int maxspecial=0; }
if (atom->molecular) double cell_size = sqrt(maxcut) + neighbor->skin;
maxspecial=atom->maxspecial;
int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, int maxspecial=0;
offset, force->special_lj, atom->nlocal, if (atom->molecular)
atom->nlocal+atom->nghost, 300, maxspecial, maxspecial=atom->maxspecial;
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
force->special_coul, force->qqrd2e); offset, force->special_lj, atom->nlocal,
GPU_EXTRA::check_flag(success,error,world); atom->nlocal+atom->nghost, 300, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
if (gpu_mode == GPU_FORCE) { force->special_coul, force->qqrd2e);
int irequest = neighbor->request(this); GPU_EXTRA::check_flag(success,error,world);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1; if (gpu_mode == GPU_FORCE) {
} int irequest = neighbor->request(this);
} neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
/* ---------------------------------------------------------------------- */ }
}
double PairDipoleCutGPU::memory_usage()
{ /* ---------------------------------------------------------------------- */
double bytes = Pair::memory_usage();
return bytes + dpl_gpu_bytes(); double PairLJCutDipoleCutGPU::memory_usage()
} {
double bytes = Pair::memory_usage();
/* ---------------------------------------------------------------------- */ return bytes + dpl_gpu_bytes();
}
void PairDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int *ilist, int *numneigh, /* ---------------------------------------------------------------------- */
int **firstneigh)
{ void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int i,j,ii,jj,jnum,itype,jtype; int *ilist, int *numneigh,
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; int **firstneigh)
double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv; {
double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; int i,j,ii,jj,jnum,itype,jtype;
double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv;
double forcelj,factor_coul,factor_lj; double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
int *jlist; double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
evdwl = ecoul = 0.0; double forcelj,factor_coul,factor_lj;
if (eflag || vflag) ev_setup(eflag,vflag); int *jlist;
else evflag = vflag_fdotr = 0;
evdwl = ecoul = 0.0;
double **x = atom->x; if (eflag || vflag) ev_setup(eflag,vflag);
double **f = atom->f; else evflag = vflag_fdotr = 0;
double *q = atom->q;
double **mu = atom->mu; double **x = atom->x;
double **torque = atom->torque; double **f = atom->f;
int *type = atom->type; double *q = atom->q;
double *special_coul = force->special_coul; double **mu = atom->mu;
double *special_lj = force->special_lj; double **torque = atom->torque;
double qqrd2e = force->qqrd2e; int *type = atom->type;
double *special_coul = force->special_coul;
double *special_lj = force->special_lj;
// loop over neighbors of my atoms double qqrd2e = force->qqrd2e;
for (ii = start; ii < inum; ii++) {
i = ilist[ii]; // loop over neighbors of my atoms
qtmp = q[i];
xtmp = x[i][0]; for (ii = start; ii < inum; ii++) {
ytmp = x[i][1]; i = ilist[ii];
ztmp = x[i][2]; qtmp = q[i];
itype = type[i]; xtmp = x[i][0];
jlist = firstneigh[i]; ytmp = x[i][1];
jnum = numneigh[i]; ztmp = x[i][2];
itype = type[i];
for (jj = 0; jj < jnum; jj++) { jlist = firstneigh[i];
j = jlist[jj]; jnum = numneigh[i];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)]; for (jj = 0; jj < jnum; jj++) {
j &= NEIGHMASK; j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
delx = xtmp - x[j][0]; factor_coul = special_coul[sbmask(j)];
dely = ytmp - x[j][1]; j &= NEIGHMASK;
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz; delx = xtmp - x[j][0];
jtype = type[j]; dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
if (rsq < cutsq[itype][jtype]) { rsq = delx*delx + dely*dely + delz*delz;
r2inv = 1.0/rsq; jtype = type[j];
rinv = sqrt(r2inv);
if (rsq < cutsq[itype][jtype]) {
// atom can have both a charge and dipole r2inv = 1.0/rsq;
// i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole rinv = sqrt(r2inv);
forcecoulx = forcecouly = forcecoulz = 0.0; // atom can have both a charge and dipole
tixcoul = tiycoul = tizcoul = 0.0; // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
tjxcoul = tjycoul = tjzcoul = 0.0;
forcecoulx = forcecouly = forcecoulz = 0.0;
if (rsq < cut_coulsq[itype][jtype]) { tixcoul = tiycoul = tizcoul = 0.0;
tjxcoul = tjycoul = tjzcoul = 0.0;
if (qtmp != 0.0 && q[j] != 0.0) {
r3inv = r2inv*rinv; if (rsq < cut_coulsq[itype][jtype]) {
pre1 = qtmp*q[j]*r3inv;
if (qtmp != 0.0 && q[j] != 0.0) {
forcecoulx += pre1*delx; r3inv = r2inv*rinv;
forcecouly += pre1*dely; pre1 = qtmp*q[j]*r3inv;
forcecoulz += pre1*delz;
} forcecoulx += pre1*delx;
forcecouly += pre1*dely;
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { forcecoulz += pre1*delz;
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
r7inv = r5inv*r2inv; if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
r3inv = r2inv*rinv;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; r7inv = r5inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pre2 = 3.0*r5inv*pjdotr; pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pre3 = 3.0*r5inv*pidotr;
pre4 = -1.0*r3inv; pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr;
pre2 = 3.0*r5inv*pjdotr;
forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0]; pre3 = 3.0*r5inv*pidotr;
forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1]; pre4 = -1.0*r3inv;
forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0];
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1];
crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2];
crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
} tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
if (mu[i][3] > 0.0 && q[j] != 0.0) { tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; if (mu[i][3] > 0.0 && q[j] != 0.0) {
pre1 = 3.0*q[j]*r5inv * pidotr; r3inv = r2inv*rinv;
pre2 = q[j]*r3inv; r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
forcecoulx += pre2*mu[i][0] - pre1*delx; pre1 = 3.0*q[j]*r5inv * pidotr;
forcecouly += pre2*mu[i][1] - pre1*dely; pre2 = q[j]*r3inv;
forcecoulz += pre2*mu[i][2] - pre1*delz;
tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); forcecoulx += pre2*mu[i][0] - pre1*delx;
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); forcecouly += pre2*mu[i][1] - pre1*dely;
tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); forcecoulz += pre2*mu[i][2] - pre1*delz;
} tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
if (mu[j][3] > 0.0 && qtmp != 0.0) { tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; if (mu[j][3] > 0.0 && qtmp != 0.0) {
pre1 = 3.0*qtmp*r5inv * pjdotr; r3inv = r2inv*rinv;
pre2 = qtmp*r3inv; r5inv = r3inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
forcecoulx += pre1*delx - pre2*mu[j][0]; pre1 = 3.0*qtmp*r5inv * pjdotr;
forcecouly += pre1*dely - pre2*mu[j][1]; pre2 = qtmp*r3inv;
forcecoulz += pre1*delz - pre2*mu[j][2];
tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); forcecoulx += pre1*delx - pre2*mu[j][0];
tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); forcecouly += pre1*dely - pre2*mu[j][1];
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); forcecoulz += pre1*delz - pre2*mu[j][2];
} tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
} tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
// LJ interaction }
}
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv; // LJ interaction
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj *= factor_lj * r2inv; if (rsq < cut_ljsq[itype][jtype]) {
} else forcelj = 0.0; r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
// total force forcelj *= factor_lj * r2inv;
} else forcelj = 0.0;
fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj; // total force
fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj; fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj;
// force & torque accumulation fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj;
f[i][0] += fx;
f[i][1] += fy; // force & torque accumulation
f[i][2] += fz;
torque[i][0] += fq*tixcoul; f[i][0] += fx;
torque[i][1] += fq*tiycoul; f[i][1] += fy;
torque[i][2] += fq*tizcoul; f[i][2] += fz;
torque[i][0] += fq*tixcoul;
if (eflag) { torque[i][1] += fq*tiycoul;
if (rsq < cut_coulsq[itype][jtype]) { torque[i][2] += fq*tizcoul;
ecoul = qtmp*q[j]*rinv;
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) if (eflag) {
ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; if (rsq < cut_coulsq[itype][jtype]) {
if (mu[i][3] > 0.0 && q[j] != 0.0) ecoul = qtmp*q[j]*rinv;
ecoul += -q[j]*r3inv*pidotr; if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr;
ecoul += qtmp*r3inv*pjdotr; if (mu[i][3] > 0.0 && q[j] != 0.0)
ecoul *= factor_coul*qqrd2e; ecoul += -q[j]*r3inv*pidotr;
} else ecoul = 0.0; if (mu[j][3] > 0.0 && qtmp != 0.0)
ecoul += qtmp*r3inv*pjdotr;
if (rsq < cut_ljsq[itype][jtype]) { ecoul *= factor_coul*qqrd2e;
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - } else ecoul = 0.0;
offset[itype][jtype];
evdwl *= factor_lj; if (rsq < cut_ljsq[itype][jtype]) {
} else evdwl = 0.0; evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
} offset[itype][jtype];
evdwl *= factor_lj;
if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz); } else evdwl = 0.0;
} }
}
} if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz);
} }
}
}
}

View File

@ -1,63 +1,67 @@
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under certain rights in this software. This software is distributed under
the GNU General Public License. the GNU General Public License.
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#ifdef PAIR_CLASS #ifdef PAIR_CLASS
PairStyle(dipole/cut/gpu,PairDipoleCutGPU) PairStyle(lj/cut/dipole/cut/gpu,PairLJCutDipoleCutGPU)
#else #else
#ifndef LMP_PAIR_DIPOLE_CUT_GPU_H #ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H
#define LMP_PAIR_DIPOLE_CUT_GPU_H #define LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H
#include "pair_dipole_cut.h" #include "pair_lj_cut_dipole_cut.h"
namespace LAMMPS_NS { namespace LAMMPS_NS {
class PairDipoleCutGPU : public PairDipoleCut { class PairLJCutDipoleCutGPU : public PairLJCutDipoleCut {
public: public:
PairDipoleCutGPU(LAMMPS *lmp); PairLJCutDipoleCutGPU(LAMMPS *lmp);
~PairDipoleCutGPU(); ~PairLJCutDipoleCutGPU();
void cpu_compute(int, int, int, int, int *, int *, int **); void cpu_compute(int, int, int, int, int *, int *, int **);
void compute(int, int); void compute(int, int);
void init_style(); void init_style();
double memory_usage(); double memory_usage();
enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
private: private:
int gpu_mode; int gpu_mode;
double cpu_time; double cpu_time;
int *gpulist; int *gpulist;
}; };
} }
#endif #endif
#endif #endif
/* ERROR/WARNING messages: /* ERROR/WARNING messages:
E: Insufficient memory on accelerator E: Insufficient memory on accelerator
There is insufficient memory on one of the devices specified for the gpu There is insufficient memory on one of the devices specified for the gpu
package package
E: Pair dipole/cut/gpu requires atom attributes q, mu, torque E: Pair dipole/cut/gpu requires atom attributes q, mu, torque
The atom style defined does not have this attribute. The atom style defined does not have this attribute.
E: Cannot use newton pair with dipole/cut/gpu pair style E: Cannot use newton pair with dipole/cut/gpu pair style
Self-explanatory. Self-explanatory.
*/ E: Cannot (yet) use 'electron' units with dipoles
This feature is not yet supported.
*/

View File

@ -1,397 +1,400 @@
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under certain rights in this software. This software is distributed under
the GNU General Public License. the GNU General Public License.
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
Contributing author: Trung Dac Nguyen (ORNL) Contributing author: Trung Dac Nguyen (ORNL)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "lmptype.h" #include "lmptype.h"
#include "math.h" #include "math.h"
#include "stdio.h" #include "stdio.h"
#include "stdlib.h" #include "stdlib.h"
#include "pair_dipole_sf_gpu.h" #include "pair_lj_sf_dipole_sf_gpu.h"
#include "atom.h" #include "atom.h"
#include "atom_vec.h" #include "atom_vec.h"
#include "comm.h" #include "comm.h"
#include "force.h" #include "force.h"
#include "neighbor.h" #include "neighbor.h"
#include "neigh_list.h" #include "neigh_list.h"
#include "integrate.h" #include "integrate.h"
#include "memory.h" #include "memory.h"
#include "error.h" #include "error.h"
#include "neigh_request.h" #include "neigh_request.h"
#include "universe.h" #include "universe.h"
#include "update.h" #include "update.h"
#include "domain.h" #include "domain.h"
#include "string.h" #include "string.h"
#include "gpu_extra.h" #include "gpu_extra.h"
// External functions from cuda library for atom decomposition // External functions from cuda library for atom decomposition
int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
double **host_lj2, double **host_lj3, double **host_lj4, double **host_lj2, double **host_lj3, double **host_lj4,
double *special_lj, const int nlocal, double *special_lj, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, int &gpu_mode, FILE *screen, const double cell_size, int &gpu_mode, FILE *screen,
double **host_cut_ljsq, double **host_cut_coulsq, double **host_cut_ljsq, double **host_cut_coulsq,
double *host_special_coul, const double qqrd2e); double *host_special_coul, const double qqrd2e);
void dplsf_gpu_clear(); void dplsf_gpu_clear();
int ** dplsf_gpu_compute_n(const int ago, const int inum, int ** dplsf_gpu_compute_n(const int ago, const int inum,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
double *sublo, double *subhi, int *tag, int **nspecial, double *sublo, double *subhi, int *tag, int **nspecial,
int **special, const bool eflag, const bool vflag, int **special, const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start, const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time, int **ilist, int **jnum, const double cpu_time,
bool &success, double *host_q, double **host_mu, bool &success, double *host_q, double **host_mu,
double *boxlo, double *prd); double *boxlo, double *prd);
void dplsf_gpu_compute(const int ago, const int inum, void dplsf_gpu_compute(const int ago, const int inum,
const int nall, double **host_x, int *host_type, const int nall, double **host_x, int *host_type,
int *ilist, int *numj, int **firstneigh, int *ilist, int *numj, int **firstneigh,
const bool eflag, const bool vflag, const bool eatom, const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start, const double cpu_time, const bool vatom, int &host_start, const double cpu_time,
bool &success, double *host_q, double **host_mu, const int nlocal, bool &success, double *host_q, double **host_mu, const int nlocal,
double *boxlo, double *prd); double *boxlo, double *prd);
double dplsf_gpu_bytes(); double dplsf_gpu_bytes();
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
PairDipoleSFGPU::PairDipoleSFGPU(LAMMPS *lmp) : PairDipoleSF(lmp), PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp),
gpu_mode(GPU_FORCE) gpu_mode(GPU_FORCE)
{ {
respa_enable = 0; respa_enable = 0;
cpu_time = 0.0; cpu_time = 0.0;
GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); GPU_EXTRA::gpu_ready(lmp->modify, lmp->error);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
free all arrays free all arrays
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
PairDipoleSFGPU::~PairDipoleSFGPU() PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU()
{ {
dplsf_gpu_clear(); dplsf_gpu_clear();
} }
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void PairDipoleSFGPU::compute(int eflag, int vflag) void PairLJSFDipoleSFGPU::compute(int eflag, int vflag)
{ {
if (eflag || vflag) ev_setup(eflag,vflag); if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0; else evflag = vflag_fdotr = 0;
int nall = atom->nlocal + atom->nghost; int nall = atom->nlocal + atom->nghost;
int inum, host_start; int inum, host_start;
bool success = true; bool success = true;
int *ilist, *numneigh, **firstneigh; int *ilist, *numneigh, **firstneigh;
if (gpu_mode != GPU_FORCE) { if (gpu_mode != GPU_FORCE) {
inum = atom->nlocal; inum = atom->nlocal;
firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x, firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x,
atom->type, domain->sublo, domain->subhi, atom->type, domain->sublo, domain->subhi,
atom->tag, atom->nspecial, atom->special, atom->tag, atom->nspecial, atom->special,
eflag, vflag, eflag_atom, vflag_atom, eflag, vflag, eflag_atom, vflag_atom,
host_start, &ilist, &numneigh, cpu_time, host_start, &ilist, &numneigh, cpu_time,
success, atom->q, atom->mu, domain->boxlo, success, atom->q, atom->mu, domain->boxlo,
domain->prd); domain->prd);
} else { } else {
inum = list->inum; inum = list->inum;
ilist = list->ilist; ilist = list->ilist;
numneigh = list->numneigh; numneigh = list->numneigh;
firstneigh = list->firstneigh; firstneigh = list->firstneigh;
dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type,
ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, ilist, numneigh, firstneigh, eflag, vflag, eflag_atom,
vflag_atom, host_start, cpu_time, success, atom->q, vflag_atom, host_start, cpu_time, success, atom->q,
atom->mu, atom->nlocal, domain->boxlo, domain->prd); atom->mu, atom->nlocal, domain->boxlo, domain->prd);
} }
if (!success) if (!success)
error->one(FLERR,"Insufficient memory on accelerator"); error->one(FLERR,"Insufficient memory on accelerator");
if (host_start<inum) { if (host_start<inum) {
cpu_time = MPI_Wtime(); cpu_time = MPI_Wtime();
cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh); cpu_compute(host_start, inum, eflag, vflag, ilist, numneigh, firstneigh);
cpu_time = MPI_Wtime() - cpu_time; cpu_time = MPI_Wtime() - cpu_time;
} }
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
init specific to this pair style init specific to this pair style
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PairDipoleSFGPU::init_style() void PairLJSFDipoleSFGPU::init_style()
{ {
if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag) if (!atom->q_flag || !atom->mu_flag || !atom->torque_flag)
error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque"); error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque");
if (force->newton_pair) if (force->newton_pair)
error->all(FLERR,"Cannot use newton pair with dipole/sf/gpu pair style"); error->all(FLERR,"Cannot use newton pair with dipole/sf/gpu pair style");
// Repeat cutsq calculation because done after call to init_style if (strcmp(update->unit_style,"electron") == 0)
double maxcut = -1.0; error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles");
double cut;
for (int i = 1; i <= atom->ntypes; i++) { // Repeat cutsq calculation because done after call to init_style
for (int j = i; j <= atom->ntypes; j++) { double maxcut = -1.0;
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { double cut;
cut = init_one(i,j); for (int i = 1; i <= atom->ntypes; i++) {
cut *= cut; for (int j = i; j <= atom->ntypes; j++) {
if (cut > maxcut) if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
maxcut = cut; cut = init_one(i,j);
cutsq[i][j] = cutsq[j][i] = cut; cut *= cut;
} else if (cut > maxcut)
cutsq[i][j] = cutsq[j][i] = 0.0; maxcut = cut;
} cutsq[i][j] = cutsq[j][i] = cut;
} } else
double cell_size = sqrt(maxcut) + neighbor->skin; cutsq[i][j] = cutsq[j][i] = 0.0;
}
int maxspecial=0; }
if (atom->molecular) double cell_size = sqrt(maxcut) + neighbor->skin;
maxspecial=atom->maxspecial;
int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, int maxspecial=0;
force->special_lj, atom->nlocal, if (atom->molecular)
atom->nlocal+atom->nghost, 300, maxspecial, maxspecial=atom->maxspecial;
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4,
force->special_coul, force->qqrd2e); force->special_lj, atom->nlocal,
GPU_EXTRA::check_flag(success,error,world); atom->nlocal+atom->nghost, 300, maxspecial,
cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq,
if (gpu_mode == GPU_FORCE) { force->special_coul, force->qqrd2e);
int irequest = neighbor->request(this); GPU_EXTRA::check_flag(success,error,world);
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1; if (gpu_mode == GPU_FORCE) {
} int irequest = neighbor->request(this);
} neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full = 1;
/* ---------------------------------------------------------------------- */ }
}
double PairDipoleSFGPU::memory_usage()
{ /* ---------------------------------------------------------------------- */
double bytes = Pair::memory_usage();
return bytes + dplsf_gpu_bytes(); double PairLJSFDipoleSFGPU::memory_usage()
} {
double bytes = Pair::memory_usage();
/* ---------------------------------------------------------------------- */ return bytes + dplsf_gpu_bytes();
}
void PairDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int *ilist, int *numneigh, /* ---------------------------------------------------------------------- */
int **firstneigh)
{ void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag,
int i,j,ii,jj,jnum,itype,jtype; int *ilist, int *numneigh,
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; int **firstneigh)
double rsq,rinv,r2inv,r6inv,r3inv,r5inv; {
double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; int i,j,ii,jj,jnum,itype,jtype;
double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz;
double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; double rsq,rinv,r2inv,r6inv,r3inv,r5inv;
double forcelj,factor_coul,factor_lj; double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz;
double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul;
double aforcecoulx,aforcecouly,aforcecoulz; double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4;
double bforcecoulx,bforcecouly,bforcecoulz; double forcelj,factor_coul,factor_lj;
double rcutlj2inv, rcutcoul2inv,rcutlj6inv; double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf;
int *jlist; double aforcecoulx,aforcecouly,aforcecoulz;
double bforcecoulx,bforcecouly,bforcecoulz;
evdwl = ecoul = 0.0; double rcutlj2inv, rcutcoul2inv,rcutlj6inv;
if (eflag || vflag) ev_setup(eflag,vflag); int *jlist;
else evflag = vflag_fdotr = 0;
evdwl = ecoul = 0.0;
double **x = atom->x; if (eflag || vflag) ev_setup(eflag,vflag);
double **f = atom->f; else evflag = vflag_fdotr = 0;
double *q = atom->q;
double **mu = atom->mu; double **x = atom->x;
double **torque = atom->torque; double **f = atom->f;
int *type = atom->type; double *q = atom->q;
double *special_coul = force->special_coul; double **mu = atom->mu;
double *special_lj = force->special_lj; double **torque = atom->torque;
double qqrd2e = force->qqrd2e; int *type = atom->type;
double *special_coul = force->special_coul;
double *special_lj = force->special_lj;
// loop over neighbors of my atoms double qqrd2e = force->qqrd2e;
for (ii = start; ii < inum; ii++) {
i = ilist[ii]; // loop over neighbors of my atoms
qtmp = q[i];
xtmp = x[i][0]; for (ii = start; ii < inum; ii++) {
ytmp = x[i][1]; i = ilist[ii];
ztmp = x[i][2]; qtmp = q[i];
itype = type[i]; xtmp = x[i][0];
jlist = firstneigh[i]; ytmp = x[i][1];
jnum = numneigh[i]; ztmp = x[i][2];
itype = type[i];
for (jj = 0; jj < jnum; jj++) { jlist = firstneigh[i];
j = jlist[jj]; jnum = numneigh[i];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)]; for (jj = 0; jj < jnum; jj++) {
j &= NEIGHMASK; j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
delx = xtmp - x[j][0]; factor_coul = special_coul[sbmask(j)];
dely = ytmp - x[j][1]; j &= NEIGHMASK;
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz; delx = xtmp - x[j][0];
jtype = type[j]; dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
if (rsq < cutsq[itype][jtype]) { rsq = delx*delx + dely*dely + delz*delz;
r2inv = 1.0/rsq; jtype = type[j];
rinv = sqrt(r2inv);
if (rsq < cutsq[itype][jtype]) {
// atom can have both a charge and dipole r2inv = 1.0/rsq;
// i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole rinv = sqrt(r2inv);
forcecoulx = forcecouly = forcecoulz = 0.0; // atom can have both a charge and dipole
tixcoul = tiycoul = tizcoul = 0.0; // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole
tjxcoul = tjycoul = tjzcoul = 0.0;
forcecoulx = forcecouly = forcecoulz = 0.0;
if (rsq < cut_coulsq[itype][jtype]) { tixcoul = tiycoul = tizcoul = 0.0;
tjxcoul = tjycoul = tjzcoul = 0.0;
if (qtmp != 0.0 && q[j] != 0.0) {
pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); if (rsq < cut_coulsq[itype][jtype]) {
forcecoulx += pre1*delx; if (qtmp != 0.0 && q[j] != 0.0) {
forcecouly += pre1*dely; pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]);
forcecoulz += pre1*delz;
} forcecoulx += pre1*delx;
forcecouly += pre1*dely;
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { forcecoulz += pre1*delz;
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; if (mu[i][3] > 0.0 && mu[j][3] > 0.0) {
r3inv = r2inv*rinv;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2];
afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
aforcecoulx = pre1*delx;
aforcecouly = pre1*dely; afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv;
aforcecoulz = pre1*delz; pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr );
aforcecoulx = pre1*delx;
bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + aforcecouly = pre1*dely;
3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; aforcecoulz = pre1*delz;
presf = 2.0 * r2inv * pidotr * pjdotr;
bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx); bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) +
bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely); 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv;
bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz); presf = 2.0 * r2inv * pidotr * pjdotr;
bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx);
forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely);
forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz);
forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx );
pre2 = 3.0 * bfac * r5inv * pjdotr; forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly );
pre3 = 3.0 * bfac * r5inv * pidotr; forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz );
pre4 = -bfac * r3inv;
pre2 = 3.0 * bfac * r5inv * pjdotr;
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); pre3 = 3.0 * bfac * r5inv * pidotr;
crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); pre4 = -bfac * r3inv;
crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]);
tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]);
tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]);
tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz);
tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx);
} tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely);
tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz);
if (mu[i][3] > 0.0 && q[j] != 0.0) { tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx);
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; if (mu[i][3] > 0.0 && q[j] != 0.0) {
rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; r3inv = r2inv*rinv;
pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); r5inv = r3inv*r2inv;
pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz;
2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
pre2 = q[j] * r3inv * pqfac; pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv);
pqfac = 1.0 - 3.0*rsq*rcutcoul2inv +
forcecoulx += pre2*mu[i][0] - pre1*delx; 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
forcecouly += pre2*mu[i][1] - pre1*dely; pre2 = q[j] * r3inv * pqfac;
forcecoulz += pre2*mu[i][2] - pre1*delz;
tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); forcecoulx += pre2*mu[i][0] - pre1*delx;
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); forcecouly += pre2*mu[i][1] - pre1*dely;
tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); forcecoulz += pre2*mu[i][2] - pre1*delz;
} tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely);
tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz);
if (mu[j][3] > 0.0 && qtmp != 0.0) { tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx);
r3inv = r2inv*rinv; }
r5inv = r3inv*r2inv;
pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; if (mu[j][3] > 0.0 && qtmp != 0.0) {
rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; r3inv = r2inv*rinv;
pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); r5inv = r3inv*r2inv;
qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz;
2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); rcutcoul2inv=1.0/cut_coulsq[itype][jtype];
pre2 = qtmp * r3inv * qpfac; pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv);
qpfac = 1.0 - 3.0*rsq*rcutcoul2inv +
forcecoulx += pre1*delx - pre2*mu[j][0]; 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv);
forcecouly += pre1*dely - pre2*mu[j][1]; pre2 = qtmp * r3inv * qpfac;
forcecoulz += pre1*delz - pre2*mu[j][2];
tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); forcecoulx += pre1*delx - pre2*mu[j][0];
tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); forcecouly += pre1*dely - pre2*mu[j][1];
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); forcecoulz += pre1*delz - pre2*mu[j][2];
} tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely);
} tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz);
tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx);
// LJ interaction }
}
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv; // LJ interaction
forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
if (rsq < cut_ljsq[itype][jtype]) {
rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; r6inv = r2inv*r2inv*r2inv;
rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv;
forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
rcutlj6inv * rcutlj2inv; rcutlj2inv = 1.0 / cut_ljsq[itype][jtype];
rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv;
forcelj = factor_lj * (forceljcut - forceljsf); forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) *
} else forcelj = 0.0; rcutlj6inv * rcutlj2inv;
// total force forcelj = factor_lj * (forceljcut - forceljsf);
} else forcelj = 0.0;
fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj; // total force
fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj; fq = factor_coul*qqrd2e;
fx = fq*forcecoulx + delx*forcelj;
// force & torque accumulation fy = fq*forcecouly + dely*forcelj;
fz = fq*forcecoulz + delz*forcelj;
f[i][0] += fx;
f[i][1] += fy; // force & torque accumulation
f[i][2] += fz;
torque[i][0] += fq*tixcoul; f[i][0] += fx;
torque[i][1] += fq*tiycoul; f[i][1] += fy;
torque[i][2] += fq*tizcoul; f[i][2] += fz;
torque[i][0] += fq*tixcoul;
if (eflag) { torque[i][1] += fq*tiycoul;
if (rsq < cut_coulsq[itype][jtype]) { torque[i][2] += fq*tizcoul;
ecoul = qtmp*q[j]*rinv*
pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2); if (eflag) {
if (mu[i][3] > 0.0 && mu[j][3] > 0.0) if (rsq < cut_coulsq[itype][jtype]) {
ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); ecoul = qtmp*q[j]*rinv*
if (mu[i][3] > 0.0 && q[j] != 0.0) pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2);
ecoul += -q[j]*r3inv * pqfac * pidotr; if (mu[i][3] > 0.0 && mu[j][3] > 0.0)
if (mu[j][3] > 0.0 && qtmp != 0.0) ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr);
ecoul += qtmp*r3inv * qpfac * pjdotr; if (mu[i][3] > 0.0 && q[j] != 0.0)
ecoul *= factor_coul*qqrd2e; ecoul += -q[j]*r3inv * pqfac * pidotr;
} else ecoul = 0.0; if (mu[j][3] > 0.0 && qtmp != 0.0)
ecoul += qtmp*r3inv * qpfac * pjdotr;
if (rsq < cut_ljsq[itype][jtype]) { ecoul *= factor_coul*qqrd2e;
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + } else ecoul = 0.0;
rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
rsq*rcutlj2inv + if (rsq < cut_ljsq[itype][jtype]) {
rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) +
evdwl *= factor_lj; rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])*
} else evdwl = 0.0; rsq*rcutlj2inv +
} rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]);
evdwl *= factor_lj;
if (evflag) ev_tally_xyz_full(i,evdwl,ecoul, } else evdwl = 0.0;
fx,fy,fz,delx,dely,delz); }
}
} if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,
} fx,fy,fz,delx,dely,delz);
} }
}
}
}

View File

@ -1,63 +1,67 @@
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under certain rights in this software. This software is distributed under
the GNU General Public License. the GNU General Public License.
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#ifdef PAIR_CLASS #ifdef PAIR_CLASS
PairStyle(dipole/sf/gpu,PairDipoleSFGPU) PairStyle(lj/sf/dipole/sf/gpu,PairLJSFDipoleSFGPU)
#else #else
#ifndef LMP_PAIR_DIPOLE_SF_GPU_H #ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H
#define LMP_PAIR_DIPOLE_SF_GPU_H #define LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H
#include "pair_dipole_sf.h" #include "pair_lj_sf_dipole_sf.h"
namespace LAMMPS_NS { namespace LAMMPS_NS {
class PairDipoleSFGPU : public PairDipoleSF { class PairLJSFDipoleSFGPU : public PairLJSFDipoleSF {
public: public:
PairDipoleSFGPU(LAMMPS *lmp); PairLJSFDipoleSFGPU(LAMMPS *lmp);
~PairDipoleSFGPU(); ~PairLJSFDipoleSFGPU();
void cpu_compute(int, int, int, int, int *, int *, int **); void cpu_compute(int, int, int, int, int *, int *, int **);
void compute(int, int); void compute(int, int);
void init_style(); void init_style();
double memory_usage(); double memory_usage();
enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH };
private: private:
int gpu_mode; int gpu_mode;
double cpu_time; double cpu_time;
int *gpulist; int *gpulist;
}; };
} }
#endif #endif
#endif #endif
/* ERROR/WARNING messages: /* ERROR/WARNING messages:
E: Insufficient memory on accelerator E: Insufficient memory on accelerator
There is insufficient memory on one of the devices specified for the gpu There is insufficient memory on one of the devices specified for the gpu
package package
E: Pair style dipole/cut/gpu requires atom attribute q E: Pair style dipole/cut/gpu requires atom attribute q
The atom style defined does not have this attribute. The atom style defined does not have this attribute.
E: Cannot use newton pair with dipole/cut/gpu pair style E: Cannot use newton pair with dipole/cut/gpu pair style
Self-explanatory. Self-explanatory.
*/ E: Cannot (yet) use 'electron' units with dipoles
This feature is not yet supported.
*/