Merge branch 'lammps:develop' into mliappy_unified
This commit is contained in:
12
src/.gitignore
vendored
12
src/.gitignore
vendored
@ -173,12 +173,20 @@
|
||||
/pair_tdpd.cpp
|
||||
/pair_tdpd.h
|
||||
|
||||
/compute_grid.cpp
|
||||
/compute_grid.h
|
||||
/compute_grid_local.cpp
|
||||
/compute_grid_local.h
|
||||
/compute_sna_atom.cpp
|
||||
/compute_sna_atom.h
|
||||
/compute_snad_atom.cpp
|
||||
/compute_snad_atom.h
|
||||
/compute_snav_atom.cpp
|
||||
/compute_snav_atom.h
|
||||
/compute_sna_grid.cpp
|
||||
/compute_sna_grid.h
|
||||
/compute_sna_grid_local.cpp
|
||||
/compute_sna_grid_local.h
|
||||
/compute_snap.cpp
|
||||
/compute_snap.h
|
||||
/openmp_snap.h
|
||||
@ -997,6 +1005,8 @@
|
||||
/neb.h
|
||||
/netcdf_units.cpp
|
||||
/netcdf_units.h
|
||||
/pair_threebody_table.cpp
|
||||
/pair_threebody_table.h
|
||||
/pair_adp.cpp
|
||||
/pair_adp.h
|
||||
/pair_agni.cpp
|
||||
@ -1291,6 +1301,8 @@
|
||||
/pair_sph_taitwater_morris.h
|
||||
/pair_sw.cpp
|
||||
/pair_sw.h
|
||||
/pair_sw_angle_table.cpp
|
||||
/pair_sw_angle_table.h
|
||||
/pair_sw_mod.cpp
|
||||
/pair_sw_mod.h
|
||||
/pair_tersoff.cpp
|
||||
|
||||
40
src/AMOEBA/Install.sh
Normal file
40
src/AMOEBA/Install.sh
Normal file
@ -0,0 +1,40 @@
|
||||
# Install/unInstall package files in LAMMPS
|
||||
# mode = 0/1/2 for uninstall/install/update
|
||||
|
||||
mode=$1
|
||||
|
||||
# enforce using portable C locale
|
||||
LC_ALL=C
|
||||
export LC_ALL
|
||||
|
||||
# arg1 = file, arg2 = file it depends on
|
||||
|
||||
action () {
|
||||
if (test $mode = 0) then
|
||||
rm -f ../$1
|
||||
elif (! cmp -s $1 ../$1) then
|
||||
if (test -z "$2" || test -e ../$2) then
|
||||
cp $1 ..
|
||||
if (test $mode = 2) then
|
||||
echo " updating src/$1"
|
||||
fi
|
||||
fi
|
||||
elif (test -n "$2") then
|
||||
if (test ! -e ../$2) then
|
||||
rm -f ../$1
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# pair style amoeba calls KSPACE functions and requires FFT grid.
|
||||
|
||||
if (test $1 = 1) then
|
||||
if (test ! -e ../pppm.cpp) then
|
||||
echo "Must install KSPACE package with AMOEBA package"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
for file in *.cpp *.h; do
|
||||
action ${file}
|
||||
done
|
||||
167
src/AMOEBA/amoeba_charge_transfer.cpp
Normal file
167
src/AMOEBA/amoeba_charge_transfer.cpp
Normal file
@ -0,0 +1,167 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "memory.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
charge_transfer = HIPPO charge transfer forces
|
||||
adapted from Tinker echgtrn1b() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::charge_transfer()
|
||||
{
|
||||
int i,j,ii,jj,itype,jtype,iclass,jclass;
|
||||
double e,de,felec;
|
||||
double rr1,r,r2;
|
||||
double r3,r4,r5;
|
||||
double xi,yi,zi;
|
||||
double xr,yr,zr;
|
||||
double chgi,chgj;
|
||||
double alphai,alphaj;
|
||||
double expi,expj;
|
||||
double frcx,frcy,frcz;
|
||||
double vxx,vyy,vzz;
|
||||
double vxy,vxz,vyz;
|
||||
double taper,dtaper;
|
||||
double factor_mpole;
|
||||
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
// set cutoffs and taper coeffs
|
||||
|
||||
choose(QFER);
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
|
||||
// neigh list
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// set the energy unit conversion factor
|
||||
|
||||
felec = electric / am_dielectric;
|
||||
|
||||
// find charge transfer energy and derivatives via neighbor list
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = amtype[i];
|
||||
iclass = amtype2class[itype];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
xi = x[i][0];
|
||||
yi = x[i][1];
|
||||
zi = x[i][2];
|
||||
chgi = chgct[iclass];
|
||||
alphai = dmpct[iclass];
|
||||
if (alphai == 0.0) alphai = 100.0;
|
||||
|
||||
// evaluate all sites within the cutoff distance
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_mpole = special_mpole[sbmask15(j)];
|
||||
if (factor_mpole == 0.0) continue;
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
xr = x[j][0] - xi;
|
||||
yr = x[j][1] - yi;
|
||||
zr = x[j][2] - zi;
|
||||
r2 = xr*xr + yr* yr + zr*zr;
|
||||
if (r2 > off2) continue;
|
||||
|
||||
jtype = amtype[j];
|
||||
jclass = amtype2class[jtype];
|
||||
|
||||
r = sqrt(r2);
|
||||
rr1 = 1.0 / r;
|
||||
chgj = chgct[jclass];
|
||||
alphaj = dmpct[jclass];
|
||||
if (alphaj == 0.0) alphaj = 100.0;
|
||||
|
||||
expi = exp(-alphai*r);
|
||||
expj = exp(-alphaj*r);
|
||||
e = -chgi*expj - chgj*expi;
|
||||
de = chgi*expj*alphaj + chgj*expi*alphai;
|
||||
e = felec * e * factor_mpole;
|
||||
de = felec * de * factor_mpole;
|
||||
|
||||
// use energy switching if near the cutoff distance
|
||||
|
||||
if (r2 > cut2) {
|
||||
r3 = r2 * r;
|
||||
r4 = r2 * r2;
|
||||
r5 = r2 * r3;
|
||||
taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0;
|
||||
dtaper = 5.0*c5*r4 + 4.0*c4*r3 + 3.0*c3*r2 + 2.0*c2*r + c1;
|
||||
de = e*dtaper + de*taper;
|
||||
e *= taper;
|
||||
}
|
||||
|
||||
eqxfer += e;
|
||||
|
||||
// compute the force components for this interaction
|
||||
|
||||
frcx = de * xr * rr1;
|
||||
frcy = de * yr * rr1;
|
||||
frcz = de * zr * rr1;
|
||||
|
||||
// increment the total charge transfer energy and derivatives
|
||||
|
||||
f[i][0] += frcx;
|
||||
f[i][1] += frcy;
|
||||
f[i][2] += frcz;
|
||||
f[j][0] -= frcx;
|
||||
f[j][1] -= frcy;
|
||||
f[j][2] -= frcz;
|
||||
|
||||
// increment the internal virial tensor components
|
||||
|
||||
if (vflag_global) {
|
||||
vxx = xr * frcx;
|
||||
vxy = yr * frcx;
|
||||
vxz = zr * frcx;
|
||||
vyy = yr * frcy;
|
||||
vyz = zr * frcy;
|
||||
vzz = zr * frcz;
|
||||
|
||||
virqxfer[0] -= vxx;
|
||||
virqxfer[1] -= vyy;
|
||||
virqxfer[2] -= vzz;
|
||||
virqxfer[3] -= vxy;
|
||||
virqxfer[4] -= vxz;
|
||||
virqxfer[5] -= vyz;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
843
src/AMOEBA/amoeba_convolution.cpp
Normal file
843
src/AMOEBA/amoeba_convolution.cpp
Normal file
@ -0,0 +1,843 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "amoeba_convolution.h"
|
||||
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "fft3d_wrap.h"
|
||||
#include "gridcomm.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "remap_wrap.h"
|
||||
#include "update.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
// DEBUG
|
||||
|
||||
#define DEBUG_AMOEBA 0
|
||||
#if DEBUG_AMOEBA
|
||||
char *labels[7] =
|
||||
{(char *) "MPOLE_GRID", (char *) "POLAR_GRID",
|
||||
(char *) "POLAR_GRIDC", (char *) "DISP_GRID",
|
||||
(char *) "INDUCE_GRID", (char *) "INDUCE_GRIDC"};
|
||||
|
||||
enum{GRIDBRICK_OUT,GRIDBRICK_IN,FFT,CFFT1,CFFT2};
|
||||
#endif
|
||||
// END DEBUG
|
||||
|
||||
enum{MPOLE_GRID,POLAR_GRID,POLAR_GRIDC,DISP_GRID,INDUCE_GRID,INDUCE_GRIDC};
|
||||
|
||||
//#define SCALE 1
|
||||
#define SCALE 0
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define ZEROF 0.0f
|
||||
#define ONEF 1.0f
|
||||
#else
|
||||
#define ZEROF 0.0
|
||||
#define ONEF 1.0
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
partition an FFT grid across processors
|
||||
both for a brick and FFT x pencil decomposition
|
||||
nx,nz,nz = global FFT grid size
|
||||
order = size of stencil in each dimension that maps atoms to grid
|
||||
adapted from PPPM::set_grid_local()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
AmoebaConvolution::AmoebaConvolution(LAMMPS *lmp, Pair *pair,
|
||||
int nx_caller, int ny_caller, int nz_caller,
|
||||
int order_caller, int which_caller) :
|
||||
Pointers(lmp)
|
||||
{
|
||||
amoeba = pair;
|
||||
nx = nx_caller;
|
||||
ny = ny_caller;
|
||||
nz = nz_caller;
|
||||
order = order_caller;
|
||||
which = which_caller;
|
||||
|
||||
flag3d = 1;
|
||||
if (which == POLAR_GRIDC || which == INDUCE_GRIDC) flag3d = 0;
|
||||
|
||||
nfft_global = (bigint) nx * ny * nz;
|
||||
|
||||
// global indices of grid range from 0 to N-1
|
||||
// nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
|
||||
// global grid that I own without ghost cells
|
||||
// both non-tiled and tiled proc layouts use 0-1 fractional subdomain info
|
||||
|
||||
if (comm->layout != Comm::LAYOUT_TILED) {
|
||||
nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx);
|
||||
nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx) - 1;
|
||||
nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny);
|
||||
nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny) - 1;
|
||||
nzlo_in = static_cast<int> (comm->zsplit[comm->myloc[2]] * nz);
|
||||
nzhi_in = static_cast<int> (comm->zsplit[comm->myloc[2]+1] * nz) - 1;
|
||||
|
||||
} else {
|
||||
nxlo_in = static_cast<int> (comm->mysplit[0][0] * nx);
|
||||
nxhi_in = static_cast<int> (comm->mysplit[0][1] * nx) - 1;
|
||||
nylo_in = static_cast<int> (comm->mysplit[1][0] * ny);
|
||||
nyhi_in = static_cast<int> (comm->mysplit[1][1] * ny) - 1;
|
||||
nzlo_in = static_cast<int> (comm->mysplit[2][0] * nz);
|
||||
nzhi_in = static_cast<int> (comm->mysplit[2][1] * nz) - 1;
|
||||
}
|
||||
|
||||
// nlower,nupper = stencil size for mapping particles to FFT grid
|
||||
|
||||
int nlower = -(order-1)/2;
|
||||
int nupper = order/2;
|
||||
|
||||
// nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
|
||||
// global grid that my particles can contribute charge to
|
||||
// effectively nlo_in,nhi_in + ghost cells
|
||||
// nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
|
||||
// position a particle in my box can be at
|
||||
// dist[3] = particle position bound = subbox + skin/2.0
|
||||
// convert to triclinic if necessary
|
||||
// nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
|
||||
|
||||
double *prd,*boxlo,*sublo,*subhi;
|
||||
int triclinic = domain->triclinic;
|
||||
|
||||
if (triclinic == 0) {
|
||||
prd = domain->prd;
|
||||
boxlo = domain->boxlo;
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
prd = domain->prd_lamda;
|
||||
boxlo = domain->boxlo_lamda;
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
double xprd = prd[0];
|
||||
double yprd = prd[1];
|
||||
double zprd = prd[2];
|
||||
|
||||
double dist[3] = {0.0,0.0,0.0};
|
||||
double cuthalf = 0.5*neighbor->skin;
|
||||
if (triclinic == 0) dist[0] = dist[1] = dist[2] = cuthalf;
|
||||
else kspacebbox(cuthalf,&dist[0]);
|
||||
|
||||
int nlo,nhi;
|
||||
|
||||
nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) * nx/xprd);
|
||||
nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) * nx/xprd);
|
||||
nxlo_out = nlo + nlower;
|
||||
nxhi_out = nhi + nupper;
|
||||
|
||||
nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) * ny/yprd);
|
||||
nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) * ny/yprd);
|
||||
nylo_out = nlo + nlower;
|
||||
nyhi_out = nhi + nupper;
|
||||
|
||||
nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) * nz/zprd);
|
||||
nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) * nz/zprd);
|
||||
nzlo_out = nlo + nlower;
|
||||
nzhi_out = nhi + nupper;
|
||||
|
||||
// x-pencil decomposition of FFT mesh
|
||||
// global indices range from 0 to N-1
|
||||
// each proc owns entire x-dimension, clumps of columns in y,z dimensions
|
||||
// npey_fft,npez_fft = # of procs in y,z dims
|
||||
// if nprocs is small enough, proc can own 1 or more entire xy planes,
|
||||
// else proc owns 2d sub-blocks of yz plane
|
||||
// me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
|
||||
// nlo_fft,nhi_fft = lower/upper limit of the section
|
||||
// of the global FFT mesh that I own in x-pencil decomposition
|
||||
|
||||
int me = comm->me;
|
||||
int nprocs = comm->nprocs;
|
||||
|
||||
int npey_fft,npez_fft;
|
||||
if (nz >= nprocs) {
|
||||
npey_fft = 1;
|
||||
npez_fft = nprocs;
|
||||
} else procs2grid2d(nprocs,ny,nz,npey_fft,npez_fft);
|
||||
|
||||
int me_y = me % npey_fft;
|
||||
int me_z = me / npey_fft;
|
||||
|
||||
nxlo_fft = 0;
|
||||
nxhi_fft = nx - 1;
|
||||
nylo_fft = me_y*ny/npey_fft;
|
||||
nyhi_fft = (me_y+1)*ny/npey_fft - 1;
|
||||
nzlo_fft = me_z*nz/npez_fft;
|
||||
nzhi_fft = (me_z+1)*nz/npez_fft - 1;
|
||||
|
||||
// grid sizes
|
||||
// nbrick_owned = owned grid points in brick decomp
|
||||
// nbrick_ghosts = owned + ghost grid points in grid decomp
|
||||
// nfft_owned = owned grid points in FFT decomp
|
||||
// ngrid_either = max of nbrick_onwed and nfft_owned
|
||||
// nfft = total FFT grid points
|
||||
|
||||
nbrick_owned = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
|
||||
(nzhi_in-nzlo_in+1);
|
||||
nbrick_ghosts = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
nfft_owned = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
|
||||
(nzhi_fft-nzlo_fft+1);
|
||||
|
||||
ngrid_either = MAX(nbrick_owned,nfft_owned);
|
||||
|
||||
// instantiate FFT, GridComm, and Remap
|
||||
|
||||
int tmp;
|
||||
|
||||
fft1 = new FFT3d(lmp,world,nx,ny,nz,
|
||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||
1,0,&tmp,0);
|
||||
// 0,0,&tmp,0);
|
||||
|
||||
fft2 = new FFT3d(lmp,world,nx,ny,nz,
|
||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
//1,0,&tmp,0);
|
||||
0,0,&tmp,0);
|
||||
|
||||
gc = new GridComm(lmp,world,nx,ny,nz,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_out,nxhi_out,nylo_out,nyhi_out,nzlo_out,nzhi_out);
|
||||
|
||||
int nqty = flag3d ? 1 : 2;
|
||||
remap = new Remap(lmp,world,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||
nqty,0,0,FFT_PRECISION,0);
|
||||
|
||||
// memory allocations
|
||||
|
||||
if (flag3d) {
|
||||
memory->create3d_offset(grid_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,"amoeba:grid_brick");
|
||||
grid_brick_start = &grid_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
cgrid_brick = nullptr;
|
||||
} else {
|
||||
memory->create4d_offset_last(cgrid_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
||||
nxlo_out,nxhi_out,2,"amoeba:cgrid_brick");
|
||||
grid_brick_start = &cgrid_brick[nzlo_out][nylo_out][nxlo_out][0];
|
||||
grid_brick = nullptr;
|
||||
}
|
||||
|
||||
memory->create(grid_fft,ngrid_either,"amoeba:grid_fft");
|
||||
memory->create(cfft,2*ngrid_either,"amoeba:cfft");
|
||||
|
||||
int ngc_buf1,ngc_buf2;
|
||||
gc->setup(ngc_buf1,ngc_buf2);
|
||||
memory->create(gc_buf1,nqty*ngc_buf1,"amoeba:gc_buf1");
|
||||
memory->create(gc_buf2,nqty*ngc_buf2,"amoeba:gc_buf2");
|
||||
|
||||
memory->create(remap_buf,nqty*nfft_owned,"amoeba:remap_buf");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
free all memory
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
AmoebaConvolution::~AmoebaConvolution()
|
||||
{
|
||||
memory->destroy3d_offset(grid_brick,nzlo_out,nylo_out,nxlo_out);
|
||||
memory->destroy4d_offset_last(cgrid_brick,nzlo_out,nylo_out,nxlo_out);
|
||||
memory->destroy(grid_fft);
|
||||
memory->destroy(cfft);
|
||||
memory->destroy(gc_buf1);
|
||||
memory->destroy(gc_buf2);
|
||||
memory->destroy(remap_buf);
|
||||
|
||||
delete fft1;
|
||||
delete fft2;
|
||||
delete gc;
|
||||
delete remap;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
zero brick grid, including ghosts
|
||||
can be 3d real or 4d complex array
|
||||
return pointer to data in brick grid, caller casts to 3d or 4d
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::zero()
|
||||
{
|
||||
if (flag3d) return zero_3d();
|
||||
return zero_4d();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::zero_3d()
|
||||
{
|
||||
if (!grid_brick) return nullptr;
|
||||
memset(&(grid_brick[nzlo_out][nylo_out][nxlo_out]),0,
|
||||
nbrick_ghosts*sizeof(FFT_SCALAR));
|
||||
return (void *) grid_brick;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::zero_4d()
|
||||
{
|
||||
if (!cgrid_brick) return nullptr;
|
||||
memset(&(cgrid_brick[nzlo_out][nylo_out][nxlo_out][0]),0,
|
||||
2*nbrick_ghosts*sizeof(FFT_SCALAR));
|
||||
return (void *) cgrid_brick;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform pre-convolution grid operations
|
||||
can be 3d real or 4d complex array
|
||||
return pointer to complex cfft vector
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
FFT_SCALAR *AmoebaConvolution::pre_convolution()
|
||||
{
|
||||
if (flag3d) return pre_convolution_3d();
|
||||
return pre_convolution_4d();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform pre-convolution grid operations for 3d grid_brick array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
FFT_SCALAR *AmoebaConvolution::pre_convolution_3d()
|
||||
{
|
||||
int ix,iy,iz,n;
|
||||
|
||||
// reverse comm for 3d brick grid + ghosts
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_OUT,"PRE Convo / PRE GridComm");
|
||||
#endif
|
||||
|
||||
gc->reverse_comm(GridComm::PAIR,amoeba,1,sizeof(FFT_SCALAR),which,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_IN,"PRE Convo / POST GridComm");
|
||||
debug_file(GRIDBRICK_IN,"pre.convo.post.gridcomm");
|
||||
#endif
|
||||
|
||||
// copy owned 3d brick grid values to FFT grid
|
||||
|
||||
n = 0;
|
||||
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (ix = nxlo_in; ix <= nxhi_in; ix++)
|
||||
grid_fft[n++] = grid_brick[iz][iy][ix];
|
||||
|
||||
// remap FFT grid from brick to x pencil partitioning
|
||||
|
||||
remap->perform(grid_fft,grid_fft,remap_buf);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(FFT,"PRE Convo / POST Remap");
|
||||
debug_file(FFT,"pre.convo.post.remap");
|
||||
#endif
|
||||
|
||||
// copy real values into complex grid
|
||||
|
||||
n = 0;
|
||||
for (int i = 0; i < nfft_owned; i++) {
|
||||
cfft[n++] = grid_fft[i];
|
||||
cfft[n++] = ZEROF;
|
||||
}
|
||||
|
||||
// perform forward FFT
|
||||
|
||||
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
||||
|
||||
if (SCALE) {
|
||||
double scale = 1.0/nfft_global;
|
||||
for (int i = 0; i < 2*nfft_owned; i++) cfft[i] *= scale;
|
||||
}
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT1,"PRE Convo / POST FFT");
|
||||
debug_file(CFFT1,"pre.convo.post.fft");
|
||||
#endif
|
||||
return cfft;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform pre-convolution grid operations for 4d cgrid_brick array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
FFT_SCALAR *AmoebaConvolution::pre_convolution_4d()
|
||||
{
|
||||
int ix,iy,iz,n;
|
||||
|
||||
// reverse comm for 4d brick grid + ghosts
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_OUT,"PRE Convo / PRE GridComm");
|
||||
#endif
|
||||
|
||||
gc->reverse_comm(GridComm::PAIR,amoeba,2,sizeof(FFT_SCALAR),which,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_IN,"PRE Convo / POST GridComm");
|
||||
debug_file(GRIDBRICK_IN,"pre.convo.post.gridcomm");
|
||||
#endif
|
||||
// copy owned 4d brick grid values to FFT grid
|
||||
|
||||
n = 0;
|
||||
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
cfft[n++] = cgrid_brick[iz][iy][ix][0];
|
||||
cfft[n++] = cgrid_brick[iz][iy][ix][1];
|
||||
}
|
||||
|
||||
// remap FFT grid from brick to x pencil partitioning
|
||||
// NOTE: could just setup FFT to start from brick decomp and skip remap
|
||||
|
||||
remap->perform(cfft,cfft,remap_buf);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(FFT,"PRE Convo / POST Remap");
|
||||
debug_file(FFT,"pre.convo.post.remap");
|
||||
#endif
|
||||
// perform forward FFT
|
||||
|
||||
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
||||
|
||||
if (SCALE) {
|
||||
double scale = 1.0/nfft_global;
|
||||
for (int i = 0; i < 2*nfft_owned; i++) cfft[i] *= scale;
|
||||
}
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT1,"PRE Convo / POST FFT");
|
||||
debug_file(CFFT1,"pre.convo.post.fft");
|
||||
#endif
|
||||
return cfft;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform post-convolution grid operations
|
||||
can be 3d real or 4d complex array
|
||||
return pointer to data in brick grid, caller casts to 3d or 4d
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::post_convolution()
|
||||
{
|
||||
if (flag3d) return post_convolution_3d();
|
||||
return post_convolution_4d();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform post-convolution grid operations for 3d grid_brick array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::post_convolution_3d()
|
||||
{
|
||||
int ix,iy,iz,n;
|
||||
|
||||
// perform backward FFT
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT1,"POST Convo / PRE FFT");
|
||||
debug_file(CFFT1,"post.convo.pre.fft");
|
||||
#endif
|
||||
fft2->compute(cfft,cfft,FFT3d::BACKWARD);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT2,"POST Convo / POST FFT");
|
||||
debug_file(CFFT2,"post.convo.post.fft");
|
||||
#endif
|
||||
// copy real portion of 1d complex values into 3d real grid
|
||||
|
||||
n = 0;
|
||||
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
grid_brick[iz][iy][ix] = cfft[n];
|
||||
n += 2;
|
||||
}
|
||||
|
||||
// forward comm to populate ghost grid values
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_IN,"POST Convo / PRE gridcomm");
|
||||
debug_file(GRIDBRICK_IN,"post.convo.pre.gridcomm");
|
||||
#endif
|
||||
gc->forward_comm(GridComm::PAIR,amoeba,1,sizeof(FFT_SCALAR),which,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
return (void *) grid_brick;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform post-convolution grid operations for 4d cgrid_brick array
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void *AmoebaConvolution::post_convolution_4d()
|
||||
{
|
||||
int ix,iy,iz,n;
|
||||
|
||||
// perform backward FFT
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT1,"POST Convo / PRE FFT");
|
||||
debug_file(CFFT1,"post.convo.pre.fft");
|
||||
#endif
|
||||
fft2->compute(cfft,cfft,FFT3d::BACKWARD);
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(CFFT2,"POST Convo / POST FFT");
|
||||
debug_file(CFFT2,"post.convo.post.fft");
|
||||
#endif
|
||||
// copy 1d complex values into 4d complex grid
|
||||
|
||||
n = 0;
|
||||
for (iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
cgrid_brick[iz][iy][ix][0] = cfft[n++];
|
||||
cgrid_brick[iz][iy][ix][1] = cfft[n++];
|
||||
}
|
||||
|
||||
// forward comm to populate ghost grid values
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
debug_scalar(GRIDBRICK_IN,"POST Convo / PRE gridcomm");
|
||||
debug_file(GRIDBRICK_IN,"post.convo.pre.gridcomm");
|
||||
#endif
|
||||
gc->forward_comm(GridComm::PAIR,amoeba,2,sizeof(FFT_SCALAR),which,
|
||||
gc_buf1,gc_buf2,MPI_FFT_SCALAR);
|
||||
|
||||
return (void *) cgrid_brick;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
convert a sphere in box coords to an ellipsoid in lamda (0-1)
|
||||
coords and return the tight (axis-aligned) bounding box, does not
|
||||
preserve vector magnitude
|
||||
see http://www.loria.fr/~shornus/ellipsoid-bbox.html and
|
||||
http://yiningkarlli.blogspot.com/2013/02/
|
||||
bounding-boxes-for-ellipsoidsfigure.html
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AmoebaConvolution::kspacebbox(double r, double *b)
|
||||
{
|
||||
double *h = domain->h;
|
||||
double lx,ly,lz,xy,xz,yz;
|
||||
|
||||
lx = h[0]; ly = h[1]; lz = h[2];
|
||||
yz = h[3]; xz = h[4]; xy = h[5];
|
||||
|
||||
b[0] = r*sqrt(ly*ly*lz*lz + ly*ly*xz*xz - 2.0*ly*xy*xz*yz + lz*lz*xy*xy +
|
||||
xy*xy*yz*yz)/(lx*ly*lz);
|
||||
b[1] = r*sqrt(lz*lz + yz*yz)/(ly*lz);
|
||||
b[2] = r/lz;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
map nprocs to NX by NY grid as PX by PY procs - return optimal px,py
|
||||
copy of PPPM::procs2grid2d()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AmoebaConvolution::procs2grid2d(int nprocs, int nx, int ny, int &px, int &py)
|
||||
{
|
||||
// loop thru all possible factorizations of nprocs
|
||||
// surf = surface area of largest proc sub-domain
|
||||
// innermost if test minimizes surface area and surface/volume ratio
|
||||
|
||||
int bestsurf = 2 * (nx + ny);
|
||||
int bestboxx = 0;
|
||||
int bestboxy = 0;
|
||||
|
||||
int boxx,boxy,surf,ipx,ipy;
|
||||
|
||||
ipx = 1;
|
||||
while (ipx <= nprocs) {
|
||||
if (nprocs % ipx == 0) {
|
||||
ipy = nprocs/ipx;
|
||||
boxx = nx/ipx;
|
||||
if (nx % ipx) boxx++;
|
||||
boxy = ny/ipy;
|
||||
if (ny % ipy) boxy++;
|
||||
surf = boxx + boxy;
|
||||
if (surf < bestsurf ||
|
||||
(surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
|
||||
bestsurf = surf;
|
||||
bestboxx = boxx;
|
||||
bestboxy = boxy;
|
||||
px = ipx;
|
||||
py = ipy;
|
||||
}
|
||||
}
|
||||
ipx++;
|
||||
}
|
||||
}
|
||||
|
||||
#if DEBUG_AMOEBA
|
||||
/* ----------------------------------------------------------------------
|
||||
output a scalar value to screen
|
||||
array = which array is being summed over
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
void AmoebaConvolution::debug_scalar(int array, const char *label)
|
||||
{
|
||||
double sum = 0.0;
|
||||
|
||||
if (array == GRIDBRICK_OUT) {
|
||||
if (flag3d) {
|
||||
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
||||
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
||||
for (int ix = nxlo_out; ix <= nxhi_out; ix++)
|
||||
sum += grid_brick[iz][iy][ix];
|
||||
} else {
|
||||
for (int iz = nzlo_out; iz <= nzhi_out; iz++)
|
||||
for (int iy = nylo_out; iy <= nyhi_out; iy++)
|
||||
for (int ix = nxlo_out; ix <= nxhi_out; ix++) {
|
||||
sum += cgrid_brick[iz][iy][ix][0];
|
||||
sum += cgrid_brick[iz][iy][ix][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (array == GRIDBRICK_IN) {
|
||||
if (flag3d) {
|
||||
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (int ix = nxlo_in; ix <= nxhi_in; ix++)
|
||||
sum += grid_brick[iz][iy][ix];
|
||||
} else {
|
||||
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
sum += cgrid_brick[iz][iy][ix][0];
|
||||
sum += cgrid_brick[iz][iy][ix][1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (array == FFT) {
|
||||
if (flag3d) {
|
||||
for (int i = 0; i < nfft_owned; i++)
|
||||
sum += grid_fft[i];
|
||||
} else {
|
||||
for (int i = 0; i < 2*nfft_owned; i++)
|
||||
sum += cfft[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (array == CFFT1) {
|
||||
for (int i = 0; i < 2*nfft_owned; i++)
|
||||
sum += cfft[i];
|
||||
}
|
||||
|
||||
if (array == CFFT2) {
|
||||
for (int i = 0; i < 2*nbrick_owned; i++)
|
||||
sum += cfft[i];
|
||||
}
|
||||
|
||||
/*
|
||||
double sumall;
|
||||
MPI_Allreduce(&sum,&sumall,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
if (comm->me == 0) printf("%s: %s: %12.8g\n",labels[which],label,sumall);
|
||||
*/
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dump grid values to a file
|
||||
array = which array is being output
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
void AmoebaConvolution::debug_file(int array, const char *label)
|
||||
{
|
||||
FILE *fp;
|
||||
|
||||
int me = comm->me;
|
||||
int nprocs = comm->nprocs;
|
||||
|
||||
// open file
|
||||
|
||||
char fname[128];
|
||||
sprintf(fname,"tmp.%s.%s",labels[which],label);
|
||||
if (me == 0) fp = fopen(fname,"w");
|
||||
|
||||
// file header
|
||||
// ncol = # of columns, including grid cell ID
|
||||
|
||||
bigint ntot = nx * ny * nz;
|
||||
|
||||
int ncol;
|
||||
char *columns;
|
||||
|
||||
if (array == CFFT1 || array == CFFT2 || !flag3d) {
|
||||
ncol = 3;
|
||||
columns = (char *) "id real imag";
|
||||
} else {
|
||||
ncol = 2;
|
||||
columns = (char *) "id value";
|
||||
}
|
||||
|
||||
char boundstr[9]; // encoding of boundary flags
|
||||
domain->boundary_string(boundstr);
|
||||
|
||||
if (me == 0) {
|
||||
fprintf(fp,"ITEM: TIMESTEP\n");
|
||||
fprintf(fp,BIGINT_FORMAT "\n",update->ntimestep);
|
||||
fprintf(fp,"ITEM: NUMBER OF ATOMS\n");
|
||||
fprintf(fp,BIGINT_FORMAT "\n",ntot);
|
||||
fprintf(fp,"ITEM: BOX BOUNDS %s\n",boundstr);
|
||||
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[0],domain->boxhi[0]);
|
||||
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[1],domain->boxhi[1]);
|
||||
fprintf(fp,"%-1.16e %-1.16e\n",domain->boxlo[2],domain->boxhi[2]);
|
||||
fprintf(fp,"ITEM: ATOMS %s\n",columns);
|
||||
}
|
||||
|
||||
// pack my values
|
||||
// ngrid = # of grid cells I own
|
||||
|
||||
int ngrid;
|
||||
if (array == GRIDBRICK_IN) ngrid = nbrick_owned;
|
||||
else if (array == FFT) ngrid = nfft_owned;
|
||||
else if (array == CFFT1) ngrid = nfft_owned;
|
||||
else if (array == CFFT2) ngrid = nbrick_owned;
|
||||
|
||||
int ngridmax;
|
||||
MPI_Allreduce(&ngrid,&ngridmax,1,MPI_INT,MPI_MAX,world);
|
||||
|
||||
double *buf,*buf2;
|
||||
memory->create(buf,ncol*ngridmax,"amoeba:buf");
|
||||
memory->create(buf2,ncol*ngridmax,"amoeba:buf2");
|
||||
|
||||
ngrid = 0;
|
||||
|
||||
if (array == GRIDBRICK_IN) {
|
||||
if (flag3d) {
|
||||
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = grid_brick[iz][iy][ix];
|
||||
ngrid++;
|
||||
}
|
||||
} else {
|
||||
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = cgrid_brick[iz][iy][ix][0];
|
||||
buf[ncol*ngrid+2] = cgrid_brick[iz][iy][ix][1];
|
||||
ngrid++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (array == FFT) {
|
||||
if (flag3d) {
|
||||
int m = 0;
|
||||
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
||||
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
||||
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = grid_fft[m++];
|
||||
ngrid++;
|
||||
}
|
||||
} else {
|
||||
int m = 0;
|
||||
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
||||
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
||||
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = cfft[m++];
|
||||
buf[ncol*ngrid+2] = cfft[m++];
|
||||
ngrid++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (array == CFFT1) {
|
||||
int m = 0;
|
||||
for (int iz = nzlo_fft; iz <= nzhi_fft; iz++)
|
||||
for (int iy = nylo_fft; iy <= nyhi_fft; iy++)
|
||||
for (int ix = nxlo_fft; ix <= nxhi_fft; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = cfft[m++];
|
||||
buf[ncol*ngrid+2] = cfft[m++];
|
||||
ngrid++;
|
||||
}
|
||||
}
|
||||
|
||||
if (array == CFFT2) {
|
||||
int m = 0;
|
||||
for (int iz = nzlo_in; iz <= nzhi_in; iz++)
|
||||
for (int iy = nylo_in; iy <= nyhi_in; iy++)
|
||||
for (int ix = nxlo_in; ix <= nxhi_in; ix++) {
|
||||
int id = iz*ny*nx + iy*nx + ix + 1;
|
||||
buf[ncol*ngrid] = id;
|
||||
buf[ncol*ngrid+1] = cfft[m++];
|
||||
buf[ncol*ngrid+2] = cfft[m++];
|
||||
ngrid++;
|
||||
}
|
||||
}
|
||||
|
||||
// proc 0 outputs values
|
||||
// pings other procs, send/recv of their values
|
||||
|
||||
int tmp,nlines;
|
||||
MPI_Request request;
|
||||
MPI_Status status;
|
||||
|
||||
if (me == 0) {
|
||||
for (int iproc = 0; iproc < nprocs; iproc++) {
|
||||
if (iproc) {
|
||||
MPI_Irecv(buf,ngridmax*ncol,MPI_DOUBLE,iproc,0,world,&request);
|
||||
MPI_Send(&tmp,0,MPI_INT,me+iproc,0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
MPI_Get_count(&status,MPI_DOUBLE,&nlines);
|
||||
nlines /= ncol;
|
||||
} else nlines = ngrid;
|
||||
|
||||
int n = 0;
|
||||
for (int m = 0; m < nlines; m++) {
|
||||
if (ncol == 2)
|
||||
fprintf(fp,"%d %12.8g\n",(int) buf[n],buf[n+1]);
|
||||
else if (ncol == 3)
|
||||
fprintf(fp,"%d %12.8g %12.8g\n",(int ) buf[n],buf[n+1],buf[n+2]);
|
||||
n += ncol;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
MPI_Recv(&tmp,0,MPI_INT,0,0,world,MPI_STATUS_IGNORE);
|
||||
MPI_Rsend(buf,ngrid*ncol,MPI_DOUBLE,0,0,world);
|
||||
}
|
||||
|
||||
// close file
|
||||
|
||||
if (me == 0) fclose(fp);
|
||||
|
||||
// clean up
|
||||
|
||||
memory->destroy(buf);
|
||||
memory->destroy(buf2);
|
||||
}
|
||||
#endif
|
||||
85
src/AMOEBA/amoeba_convolution.h
Normal file
85
src/AMOEBA/amoeba_convolution.h
Normal file
@ -0,0 +1,85 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_AMOEBA_CONVOLUTION_H
|
||||
#define LMP_AMOEBA_CONVOLUTION_H
|
||||
|
||||
#include "pointers.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define LMP_FFT_PREC "single"
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
|
||||
typedef double FFT_SCALAR;
|
||||
#define LMP_FFT_PREC "double"
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AmoebaConvolution : protected Pointers {
|
||||
public:
|
||||
int nx, ny, nz;
|
||||
int order;
|
||||
int nfft_owned; // owned grid points in FFT decomp
|
||||
int nxlo_in, nxhi_in, nylo_in, nyhi_in, nzlo_in, nzhi_in;
|
||||
int nxlo_out, nxhi_out, nylo_out, nyhi_out, nzlo_out, nzhi_out;
|
||||
int nxlo_fft, nxhi_fft, nylo_fft, nyhi_fft, nzlo_fft, nzhi_fft;
|
||||
bigint nfft_global; // nx * ny * nz
|
||||
double *grid_brick_start; // lower left corner of (c)grid_brick data
|
||||
|
||||
AmoebaConvolution(class LAMMPS *, class Pair *, int, int, int, int, int);
|
||||
~AmoebaConvolution();
|
||||
void *zero();
|
||||
FFT_SCALAR *pre_convolution();
|
||||
void *post_convolution();
|
||||
|
||||
private:
|
||||
int which; // caller name for convolution being performed
|
||||
int flag3d; // 1 if using 3d grid_brick, 0 for 4d cgrid_brick
|
||||
int nbrick_owned; // owned grid points in brick decomp
|
||||
int nbrick_ghosts; // owned + ghost brick grid points
|
||||
int ngrid_either; // max of nbrick_owned or nfft_owned
|
||||
|
||||
class Pair *amoeba;
|
||||
class FFT3d *fft1, *fft2;
|
||||
class GridComm *gc;
|
||||
class Remap *remap;
|
||||
|
||||
double ***grid_brick; // 3d real brick grid with ghosts
|
||||
double ****cgrid_brick; // 4d complex brick grid with ghosts
|
||||
|
||||
FFT_SCALAR *grid_fft; // 3d FFT grid as 1d vector
|
||||
FFT_SCALAR *cfft; // 3d complex FFT grid as 1d vector
|
||||
|
||||
double *gc_buf1, *gc_buf2; // buffers for GridComm
|
||||
double *remap_buf; // buffer for Remap
|
||||
|
||||
void *zero_3d();
|
||||
void *zero_4d();
|
||||
FFT_SCALAR *pre_convolution_3d();
|
||||
FFT_SCALAR *pre_convolution_4d();
|
||||
void *post_convolution_3d();
|
||||
void *post_convolution_4d();
|
||||
void kspacebbox(double, double *);
|
||||
void procs2grid2d(int, int, int, int &, int &);
|
||||
|
||||
// DEBUG
|
||||
|
||||
void debug_scalar(int, const char *);
|
||||
void debug_file(int, const char *);
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
422
src/AMOEBA/amoeba_dispersion.cpp
Normal file
422
src/AMOEBA/amoeba_dispersion.cpp
Normal file
@ -0,0 +1,422 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
#include "amoeba_convolution.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "fft3d_wrap.h"
|
||||
#include "math_const.h"
|
||||
#include "math_special.h"
|
||||
#include "memory.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
using MathSpecial::cube;
|
||||
using MathSpecial::powint;
|
||||
|
||||
enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dispersion = Ewald dispersion
|
||||
adapted from Tinker edisp1d() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::dispersion()
|
||||
{
|
||||
// set cutoffs, taper coeffs, and PME params
|
||||
|
||||
if (use_dewald) choose(DISP_LONG);
|
||||
else choose(DISP);
|
||||
|
||||
// owned atoms
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// compute the real space portion of the Ewald summation
|
||||
|
||||
if (disp_rspace_flag) dispersion_real();
|
||||
|
||||
// compute the reciprocal space part of the Ewald summation
|
||||
|
||||
if (disp_kspace_flag) dispersion_kspace();
|
||||
|
||||
// compute the self-energy portion of the Ewald summation
|
||||
|
||||
int itype,iclass;
|
||||
double term;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
itype = amtype[i];
|
||||
iclass = amtype2class[itype];
|
||||
term = powint(aewald,6) / 12.0;
|
||||
edisp += term*csix[iclass]*csix[iclass];
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dispersion_real = real-space portion of Ewald dispersion
|
||||
adapted from Tinker edreal1d() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::dispersion_real()
|
||||
{
|
||||
int i,j,ii,jj,itype,jtype,iclass,jclass;
|
||||
double xi,yi,zi;
|
||||
double xr,yr,zr;
|
||||
double e,de;
|
||||
double ci,ck;
|
||||
double r,r2,r6,r7;
|
||||
double ai,ai2;
|
||||
double ak,ak2;
|
||||
double di,di2,di3,di4,di5;
|
||||
double dk,dk2,dk3;
|
||||
double ti,ti2;
|
||||
double tk,tk2;
|
||||
double expi,expk;
|
||||
double damp3,damp5;
|
||||
double damp,ddamp;
|
||||
double ralpha2,scale;
|
||||
double expterm,term;
|
||||
double expa,rterm;
|
||||
double dedx,dedy,dedz;
|
||||
double vxx,vyx,vzx;
|
||||
double vyy,vzy,vzz;
|
||||
double factor_disp;
|
||||
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
|
||||
// neigh list
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// compute the real space portion of the Ewald summation
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = amtype[i];
|
||||
iclass = amtype2class[itype];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
ci = csix[iclass];
|
||||
ai = adisp[iclass];
|
||||
xi = x[i][0];
|
||||
yi = x[i][1];
|
||||
zi = x[i][2];
|
||||
|
||||
// decide whether to compute the current interaction
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_disp = special_disp[sbmask15(j)];
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
xr = xi - x[j][0];
|
||||
yr = yi - x[j][1];
|
||||
zr = zi - x[j][2];
|
||||
r2 = xr*xr + yr*yr + zr*zr;
|
||||
if (r2 > off2) continue;
|
||||
|
||||
// compute the energy contribution for this interaction
|
||||
|
||||
jtype = amtype[j];
|
||||
jclass = amtype2class[jtype];
|
||||
ck = csix[jclass];
|
||||
ak = adisp[jclass];
|
||||
|
||||
r6 = r2*r2*r2;
|
||||
ralpha2 = r2 * aewald*aewald;
|
||||
term = 1.0 + ralpha2 + 0.5*ralpha2*ralpha2;
|
||||
expterm = exp(-ralpha2);
|
||||
expa = expterm * term;
|
||||
|
||||
// find the damping factor for the dispersion interaction
|
||||
|
||||
r = sqrt(r2);
|
||||
r7 = r6 * r;
|
||||
di = ai * r;
|
||||
di2 = di * di;
|
||||
di3 = di * di2;
|
||||
dk = ak * r;
|
||||
expi = exp(-di);
|
||||
expk = exp(-dk);
|
||||
|
||||
if (ai != ak) {
|
||||
ai2 = ai * ai;
|
||||
ak2 = ak * ak;
|
||||
dk2 = dk * dk;
|
||||
dk3 = dk * dk2;
|
||||
ti = ak2 / (ak2-ai2);
|
||||
ti2 = ti * ti;
|
||||
tk = ai2 / (ai2-ak2);
|
||||
tk2 = tk * tk;
|
||||
damp3 = 1.0 - ti2*(1.0+di+0.5*di2)*expi - tk2*(1.0+dk+0.5*dk2)*expk -
|
||||
2.0*ti2*tk*(1.0+di)*expi - 2.0*tk2*ti*(1.0+dk)*expk;
|
||||
damp5 = 1.0 - ti2*(1.0+di+0.5*di2+di3/6.0)*expi -
|
||||
tk2*(1.0+dk+0.5*dk2 + dk3/6.0)*expk -
|
||||
2.0*ti2*tk*(1.0+di+di2/3.0)*expi - 2.0*tk2*ti*(1.0+dk+dk2/3.0)*expk;
|
||||
ddamp = 0.25 * di2 * ti2 * ai * expi * (r*ai+4.0*tk-1.0) +
|
||||
0.25 * dk2 * tk2 * ak * expk * (r*ak+4.0*ti-1.0);
|
||||
|
||||
} else {
|
||||
di4 = di2 * di2;
|
||||
di5 = di2 * di3;
|
||||
damp3 = 1.0 - (1.0+di+0.5*di2 + 7.0*di3/48.0+di4/48.0)*expi;
|
||||
damp5 = 1.0 - (1.0+di+0.5*di2 + di3/6.0+di4/24.0+di5/144.0)*expi;
|
||||
ddamp = ai * expi * (di5-3.0*di3-3.0*di2) / 96.0;
|
||||
}
|
||||
|
||||
damp = 1.5*damp5 - 0.5*damp3;
|
||||
|
||||
// apply damping and scaling factors for this interaction
|
||||
|
||||
scale = factor_disp * damp*damp;
|
||||
scale = scale - 1.0;
|
||||
e = -ci * ck * (expa+scale) / r6;
|
||||
rterm = -cube(ralpha2) * expterm / r;
|
||||
de = -6.0*e/r2 - ci*ck*rterm/r7 - 2.0*ci*ck*factor_disp*damp*ddamp/r7;
|
||||
|
||||
edisp += e;
|
||||
|
||||
// increment the damped dispersion derivative components
|
||||
|
||||
dedx = de * xr;
|
||||
dedy = de * yr;
|
||||
dedz = de * zr;
|
||||
f[i][0] -= dedx;
|
||||
f[i][1] -= dedy;
|
||||
f[i][2] -= dedz;
|
||||
f[j][0] += dedx;
|
||||
f[j][1] += dedy;
|
||||
f[j][2] += dedz;
|
||||
|
||||
// increment the internal virial tensor components
|
||||
|
||||
if (vflag_global) {
|
||||
vxx = xr * dedx;
|
||||
vyx = yr * dedx;
|
||||
vzx = zr * dedx;
|
||||
vyy = yr * dedy;
|
||||
vzy = zr * dedy;
|
||||
vzz = zr * dedz;
|
||||
|
||||
virdisp[0] -= vxx;
|
||||
virdisp[1] -= vyy;
|
||||
virdisp[2] -= vzz;
|
||||
virdisp[3] -= vyx;
|
||||
virdisp[4] -= vzx;
|
||||
virdisp[5] -= vzy;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dispersion_kspace = KSpace portion of Ewald dispersion
|
||||
adapted from Tinker edrecip1d() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::dispersion_kspace()
|
||||
{
|
||||
int i,j,k,m,n,ib,jb,kb,itype,iclass;
|
||||
int nhalf1,nhalf2,nhalf3;
|
||||
int nxlo,nxhi,nylo,nyhi,nzlo,nzhi;
|
||||
double e,fi,denom,scale;
|
||||
double r1,r2,r3;
|
||||
double h1,h2,h3;
|
||||
double term,vterm;
|
||||
double expterm;
|
||||
double erfcterm;
|
||||
double hsq,struc2;
|
||||
double h,hhh,b,bfac;
|
||||
double term1,denom0;
|
||||
double fac1,fac2,fac3;
|
||||
double de1,de2,de3;
|
||||
double dt1,dt2,dt3;
|
||||
double t1,t2,t3;
|
||||
|
||||
// return if the Ewald coefficient is zero
|
||||
|
||||
if (aewald < 1.0e-6) return;
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double volbox = domain->prd[0] * domain->prd[1] * domain->prd[2];
|
||||
|
||||
// FFT moduli pre-computations
|
||||
// set igrid for each atom and its B-spline coeffs
|
||||
|
||||
nfft1 = d_kspace->nx;
|
||||
nfft2 = d_kspace->ny;
|
||||
nfft3 = d_kspace->nz;
|
||||
bsorder = d_kspace->order;
|
||||
|
||||
moduli();
|
||||
bspline_fill();
|
||||
|
||||
// gridpre = my portion of 3d grid in brick decomp w/ ghost values
|
||||
// zeroed by zero()
|
||||
|
||||
double ***gridpre = (double ***) d_kspace->zero();
|
||||
|
||||
// map atoms to grid
|
||||
|
||||
grid_disp(gridpre);
|
||||
|
||||
// pre-convolution operations including forward FFT
|
||||
// gridfft = my portion of complex 3d grid in FFT decomposition
|
||||
|
||||
double *gridfft = d_kspace->pre_convolution();
|
||||
|
||||
// ---------------------
|
||||
// convolution operation
|
||||
// ---------------------
|
||||
|
||||
nhalf1 = (nfft1+1) / 2;
|
||||
nhalf2 = (nfft2+1) / 2;
|
||||
nhalf3 = (nfft3+1) / 2;
|
||||
|
||||
nxlo = d_kspace->nxlo_fft;
|
||||
nxhi = d_kspace->nxhi_fft;
|
||||
nylo = d_kspace->nylo_fft;
|
||||
nyhi = d_kspace->nyhi_fft;
|
||||
nzlo = d_kspace->nzlo_fft;
|
||||
nzhi = d_kspace->nzhi_fft;
|
||||
|
||||
bfac = MY_PI / aewald;
|
||||
fac1 = 2.0*pow(MY_PI,3.5);
|
||||
fac2 = cube(aewald);
|
||||
fac3 = -2.0*aewald*MY_PI*MY_PI;
|
||||
denom0 = (6.0*volbox)/pow(MY_PI,1.5);
|
||||
|
||||
n = 0;
|
||||
for (k = nzlo; k <= nzhi; k++) {
|
||||
for (j = nylo; j <= nyhi; j++) {
|
||||
for (i = nxlo; i <= nxhi; i++) {
|
||||
r1 = (i >= nhalf1) ? i-nfft1 : i;
|
||||
r2 = (j >= nhalf2) ? j-nfft2 : j;
|
||||
r3 = (k >= nhalf3) ? k-nfft3 : k;
|
||||
h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3; // matvec
|
||||
h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
|
||||
h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
|
||||
hsq = h1*h1 + h2*h2 + h3*h3;
|
||||
h = sqrt(hsq);
|
||||
b = h*bfac;
|
||||
hhh = h*hsq;
|
||||
term = -b*b;
|
||||
expterm = 0.0;
|
||||
erfcterm = erfc(b);
|
||||
denom = denom0*bsmod1[i]*bsmod2[j]*bsmod3[k];
|
||||
if (term > -50.0 && hsq != 0.0) {
|
||||
expterm = exp(term);
|
||||
erfcterm = erfc(b);
|
||||
term1 = fac1*erfcterm*hhh + expterm*(fac2 + fac3*hsq);
|
||||
struc2 = gridfft[n]*gridfft[n] + gridfft[n+1]*gridfft[n+1];
|
||||
e = -(term1 / denom) * struc2;
|
||||
edisp += e;
|
||||
if (vflag_global) {
|
||||
vterm = 3.0 * (fac1*erfcterm*h + fac3*expterm) * struc2/denom;
|
||||
virdisp[0] -= h1*h1*vterm - e;
|
||||
virdisp[1] -= h2*h2*vterm - e;
|
||||
virdisp[2] -= h3*h3*vterm - e;
|
||||
virdisp[3] -= h1*h2*vterm;
|
||||
virdisp[4] -= h1*h3*vterm;
|
||||
virdisp[5] -= h2*h3*vterm;
|
||||
}
|
||||
} else term1 = 0.0;
|
||||
scale = -term1 / denom;
|
||||
gridfft[n] *= scale;
|
||||
gridfft[n+1] *= scale;
|
||||
n += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// post-convolution operations including backward FFT
|
||||
// gridppost = my portion of 3d grid in brick decomp w/ ghost values
|
||||
|
||||
double ***gridpost = (double ***) d_kspace->post_convolution();
|
||||
|
||||
// get first derivatives of the reciprocal space energy
|
||||
|
||||
int nlpts = (bsorder-1) / 2;
|
||||
|
||||
for (m = 0; m < nlocal; m++) {
|
||||
itype = amtype[m];
|
||||
iclass = amtype2class[itype];
|
||||
de1 = de2 = de3 = 0.0;
|
||||
|
||||
k = igrid[m][2] - nlpts;
|
||||
for (kb = 0; kb < bsorder; kb++) {
|
||||
t3 = thetai3[m][kb][0];
|
||||
dt3 = nfft3 * thetai3[m][kb][1];
|
||||
|
||||
j = igrid[m][1] - nlpts;
|
||||
for (jb = 0; jb < bsorder; jb++) {
|
||||
t2 = thetai2[m][jb][0];
|
||||
dt2 = nfft2 * thetai2[m][jb][1];
|
||||
|
||||
i = igrid[m][0] - nlpts;
|
||||
for (ib = 0; ib < bsorder; ib++) {
|
||||
t1 = thetai1[m][ib][0];
|
||||
dt1 = nfft1 * thetai1[m][ib][1];
|
||||
term = gridpost[k][j][i];
|
||||
de1 += 2.0*term*dt1*t2*t3;
|
||||
de2 += 2.0*term*dt2*t1*t3;
|
||||
de3 += 2.0*term*dt3*t1*t2;
|
||||
i++;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
k++;
|
||||
}
|
||||
|
||||
fi = csix[iclass];
|
||||
f[m][0] -= fi * (recip[0][0]*de1 + recip[0][1]*de2 + recip[0][2]*de3);
|
||||
f[m][1] -= fi * (recip[1][0]*de1 + recip[1][1]*de2 + recip[1][2]*de3);
|
||||
f[m][2] -= fi * (recip[2][0]*de1 + recip[2][1]*de2 + recip[2][2]*de3);
|
||||
}
|
||||
|
||||
// account for the energy and virial correction terms
|
||||
|
||||
term = csixpr * aewald*aewald*aewald / denom0;
|
||||
|
||||
if (comm->me == 0) {
|
||||
edisp -= term;
|
||||
if (vflag_global) {
|
||||
virdisp[0] -= term;
|
||||
virdisp[1] -= term;
|
||||
virdisp[2] -= term;
|
||||
}
|
||||
}
|
||||
}
|
||||
1402
src/AMOEBA/amoeba_file.cpp
Normal file
1402
src/AMOEBA/amoeba_file.cpp
Normal file
File diff suppressed because it is too large
Load Diff
205
src/AMOEBA/amoeba_hal.cpp
Normal file
205
src/AMOEBA/amoeba_hal.cpp
Normal file
@ -0,0 +1,205 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "error.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
hal = buffered 14-7 Vdwl interactions
|
||||
adapted from Tinker ehal1c() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::hal()
|
||||
{
|
||||
int i,j,ii,jj,itype,jtype,iclass,jclass,iv,jv;
|
||||
int special_which;
|
||||
double e,de,eps;
|
||||
double rv,rv7;
|
||||
double xi,yi,zi;
|
||||
double xr,yr,zr;
|
||||
double redi,rediv;
|
||||
double redj,redjv;
|
||||
double dedx,dedy,dedz;
|
||||
double rho,tau,tau7;
|
||||
double dtau,gtau;
|
||||
double taper,dtaper;
|
||||
double rik,rik2,rik3;
|
||||
double rik4,rik5;
|
||||
double rik6,rik7;
|
||||
double vxx,vyy,vzz;
|
||||
double vyx,vzx,vzy;
|
||||
double factor_hal;
|
||||
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
// set cutoffs and taper coeffs
|
||||
|
||||
choose(VDWL);
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **f = atom->f;
|
||||
|
||||
// neigh list
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// find van der Waals energy and derivatives via neighbor list
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = amtype[i];
|
||||
iclass = amtype2class[itype];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
redi = kred[iclass];
|
||||
rediv = 1.0 - redi;
|
||||
xi = xred[i][0];
|
||||
yi = xred[i][1];
|
||||
zi = xred[i][2];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
special_which = sbmask15(j);
|
||||
factor_hal = special_hal[special_which];
|
||||
if (factor_hal == 0.0) continue;
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
xr = xi - xred[j][0];
|
||||
yr = yi - xred[j][1];
|
||||
zr = zi - xred[j][2];
|
||||
rik2 = xr*xr + yr*yr + zr*zr;
|
||||
|
||||
if (rik2 > off2) continue;
|
||||
|
||||
// compute the energy contribution for this interaction
|
||||
|
||||
jtype = amtype[j];
|
||||
jclass = amtype2class[jtype];
|
||||
|
||||
// check for an interaction distance less than the cutoff
|
||||
// special_which = 3 is a 1-4 neighbor with its own sigma,epsilon
|
||||
|
||||
rik = sqrt(rik2);
|
||||
rv = radmin[iclass][jclass];
|
||||
eps = epsilon[iclass][jclass];
|
||||
if (special_which == 3) {
|
||||
rv = radmin4[iclass][jclass];
|
||||
eps = epsilon4[iclass][jclass];
|
||||
}
|
||||
eps *= factor_hal;
|
||||
|
||||
rv7 = pow(rv,7.0);
|
||||
rik6 = pow(rik2,3.0);
|
||||
rik7 = rik6 * rik;
|
||||
rho = rik7 + ghal*rv7;
|
||||
tau = (dhal+1.0) / (rik + dhal*rv);
|
||||
tau7 = pow(tau,7.0);
|
||||
dtau = tau / (dhal+1.0);
|
||||
gtau = eps*tau7*rik6*(ghal+1.0)*pow(rv7/rho,2.0);
|
||||
e = eps*tau7*rv7*((ghal+1.0)*rv7/rho-2.0);
|
||||
de = -7.0 * (dtau*e+gtau);
|
||||
|
||||
// use energy switching if near the cutoff distance
|
||||
|
||||
if (rik2 > cut2) {
|
||||
rik3 = rik2 * rik;
|
||||
rik4 = rik2 * rik2;
|
||||
rik5 = rik2 * rik3;
|
||||
taper = c5*rik5 + c4*rik4 + c3*rik3 + c2*rik2 + c1*rik + c0;
|
||||
dtaper = 5.0*c5*rik4 + 4.0*c4*rik3 + 3.0*c3*rik2 + 2.0*c2*rik + c1;
|
||||
de = e*dtaper + de*taper;
|
||||
e *= taper;
|
||||
}
|
||||
|
||||
ehal += e;
|
||||
|
||||
// find the chain rule terms for derivative components
|
||||
|
||||
de = de / rik;
|
||||
dedx = de * xr;
|
||||
dedy = de * yr;
|
||||
dedz = de * zr;
|
||||
|
||||
// increment the total van der Waals energy and derivatives
|
||||
// if jv < 0, trigger an error, needed H-bond partner is missing
|
||||
|
||||
iv = red2local[i];
|
||||
jv = red2local[j];
|
||||
if (jv < 0)
|
||||
error->one(FLERR,"AMOEBA hal cannot find H bond partner - "
|
||||
"ghost comm is too short");
|
||||
|
||||
if (i == iv) {
|
||||
f[i][0] -= dedx;
|
||||
f[i][1] -= dedy;
|
||||
f[i][2] -= dedz;
|
||||
} else {
|
||||
f[i][0] -= dedx*redi;
|
||||
f[i][1] -= dedy*redi;
|
||||
f[i][2] -= dedz*redi;
|
||||
f[iv][0] -= dedx*rediv;
|
||||
f[iv][1] -= dedy*rediv;
|
||||
f[iv][2] -= dedz*rediv;
|
||||
}
|
||||
|
||||
if (j == jv) {
|
||||
f[j][0] += dedx;
|
||||
f[j][1] += dedy;
|
||||
f[j][2] += dedz;
|
||||
} else {
|
||||
redj = kred[jclass];
|
||||
redjv = 1.0 - redj;
|
||||
f[j][0] += dedx*redj;
|
||||
f[j][1] += dedy*redj;
|
||||
f[j][2] += dedz*redj;
|
||||
f[jv][0] += dedx*redjv;
|
||||
f[jv][1] += dedy*redjv;
|
||||
f[jv][2] += dedz*redjv;
|
||||
}
|
||||
|
||||
// increment the internal virial tensor components
|
||||
|
||||
if (vflag_global) {
|
||||
vxx = xr * dedx;
|
||||
vyx = yr * dedx;
|
||||
vzx = zr * dedx;
|
||||
vyy = yr * dedy;
|
||||
vzy = zr * dedy;
|
||||
vzz = zr * dedz;
|
||||
|
||||
virhal[0] -= vxx;
|
||||
virhal[1] -= vyy;
|
||||
virhal[2] -= vzz;
|
||||
virhal[3] -= vyx;
|
||||
virhal[4] -= vzx;
|
||||
virhal[5] -= vzy;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1699
src/AMOEBA/amoeba_induce.cpp
Normal file
1699
src/AMOEBA/amoeba_induce.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1236
src/AMOEBA/amoeba_kspace.cpp
Normal file
1236
src/AMOEBA/amoeba_kspace.cpp
Normal file
File diff suppressed because it is too large
Load Diff
982
src/AMOEBA/amoeba_multipole.cpp
Normal file
982
src/AMOEBA/amoeba_multipole.cpp
Normal file
@ -0,0 +1,982 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
#include "amoeba_convolution.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "fft3d_wrap.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
enum{FIELD,ZRSD,TORQUE,UFLD}; // reverse comm
|
||||
enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define ZEROF 0.0f
|
||||
#define ONEF 1.0f
|
||||
#else
|
||||
#define ZEROF 0.0
|
||||
#define ONEF 1.0
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multipole = multipole interactions
|
||||
adapted from Tinker empole1d() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::multipole()
|
||||
{
|
||||
double e;
|
||||
double felec;
|
||||
double term,fterm;
|
||||
double ci;
|
||||
double dix,diy,diz;
|
||||
double qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||
double cii,dii,qii;
|
||||
|
||||
// set cutoffs, taper coeffs, and PME params
|
||||
|
||||
if (use_ewald) choose(MPOLE_LONG);
|
||||
else choose(MPOLE);
|
||||
|
||||
// owned atoms
|
||||
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
// zero repulsion torque on owned + ghost atoms
|
||||
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
for (int i = 0; i < nall; i++) {
|
||||
tq[i][0] = 0.0;
|
||||
tq[i][1] = 0.0;
|
||||
tq[i][2] = 0.0;
|
||||
}
|
||||
|
||||
// set the energy unit conversion factor
|
||||
|
||||
felec = electric / am_dielectric;
|
||||
|
||||
// compute the real space part of the Ewald summation
|
||||
|
||||
if (mpole_rspace_flag) multipole_real();
|
||||
|
||||
// compute the reciprocal space part of the Ewald summation
|
||||
|
||||
if (mpole_kspace_flag) multipole_kspace();
|
||||
|
||||
// compute the Ewald self-energy term over all the atoms
|
||||
|
||||
term = 2.0 * aewald * aewald;
|
||||
fterm = -felec * aewald / MY_PIS;
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
ci = rpole[i][0];
|
||||
dix = rpole[i][1];
|
||||
diy = rpole[i][2];
|
||||
diz = rpole[i][3];
|
||||
qixx = rpole[i][4];
|
||||
qixy = rpole[i][5];
|
||||
qixz = rpole[i][6];
|
||||
qiyy = rpole[i][8];
|
||||
qiyz = rpole[i][9];
|
||||
qizz = rpole[i][12];
|
||||
cii = ci*ci;
|
||||
dii = dix*dix + diy*diy + diz*diz;
|
||||
qii = 2.0*(qixy*qixy+qixz*qixz+qiyz*qiyz) +
|
||||
qixx*qixx + qiyy*qiyy + qizz*qizz;
|
||||
e = fterm * (cii + term*(dii/3.0+2.0*term*qii/5.0));
|
||||
empole += e;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multipole_real = real-space portion of mulipole interactions
|
||||
adapted from Tinker emreal1d() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::multipole_real()
|
||||
{
|
||||
int i,j,k,itype,jtype,iclass,jclass;
|
||||
int ii,jj;
|
||||
int ix,iy,iz;
|
||||
double e,de,felec;
|
||||
double bfac;
|
||||
double alsq2,alsq2n;
|
||||
double exp2a,ralpha;
|
||||
double scalek;
|
||||
double xi,yi,zi;
|
||||
double xr,yr,zr;
|
||||
double xix,yix,zix;
|
||||
double xiy,yiy,ziy;
|
||||
double xiz,yiz,ziz;
|
||||
double r,r2,rr1,rr3;
|
||||
double rr5,rr7,rr9,rr11;
|
||||
double rr1i,rr3i,rr5i,rr7i;
|
||||
double rr1k,rr3k,rr5k,rr7k;
|
||||
double rr1ik,rr3ik,rr5ik;
|
||||
double rr7ik,rr9ik,rr11ik;
|
||||
double ci,dix,diy,diz;
|
||||
double qixx,qixy,qixz;
|
||||
double qiyy,qiyz,qizz;
|
||||
double ck,dkx,dky,dkz;
|
||||
double qkxx,qkxy,qkxz;
|
||||
double qkyy,qkyz,qkzz;
|
||||
double dir,dkr,dik,qik;
|
||||
double qix,qiy,qiz,qir;
|
||||
double qkx,qky,qkz,qkr;
|
||||
double diqk,dkqi,qiqk;
|
||||
double dirx,diry,dirz;
|
||||
double dkrx,dkry,dkrz;
|
||||
double dikx,diky,dikz;
|
||||
double qirx,qiry,qirz;
|
||||
double qkrx,qkry,qkrz;
|
||||
double qikx,qiky,qikz;
|
||||
double qixk,qiyk,qizk;
|
||||
double qkxi,qkyi,qkzi;
|
||||
double qikrx,qikry,qikrz;
|
||||
double qkirx,qkiry,qkirz;
|
||||
double diqkx,diqky,diqkz;
|
||||
double dkqix,dkqiy,dkqiz;
|
||||
double diqkrx,diqkry,diqkrz;
|
||||
double dkqirx,dkqiry,dkqirz;
|
||||
double dqikx,dqiky,dqikz;
|
||||
double corei,corek;
|
||||
double vali,valk;
|
||||
double alphai,alphak;
|
||||
double term1,term2,term3;
|
||||
double term4,term5,term6;
|
||||
double term1i,term2i,term3i;
|
||||
double term1k,term2k,term3k;
|
||||
double term1ik,term2ik,term3ik;
|
||||
double term4ik,term5ik;
|
||||
double frcx,frcy,frcz;
|
||||
double vxx,vyy,vzz;
|
||||
double vxy,vxz,vyz;
|
||||
double factor_mpole;
|
||||
double ttmi[3],ttmk[3];
|
||||
double fix[3],fiy[3],fiz[3];
|
||||
double dmpi[9],dmpj[9];
|
||||
double dmpij[11];
|
||||
double bn[6];
|
||||
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
// owned atoms
|
||||
|
||||
double *pval = atom->dvector[index_pval];
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// neigh list
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// set conversion factor, cutoff and switching coefficients
|
||||
|
||||
felec = electric / am_dielectric;
|
||||
|
||||
// DEBUG
|
||||
|
||||
//int count = 0;
|
||||
//int imin,imax;
|
||||
|
||||
// compute the real space portion of the Ewald summation
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = amtype[i];
|
||||
iclass = amtype2class[itype];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
xi = x[i][0];
|
||||
yi = x[i][1];
|
||||
zi = x[i][2];
|
||||
ci = rpole[i][0];
|
||||
dix = rpole[i][1];
|
||||
diy = rpole[i][2];
|
||||
diz = rpole[i][3];
|
||||
qixx = rpole[i][4];
|
||||
qixy = rpole[i][5];
|
||||
qixz = rpole[i][6];
|
||||
qiyy = rpole[i][8];
|
||||
qiyz = rpole[i][9];
|
||||
qizz = rpole[i][12];
|
||||
if (!amoeba) {
|
||||
corei = pcore[iclass];
|
||||
alphai = palpha[iclass];
|
||||
vali = pval[i];
|
||||
}
|
||||
|
||||
// evaluate all sites within the cutoff distance
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_mpole = special_mpole[sbmask15(j)];
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
xr = x[j][0] - xi;
|
||||
yr = x[j][1] - yi;
|
||||
zr = x[j][2] - zi;
|
||||
r2 = xr*xr + yr*yr + zr*zr;
|
||||
if (r2 > off2) continue;
|
||||
|
||||
// DEBUG
|
||||
|
||||
//imin = MIN(atom->tag[i],atom->tag[j]);
|
||||
//imax = MAX(atom->tag[i],atom->tag[j]);
|
||||
|
||||
jtype = amtype[j];
|
||||
jclass = amtype2class[jtype];
|
||||
|
||||
r = sqrt(r2);
|
||||
ck = rpole[j][0];
|
||||
dkx = rpole[j][1];
|
||||
dky = rpole[j][2];
|
||||
dkz = rpole[j][3];
|
||||
qkxx = rpole[j][4];
|
||||
qkxy = rpole[j][5];
|
||||
qkxz = rpole[j][6];
|
||||
qkyy = rpole[j][8];
|
||||
qkyz = rpole[j][9];
|
||||
qkzz = rpole[j][12];
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
dir = dix*xr + diy*yr + diz*zr;
|
||||
qix = qixx*xr + qixy*yr + qixz*zr;
|
||||
qiy = qixy*xr + qiyy*yr + qiyz*zr;
|
||||
qiz = qixz*xr + qiyz*yr + qizz*zr;
|
||||
qir = qix*xr + qiy*yr + qiz*zr;
|
||||
dkr = dkx*xr + dky*yr + dkz*zr;
|
||||
qkx = qkxx*xr + qkxy*yr + qkxz*zr;
|
||||
qky = qkxy*xr + qkyy*yr + qkyz*zr;
|
||||
qkz = qkxz*xr + qkyz*yr + qkzz*zr;
|
||||
qkr = qkx*xr + qky*yr + qkz*zr;
|
||||
dik = dix*dkx + diy*dky + diz*dkz;
|
||||
qik = qix*qkx + qiy*qky + qiz*qkz;
|
||||
diqk = dix*qkx + diy*qky + diz*qkz;
|
||||
dkqi = dkx*qix + dky*qiy + dkz*qiz;
|
||||
qiqk = 2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
|
||||
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
|
||||
|
||||
// additional intermediates involving moments and distance
|
||||
|
||||
dirx = diy*zr - diz*yr;
|
||||
diry = diz*xr - dix*zr;
|
||||
dirz = dix*yr - diy*xr;
|
||||
dkrx = dky*zr - dkz*yr;
|
||||
dkry = dkz*xr - dkx*zr;
|
||||
dkrz = dkx*yr - dky*xr;
|
||||
dikx = diy*dkz - diz*dky;
|
||||
diky = diz*dkx - dix*dkz;
|
||||
dikz = dix*dky - diy*dkx;
|
||||
qirx = qiz*yr - qiy*zr;
|
||||
qiry = qix*zr - qiz*xr;
|
||||
qirz = qiy*xr - qix*yr;
|
||||
qkrx = qkz*yr - qky*zr;
|
||||
qkry = qkx*zr - qkz*xr;
|
||||
qkrz = qky*xr - qkx*yr;
|
||||
qikx = qky*qiz - qkz*qiy;
|
||||
qiky = qkz*qix - qkx*qiz;
|
||||
qikz = qkx*qiy - qky*qix;
|
||||
qixk = qixx*qkx + qixy*qky + qixz*qkz;
|
||||
qiyk = qixy*qkx + qiyy*qky + qiyz*qkz;
|
||||
qizk = qixz*qkx + qiyz*qky + qizz*qkz;
|
||||
qkxi = qkxx*qix + qkxy*qiy + qkxz*qiz;
|
||||
qkyi = qkxy*qix + qkyy*qiy + qkyz*qiz;
|
||||
qkzi = qkxz*qix + qkyz*qiy + qkzz*qiz;
|
||||
qikrx = qizk*yr - qiyk*zr;
|
||||
qikry = qixk*zr - qizk*xr;
|
||||
qikrz = qiyk*xr - qixk*yr;
|
||||
qkirx = qkzi*yr - qkyi*zr;
|
||||
qkiry = qkxi*zr - qkzi*xr;
|
||||
qkirz = qkyi*xr - qkxi*yr;
|
||||
diqkx = dix*qkxx + diy*qkxy + diz*qkxz;
|
||||
diqky = dix*qkxy + diy*qkyy + diz*qkyz;
|
||||
diqkz = dix*qkxz + diy*qkyz + diz*qkzz;
|
||||
dkqix = dkx*qixx + dky*qixy + dkz*qixz;
|
||||
dkqiy = dkx*qixy + dky*qiyy + dkz*qiyz;
|
||||
dkqiz = dkx*qixz + dky*qiyz + dkz*qizz;
|
||||
diqkrx = diqkz*yr - diqky*zr;
|
||||
diqkry = diqkx*zr - diqkz*xr;
|
||||
diqkrz = diqky*xr - diqkx*yr;
|
||||
dkqirx = dkqiz*yr - dkqiy*zr;
|
||||
dkqiry = dkqix*zr - dkqiz*xr;
|
||||
dkqirz = dkqiy*xr - dkqix*yr;
|
||||
dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
|
||||
2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
|
||||
dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
|
||||
2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
|
||||
dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
|
||||
2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
|
||||
|
||||
// get reciprocal distance terms for this interaction
|
||||
|
||||
rr1 = felec / r;
|
||||
rr3 = rr1 / r2;
|
||||
rr5 = 3.0 * rr3 / r2;
|
||||
rr7 = 5.0 * rr5 / r2;
|
||||
rr9 = 7.0 * rr7 / r2;
|
||||
rr11 = 9.0 * rr9 / r2;
|
||||
|
||||
// calculate the real space Ewald error function terms
|
||||
|
||||
ralpha = aewald * r;
|
||||
bn[0] = erfc(ralpha) / r;
|
||||
alsq2 = 2.0 * aewald*aewald;
|
||||
alsq2n = 0.0;
|
||||
if (aewald > 0.0) alsq2n = 1.0 / (MY_PIS*aewald);
|
||||
exp2a = exp(-ralpha*ralpha);
|
||||
for (k = 1; k < 6; k++) {
|
||||
bfac = (double) (k+k-1);
|
||||
alsq2n = alsq2 * alsq2n;
|
||||
bn[k] = (bfac*bn[k-1]+alsq2n*exp2a) / r2;
|
||||
}
|
||||
for (k = 0; k < 6; k++) bn[k] *= felec;
|
||||
|
||||
// find damped multipole intermediates and energy value
|
||||
|
||||
if (!amoeba) {
|
||||
corek = pcore[jclass];
|
||||
alphak = palpha[jclass];
|
||||
valk = pval[j];
|
||||
|
||||
term1 = corei*corek;
|
||||
term1i = corek*vali;
|
||||
term2i = corek*dir;
|
||||
term3i = corek*qir;
|
||||
term1k = corei*valk;
|
||||
term2k = -corei*dkr;
|
||||
term3k = corei*qkr;
|
||||
term1ik = vali*valk;
|
||||
term2ik = valk*dir - vali*dkr + dik;
|
||||
term3ik = vali*qkr + valk*qir - dir*dkr + 2.0*(dkqi-diqk+qiqk);
|
||||
term4ik = dir*qkr - dkr*qir - 4.0*qik;
|
||||
term5ik = qir*qkr;
|
||||
damppole(r,11,alphai,alphak,dmpi,dmpj,dmpij);
|
||||
scalek = factor_mpole;
|
||||
rr1i = bn[0] - (1.0-scalek*dmpi[0])*rr1;
|
||||
rr3i = bn[1] - (1.0-scalek*dmpi[2])*rr3;
|
||||
rr5i = bn[2] - (1.0-scalek*dmpi[4])*rr5;
|
||||
rr7i = bn[3] - (1.0-scalek*dmpi[6])*rr7;
|
||||
rr1k = bn[0] - (1.0-scalek*dmpj[0])*rr1;
|
||||
rr3k = bn[1] - (1.0-scalek*dmpj[2])*rr3;
|
||||
rr5k = bn[2] - (1.0-scalek*dmpj[4])*rr5;
|
||||
rr7k = bn[3] - (1.0-scalek*dmpj[6])*rr7;
|
||||
rr1ik = bn[0] - (1.0-scalek*dmpij[0])*rr1;
|
||||
rr3ik = bn[1] - (1.0-scalek*dmpij[2])*rr3;
|
||||
rr5ik = bn[2] - (1.0-scalek*dmpij[4])*rr5;
|
||||
rr7ik = bn[3] - (1.0-scalek*dmpij[6])*rr7;
|
||||
rr9ik = bn[4] - (1.0-scalek*dmpij[8])*rr9;
|
||||
rr11ik = bn[5] - (1.0-scalek*dmpij[10])*rr11;
|
||||
rr1 = bn[0] - (1.0-scalek)*rr1;
|
||||
rr3 = bn[1] - (1.0-scalek)*rr3;
|
||||
e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik +
|
||||
term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
|
||||
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
||||
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
||||
|
||||
// find damped multipole intermediates for force and torque
|
||||
|
||||
de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
|
||||
term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
|
||||
term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
|
||||
term3i*rr7i + term3k*rr7k + term3ik*rr7ik;
|
||||
term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik;
|
||||
term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik;
|
||||
term3 = 2.0 * rr5ik;
|
||||
term4 = -2.0 * (corek*rr5i+valk*rr5ik - dkr*rr7ik+qkr*rr9ik);
|
||||
term5 = -2.0 * (corei*rr5k+vali*rr5ik + dir*rr7ik+qir*rr9ik);
|
||||
term6 = 4.0 * rr7ik;
|
||||
rr3 = rr3ik;
|
||||
|
||||
// find standard multipole intermediates and energy value
|
||||
|
||||
} else {
|
||||
term1 = ci*ck;
|
||||
term2 = ck*dir - ci*dkr + dik;
|
||||
term3 = ci*qkr + ck*qir - dir*dkr + 2.0*(dkqi-diqk+qiqk);
|
||||
term4 = dir*qkr - dkr*qir - 4.0*qik;
|
||||
term5 = qir*qkr;
|
||||
scalek = 1.0 - factor_mpole;
|
||||
rr1 = bn[0] - scalek*rr1;
|
||||
rr3 = bn[1] - scalek*rr3;
|
||||
rr5 = bn[2] - scalek*rr5;
|
||||
rr7 = bn[3] - scalek*rr7;
|
||||
rr9 = bn[4] - scalek*rr9;
|
||||
rr11 = bn[5] - scalek*rr11;
|
||||
e = term1*rr1 + term2*rr3 + term3*rr5 + term4*rr7 + term5*rr9;
|
||||
|
||||
// find standard multipole intermediates for force and torque
|
||||
|
||||
de = term1*rr3 + term2*rr5 + term3*rr7 + term4*rr9 + term5*rr11;
|
||||
term1 = -ck*rr3 + dkr*rr5 - qkr*rr7;
|
||||
term2 = ci*rr3 + dir*rr5 + qir*rr7;
|
||||
term3 = 2.0 * rr5;
|
||||
term4 = 2.0 * (-ck*rr5+dkr*rr7-qkr*rr9);
|
||||
term5 = 2.0 * (-ci*rr5-dir*rr7-qir*rr9);
|
||||
term6 = 4.0 * rr7;
|
||||
}
|
||||
|
||||
empole += e;
|
||||
|
||||
// compute the force components for this interaction
|
||||
|
||||
frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
|
||||
term4*qix + term5*qkx + term6*(qixk+qkxi);
|
||||
frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
|
||||
term4*qiy + term5*qky + term6*(qiyk+qkyi);
|
||||
frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
|
||||
term4*qiz + term5*qkz + term6*(qizk+qkzi);
|
||||
|
||||
// compute the torque components for this interaction
|
||||
|
||||
ttmi[0] = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
|
||||
term4*qirx - term6*(qikrx+qikx);
|
||||
ttmi[1] = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
|
||||
term4*qiry - term6*(qikry+qiky);
|
||||
ttmi[2] = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
|
||||
term4*qirz - term6*(qikrz+qikz);
|
||||
ttmk[0] = rr3*dikx + term2*dkrx - term3*(dqikx+diqkrx) -
|
||||
term5*qkrx - term6*(qkirx-qikx);
|
||||
ttmk[1] = rr3*diky + term2*dkry - term3*(dqiky+diqkry) -
|
||||
term5*qkry - term6*(qkiry-qiky);
|
||||
ttmk[2] = rr3*dikz + term2*dkrz - term3*(dqikz+diqkrz) -
|
||||
term5*qkrz - term6*(qkirz-qikz);
|
||||
|
||||
// increment force-based gradient and torque on first site
|
||||
|
||||
f[i][0] -= frcx;
|
||||
f[i][1] -= frcy;
|
||||
f[i][2] -= frcz;
|
||||
tq[i][0] += ttmi[0];
|
||||
tq[i][1] += ttmi[1];
|
||||
tq[i][2] += ttmi[2];
|
||||
|
||||
// increment force-based gradient and torque on second site
|
||||
|
||||
f[j][0] += frcx;
|
||||
f[j][1] += frcy;
|
||||
f[j][2] += frcz;
|
||||
tq[j][0] += ttmk[0];
|
||||
tq[j][1] += ttmk[1];
|
||||
tq[j][2] += ttmk[2];
|
||||
|
||||
// increment the virial due to pairwise Cartesian forces
|
||||
|
||||
if (vflag_global) {
|
||||
vxx = -xr * frcx;
|
||||
vxy = -0.5 * (yr*frcx+xr*frcy);
|
||||
vxz = -0.5 * (zr*frcx+xr*frcz);
|
||||
vyy = -yr * frcy;
|
||||
vyz = -0.5 * (zr*frcy+yr*frcz);
|
||||
vzz = -zr * frcz;
|
||||
|
||||
virmpole[0] -= vxx;
|
||||
virmpole[1] -= vyy;
|
||||
virmpole[2] -= vzz;
|
||||
virmpole[3] -= vxy;
|
||||
virmpole[4] -= vxz;
|
||||
virmpole[5] -= vyz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reverse comm to sum torque from ghost atoms to owned atoms
|
||||
|
||||
crstyle = TORQUE;
|
||||
comm->reverse_comm(this);
|
||||
|
||||
// resolve site torques then increment forces and virial
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
torque2force(i,tq[i],fix,fiy,fiz,f);
|
||||
|
||||
if (!vflag_global) continue;
|
||||
|
||||
iz = zaxis2local[i];
|
||||
ix = xaxis2local[i];
|
||||
iy = yaxis2local[i];
|
||||
|
||||
xiz = x[iz][0] - x[i][0];
|
||||
yiz = x[iz][1] - x[i][1];
|
||||
ziz = x[iz][2] - x[i][2];
|
||||
xix = x[ix][0] - x[i][0];
|
||||
yix = x[ix][1] - x[i][1];
|
||||
zix = x[ix][2] - x[i][2];
|
||||
xiy = x[iy][0] - x[i][0];
|
||||
yiy = x[iy][1] - x[i][1];
|
||||
ziy = x[iy][2] - x[i][2];
|
||||
|
||||
vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
|
||||
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
|
||||
xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
|
||||
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
|
||||
xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
|
||||
vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
|
||||
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
|
||||
yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
|
||||
vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
|
||||
|
||||
virmpole[0] -= vxx;
|
||||
virmpole[1] -= vyy;
|
||||
virmpole[2] -= vzz;
|
||||
virmpole[3] -= vxy;
|
||||
virmpole[4] -= vxz;
|
||||
virmpole[5] -= vyz;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
multipole_kspace = KSpace portion of multipole interactions
|
||||
adapted from Tinker emrecip1() routine
|
||||
literature reference:
|
||||
C. Sagui, L. G. Pedersen and T. A. Darden, "Towards an Accurate
|
||||
Representation of Electrostatics in Classical Force Fields:
|
||||
Efficient Implementation of Multipolar Interactions in
|
||||
Biomolecular Simulations", Journal of Chemical Physics, 120,
|
||||
73-87 (2004)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::multipole_kspace()
|
||||
{
|
||||
int i,j,k,n,ix,iy,iz;
|
||||
int nhalf1,nhalf2,nhalf3;
|
||||
int nxlo,nxhi,nylo,nyhi,nzlo,nzhi;
|
||||
double e,eterm,felec;
|
||||
double r1,r2,r3;
|
||||
double h1,h2,h3;
|
||||
double f1,f2,f3;
|
||||
double xix,yix,zix;
|
||||
double xiy,yiy,ziy;
|
||||
double xiz,yiz,ziz;
|
||||
double vxx,vyy,vzz,vxy,vxz,vyz;
|
||||
double volterm,denom;
|
||||
double hsq,expterm;
|
||||
double term,pterm;
|
||||
double vterm,struc2;
|
||||
double tem[3],fix[3],fiy[3],fiz[3];
|
||||
|
||||
// indices into the electrostatic field array
|
||||
// decremented by 1 versus Fortran
|
||||
|
||||
int deriv1[10] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
|
||||
int deriv2[10] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
|
||||
int deriv3[10] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
|
||||
|
||||
// return if the Ewald coefficient is zero
|
||||
|
||||
if (aewald < 1.0e-6) return;
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
double volbox = domain->prd[0] * domain->prd[1] * domain->prd[2];
|
||||
|
||||
felec = electric / am_dielectric;
|
||||
|
||||
// FFT moduli pre-computations
|
||||
// set igrid for each atom and its B-spline coeffs
|
||||
|
||||
nfft1 = m_kspace->nx;
|
||||
nfft2 = m_kspace->ny;
|
||||
nfft3 = m_kspace->nz;
|
||||
bsorder = m_kspace->order;
|
||||
|
||||
moduli();
|
||||
bspline_fill();
|
||||
|
||||
// copy multipole info to Cartesian cmp
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
cmp[i][0] = rpole[i][0];
|
||||
cmp[i][1] = rpole[i][1];
|
||||
cmp[i][2] = rpole[i][2];
|
||||
cmp[i][3] = rpole[i][3];
|
||||
cmp[i][4] = rpole[i][4];
|
||||
cmp[i][5] = rpole[i][8];
|
||||
cmp[i][6] = rpole[i][12];
|
||||
cmp[i][7] = 2.0 * rpole[i][5];
|
||||
cmp[i][8] = 2.0 * rpole[i][6];
|
||||
cmp[i][9] = 2.0 * rpole[i][9];
|
||||
}
|
||||
|
||||
// convert Cartesian multipoles to fractional multipoles
|
||||
|
||||
cmp_to_fmp(cmp,fmp);
|
||||
|
||||
// gridpre = my portion of 3d grid in brick decomp w/ ghost values
|
||||
|
||||
double ***gridpre = (double ***) m_kspace->zero();
|
||||
|
||||
// map atoms to grid
|
||||
|
||||
grid_mpole(fmp,gridpre);
|
||||
|
||||
// pre-convolution operations including forward FFT
|
||||
// gridfft = my portion of complex 3d grid in FFT decomp as 1d vector
|
||||
|
||||
double *gridfft = m_kspace->pre_convolution();
|
||||
|
||||
// ---------------------
|
||||
// convolution operation
|
||||
// ---------------------
|
||||
|
||||
// zero virial accumulation variables
|
||||
|
||||
vxx = vyy = vzz = vxy = vxz = vyz = 0.0;
|
||||
|
||||
// perform convolution on K-space points I own
|
||||
|
||||
nhalf1 = (nfft1+1) / 2;
|
||||
nhalf2 = (nfft2+1) / 2;
|
||||
nhalf3 = (nfft3+1) / 2;
|
||||
|
||||
nxlo = m_kspace->nxlo_fft;
|
||||
nxhi = m_kspace->nxhi_fft;
|
||||
nylo = m_kspace->nylo_fft;
|
||||
nyhi = m_kspace->nyhi_fft;
|
||||
nzlo = m_kspace->nzlo_fft;
|
||||
nzhi = m_kspace->nzhi_fft;
|
||||
|
||||
pterm = pow((MY_PI/aewald),2.0);
|
||||
volterm = MY_PI * volbox;
|
||||
|
||||
n = 0;
|
||||
for (k = nzlo; k <= nzhi; k++) {
|
||||
for (j = nylo; j <= nyhi; j++) {
|
||||
for (i = nxlo; i <= nxhi; i++) {
|
||||
r1 = (i >= nhalf1) ? i-nfft1 : i;
|
||||
r2 = (j >= nhalf2) ? j-nfft2 : j;
|
||||
r3 = (k >= nhalf3) ? k-nfft3 : k;
|
||||
h1 = recip[0][0]*r1 + recip[0][1]*r2 + recip[0][2]*r3; // matvec
|
||||
h2 = recip[1][0]*r1 + recip[1][1]*r2 + recip[1][2]*r3;
|
||||
h3 = recip[2][0]*r1 + recip[2][1]*r2 + recip[2][2]*r3;
|
||||
hsq = h1*h1 + h2*h2 + h3*h3;
|
||||
term = -pterm * hsq;
|
||||
expterm = 0.0;
|
||||
if (term > -50.0 && hsq != 0.0) {
|
||||
denom = volterm*hsq*bsmod1[i]*bsmod2[j]*bsmod3[k];
|
||||
expterm = exp(term) / denom;
|
||||
struc2 = gridfft[n]*gridfft[n] + gridfft[n+1]*gridfft[n+1];
|
||||
eterm = 0.5 * felec * expterm * struc2;
|
||||
vterm = (2.0/hsq) * (1.0-term) * eterm;
|
||||
vxx += h1*h1*vterm - eterm;
|
||||
vyy += h2*h2*vterm - eterm;
|
||||
vzz += h3*h3*vterm - eterm;
|
||||
vxy += h1*h2*vterm;
|
||||
vxz += h1*h3*vterm;
|
||||
vyz += h2*h3*vterm;
|
||||
}
|
||||
gridfft[n] *= expterm;
|
||||
gridfft[n+1] *= expterm;
|
||||
n += 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// save multipole virial for use in polarization computation
|
||||
|
||||
vmsave[0] = vxx;
|
||||
vmsave[1] = vyy;
|
||||
vmsave[2] = vzz;
|
||||
vmsave[3] = vxy;
|
||||
vmsave[4] = vxz;
|
||||
vmsave[5] = vyz;
|
||||
|
||||
// post-convolution operations including backward FFT
|
||||
// gridppost = my portion of 3d grid in brick decomp w/ ghost values
|
||||
|
||||
double ***gridpost = (double ***) m_kspace->post_convolution();
|
||||
|
||||
// get potential
|
||||
|
||||
fphi_mpole(gridpost,fphi);
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
for (k = 0; k < 20; k++)
|
||||
fphi[i][k] *= felec;
|
||||
}
|
||||
|
||||
// convert field from fractional to Cartesian
|
||||
|
||||
fphi_to_cphi(fphi,cphi);
|
||||
|
||||
// increment the permanent multipole energy and gradient
|
||||
|
||||
e = 0.0;
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
f1 = 0.0;
|
||||
f2 = 0.0;
|
||||
f3 = 0.0;
|
||||
for (k = 0; k < 10; k++) {
|
||||
e += fmp[i][k]*fphi[i][k];
|
||||
f1 += fmp[i][k]*fphi[i][deriv1[k]];
|
||||
f2 += fmp[i][k]*fphi[i][deriv2[k]];
|
||||
f3 += fmp[i][k]*fphi[i][deriv3[k]];
|
||||
}
|
||||
f1 *= nfft1;
|
||||
f2 *= nfft2;
|
||||
f3 *= nfft3;
|
||||
h1 = recip[0][0]*f1 + recip[0][1]*f2 + recip[0][2]*f3; // matvec?
|
||||
h2 = recip[1][0]*f1 + recip[1][1]*f2 + recip[1][2]*f3;
|
||||
h3 = recip[2][0]*f1 + recip[2][1]*f2 + recip[2][2]*f3;
|
||||
f[i][0] -= h1;
|
||||
f[i][1] -= h2;
|
||||
f[i][2] -= h3;
|
||||
}
|
||||
empole += 0.5*e;
|
||||
|
||||
// augment the permanent multipole virial contributions
|
||||
|
||||
if (vflag_global) {
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
vxx = vxx - cmp[i][1]*cphi[i][1] - 2.0*cmp[i][4]*cphi[i][4] -
|
||||
cmp[i][7]*cphi[i][7] - cmp[i][8]*cphi[i][8];
|
||||
vxy = vxy - 0.5*(cmp[i][2]*cphi[i][1]+cmp[i][1]*cphi[i][2]) -
|
||||
(cmp[i][4]+cmp[i][5])*cphi[i][7] - 0.5*cmp[i][7]*(cphi[i][4]+cphi[i][5]) -
|
||||
0.5*(cmp[i][8]*cphi[i][9]+cmp[i][9]*cphi[i][8]);
|
||||
vxz = vxz - 0.5*(cmp[i][3]*cphi[i][1]+cmp[i][1]*cphi[i][3]) -
|
||||
(cmp[i][4]+cmp[i][6])*cphi[i][8] - 0.5*cmp[i][8]*(cphi[i][4]+cphi[i][6]) -
|
||||
0.5*(cmp[i][7]*cphi[i][9]+cmp[i][9]*cphi[i][7]);
|
||||
vyy = vyy - cmp[i][2]*cphi[i][2] - 2.0*cmp[i][5]*cphi[i][5] -
|
||||
cmp[i][7]*cphi[i][7] - cmp[i][9]*cphi[i][9];
|
||||
vyz = vyz - 0.5*(cmp[i][3]*cphi[i][2]+cmp[i][2]*cphi[i][3]) -
|
||||
(cmp[i][5]+cmp[i][6])*cphi[i][9] - 0.5*cmp[i][9]*(cphi[i][5]+cphi[i][6]) -
|
||||
0.5*(cmp[i][7]*cphi[i][8]+cmp[i][8]*cphi[i][7]);
|
||||
vzz = vzz - cmp[i][3]*cphi[i][3] - 2.0*cmp[i][6]*cphi[i][6] -
|
||||
cmp[i][8]*cphi[i][8] - cmp[i][9]*cphi[i][9];
|
||||
}
|
||||
}
|
||||
|
||||
// resolve site torques then increment forces and virial
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
tem[0] = cmp[i][3]*cphi[i][2] - cmp[i][2]*cphi[i][3] +
|
||||
2.0*(cmp[i][6]-cmp[i][5])*cphi[i][9] +
|
||||
cmp[i][8]*cphi[i][7] + cmp[i][9]*cphi[i][5] -
|
||||
cmp[i][7]*cphi[i][8] - cmp[i][9]*cphi[i][6];
|
||||
tem[1] = cmp[i][1]*cphi[i][3] - cmp[i][3]*cphi[i][1] +
|
||||
2.0*(cmp[i][4]-cmp[i][6])*cphi[i][8] +
|
||||
cmp[i][7]*cphi[i][9] + cmp[i][8]*cphi[i][6] -
|
||||
cmp[i][8]*cphi[i][4] - cmp[i][9]*cphi[i][7];
|
||||
tem[2] = cmp[i][2]*cphi[i][1] - cmp[i][1]*cphi[i][2] +
|
||||
2.0*(cmp[i][5]-cmp[i][4])*cphi[i][7] +
|
||||
cmp[i][7]*cphi[i][4] + cmp[i][9]*cphi[i][8] -
|
||||
cmp[i][7]*cphi[i][5] - cmp[i][8]*cphi[i][9];
|
||||
|
||||
torque2force(i,tem,fix,fiy,fiz,f);
|
||||
|
||||
if (vflag_global) {
|
||||
iz = zaxis2local[i];
|
||||
ix = xaxis2local[i];
|
||||
iy = yaxis2local[i];
|
||||
|
||||
xiz = x[iz][0] - x[i][0];
|
||||
yiz = x[iz][1] - x[i][1];
|
||||
ziz = x[iz][2] - x[i][2];
|
||||
xix = x[ix][0] - x[i][0];
|
||||
yix = x[ix][1] - x[i][1];
|
||||
zix = x[ix][2] - x[i][2];
|
||||
xiy = x[iy][0] - x[i][0];
|
||||
yiy = x[iy][1] - x[i][1];
|
||||
ziy = x[iy][2] - x[i][2];
|
||||
|
||||
vxx += xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
|
||||
vxy += 0.5*(yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
|
||||
xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
|
||||
vxz += 0.5*(zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
|
||||
xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
|
||||
vyy += yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
|
||||
vyz += 0.5*(zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
|
||||
yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
|
||||
vzz += zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
|
||||
}
|
||||
}
|
||||
|
||||
// increment total internal virial tensor components
|
||||
|
||||
if (vflag_global) {
|
||||
virmpole[0] -= vxx;
|
||||
virmpole[1] -= vyy;
|
||||
virmpole[2] -= vzz;
|
||||
virmpole[3] -= vxy;
|
||||
virmpole[4] -= vxz;
|
||||
virmpole[5] -= vyz;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
damppole generates coefficients for the charge penetration
|
||||
damping function for powers of the interatomic distance
|
||||
|
||||
literature references:
|
||||
|
||||
L. V. Slipchenko and M. S. Gordon, "Electrostatic Energy in the
|
||||
Effective Fragment Potential Method: Theory and Application to
|
||||
the Benzene Dimer", Journal of Computational Chemistry, 28,
|
||||
276-291 (2007) [Gordon f1 and f2 models]
|
||||
|
||||
J. A. Rackers, Q. Wang, C. Liu, J.-P. Piquemal, P. Ren and
|
||||
J. W. Ponder, "An Optimized Charge Penetration Model for Use with
|
||||
the AMOEBA Force Field", Physical Chemistry Chemical Physics, 19,
|
||||
276-291 (2017)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::damppole(double r, int rorder, double alphai, double alphak,
|
||||
double *dmpi, double *dmpk, double *dmpik)
|
||||
{
|
||||
double termi,termk;
|
||||
double termi2,termk2;
|
||||
double alphai2,alphak2;
|
||||
double eps,diff;
|
||||
double expi,expk;
|
||||
double dampi,dampk;
|
||||
double dampi2,dampi3;
|
||||
double dampi4,dampi5;
|
||||
double dampi6,dampi7;
|
||||
double dampi8;
|
||||
double dampk2,dampk3;
|
||||
double dampk4,dampk5;
|
||||
double dampk6;
|
||||
|
||||
// compute tolerance and exponential damping factors
|
||||
|
||||
eps = 0.001;
|
||||
diff = fabs(alphai-alphak);
|
||||
dampi = alphai * r;
|
||||
dampk = alphak * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
|
||||
// core-valence charge penetration damping for Gordon f1
|
||||
|
||||
dampi2 = dampi * dampi;
|
||||
dampi3 = dampi * dampi2;
|
||||
dampi4 = dampi2 * dampi2;
|
||||
dampi5 = dampi2 * dampi3;
|
||||
dmpi[0] = 1.0 - (1.0 + 0.5*dampi)*expi;
|
||||
dmpi[2] = 1.0 - (1.0 + dampi + 0.5*dampi2)*expi;
|
||||
dmpi[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0)*expi;
|
||||
dmpi[6] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 + dampi4/30.0)*expi;
|
||||
dmpi[8] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
4.0*dampi4/105.0 + dampi5/210.0)*expi;
|
||||
if (diff < eps) {
|
||||
dmpk[0] = dmpi[0];
|
||||
dmpk[2] = dmpi[2];
|
||||
dmpk[4] = dmpi[4];
|
||||
dmpk[6] = dmpi[6];
|
||||
dmpk[8] = dmpi[8];
|
||||
} else {
|
||||
dampk2 = dampk * dampk;
|
||||
dampk3 = dampk * dampk2;
|
||||
dampk4 = dampk2 * dampk2;
|
||||
dampk5 = dampk2 * dampk3;
|
||||
dmpk[0] = 1.0 - (1.0 + 0.5*dampk)*expk;
|
||||
dmpk[2] = 1.0 - (1.0 + dampk + 0.5*dampk2)*expk;
|
||||
dmpk[4] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0)*expk;
|
||||
dmpk[6] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + dampk4/30.0)*expk;
|
||||
dmpk[8] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0 +
|
||||
4.0*dampk4/105.0 + dampk5/210.0)*expk;
|
||||
}
|
||||
|
||||
// valence-valence charge penetration damping for Gordon f1
|
||||
|
||||
if (diff < eps) {
|
||||
dampi6 = dampi3 * dampi3;
|
||||
dampi7 = dampi3 * dampi4;
|
||||
dmpik[0] = 1.0 - (1.0 + 11.0*dampi/16.0 + 3.0*dampi2/16.0 +
|
||||
dampi3/48.0)*expi;
|
||||
dmpik[2] = 1.0 - (1.0 + dampi + 0.5*dampi2 +
|
||||
7.0*dampi3/48.0 + dampi4/48.0)*expi;
|
||||
dmpik[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/144.0)*expi;
|
||||
dmpik[6] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0)*expi;
|
||||
dmpik[8] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0 +
|
||||
dampi7/5040.0)*expi;
|
||||
if (rorder >= 11) {
|
||||
dampi8 = dampi4 * dampi4;
|
||||
dmpik[10] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0 +
|
||||
dampi7/5040.0 + dampi8/45360.0)*expi;
|
||||
}
|
||||
|
||||
} else {
|
||||
alphai2 = alphai * alphai;
|
||||
alphak2 = alphak * alphak;
|
||||
termi = alphak2 / (alphak2-alphai2);
|
||||
termk = alphai2 / (alphai2-alphak2);
|
||||
termi2 = termi * termi;
|
||||
termk2 = termk * termk;
|
||||
dmpik[0] = 1.0 - termi2*(1.0 + 2.0*termk + 0.5*dampi)*expi -
|
||||
termk2*(1.0 + 2.0*termi + 0.5*dampk)*expk;
|
||||
dmpik[2] = 1.0 - termi2*(1.0+dampi+0.5*dampi2)*expi -
|
||||
termk2*(1.0+dampk+0.5*dampk2)*expk -
|
||||
2.0*termi2*termk*(1.0+dampi)*expi -
|
||||
2.0*termk2*termi*(1.0+dampk)*expk;
|
||||
dmpik[4] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + dampi2/3.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + dampk2/3.0)*expk;
|
||||
dmpik[6] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 +
|
||||
dampi3/6.0 + dampi4/30.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + dampk4/30.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 2.0*dampi2/5.0 + dampi3/15.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 2.0*dampk2/5.0 + dampk3/15.0)*expk;
|
||||
dmpik[8] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
4.0*dampi4/105.0 + dampi5/210.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 +
|
||||
4.0*dampk4/105.0 + dampk5/210.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 3.0*dampi2/7.0 +
|
||||
2.0*dampi3/21.0 + dampi4/105.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 3.0*dampk2/7.0 +
|
||||
2.0*dampk3/21.0 + dampk4/105.0)*expk;
|
||||
|
||||
if (rorder >= 11) {
|
||||
dampi6 = dampi3 * dampi3;
|
||||
dampk6 = dampk3 * dampk3;
|
||||
dmpik[10] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
5.0*dampi4/126.0 + 2.0*dampi5/315.0 +
|
||||
dampi6/1890.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + 5.0*dampk4/126.0 +
|
||||
2.0*dampk5/315.0 + dampk6/1890.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 4.0*dampi2/9.0 + dampi3/9.0 +
|
||||
dampi4/63.0 + dampi5/945.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/9.0 + dampk3/9.0 +
|
||||
dampk4/63.0 + dampk5/945.0)*expk;
|
||||
}
|
||||
}
|
||||
}
|
||||
2161
src/AMOEBA/amoeba_polar.cpp
Normal file
2161
src/AMOEBA/amoeba_polar.cpp
Normal file
File diff suppressed because it is too large
Load Diff
569
src/AMOEBA/amoeba_repulsion.cpp
Normal file
569
src/AMOEBA/amoeba_repulsion.cpp
Normal file
@ -0,0 +1,569 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{FIELD,ZRSD,TORQUE,UFLD}; // reverse comm
|
||||
enum{VDWL,REPULSE,QFER,DISP,MPOLE,POLAR,USOLV,DISP_LONG,MPOLE_LONG,POLAR_LONG};
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
repulsion = Pauli repulsion interactions
|
||||
adapted from Tinker erepel1b() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::repulsion()
|
||||
{
|
||||
int i,j,k,ii,jj,itype,jtype;
|
||||
int ix,iy,iz;
|
||||
double e;
|
||||
double eterm,de;
|
||||
double xi,yi,zi;
|
||||
double xr,yr,zr;
|
||||
double xix,yix,zix;
|
||||
double xiy,yiy,ziy;
|
||||
double xiz,yiz,ziz;
|
||||
double r,r2,r3,r4,r5;
|
||||
double rr1,rr3,rr5;
|
||||
double rr7,rr9,rr11;
|
||||
double dix,diy,diz;
|
||||
double qixx,qixy,qixz;
|
||||
double qiyy,qiyz,qizz;
|
||||
double dkx,dky,dkz;
|
||||
double qkxx,qkxy,qkxz;
|
||||
double qkyy,qkyz,qkzz;
|
||||
double dir,dkr,dik,qik;
|
||||
double qix,qiy,qiz,qir;
|
||||
double qkx,qky,qkz,qkr;
|
||||
double diqk,dkqi,qiqk;
|
||||
double dirx,diry,dirz;
|
||||
double dkrx,dkry,dkrz;
|
||||
double dikx,diky,dikz;
|
||||
double qirx,qiry,qirz;
|
||||
double qkrx,qkry,qkrz;
|
||||
double qikx,qiky,qikz;
|
||||
double qixk,qiyk,qizk;
|
||||
double qkxi,qkyi,qkzi;
|
||||
double qikrx,qikry,qikrz;
|
||||
double qkirx,qkiry,qkirz;
|
||||
double diqkx,diqky,diqkz;
|
||||
double dkqix,dkqiy,dkqiz;
|
||||
double diqkrx,diqkry,diqkrz;
|
||||
double dkqirx,dkqiry,dkqirz;
|
||||
double dqikx,dqiky,dqikz;
|
||||
double term1,term2,term3;
|
||||
double term4,term5,term6;
|
||||
double sizi,sizk,sizik;
|
||||
double vali,valk;
|
||||
double dmpi,dmpk;
|
||||
double frcx,frcy,frcz;
|
||||
double taper,dtaper;
|
||||
double vxx,vyy,vzz;
|
||||
double vxy,vxz,vyz;
|
||||
double factor_repel;
|
||||
double ttri[3],ttrk[3];
|
||||
double fix[3],fiy[3],fiz[3];
|
||||
double dmpik[11];
|
||||
|
||||
int inum,jnum;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
// set cutoffs and taper coeffs
|
||||
|
||||
choose(REPULSE);
|
||||
|
||||
// owned atoms
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
// zero repulsion torque on owned + ghost atoms
|
||||
|
||||
int nall = nlocal + atom->nghost;
|
||||
|
||||
for (i = 0; i < nall; i++) {
|
||||
tq[i][0] = 0.0;
|
||||
tq[i][1] = 0.0;
|
||||
tq[i][2] = 0.0;
|
||||
}
|
||||
|
||||
// neigh list
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
// double loop over owned atoms and neighbors
|
||||
|
||||
// DEBUG
|
||||
//FILE *fp = fopen("lammps.dat","w");
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
itype = amtype[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
xi = x[i][0];
|
||||
yi = x[i][1];
|
||||
zi = x[i][2];
|
||||
sizi = sizpr[itype];
|
||||
dmpi = dmppr[itype];
|
||||
vali = elepr[itype];
|
||||
dix = rpole[i][1];
|
||||
diy = rpole[i][2];
|
||||
diz = rpole[i][3];
|
||||
qixx = rpole[i][4];
|
||||
qixy = rpole[i][5];
|
||||
qixz = rpole[i][6];
|
||||
qiyy = rpole[i][8];
|
||||
qiyz = rpole[i][9];
|
||||
qizz = rpole[i][12];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_repel = special_repel[sbmask15(j)];
|
||||
if (factor_repel == 0.0) continue;
|
||||
j &= NEIGHMASK15;
|
||||
|
||||
xr = x[j][0] - xi;
|
||||
yr = x[j][1] - yi;
|
||||
zr = x[j][2] - zi;
|
||||
r2 = xr*xr + yr*yr + zr*zr;
|
||||
if (r2 > off2) continue;
|
||||
|
||||
jtype = amtype[j];
|
||||
|
||||
r = sqrt(r2);
|
||||
sizk = sizpr[jtype];
|
||||
dmpk = dmppr[jtype];
|
||||
valk = elepr[jtype];
|
||||
dkx = rpole[j][1];
|
||||
dky = rpole[j][2];
|
||||
dkz = rpole[j][3];
|
||||
qkxx = rpole[j][4];
|
||||
qkxy = rpole[j][5];
|
||||
qkxz = rpole[j][6];
|
||||
qkyy = rpole[j][8];
|
||||
qkyz = rpole[j][9];
|
||||
qkzz = rpole[j][12];
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
dir = dix*xr + diy*yr + diz*zr;
|
||||
qix = qixx*xr + qixy*yr + qixz*zr;
|
||||
qiy = qixy*xr + qiyy*yr + qiyz*zr;
|
||||
qiz = qixz*xr + qiyz*yr + qizz*zr;
|
||||
qir = qix*xr + qiy*yr + qiz*zr;
|
||||
dkr = dkx*xr + dky*yr + dkz*zr;
|
||||
qkx = qkxx*xr + qkxy*yr + qkxz*zr;
|
||||
qky = qkxy*xr + qkyy*yr + qkyz*zr;
|
||||
qkz = qkxz*xr + qkyz*yr + qkzz*zr;
|
||||
qkr = qkx*xr + qky*yr + qkz*zr;
|
||||
dik = dix*dkx + diy*dky + diz*dkz;
|
||||
qik = qix*qkx + qiy*qky + qiz*qkz;
|
||||
diqk = dix*qkx + diy*qky + diz*qkz;
|
||||
dkqi = dkx*qix + dky*qiy + dkz*qiz;
|
||||
qiqk = 2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
|
||||
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
|
||||
|
||||
// additional intermediates involving moments and distance
|
||||
|
||||
dirx = diy*zr - diz*yr;
|
||||
diry = diz*xr - dix*zr;
|
||||
dirz = dix*yr - diy*xr;
|
||||
dkrx = dky*zr - dkz*yr;
|
||||
dkry = dkz*xr - dkx*zr;
|
||||
dkrz = dkx*yr - dky*xr;
|
||||
dikx = diy*dkz - diz*dky;
|
||||
diky = diz*dkx - dix*dkz;
|
||||
dikz = dix*dky - diy*dkx;
|
||||
qirx = qiz*yr - qiy*zr;
|
||||
qiry = qix*zr - qiz*xr;
|
||||
qirz = qiy*xr - qix*yr;
|
||||
qkrx = qkz*yr - qky*zr;
|
||||
qkry = qkx*zr - qkz*xr;
|
||||
qkrz = qky*xr - qkx*yr;
|
||||
qikx = qky*qiz - qkz*qiy;
|
||||
qiky = qkz*qix - qkx*qiz;
|
||||
qikz = qkx*qiy - qky*qix;
|
||||
qixk = qixx*qkx + qixy*qky + qixz*qkz;
|
||||
qiyk = qixy*qkx + qiyy*qky + qiyz*qkz;
|
||||
qizk = qixz*qkx + qiyz*qky + qizz*qkz;
|
||||
qkxi = qkxx*qix + qkxy*qiy + qkxz*qiz;
|
||||
qkyi = qkxy*qix + qkyy*qiy + qkyz*qiz;
|
||||
qkzi = qkxz*qix + qkyz*qiy + qkzz*qiz;
|
||||
qikrx = qizk*yr - qiyk*zr;
|
||||
qikry = qixk*zr - qizk*xr;
|
||||
qikrz = qiyk*xr - qixk*yr;
|
||||
qkirx = qkzi*yr - qkyi*zr;
|
||||
qkiry = qkxi*zr - qkzi*xr;
|
||||
qkirz = qkyi*xr - qkxi*yr;
|
||||
diqkx = dix*qkxx + diy*qkxy + diz*qkxz;
|
||||
diqky = dix*qkxy + diy*qkyy + diz*qkyz;
|
||||
diqkz = dix*qkxz + diy*qkyz + diz*qkzz;
|
||||
dkqix = dkx*qixx + dky*qixy + dkz*qixz;
|
||||
dkqiy = dkx*qixy + dky*qiyy + dkz*qiyz;
|
||||
dkqiz = dkx*qixz + dky*qiyz + dkz*qizz;
|
||||
diqkrx = diqkz*yr - diqky*zr;
|
||||
diqkry = diqkx*zr - diqkz*xr;
|
||||
diqkrz = diqky*xr - diqkx*yr;
|
||||
dkqirx = dkqiz*yr - dkqiy*zr;
|
||||
dkqiry = dkqix*zr - dkqiz*xr;
|
||||
dkqirz = dkqiy*xr - dkqix*yr;
|
||||
dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
|
||||
2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz-qixz*qkxy-qiyz*qkyy-qizz*qkyz);
|
||||
dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
|
||||
2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz-qixx*qkxz-qixy*qkyz-qixz*qkzz);
|
||||
dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
|
||||
2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz-qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
|
||||
|
||||
// get reciprocal distance terms for this interaction
|
||||
|
||||
rr1 = 1.0 / r;
|
||||
rr3 = rr1 / r2;
|
||||
rr5 = 3.0 * rr3 / r2;
|
||||
rr7 = 5.0 * rr5 / r2;
|
||||
rr9 = 7.0 * rr7 / r2;
|
||||
rr11 = 9.0 * rr9 / r2;
|
||||
|
||||
// get damping coefficients for the Pauli repulsion energy
|
||||
|
||||
damprep(r,r2,rr1,rr3,rr5,rr7,rr9,rr11,11,dmpi,dmpk,dmpik);
|
||||
|
||||
// calculate intermediate terms needed for the energy
|
||||
|
||||
term1 = vali*valk;
|
||||
term2 = valk*dir - vali*dkr + dik;
|
||||
term3 = vali*qkr + valk*qir - dir*dkr + 2.0*(dkqi-diqk+qiqk);
|
||||
term4 = dir*qkr - dkr*qir - 4.0*qik;
|
||||
term5 = qir*qkr;
|
||||
eterm = term1*dmpik[0] + term2*dmpik[2] +
|
||||
term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
|
||||
|
||||
// compute the Pauli repulsion energy for this interaction
|
||||
|
||||
sizik = sizi * sizk * factor_repel;
|
||||
e = sizik * eterm * rr1;
|
||||
|
||||
// calculate intermediate terms for force and torque
|
||||
|
||||
de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] +
|
||||
term4*dmpik[8] + term5*dmpik[10];
|
||||
term1 = -valk*dmpik[2] + dkr*dmpik[4] - qkr*dmpik[6];
|
||||
term2 = vali*dmpik[2] + dir*dmpik[4] + qir*dmpik[6];
|
||||
term3 = 2.0 * dmpik[4];
|
||||
term4 = 2.0 * (-valk*dmpik[4] + dkr*dmpik[6] - qkr*dmpik[8]);
|
||||
term5 = 2.0 * (-vali*dmpik[4] - dir*dmpik[6] - qir*dmpik[8]);
|
||||
term6 = 4.0 * dmpik[6];
|
||||
|
||||
// compute the force components for this interaction
|
||||
|
||||
frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
|
||||
term4*qix + term5*qkx + term6*(qixk+qkxi);
|
||||
frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
|
||||
term4*qiy + term5*qky + term6*(qiyk+qkyi);
|
||||
frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
|
||||
term4*qiz + term5*qkz + term6*(qizk+qkzi);
|
||||
frcx = frcx*rr1 + eterm*rr3*xr;
|
||||
frcy = frcy*rr1 + eterm*rr3*yr;
|
||||
frcz = frcz*rr1 + eterm*rr3*zr;
|
||||
frcx = sizik * frcx;
|
||||
frcy = sizik * frcy;
|
||||
frcz = sizik * frcz;
|
||||
|
||||
// compute the torque components for this interaction
|
||||
|
||||
ttri[0] = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
|
||||
term4*qirx - term6*(qikrx+qikx);
|
||||
ttri[1] = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) -
|
||||
term4*qiry - term6*(qikry+qiky);
|
||||
ttri[2] = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) -
|
||||
term4*qirz - term6*(qikrz+qikz);
|
||||
ttrk[0] = dmpik[2]*dikx + term2*dkrx - term3*(dqikx+diqkrx) -
|
||||
term5*qkrx - term6*(qkirx-qikx);
|
||||
ttrk[1] = dmpik[2]*diky + term2*dkry - term3*(dqiky+diqkry) -
|
||||
term5*qkry - term6*(qkiry-qiky);
|
||||
ttrk[2] = dmpik[2]*dikz + term2*dkrz - term3*(dqikz+diqkrz) -
|
||||
term5*qkrz - term6*(qkirz-qikz);
|
||||
ttri[0] = sizik * ttri[0] * rr1;
|
||||
ttri[1] = sizik * ttri[1] * rr1;
|
||||
ttri[2] = sizik * ttri[2] * rr1;
|
||||
ttrk[0] = sizik * ttrk[0] * rr1;
|
||||
ttrk[1] = sizik * ttrk[1] * rr1;
|
||||
ttrk[2] = sizik * ttrk[2] * rr1;
|
||||
|
||||
// use energy switching if near the cutoff distance
|
||||
|
||||
if (r2 > cut2) {
|
||||
r3 = r2 * r;
|
||||
r4 = r2 * r2;
|
||||
r5 = r2 * r3;
|
||||
taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0;
|
||||
dtaper = 5.0*c5*r4 + 4.0*c4*r3 + 3.0*c3*r2 + 2.0*c2*r + c1;
|
||||
dtaper *= e * rr1;
|
||||
e *= taper;
|
||||
frcx = frcx*taper - dtaper*xr;
|
||||
frcy = frcy*taper - dtaper*yr;
|
||||
frcz = frcz*taper - dtaper*zr;
|
||||
for (k = 0; k < 3; k++) {
|
||||
ttri[k] *= taper;
|
||||
ttrk[k] *= taper;
|
||||
}
|
||||
}
|
||||
|
||||
erepulse += e;
|
||||
|
||||
// increment force-based gradient and torque on atom I
|
||||
|
||||
f[i][0] -= frcx;
|
||||
f[i][1] -= frcy;
|
||||
f[i][2] -= frcz;
|
||||
tq[i][0] += ttri[0];
|
||||
tq[i][1] += ttri[1];
|
||||
tq[i][2] += ttri[2];
|
||||
|
||||
// increment force-based gradient and torque on atom J
|
||||
|
||||
f[j][0] += frcx;
|
||||
f[j][1] += frcy;
|
||||
f[j][2] += frcz;
|
||||
tq[j][0] += ttrk[0];
|
||||
tq[j][1] += ttrk[1];
|
||||
tq[j][2] += ttrk[2];
|
||||
|
||||
// increment the virial due to pairwise Cartesian forces
|
||||
|
||||
if (vflag_global) {
|
||||
vxx = -xr * frcx;
|
||||
vxy = -0.5 * (yr*frcx+xr*frcy);
|
||||
vxz = -0.5 * (zr*frcx+xr*frcz);
|
||||
vyy = -yr * frcy;
|
||||
vyz = -0.5 * (zr*frcy+yr*frcz);
|
||||
vzz = -zr * frcz;
|
||||
|
||||
virrepulse[0] -= vxx;
|
||||
virrepulse[1] -= vyy;
|
||||
virrepulse[2] -= vzz;
|
||||
virrepulse[3] -= vxy;
|
||||
virrepulse[4] -= vxz;
|
||||
virrepulse[5] -= vyz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reverse comm to sum torque from ghost atoms to owned atoms
|
||||
|
||||
crstyle = TORQUE;
|
||||
comm->reverse_comm(this);
|
||||
|
||||
// resolve site torques then increment forces and virial
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
torque2force(i,tq[i],fix,fiy,fiz,f);
|
||||
|
||||
if (!vflag_global) continue;
|
||||
|
||||
iz = zaxis2local[i];
|
||||
ix = xaxis2local[i];
|
||||
iy = yaxis2local[i];
|
||||
|
||||
xiz = x[iz][0] - x[i][0];
|
||||
yiz = x[iz][1] - x[i][1];
|
||||
ziz = x[iz][2] - x[i][2];
|
||||
xix = x[ix][0] - x[i][0];
|
||||
yix = x[ix][1] - x[i][1];
|
||||
zix = x[ix][2] - x[i][2];
|
||||
xiy = x[iy][0] - x[i][0];
|
||||
yiy = x[iy][1] - x[i][1];
|
||||
ziy = x[iy][2] - x[i][2];
|
||||
|
||||
vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
|
||||
vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
|
||||
vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
|
||||
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
|
||||
xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
|
||||
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
|
||||
xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
|
||||
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
|
||||
yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
|
||||
|
||||
virrepulse[0] -= vxx;
|
||||
virrepulse[1] -= vyy;
|
||||
virrepulse[2] -= vzz;
|
||||
virrepulse[3] -= vxy;
|
||||
virrepulse[4] -= vxz;
|
||||
virrepulse[5] -= vyz;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
damprep generates coefficients for the Pauli repulsion
|
||||
damping function for powers of the interatomic distance
|
||||
|
||||
literature reference:
|
||||
|
||||
J. A. Rackers and J. W. Ponder, "Classical Pauli Repulsion: An
|
||||
Anisotropic, Atomic Multipole Model", Journal of Chemical Physics,
|
||||
150, 084104 (2019)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAmoeba::damprep(double r, double r2, double rr1, double rr3,
|
||||
double rr5, double rr7, double rr9, double rr11,
|
||||
int rorder, double dmpi, double dmpk, double *dmpik)
|
||||
{
|
||||
double r3,r4;
|
||||
double r5,r6,r7,r8;
|
||||
double s,ds,d2s;
|
||||
double d3s,d4s,d5s;
|
||||
double dmpi2,dmpk2;
|
||||
double dmpi22,dmpi23;
|
||||
double dmpi24,dmpi25;
|
||||
double dmpi26,dmpi27;
|
||||
double dmpk22,dmpk23;
|
||||
double dmpk24,dmpk25;
|
||||
double dmpk26;
|
||||
double eps,diff;
|
||||
double expi,expk;
|
||||
double dampi,dampk;
|
||||
double pre,term,tmp;
|
||||
|
||||
// compute tolerance value for damping exponents
|
||||
|
||||
eps = 0.001;
|
||||
diff = fabs(dmpi-dmpk);
|
||||
|
||||
// treat the case where alpha damping exponents are equal
|
||||
|
||||
if (diff < eps) {
|
||||
r3 = r2 * r;
|
||||
r4 = r3 * r;
|
||||
r5 = r4 * r;
|
||||
r6 = r5 * r;
|
||||
r7 = r6 * r;
|
||||
dmpi2 = 0.5 * dmpi;
|
||||
dampi = dmpi2 * r;
|
||||
expi = exp(-dampi);
|
||||
dmpi22 = dmpi2 * dmpi2;
|
||||
dmpi23 = dmpi22 * dmpi2;
|
||||
dmpi24 = dmpi23 * dmpi2;
|
||||
dmpi25 = dmpi24 * dmpi2;
|
||||
dmpi26 = dmpi25 * dmpi2;
|
||||
pre = 128.0;
|
||||
s = (r + dmpi2*r2 + dmpi22*r3/3.0) * expi;
|
||||
|
||||
ds = (dmpi22*r3 + dmpi23*r4) * expi / 3.0;
|
||||
d2s = dmpi24 * expi * r5 / 9.0;
|
||||
d3s = dmpi25 * expi * r6 / 45.0;
|
||||
d4s = (dmpi25*r6 + dmpi26*r7) * expi / 315.0;
|
||||
if (rorder >= 11) {
|
||||
r8 = r7 * r;
|
||||
dmpi27 = dmpi2 * dmpi26;
|
||||
d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/3.0) * expi / 945.0;
|
||||
} else d5s = 0.0;
|
||||
|
||||
// treat the case where alpha damping exponents are unequal
|
||||
|
||||
} else {
|
||||
r3 = r2 * r;
|
||||
r4 = r3 * r;
|
||||
r5 = r4 * r;
|
||||
dmpi2 = 0.5 * dmpi;
|
||||
dmpk2 = 0.5 * dmpk;
|
||||
dampi = dmpi2 * r;
|
||||
dampk = dmpk2 * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
dmpi22 = dmpi2 * dmpi2;
|
||||
dmpi23 = dmpi22 * dmpi2;
|
||||
dmpi24 = dmpi23 * dmpi2;
|
||||
dmpi25 = dmpi24 * dmpi2;
|
||||
dmpk22 = dmpk2 * dmpk2;
|
||||
dmpk23 = dmpk22 * dmpk2;
|
||||
dmpk24 = dmpk23 * dmpk2;
|
||||
dmpk25 = dmpk24 * dmpk2;
|
||||
term = dmpi22 - dmpk22;
|
||||
pre = 8192.0 * dmpi23 * dmpk23 / pow(term,4.0);
|
||||
tmp = 4.0 * dmpi2 * dmpk2 / term;
|
||||
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
|
||||
|
||||
ds = (dmpi2*dmpk2*r2 - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2 + 4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/3.0 -
|
||||
(4.0/3.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2/3.0 + dmpi22*dmpk2*r3/3.0 +
|
||||
(4.0/3.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/5.0 + dmpi2*dmpk2*r2/5.0 -
|
||||
(4.0/15.0)*dmpi2*dmpk24*r3/term - (8.0/5.0)*dmpi2*dmpk23*r2/term -
|
||||
4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
|
||||
(dmpi23*dmpk2*r4/15.0 + dmpi22*dmpk2*r3/5.0 + dmpi2*dmpk2*r2/5.0 +
|
||||
(4.0/15.0)*dmpi24*dmpk2*r3/term + (8.0/5.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0/term*dmpi2*dmpk2) * expi;
|
||||
d4s = (dmpi2*dmpk24*r5/105.0 + (2.0/35.0)*dmpi2*dmpk23*r4 +
|
||||
dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/7.0 -
|
||||
(4.0/105.0)*dmpi2*dmpk25*r4/term - (8.0/21.0)*dmpi2*dmpk24*r3/term -
|
||||
(12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi24*dmpk2*r5/105.0 + (2.0/35.0)*dmpi23*dmpk2*r4 +
|
||||
dmpi22*dmpk2*r3/7.0 + dmpi2*dmpk2*r2/7.0 +
|
||||
(4.0/105.0)*dmpi25*dmpk2*r4/term + (8.0/21.0)*dmpi24*dmpk2*r3/term +
|
||||
(12.0/7.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
|
||||
if (rorder >= 11) {
|
||||
r6 = r5 * r;
|
||||
dmpi26 = dmpi25 * dmpi2;
|
||||
dmpk26 = dmpk25 * dmpk2;
|
||||
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
|
||||
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
|
||||
(4.0/945.0)*dmpi2*dmpk26*r5/term -
|
||||
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
|
||||
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
|
||||
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
|
||||
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
|
||||
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
} else d5s = 0.0;
|
||||
}
|
||||
|
||||
// convert partial derivatives into full derivatives
|
||||
|
||||
s = s * rr1;
|
||||
ds = ds * rr3;
|
||||
d2s = d2s * rr5;
|
||||
d3s = d3s * rr7;
|
||||
d4s = d4s * rr9;
|
||||
d5s = d5s * rr11;
|
||||
dmpik[0] = 0.5 * pre * s * s;
|
||||
dmpik[2] = pre * s * ds;
|
||||
dmpik[4] = pre * (s*d2s + ds*ds);
|
||||
dmpik[6] = pre * (s*d3s + 3.0*ds*d2s);
|
||||
dmpik[8] = pre * (s*d4s + 4.0*ds*d3s + 3.0*d2s*d2s);
|
||||
if (rorder >= 11) dmpik[10] = pre * (s*d5s + 5.0*ds*d4s + 10.0*d2s*d3s);
|
||||
}
|
||||
1135
src/AMOEBA/amoeba_utils.cpp
Normal file
1135
src/AMOEBA/amoeba_utils.cpp
Normal file
File diff suppressed because it is too large
Load Diff
866
src/AMOEBA/angle_amoeba.cpp
Normal file
866
src/AMOEBA/angle_amoeba.cpp
Normal file
@ -0,0 +1,866 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "angle_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "pair.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define SMALL 0.001
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AngleAmoeba::AngleAmoeba(LAMMPS *lmp) : Angle(lmp)
|
||||
{
|
||||
pflag = nullptr;
|
||||
ubflag = nullptr;
|
||||
|
||||
theta0 = nullptr;
|
||||
k2 = nullptr;
|
||||
k3 = nullptr;
|
||||
k4 = nullptr;
|
||||
k5 = nullptr;
|
||||
k6 = nullptr;
|
||||
|
||||
ba_k1 = nullptr;
|
||||
ba_k2 = nullptr;
|
||||
ba_r1 = nullptr;
|
||||
ba_r2 = nullptr;
|
||||
|
||||
ub_k = nullptr;
|
||||
ub_r0 = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AngleAmoeba::~AngleAmoeba()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
if (allocated) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(setflag_a);
|
||||
memory->destroy(setflag_ba);
|
||||
memory->destroy(setflag_ub);
|
||||
|
||||
memory->destroy(pflag);
|
||||
memory->destroy(ubflag);
|
||||
|
||||
memory->destroy(theta0);
|
||||
memory->destroy(k2);
|
||||
memory->destroy(k3);
|
||||
memory->destroy(k4);
|
||||
memory->destroy(k5);
|
||||
memory->destroy(k6);
|
||||
|
||||
memory->destroy(ba_k1);
|
||||
memory->destroy(ba_k2);
|
||||
memory->destroy(ba_r1);
|
||||
memory->destroy(ba_r2);
|
||||
|
||||
memory->destroy(ub_k);
|
||||
memory->destroy(ub_r0);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::compute(int eflag, int vflag)
|
||||
{
|
||||
int i1,i2,i3,n,type,tflag,uflag;
|
||||
|
||||
int **anglelist = neighbor->anglelist;
|
||||
int **nspecial = atom->nspecial;
|
||||
int nanglelist = neighbor->nanglelist;
|
||||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
for (n = 0; n < nanglelist; n++) {
|
||||
i1 = anglelist[n][0];
|
||||
i2 = anglelist[n][1];
|
||||
i3 = anglelist[n][2];
|
||||
type = anglelist[n][3];
|
||||
|
||||
// tflag = 0 for "angle", 1 for "anglep" in Tinker PRM file
|
||||
// atom 2 must have exactly 3 bond partners to invoke anglep() variant
|
||||
|
||||
if (enable_angle) {
|
||||
tflag = pflag[type];
|
||||
|
||||
if (tflag && nspecial[i2][0] == 3)
|
||||
tinker_anglep(i1,i2,i3,type,eflag);
|
||||
else
|
||||
tinker_angle(i1,i2,i3,type,eflag);
|
||||
|
||||
// bondangle = bond-stretch cross term in Tinker
|
||||
|
||||
if (ba_k1[type] != 0.0)
|
||||
tinker_bondangle(i1,i2,i3,type,eflag);
|
||||
}
|
||||
|
||||
// Urey-Bradley H-H bond term within water molecules
|
||||
|
||||
if (enable_urey) {
|
||||
uflag = ubflag[type];
|
||||
if (uflag) tinker_urey_bradley(i1,i3,type,eflag);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::tinker_angle(int i1, int i2, int i3, int type, int eflag)
|
||||
{
|
||||
double delx1,dely1,delz1,delx2,dely2,delz2;
|
||||
double eangle,f1[3],f3[3];
|
||||
double dtheta,dtheta2,dtheta3,dtheta4,dtheta5,dtheta6,de_angle;
|
||||
double rsq1,rsq2,r1,r2,c,s,a;
|
||||
double a11,a12,a22;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_bond = force->newton_bond;
|
||||
|
||||
// 1st bond
|
||||
|
||||
delx1 = x[i1][0] - x[i2][0];
|
||||
dely1 = x[i1][1] - x[i2][1];
|
||||
delz1 = x[i1][2] - x[i2][2];
|
||||
|
||||
rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
r1 = sqrt(rsq1);
|
||||
|
||||
// 2nd bond
|
||||
|
||||
delx2 = x[i3][0] - x[i2][0];
|
||||
dely2 = x[i3][1] - x[i2][1];
|
||||
delz2 = x[i3][2] - x[i2][2];
|
||||
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
r2 = sqrt(rsq2);
|
||||
|
||||
// angle (cos and sin)
|
||||
|
||||
c = delx1*delx2 + dely1*dely2 + delz1*delz2;
|
||||
c /= r1*r2;
|
||||
|
||||
if (c > 1.0) c = 1.0;
|
||||
if (c < -1.0) c = -1.0;
|
||||
|
||||
s = sqrt(1.0 - c*c);
|
||||
if (s < SMALL) s = SMALL;
|
||||
s = 1.0/s;
|
||||
|
||||
// force & energy for angle term
|
||||
|
||||
dtheta = acos(c) - theta0[type];
|
||||
dtheta2 = dtheta*dtheta;
|
||||
dtheta3 = dtheta2*dtheta;
|
||||
dtheta4 = dtheta3*dtheta;
|
||||
dtheta5 = dtheta4*dtheta;
|
||||
dtheta6 = dtheta5*dtheta;
|
||||
|
||||
de_angle = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 +
|
||||
4.0*k4[type]*dtheta3 + 5.0*k5[type]*dtheta4 + 6.0*k6[type]*dtheta5;
|
||||
|
||||
a = -de_angle*s;
|
||||
a11 = a*c / rsq1;
|
||||
a12 = -a / (r1*r2);
|
||||
a22 = a*c / rsq2;
|
||||
|
||||
f1[0] = a11*delx1 + a12*delx2;
|
||||
f1[1] = a11*dely1 + a12*dely2;
|
||||
f1[2] = a11*delz1 + a12*delz2;
|
||||
|
||||
f3[0] = a22*delx2 + a12*delx1;
|
||||
f3[1] = a22*dely2 + a12*dely1;
|
||||
f3[2] = a22*delz2 + a12*delz1;
|
||||
|
||||
eangle = 0.0;
|
||||
if (eflag) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 +
|
||||
k4[type]*dtheta4 + k5[type]*dtheta5 + k6[type]*dtheta6;
|
||||
|
||||
// apply force to each of 3 atoms
|
||||
|
||||
if (newton_bond || i1 < nlocal) {
|
||||
f[i1][0] += f1[0];
|
||||
f[i1][1] += f1[1];
|
||||
f[i1][2] += f1[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i2 < nlocal) {
|
||||
f[i2][0] -= f1[0] + f3[0];
|
||||
f[i2][1] -= f1[1] + f3[1];
|
||||
f[i2][2] -= f1[2] + f3[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i3 < nlocal) {
|
||||
f[i3][0] += f3[0];
|
||||
f[i3][1] += f3[1];
|
||||
f[i3][2] += f3[2];
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i1,i2,i3,nlocal,newton_bond,eangle,f1,f3,
|
||||
delx1,dely1,delz1,delx2,dely2,delz2);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::tinker_anglep(int i1, int i2, int i3, int type, int eflag)
|
||||
{
|
||||
int i4;
|
||||
tagint i1tag,i3tag,i4tag;
|
||||
double xia,yia,zia,xib,yib,zib,xic,yic,zic,xid,yid,zid;
|
||||
double xad,yad,zad,xbd,ybd,zbd,xcd,ycd,zcd;
|
||||
double xt,yt,zt,rt2;
|
||||
double xip,yip,zip,xap,yap,zap,xcp,ycp,zcp;
|
||||
double rap2,rcp2;
|
||||
double dtheta,dtheta2,dtheta3,dtheta4,dtheta5,dtheta6;
|
||||
double xm,ym,zm,rm,dot;
|
||||
double cosine,eangle,deddt;
|
||||
double dedxip,dedyip,dedzip,dpdxia,dpdyia,dpdzia,dpdxic,dpdyic,dpdzic;
|
||||
double delta,delta2,ptrt2,term,terma,termc;
|
||||
double f1[3],f2[3],f3[3],f4[3];
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
tagint **special = atom->special;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_bond = force->newton_bond;
|
||||
|
||||
// i4 = index of third atom that i2 is bonded to
|
||||
|
||||
i1tag = atom->tag[i1];
|
||||
i3tag = atom->tag[i3];
|
||||
|
||||
for (int ibond = 0; ibond < 3; ibond++) {
|
||||
i4tag = special[i2][ibond];
|
||||
if (i4tag != i1tag && i4tag != i3tag) break;
|
||||
}
|
||||
|
||||
i4 = atom->map(i4tag);
|
||||
i4 = domain->closest_image(i2,i4);
|
||||
|
||||
// anglep out-of-plane calculation from Tinker
|
||||
|
||||
xia = x[i1][0];
|
||||
yia = x[i1][1];
|
||||
zia = x[i1][2];
|
||||
xib = x[i2][0];
|
||||
yib = x[i2][1];
|
||||
zib = x[i2][2];
|
||||
xic = x[i3][0];
|
||||
yic = x[i3][1];
|
||||
zic = x[i3][2];
|
||||
xid = x[i4][0];
|
||||
yid = x[i4][1];
|
||||
zid = x[i4][2];
|
||||
|
||||
xad = xia - xid;
|
||||
yad = yia - yid;
|
||||
zad = zia - zid;
|
||||
xbd = xib - xid;
|
||||
ybd = yib - yid;
|
||||
zbd = zib - zid;
|
||||
xcd = xic - xid;
|
||||
ycd = yic - yid;
|
||||
zcd = zic - zid;
|
||||
|
||||
xt = yad*zcd - zad*ycd;
|
||||
yt = zad*xcd - xad*zcd;
|
||||
zt = xad*ycd - yad*xcd;
|
||||
rt2 = xt*xt + yt*yt + zt*zt;
|
||||
delta = -(xt*xbd + yt*ybd + zt*zbd) / rt2;
|
||||
xip = xib + xt*delta;
|
||||
yip = yib + yt*delta;
|
||||
zip = zib + zt*delta;
|
||||
xap = xia - xip;
|
||||
yap = yia - yip;
|
||||
zap = zia - zip;
|
||||
xcp = xic - xip;
|
||||
ycp = yic - yip;
|
||||
zcp = zic - zip;
|
||||
rap2 = xap*xap + yap*yap + zap*zap;
|
||||
rcp2 = xcp*xcp + ycp*ycp + zcp*zcp;
|
||||
|
||||
// Tinker just skips the computation in either is zero
|
||||
|
||||
if (rap2 == 0.0 || rcp2 == 0.0) return;
|
||||
|
||||
xm = ycp*zap - zcp*yap;
|
||||
ym = zcp*xap - xcp*zap;
|
||||
zm = xcp*yap - ycp*xap;
|
||||
rm = sqrt(xm*xm + ym*ym + zm*zm);
|
||||
rm = MAX(rm,0.0001);
|
||||
dot = xap*xcp + yap*ycp + zap*zcp;
|
||||
cosine = dot / sqrt(rap2*rcp2);
|
||||
cosine = MIN(1.0,MAX(-1.0,cosine));
|
||||
|
||||
// force & energy for angle term
|
||||
|
||||
dtheta = acos(cosine) - theta0[type];
|
||||
dtheta2 = dtheta*dtheta;
|
||||
dtheta3 = dtheta2*dtheta;
|
||||
dtheta4 = dtheta3*dtheta;
|
||||
dtheta5 = dtheta4*dtheta;
|
||||
dtheta6 = dtheta5*dtheta;
|
||||
|
||||
deddt = 2.0*k2[type]*dtheta + 3.0*k3[type]*dtheta2 +
|
||||
4.0*k4[type]*dtheta3 + 5.0*k5[type]*dtheta4 + 6.0*k6[type]*dtheta5;
|
||||
|
||||
eangle = 0.0;
|
||||
if (eflag) eangle = k2[type]*dtheta2 + k3[type]*dtheta3 +
|
||||
k4[type]*dtheta4 + k5[type]*dtheta5 + k6[type]*dtheta6;
|
||||
|
||||
// chain rule terms for first derivative components
|
||||
|
||||
terma = -deddt / (rap2*rm);
|
||||
termc = deddt / (rcp2*rm);
|
||||
f1[0] = terma * (yap*zm-zap*ym);
|
||||
f1[1] = terma * (zap*xm-xap*zm);
|
||||
f1[2] = terma * (xap*ym-yap*xm);
|
||||
f3[0] = termc * (ycp*zm-zcp*ym);
|
||||
f3[1] = termc * (zcp*xm-xcp*zm);
|
||||
f3[2] = termc * (xcp*ym-ycp*xm);
|
||||
dedxip = -f1[0] - f3[0];
|
||||
dedyip = -f1[1] - f3[1];
|
||||
dedzip = -f1[2] - f3[2];
|
||||
|
||||
// chain rule components for the projection of the central atom
|
||||
|
||||
delta2 = 2.0 * delta;
|
||||
ptrt2 = (dedxip*xt + dedyip*yt + dedzip*zt) / rt2;
|
||||
term = (zcd*ybd-ycd*zbd) + delta2*(yt*zcd-zt*ycd);
|
||||
dpdxia = delta*(ycd*dedzip-zcd*dedyip) + term*ptrt2;
|
||||
term = (xcd*zbd-zcd*xbd) + delta2*(zt*xcd-xt*zcd);
|
||||
dpdyia = delta*(zcd*dedxip-xcd*dedzip) + term*ptrt2;
|
||||
term = (ycd*xbd-xcd*ybd) + delta2*(xt*ycd-yt*xcd);
|
||||
dpdzia = delta*(xcd*dedyip-ycd*dedxip) + term*ptrt2;
|
||||
term = (yad*zbd-zad*ybd) + delta2*(zt*yad-yt*zad);
|
||||
dpdxic = delta*(zad*dedyip-yad*dedzip) + term*ptrt2;
|
||||
term = (zad*xbd-xad*zbd) + delta2*(xt*zad-zt*xad);
|
||||
dpdyic = delta*(xad*dedzip-zad*dedxip) + term*ptrt2;
|
||||
term = (xad*ybd-yad*xbd) + delta2*(yt*xad-xt*yad);
|
||||
dpdzic = delta*(yad*dedxip-xad*dedyip) + term*ptrt2;
|
||||
|
||||
// compute derivative components for this interaction
|
||||
|
||||
f1[0] += dpdxia;
|
||||
f1[1] += dpdyia;
|
||||
f1[2] += dpdzia;
|
||||
f2[0] = dedxip;
|
||||
f2[1] = dedyip;
|
||||
f2[2] = dedzip;
|
||||
f3[0] += dpdxic;
|
||||
f3[1] += dpdyic;
|
||||
f3[2] += dpdzic;
|
||||
f4[0] = -f1[0] - f2[0] - f3[0];
|
||||
f4[1] = -f1[1] - f2[1] - f3[1];
|
||||
f4[2] = -f1[2] - f2[2] - f3[2];
|
||||
|
||||
// apply force to each of 4 atoms
|
||||
|
||||
if (newton_bond || i1 < nlocal) {
|
||||
f[i1][0] -= f1[0];
|
||||
f[i1][1] -= f1[1];
|
||||
f[i1][2] -= f1[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i2 < nlocal) {
|
||||
f[i2][0] -= f2[0];
|
||||
f[i2][1] -= f2[1];
|
||||
f[i2][2] -= f2[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i3 < nlocal) {
|
||||
f[i3][0] -= f3[0];
|
||||
f[i3][1] -= f3[1];
|
||||
f[i3][2] -= f3[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i4 < nlocal) {
|
||||
f[i4][0] -= f4[0];
|
||||
f[i4][1] -= f4[1];
|
||||
f[i4][2] -= f4[2];
|
||||
}
|
||||
|
||||
if (evflag) {
|
||||
f1[0] = -f1[0]; f1[1] = -f1[1]; f1[2] = -f1[2];
|
||||
f2[0] = -f2[0]; f2[1] = -f2[1]; f2[2] = -f2[2];
|
||||
f3[0] = -f3[0]; f3[1] = -f3[1]; f3[2] = -f3[2];
|
||||
f4[0] = -f4[0]; f4[1] = -f4[1]; f4[2] = -f4[2];
|
||||
ev_tally4(i1,i2,i3,i4,nlocal,newton_bond,eangle,f1,f2,f3,f4);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::tinker_bondangle(int i1, int i2, int i3, int type, int eflag)
|
||||
{
|
||||
double delx1,dely1,delz1,delx2,dely2,delz2;
|
||||
double rsq1,r1,rsq2,r2,c,s,dtheta;
|
||||
double dr1,dr2,aa1,aa2,b1,b2;
|
||||
double aa11,aa12,aa21,aa22;
|
||||
double vx11,vx12,vy11,vy12,vz11,vz12,vx21,vx22,vy21,vy22,vz21,vz22;
|
||||
double eangle,f1[3],f3[3];
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_bond = force->newton_bond;
|
||||
|
||||
// 1st bond
|
||||
|
||||
delx1 = x[i1][0] - x[i2][0];
|
||||
dely1 = x[i1][1] - x[i2][1];
|
||||
delz1 = x[i1][2] - x[i2][2];
|
||||
|
||||
rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
r1 = sqrt(rsq1);
|
||||
|
||||
// 2nd bond
|
||||
|
||||
delx2 = x[i3][0] - x[i2][0];
|
||||
dely2 = x[i3][1] - x[i2][1];
|
||||
delz2 = x[i3][2] - x[i2][2];
|
||||
|
||||
rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
r2 = sqrt(rsq2);
|
||||
|
||||
// angle (cos and sin)
|
||||
|
||||
c = delx1*delx2 + dely1*dely2 + delz1*delz2;
|
||||
c /= r1*r2;
|
||||
|
||||
if (c > 1.0) c = 1.0;
|
||||
if (c < -1.0) c = -1.0;
|
||||
|
||||
s = sqrt(1.0 - c*c);
|
||||
if (s < SMALL) s = SMALL;
|
||||
s = 1.0/s;
|
||||
|
||||
dtheta = acos(c) - theta0[type];
|
||||
|
||||
// force & energy for bond-angle term
|
||||
|
||||
dr1 = r1 - ba_r1[type];
|
||||
dr2 = r2 - ba_r2[type];
|
||||
|
||||
aa1 = s * dr1 * ba_k1[type];
|
||||
aa2 = s * dr2 * ba_k2[type];
|
||||
|
||||
aa11 = aa1 * c / rsq1;
|
||||
aa12 = -aa1 / (r1 * r2);
|
||||
aa21 = aa2 * c / rsq1;
|
||||
aa22 = -aa2 / (r1 * r2);
|
||||
|
||||
vx11 = (aa11 * delx1) + (aa12 * delx2);
|
||||
vx12 = (aa21 * delx1) + (aa22 * delx2);
|
||||
vy11 = (aa11 * dely1) + (aa12 * dely2);
|
||||
vy12 = (aa21 * dely1) + (aa22 * dely2);
|
||||
vz11 = (aa11 * delz1) + (aa12 * delz2);
|
||||
vz12 = (aa21 * delz1) + (aa22 * delz2);
|
||||
|
||||
aa11 = aa1 * c / rsq2;
|
||||
aa21 = aa2 * c / rsq2;
|
||||
|
||||
vx21 = (aa11 * delx2) + (aa12 * delx1);
|
||||
vx22 = (aa21 * delx2) + (aa22 * delx1);
|
||||
vy21 = (aa11 * dely2) + (aa12 * dely1);
|
||||
vy22 = (aa21 * dely2) + (aa22 * dely1);
|
||||
vz21 = (aa11 * delz2) + (aa12 * delz1);
|
||||
vz22 = (aa21 * delz2) + (aa22 * delz1);
|
||||
|
||||
b1 = ba_k1[type] * dtheta / r1;
|
||||
b2 = ba_k2[type] * dtheta / r2;
|
||||
|
||||
f1[0] = -(vx11 + b1*delx1 + vx12);
|
||||
f1[1] = -(vy11 + b1*dely1 + vy12);
|
||||
f1[2] = -(vz11 + b1*delz1 + vz12);
|
||||
|
||||
f3[0] = -(vx21 + b2*delx2 + vx22);
|
||||
f3[1] = -(vy21 + b2*dely2 + vy22);
|
||||
f3[2] = -(vz21 + b2*delz2 + vz22);
|
||||
|
||||
eangle = 0.0;
|
||||
if (eflag) eangle = ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta;
|
||||
|
||||
// apply force to each of 3 atoms
|
||||
|
||||
if (newton_bond || i1 < nlocal) {
|
||||
f[i1][0] += f1[0];
|
||||
f[i1][1] += f1[1];
|
||||
f[i1][2] += f1[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i2 < nlocal) {
|
||||
f[i2][0] -= f1[0] + f3[0];
|
||||
f[i2][1] -= f1[1] + f3[1];
|
||||
f[i2][2] -= f1[2] + f3[2];
|
||||
}
|
||||
|
||||
if (newton_bond || i3 < nlocal) {
|
||||
f[i3][0] += f3[0];
|
||||
f[i3][1] += f3[1];
|
||||
f[i3][2] += f3[2];
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i1,i2,i3,nlocal,newton_bond,eangle,f1,f3,
|
||||
delx1,dely1,delz1,delx2,dely2,delz2);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::tinker_urey_bradley(int i1, int i2, int type, int eflag)
|
||||
{
|
||||
double delx,dely,delz;
|
||||
double rsq,r,dr,rk;
|
||||
double fbond,ebond;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_bond = force->newton_bond;
|
||||
|
||||
delx = x[i1][0] - x[i2][0];
|
||||
dely = x[i1][1] - x[i2][1];
|
||||
delz = x[i1][2] - x[i2][2];
|
||||
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
r = sqrt(rsq);
|
||||
dr = r - ub_r0[type];
|
||||
rk = ub_k[type] * dr;
|
||||
|
||||
// force & energy
|
||||
|
||||
if (r > 0.0) fbond = -2.0*rk/r;
|
||||
else fbond = 0.0;
|
||||
|
||||
if (eflag) ebond = rk*dr;
|
||||
|
||||
// apply force to each of 2 atoms
|
||||
|
||||
if (newton_bond || i1 < nlocal) {
|
||||
f[i1][0] += delx*fbond;
|
||||
f[i1][1] += dely*fbond;
|
||||
f[i1][2] += delz*fbond;
|
||||
}
|
||||
|
||||
if (newton_bond || i2 < nlocal) {
|
||||
f[i2][0] -= delx*fbond;
|
||||
f[i2][1] -= dely*fbond;
|
||||
f[i2][2] -= delz*fbond;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally2(i1,i2,nlocal,newton_bond,ebond,fbond,delx,dely,delz);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->nangletypes;
|
||||
|
||||
memory->create(pflag,n+1,"angle:pflag");
|
||||
memory->create(ubflag,n+1,"angle:ubflag");
|
||||
memory->create(theta0,n+1,"angle:theta0");
|
||||
memory->create(k2,n+1,"angle:k2");
|
||||
memory->create(k3,n+1,"angle:k3");
|
||||
memory->create(k4,n+1,"angle:k4");
|
||||
memory->create(k5,n+1,"angle:k5");
|
||||
memory->create(k6,n+1,"angle:k6");
|
||||
|
||||
memory->create(ba_k1,n+1,"angle:ba_k1");
|
||||
memory->create(ba_k2,n+1,"angle:ba_k2");
|
||||
memory->create(ba_r1,n+1,"angle:ba_r1");
|
||||
memory->create(ba_r2,n+1,"angle:ba_r2");
|
||||
|
||||
memory->create(ub_k,n+1,"angle:ub_k");
|
||||
memory->create(ub_r0,n+1,"angle:ub_r0");
|
||||
|
||||
memory->create(setflag,n+1,"angle:setflag");
|
||||
memory->create(setflag_a,n+1,"angle:setflag_a");
|
||||
memory->create(setflag_ba,n+1,"angle:setflag_ba");
|
||||
memory->create(setflag_ub,n+1,"angle:setflag_ub");
|
||||
|
||||
for (int i = 1; i <= n; i++)
|
||||
setflag[i] = setflag_a[i] = setflag_ba[i] = setflag_ub[i] = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more types
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg < 2) error->all(FLERR,"Incorrect args for angle coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi;
|
||||
utils::bounds(FLERR,arg[0],1,atom->nangletypes,ilo,ihi,error);
|
||||
|
||||
int count = 0;
|
||||
|
||||
if (strcmp(arg[1],"ba") == 0) {
|
||||
if (narg != 6) error->all(FLERR,"Incorrect args for angle coefficients");
|
||||
|
||||
double ba_k1_one = utils::numeric(FLERR,arg[2],false,lmp);
|
||||
double ba_k2_one = utils::numeric(FLERR,arg[3],false,lmp);
|
||||
double ba_r1_one = utils::numeric(FLERR,arg[4],false,lmp);
|
||||
double ba_r2_one = utils::numeric(FLERR,arg[5],false,lmp);
|
||||
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
ba_k1[i] = ba_k1_one;
|
||||
ba_k2[i] = ba_k2_one;
|
||||
ba_r1[i] = ba_r1_one;
|
||||
ba_r2[i] = ba_r2_one;
|
||||
setflag_ba[i] = 1;
|
||||
count++;
|
||||
}
|
||||
|
||||
} else if (strcmp(arg[1],"ub") == 0) {
|
||||
if (narg != 4) error->all(FLERR,"Incorrect args for angle coefficients");
|
||||
|
||||
double ub_k_one = utils::numeric(FLERR,arg[2],false,lmp);
|
||||
double ub_r0_one = utils::numeric(FLERR,arg[3],false,lmp);
|
||||
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
ub_k[i] = ub_k_one;
|
||||
ub_r0[i] = ub_r0_one;
|
||||
setflag_ub[i] = 1;
|
||||
count++;
|
||||
}
|
||||
|
||||
} else {
|
||||
if (narg != 9) error->all(FLERR,"Incorrect args for angle coefficients");
|
||||
|
||||
int pflag_one = utils::inumeric(FLERR,arg[1],false,lmp);
|
||||
int ubflag_one = utils::inumeric(FLERR,arg[2],false,lmp);
|
||||
double theta0_one = utils::numeric(FLERR,arg[3],false,lmp);
|
||||
double k2_one = utils::numeric(FLERR,arg[4],false,lmp);
|
||||
double k3_one = utils::numeric(FLERR,arg[5],false,lmp);
|
||||
double k4_one = utils::numeric(FLERR,arg[6],false,lmp);
|
||||
double k5_one = utils::numeric(FLERR,arg[7],false,lmp);
|
||||
double k6_one = utils::numeric(FLERR,arg[8],false,lmp);
|
||||
|
||||
// convert theta0 from degrees to radians
|
||||
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
pflag[i] = pflag_one;
|
||||
ubflag[i] = ubflag_one;
|
||||
theta0[i] = theta0_one/180.0 * MY_PI;
|
||||
k2[i] = k2_one;
|
||||
k3[i] = k3_one;
|
||||
k4[i] = k4_one;
|
||||
k5[i] = k5_one;
|
||||
k6[i] = k6_one;
|
||||
setflag_a[i] = 1;
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for angle coefficients");
|
||||
|
||||
for (int i = ilo; i <= ihi; i++)
|
||||
if (setflag_a[i] == 1 && setflag_ba[i] == 1 && setflag_ub[i])
|
||||
setflag[i] = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::init_style()
|
||||
{
|
||||
// check if PairAmoeba or PairHippo disabled angle or Urey-Bradley terms
|
||||
|
||||
Pair *pair = nullptr;
|
||||
pair = force->pair_match("amoeba",1,0);
|
||||
if (!pair) pair = force->pair_match("hippo",1,0);
|
||||
|
||||
if (!pair) enable_angle = enable_urey = 1;
|
||||
else {
|
||||
int tmp;
|
||||
enable_angle = *((int *) pair->extract("angle_flag",tmp));
|
||||
enable_urey = *((int *) pair->extract("urey_flag",tmp));
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
double AngleAmoeba::equilibrium_angle(int i)
|
||||
{
|
||||
return theta0[i];
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes out coeffs to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::write_restart(FILE *fp)
|
||||
{
|
||||
fwrite(&pflag[1],sizeof(int),atom->nangletypes,fp);
|
||||
fwrite(&ubflag[1],sizeof(int),atom->nangletypes,fp);
|
||||
|
||||
fwrite(&theta0[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&k2[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&k3[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&k4[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&k5[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&k6[1],sizeof(double),atom->nangletypes,fp);
|
||||
|
||||
fwrite(&ba_k1[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&ba_k2[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&ba_r1[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&ba_r2[1],sizeof(double),atom->nangletypes,fp);
|
||||
|
||||
fwrite(&ub_k[1],sizeof(double),atom->nangletypes,fp);
|
||||
fwrite(&ub_r0[1],sizeof(double),atom->nangletypes,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads coeffs from restart file, bcasts them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::read_restart(FILE *fp)
|
||||
{
|
||||
allocate();
|
||||
|
||||
if (comm->me == 0) {
|
||||
utils::sfread(FLERR,&pflag[1],sizeof(int),atom->nangletypes,fp,nullptr,error);
|
||||
utils::sfread(FLERR,&ubflag[1],sizeof(int),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
|
||||
utils::sfread(FLERR,&theta0[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
utils::sfread(FLERR,&k2[1],sizeof(double),atom->nangletypes,fp,nullptr,error);
|
||||
utils::sfread(FLERR,&k3[1],sizeof(double),atom->nangletypes,fp,nullptr,error);
|
||||
utils::sfread(FLERR,&k4[1],sizeof(double),atom->nangletypes,fp,nullptr,error);
|
||||
utils::sfread(FLERR,&k5[1],sizeof(double),atom->nangletypes,fp,nullptr,error);
|
||||
utils::sfread(FLERR,&k6[1],sizeof(double),atom->nangletypes,fp,nullptr,error);
|
||||
|
||||
utils::sfread(FLERR,&ba_k1[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
utils::sfread(FLERR,&ba_k2[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
utils::sfread(FLERR,&ba_r1[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
utils::sfread(FLERR,&ba_r2[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
|
||||
utils::sfread(FLERR,&ub_k[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
utils::sfread(FLERR,&ub_r0[1],sizeof(double),atom->nangletypes,
|
||||
fp,nullptr,error);
|
||||
}
|
||||
|
||||
MPI_Bcast(&pflag[1],atom->nangletypes,MPI_INT,0,world);
|
||||
MPI_Bcast(&ubflag[1],atom->nangletypes,MPI_INT,0,world);
|
||||
MPI_Bcast(&theta0[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&k2[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&k3[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&k4[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&k5[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&k6[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
|
||||
MPI_Bcast(&ba_k1[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&ba_k2[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&ba_r1[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&ba_r2[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
|
||||
MPI_Bcast(&ub_k[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&ub_r0[1],atom->nangletypes,MPI_DOUBLE,0,world);
|
||||
|
||||
for (int i = 1; i <= atom->nangletypes; i++) setflag[i] = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to data file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AngleAmoeba::write_data(FILE *fp)
|
||||
{
|
||||
for (int i = 1; i <= atom->nangletypes; i++)
|
||||
fprintf(fp,"%d %d %d %g %g %g %g %g %g\n",
|
||||
i,pflag[i],ubflag[i],theta0[i]/MY_PI*180.0,
|
||||
k2[i],k3[i],k4[i],k5[i],k6[i]);
|
||||
|
||||
fprintf(fp,"\nBondAngle Coeffs\n\n");
|
||||
for (int i = 1; i <= atom->nangletypes; i++)
|
||||
fprintf(fp,"%d %g %g %g %g\n",i,ba_k1[i],ba_k2[i],ba_r1[i],ba_r2[i]);
|
||||
|
||||
fprintf(fp,"\nUreyBradley Coeffs\n\n");
|
||||
for (int i = 1; i <= atom->nangletypes; i++)
|
||||
fprintf(fp,"%d %g %g\n",i,ub_k[i],ub_r0[i]);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
only computes tinker_angle() and tinker_bondangle()
|
||||
does not compute tinker_anglep() and tinker_urey_bradley()
|
||||
---------------------------------------------------------------------- */
|
||||
|
||||
double AngleAmoeba::single(int type, int i1, int i2, int i3)
|
||||
{
|
||||
double **x = atom->x;
|
||||
|
||||
double delx1 = x[i1][0] - x[i2][0];
|
||||
double dely1 = x[i1][1] - x[i2][1];
|
||||
double delz1 = x[i1][2] - x[i2][2];
|
||||
domain->minimum_image(delx1,dely1,delz1);
|
||||
double r1 = sqrt(delx1*delx1 + dely1*dely1 + delz1*delz1);
|
||||
|
||||
double delx2 = x[i3][0] - x[i2][0];
|
||||
double dely2 = x[i3][1] - x[i2][1];
|
||||
double delz2 = x[i3][2] - x[i2][2];
|
||||
domain->minimum_image(delx2,dely2,delz2);
|
||||
double r2 = sqrt(delx2*delx2 + dely2*dely2 + delz2*delz2);
|
||||
|
||||
double c = delx1*delx2 + dely1*dely2 + delz1*delz2;
|
||||
c /= r1*r2;
|
||||
if (c > 1.0) c = 1.0;
|
||||
if (c < -1.0) c = -1.0;
|
||||
|
||||
double s = sqrt(1.0 - c*c);
|
||||
if (s < SMALL) s = SMALL;
|
||||
s = 1.0/s;
|
||||
|
||||
double dtheta = acos(c) - theta0[type];
|
||||
double dtheta2 = dtheta*dtheta;
|
||||
double dtheta3 = dtheta2*dtheta;
|
||||
double dtheta4 = dtheta3*dtheta;
|
||||
double dtheta5 = dtheta4*dtheta;
|
||||
double dtheta6 = dtheta5*dtheta;
|
||||
|
||||
double energy = k2[type]*dtheta2 + k3[type]*dtheta3 + k4[type]*dtheta4
|
||||
+ k5[type]*dtheta5 + k6[type]*dtheta6;
|
||||
|
||||
double dr1 = r1 - ba_r1[type];
|
||||
double dr2 = r2 - ba_r2[type];
|
||||
energy += ba_k1[type]*dr1*dtheta + ba_k2[type]*dr2*dtheta;
|
||||
|
||||
return energy;
|
||||
}
|
||||
57
src/AMOEBA/angle_amoeba.h
Normal file
57
src/AMOEBA/angle_amoeba.h
Normal file
@ -0,0 +1,57 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ANGLE_CLASS
|
||||
// clang-format off
|
||||
AngleStyle(amoeba,AngleAmoeba);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_ANGLE_AMOEBA_H
|
||||
#define LMP_ANGLE_AMOEBA_H
|
||||
|
||||
#include "angle.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AngleAmoeba : public Angle {
|
||||
public:
|
||||
AngleAmoeba(class LAMMPS *);
|
||||
~AngleAmoeba() override;
|
||||
void compute(int, int) override;
|
||||
void coeff(int, char **) override;
|
||||
void init_style() override;
|
||||
double equilibrium_angle(int) override;
|
||||
void write_restart(FILE *) override;
|
||||
void read_restart(FILE *) override;
|
||||
void write_data(FILE *) override;
|
||||
double single(int, int, int, int) override;
|
||||
|
||||
protected:
|
||||
int *pflag, *ubflag;
|
||||
double *theta0, *k2, *k3, *k4, *k5, *k6;
|
||||
double *ba_k1, *ba_k2, *ba_r1, *ba_r2;
|
||||
double *ub_k, *ub_r0;
|
||||
int *setflag_a, *setflag_ba, *setflag_ub;
|
||||
|
||||
int enable_angle, enable_urey;
|
||||
|
||||
void tinker_angle(int, int, int, int, int);
|
||||
void tinker_anglep(int, int, int, int, int);
|
||||
void tinker_bondangle(int, int, int, int, int);
|
||||
void tinker_urey_bradley(int, int, int, int);
|
||||
void allocate();
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
226
src/AMOEBA/atom_vec_amoeba.cpp
Normal file
226
src/AMOEBA/atom_vec_amoeba.cpp
Normal file
@ -0,0 +1,226 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "atom_vec_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecAmoeba::AtomVecAmoeba(LAMMPS *lmp) : AtomVec(lmp)
|
||||
{
|
||||
molecular = 1;
|
||||
bonds_allow = angles_allow = dihedrals_allow = impropers_allow = 1;
|
||||
mass_type = 1;
|
||||
|
||||
atom->molecule_flag = atom->q_flag = 1;
|
||||
atom->nspecial15_flag = 1;
|
||||
|
||||
// strings with peratom variables to include in each AtomVec method
|
||||
// strings cannot contain fields in corresponding AtomVec default strings
|
||||
// order of fields in a string does not matter
|
||||
// except: fields_data_atom & fields_data_vel must match data file
|
||||
|
||||
// clang-format off
|
||||
fields_grow = {"q", "molecule", "num_bond", "bond_type", "bond_atom", "num_angle", "angle_type",
|
||||
"angle_atom1", "angle_atom2", "angle_atom3", "num_dihedral", "dihedral_type", "dihedral_atom1",
|
||||
"dihedral_atom2", "dihedral_atom3", "dihedral_atom4", "num_improper", "improper_type",
|
||||
"improper_atom1", "improper_atom2", "improper_atom3", "improper_atom4", "nspecial", "special",
|
||||
"nspecial15", "special15"};
|
||||
fields_copy = {"q", "molecule", "num_bond", "bond_type", "bond_atom", "num_angle", "angle_type",
|
||||
"angle_atom1", "angle_atom2", "angle_atom3", "num_dihedral", "dihedral_type", "dihedral_atom1",
|
||||
"dihedral_atom2", "dihedral_atom3", "dihedral_atom4", "num_improper", "improper_type",
|
||||
"improper_atom1", "improper_atom2", "improper_atom3", "improper_atom4", "nspecial", "special",
|
||||
"nspecial15", "special15"};
|
||||
fields_border = {"q", "molecule"};
|
||||
fields_border_vel = {"q", "molecule"};
|
||||
fields_exchange = {"q", "molecule", "num_bond", "bond_type", "bond_atom", "num_angle",
|
||||
"angle_type", "angle_atom1", "angle_atom2", "angle_atom3", "num_dihedral", "dihedral_type",
|
||||
"dihedral_atom1", "dihedral_atom2", "dihedral_atom3", "dihedral_atom4", "num_improper",
|
||||
"improper_type", "improper_atom1", "improper_atom2", "improper_atom3", "improper_atom4",
|
||||
"nspecial", "special", "nspecial15", "special15"};
|
||||
fields_restart = {"q", "molecule", "num_bond", "bond_type", "bond_atom", "num_angle",
|
||||
"angle_type", "angle_atom1", "angle_atom2", "angle_atom3", "num_dihedral", "dihedral_type",
|
||||
"dihedral_atom1", "dihedral_atom2", "dihedral_atom3", "dihedral_atom4", "num_improper",
|
||||
"improper_type", "improper_atom1", "improper_atom2", "improper_atom3", "improper_atom4"};
|
||||
fields_create = {"q", "molecule", "num_bond", "num_angle", "num_dihedral", "num_improper",
|
||||
"nspecial", "nspecial15"};
|
||||
fields_data_atom = {"id", "molecule", "type", "q", "x"};
|
||||
fields_data_vel = {"id", "v"};
|
||||
// clang-format on
|
||||
setup_fields();
|
||||
|
||||
bond_per_atom = angle_per_atom = dihedral_per_atom = improper_per_atom = 0;
|
||||
bond_negative = angle_negative = dihedral_negative = improper_negative = nullptr;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AtomVecAmoeba::~AtomVecAmoeba()
|
||||
{
|
||||
delete[] bond_negative;
|
||||
delete[] angle_negative;
|
||||
delete[] dihedral_negative;
|
||||
delete[] improper_negative;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set local copies of all grow ptrs used by this class, except defaults
|
||||
needed in replicate when 2 atom classes exist and it calls pack_restart()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAmoeba::grow_pointers()
|
||||
{
|
||||
num_bond = atom->num_bond;
|
||||
bond_type = atom->bond_type;
|
||||
num_angle = atom->num_angle;
|
||||
angle_type = atom->angle_type;
|
||||
num_dihedral = atom->num_dihedral;
|
||||
dihedral_type = atom->dihedral_type;
|
||||
num_improper = atom->num_improper;
|
||||
improper_type = atom->improper_type;
|
||||
nspecial = atom->nspecial;
|
||||
nspecial15 = atom->nspecial15;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
modify values for AtomVec::pack_restart() to pack
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAmoeba::pack_restart_pre(int ilocal)
|
||||
{
|
||||
// insure negative vectors are needed length
|
||||
|
||||
if (bond_per_atom < atom->bond_per_atom) {
|
||||
delete[] bond_negative;
|
||||
bond_per_atom = atom->bond_per_atom;
|
||||
bond_negative = new int[bond_per_atom];
|
||||
}
|
||||
if (angle_per_atom < atom->angle_per_atom) {
|
||||
delete[] angle_negative;
|
||||
angle_per_atom = atom->angle_per_atom;
|
||||
angle_negative = new int[angle_per_atom];
|
||||
}
|
||||
if (dihedral_per_atom < atom->dihedral_per_atom) {
|
||||
delete[] dihedral_negative;
|
||||
dihedral_per_atom = atom->dihedral_per_atom;
|
||||
dihedral_negative = new int[dihedral_per_atom];
|
||||
}
|
||||
if (improper_per_atom < atom->improper_per_atom) {
|
||||
delete[] improper_negative;
|
||||
improper_per_atom = atom->improper_per_atom;
|
||||
improper_negative = new int[improper_per_atom];
|
||||
}
|
||||
|
||||
// flip any negative types to positive and flag which ones
|
||||
|
||||
any_bond_negative = 0;
|
||||
for (int m = 0; m < num_bond[ilocal]; m++) {
|
||||
if (bond_type[ilocal][m] < 0) {
|
||||
bond_negative[m] = 1;
|
||||
bond_type[ilocal][m] = -bond_type[ilocal][m];
|
||||
any_bond_negative = 1;
|
||||
} else
|
||||
bond_negative[m] = 0;
|
||||
}
|
||||
|
||||
any_angle_negative = 0;
|
||||
for (int m = 0; m < num_angle[ilocal]; m++) {
|
||||
if (angle_type[ilocal][m] < 0) {
|
||||
angle_negative[m] = 1;
|
||||
angle_type[ilocal][m] = -angle_type[ilocal][m];
|
||||
any_angle_negative = 1;
|
||||
} else
|
||||
angle_negative[m] = 0;
|
||||
}
|
||||
|
||||
any_dihedral_negative = 0;
|
||||
for (int m = 0; m < num_dihedral[ilocal]; m++) {
|
||||
if (dihedral_type[ilocal][m] < 0) {
|
||||
dihedral_negative[m] = 1;
|
||||
dihedral_type[ilocal][m] = -dihedral_type[ilocal][m];
|
||||
any_dihedral_negative = 1;
|
||||
} else
|
||||
dihedral_negative[m] = 0;
|
||||
}
|
||||
|
||||
any_improper_negative = 0;
|
||||
for (int m = 0; m < num_improper[ilocal]; m++) {
|
||||
if (improper_type[ilocal][m] < 0) {
|
||||
improper_negative[m] = 1;
|
||||
improper_type[ilocal][m] = -improper_type[ilocal][m];
|
||||
any_improper_negative = 1;
|
||||
} else
|
||||
improper_negative[m] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unmodify values packed by AtomVec::pack_restart()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAmoeba::pack_restart_post(int ilocal)
|
||||
{
|
||||
// restore the flagged types to their negative values
|
||||
|
||||
if (any_bond_negative) {
|
||||
for (int m = 0; m < num_bond[ilocal]; m++)
|
||||
if (bond_negative[m]) bond_type[ilocal][m] = -bond_type[ilocal][m];
|
||||
}
|
||||
|
||||
if (any_angle_negative) {
|
||||
for (int m = 0; m < num_angle[ilocal]; m++)
|
||||
if (angle_negative[m]) angle_type[ilocal][m] = -angle_type[ilocal][m];
|
||||
}
|
||||
|
||||
if (any_dihedral_negative) {
|
||||
for (int m = 0; m < num_dihedral[ilocal]; m++)
|
||||
if (dihedral_negative[m]) dihedral_type[ilocal][m] = -dihedral_type[ilocal][m];
|
||||
}
|
||||
|
||||
if (any_improper_negative) {
|
||||
for (int m = 0; m < num_improper[ilocal]; m++)
|
||||
if (improper_negative[m]) improper_type[ilocal][m] = -improper_type[ilocal][m];
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
initialize other atom quantities after AtomVec::unpack_restart()
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAmoeba::unpack_restart_init(int ilocal)
|
||||
{
|
||||
nspecial[ilocal][0] = 0;
|
||||
nspecial[ilocal][1] = 0;
|
||||
nspecial[ilocal][2] = 0;
|
||||
nspecial15[ilocal] = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
modify what AtomVec::data_atom() just unpacked
|
||||
or initialize other atom quantities
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAmoeba::data_atom_post(int ilocal)
|
||||
{
|
||||
num_bond[ilocal] = 0;
|
||||
num_angle[ilocal] = 0;
|
||||
num_dihedral[ilocal] = 0;
|
||||
num_improper[ilocal] = 0;
|
||||
nspecial[ilocal][0] = 0;
|
||||
nspecial[ilocal][1] = 0;
|
||||
nspecial[ilocal][2] = 0;
|
||||
nspecial15[ilocal] = 0;
|
||||
}
|
||||
49
src/AMOEBA/atom_vec_amoeba.h
Normal file
49
src/AMOEBA/atom_vec_amoeba.h
Normal file
@ -0,0 +1,49 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ATOM_CLASS
|
||||
// clang-format off
|
||||
AtomStyle(amoeba,AtomVecAmoeba);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_ATOM_VEC_AMOEBA_H
|
||||
#define LMP_ATOM_VEC_AMOEBA_H
|
||||
|
||||
#include "atom_vec.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AtomVecAmoeba : public AtomVec {
|
||||
public:
|
||||
AtomVecAmoeba(class LAMMPS *);
|
||||
~AtomVecAmoeba() override;
|
||||
|
||||
void grow_pointers() override;
|
||||
void pack_restart_pre(int) override;
|
||||
void pack_restart_post(int) override;
|
||||
void unpack_restart_init(int) override;
|
||||
void data_atom_post(int) override;
|
||||
|
||||
private:
|
||||
int *num_bond, *num_angle, *num_dihedral, *num_improper;
|
||||
int **bond_type, **angle_type, **dihedral_type, **improper_type;
|
||||
int **nspecial, *nspecial15;
|
||||
|
||||
int any_bond_negative, any_angle_negative, any_dihedral_negative, any_improper_negative;
|
||||
int bond_per_atom, angle_per_atom, dihedral_per_atom, improper_per_atom;
|
||||
int *bond_negative, *angle_negative, *dihedral_negative, *improper_negative;
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
1849
src/AMOEBA/fix_amoeba_bitorsion.cpp
Normal file
1849
src/AMOEBA/fix_amoeba_bitorsion.cpp
Normal file
File diff suppressed because it is too large
Load Diff
126
src/AMOEBA/fix_amoeba_bitorsion.h
Normal file
126
src/AMOEBA/fix_amoeba_bitorsion.h
Normal file
@ -0,0 +1,126 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
// clang-format off
|
||||
FixStyle(amoeba/bitorsion,FixAmoebaBiTorsion);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_AMOEBA_BITORSION_H
|
||||
#define LMP_FIX_AMOEBA_BITORSION_H
|
||||
|
||||
#include "fix.h"
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixAmoebaBiTorsion : public Fix {
|
||||
public:
|
||||
FixAmoebaBiTorsion(class LAMMPS *, int, char **);
|
||||
~FixAmoebaBiTorsion() override;
|
||||
int setmask() override;
|
||||
void init() override;
|
||||
void setup(int) override;
|
||||
void setup_pre_neighbor() override;
|
||||
void setup_pre_reverse(int, int) override;
|
||||
void min_setup(int) override;
|
||||
void pre_neighbor() override;
|
||||
void pre_reverse(int, int) override;
|
||||
void post_force(int) override;
|
||||
void post_force_respa(int, int, int) override;
|
||||
void min_post_force(int) override;
|
||||
double compute_scalar() override;
|
||||
|
||||
void read_data_header(char *) override;
|
||||
void read_data_section(char *, int, char *, tagint) override;
|
||||
bigint read_data_skip_lines(char *) override;
|
||||
void write_data_header(FILE *, int) override;
|
||||
void write_data_section_size(int, int &, int &) override;
|
||||
void write_data_section_pack(int, double **) override;
|
||||
void write_data_section_keyword(int, FILE *) override;
|
||||
void write_data_section(int, FILE *, int, double **, int) override;
|
||||
|
||||
void write_restart(FILE *) override;
|
||||
void restart(char *) override;
|
||||
int pack_restart(int, double *) override;
|
||||
void unpack_restart(int, int) override;
|
||||
int size_restart(int) override;
|
||||
int maxsize_restart() override;
|
||||
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void set_arrays(int) override;
|
||||
int pack_border(int, int *, double *) override;
|
||||
int unpack_border(int, int, double *) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
|
||||
double memory_usage() override;
|
||||
|
||||
private:
|
||||
int nprocs, me;
|
||||
int eflag_caller;
|
||||
int ilevel_respa;
|
||||
int disable;
|
||||
bigint nbitorsions; // total count of all bitorsions in system
|
||||
double ebitorsion;
|
||||
double onefifth;
|
||||
|
||||
// per-atom data for bitorsions stored with each owned atom
|
||||
|
||||
int *num_bitorsion;
|
||||
int **bitorsion_type;
|
||||
tagint **bitorsion_atom1, **bitorsion_atom2, **bitorsion_atom3;
|
||||
tagint **bitorsion_atom4, **bitorsion_atom5;
|
||||
|
||||
// previous max atoms on this proc before grow() is called
|
||||
|
||||
int nmax_previous;
|
||||
|
||||
// list of all bitorsions to compute on this proc
|
||||
|
||||
int nbitorsion_list;
|
||||
int max_bitorsion_list;
|
||||
int **bitorsion_list;
|
||||
|
||||
// BiTorsion grid and spline data
|
||||
|
||||
int nbitypes;
|
||||
int *nxgrid, *nygrid;
|
||||
double **ttx, **tty, **tbf;
|
||||
double **tbx, **tby, **tbxy;
|
||||
|
||||
// data from PairAmoeba
|
||||
|
||||
class Pair *pair;
|
||||
int *amtype, *atomic_num;
|
||||
|
||||
// local methods
|
||||
|
||||
void read_grid_data(char *);
|
||||
void create_splines();
|
||||
void nspline(int, double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *);
|
||||
void cspline(int, double *, double *, double *, double *, double *, double *, double *, double *,
|
||||
double *, double *);
|
||||
void cytsy(int, double *, double *, double *, double *, double *, int &);
|
||||
void cytsyp(int, double *, double *, double *, int &);
|
||||
void cytsys(int, double *, double *, double *, double *, double *);
|
||||
|
||||
void chkttor(int, int, int, double &, double &, double &);
|
||||
void bcuint1(double *, double *, double *, double *, double, double, double, double, double,
|
||||
double, double &, double &, double &);
|
||||
void bcucof(double *, double *, double *, double *, double, double, double[][4]);
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
1114
src/AMOEBA/fix_amoeba_pitorsion.cpp
Normal file
1114
src/AMOEBA/fix_amoeba_pitorsion.cpp
Normal file
File diff suppressed because it is too large
Load Diff
98
src/AMOEBA/fix_amoeba_pitorsion.h
Normal file
98
src/AMOEBA/fix_amoeba_pitorsion.h
Normal file
@ -0,0 +1,98 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
// clang-format off
|
||||
FixStyle(amoeba/pitorsion,FixAmoebaPiTorsion);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_AMOEBA_PITORSION_H
|
||||
#define LMP_FIX_AMOEBA_PITORSION_H
|
||||
|
||||
#include "fix.h"
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixAmoebaPiTorsion : public Fix {
|
||||
public:
|
||||
FixAmoebaPiTorsion(class LAMMPS *, int, char **);
|
||||
~FixAmoebaPiTorsion() override;
|
||||
int setmask() override;
|
||||
void init() override;
|
||||
void setup(int) override;
|
||||
void setup_pre_neighbor() override;
|
||||
void setup_pre_reverse(int, int) override;
|
||||
void min_setup(int) override;
|
||||
void pre_neighbor() override;
|
||||
void pre_reverse(int, int) override;
|
||||
void post_force(int) override;
|
||||
void post_force_respa(int, int, int) override;
|
||||
void min_post_force(int) override;
|
||||
double compute_scalar() override;
|
||||
|
||||
void read_data_header(char *) override;
|
||||
void read_data_section(char *, int, char *, tagint) override;
|
||||
bigint read_data_skip_lines(char *) override;
|
||||
void write_data_header(FILE *, int) override;
|
||||
void write_data_section_size(int, int &, int &) override;
|
||||
void write_data_section_pack(int, double **) override;
|
||||
void write_data_section_keyword(int, FILE *) override;
|
||||
void write_data_section(int, FILE *, int, double **, int) override;
|
||||
|
||||
void write_restart(FILE *) override;
|
||||
void restart(char *) override;
|
||||
int pack_restart(int, double *) override;
|
||||
void unpack_restart(int, int) override;
|
||||
int size_restart(int) override;
|
||||
int maxsize_restart() override;
|
||||
|
||||
void grow_arrays(int) override;
|
||||
void copy_arrays(int, int, int) override;
|
||||
void set_arrays(int) override;
|
||||
int pack_exchange(int, double *) override;
|
||||
int unpack_exchange(int, double *) override;
|
||||
|
||||
double memory_usage() override;
|
||||
|
||||
private:
|
||||
int nprocs, me;
|
||||
int eflag_caller;
|
||||
int ilevel_respa;
|
||||
int disable;
|
||||
bigint npitorsions;
|
||||
int npitorsion_types;
|
||||
double epitorsion;
|
||||
double onesixth;
|
||||
|
||||
double *kpit;
|
||||
|
||||
// per-atom data for pitorsions stored with each owned atom
|
||||
|
||||
int *num_pitorsion;
|
||||
int **pitorsion_type;
|
||||
tagint **pitorsion_atom1, **pitorsion_atom2, **pitorsion_atom3;
|
||||
tagint **pitorsion_atom4, **pitorsion_atom5, **pitorsion_atom6;
|
||||
|
||||
// previous max atoms on this proc before grow() is called
|
||||
|
||||
int nmax_previous;
|
||||
|
||||
// list of all pitorsions to compute on this proc
|
||||
|
||||
int npitorsion_list;
|
||||
int max_pitorsion_list;
|
||||
int **pitorsion_list;
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
338
src/AMOEBA/improper_amoeba.cpp
Normal file
338
src/AMOEBA/improper_amoeba.cpp
Normal file
@ -0,0 +1,338 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "improper_amoeba.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define TOLERANCE 0.05
|
||||
#define SMALL 0.001
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ImproperAmoeba::ImproperAmoeba(LAMMPS *lmp) : Improper(lmp)
|
||||
{
|
||||
writedata = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ImproperAmoeba::~ImproperAmoeba()
|
||||
{
|
||||
if (allocated && !copymode) {
|
||||
memory->destroy(setflag);
|
||||
memory->destroy(k);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::compute(int eflag, int vflag)
|
||||
{
|
||||
if (disable) return;
|
||||
|
||||
int ia,ib,ic,id,n,type;
|
||||
double xia,yia,zia,xib,yib,zib,xic,yic,zic,xid,yid,zid;
|
||||
double xab,yab,zab,xcb,ycb,zcb,xdb,ydb,zdb,xad,yad,zad,xcd,ycd,zcd;
|
||||
double rad2,rcd2,rdb2,dot,cc,ee;
|
||||
double sine,angle;
|
||||
double dt,dt2,dt3,dt4,e;
|
||||
double deddt,sign,dedcos,term;
|
||||
double dccdxia,dccdyia,dccdzia,dccdxic,dccdyic,dccdzic;
|
||||
double dccdxid,dccdyid,dccdzid;
|
||||
double deedxia,deedyia,deedzia,deedxic,deedyic,deedzic;
|
||||
double deedxid,deedyid,deedzid;
|
||||
double fa[3],fb[3],fc[3],fd[3];
|
||||
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int **improperlist = neighbor->improperlist;
|
||||
int nimproperlist = neighbor->nimproperlist;
|
||||
int nlocal = atom->nlocal;
|
||||
int newton_bond = force->newton_bond;
|
||||
|
||||
// conversion factors for radians to degrees and vice versa
|
||||
|
||||
double rad2degree = 180.0/MY_PI;
|
||||
double eprefactor = 1.0 / (rad2degree*rad2degree);
|
||||
double fprefactor = 1.0 / rad2degree;
|
||||
|
||||
for (n = 0; n < nimproperlist; n++) {
|
||||
|
||||
// in Tinker code, atom1 = D, atom2 = B, atom3 = A, atom4 = C
|
||||
// for Alligner angle:
|
||||
// atoms A,C,D form a plane, B is out-of-plane
|
||||
// angle is between plane and the vector from D to B
|
||||
|
||||
id = improperlist[n][0];
|
||||
ib = improperlist[n][1];
|
||||
ia = improperlist[n][2];
|
||||
ic = improperlist[n][3];
|
||||
type = improperlist[n][4];
|
||||
|
||||
// coordinates of the atoms at trigonal center
|
||||
|
||||
xia = x[ia][0];
|
||||
yia = x[ia][1];
|
||||
zia = x[ia][2];
|
||||
xib = x[ib][0];
|
||||
yib = x[ib][1];
|
||||
zib = x[ib][2];
|
||||
xic = x[ic][0];
|
||||
yic = x[ic][1];
|
||||
zic = x[ic][2];
|
||||
xid = x[id][0];
|
||||
yid = x[id][1];
|
||||
zid = x[id][2];
|
||||
|
||||
// compute the out-of-plane bending angle
|
||||
|
||||
xab = xia - xib;
|
||||
yab = yia - yib;
|
||||
zab = zia - zib;
|
||||
xcb = xic - xib;
|
||||
ycb = yic - yib;
|
||||
zcb = zic - zib;
|
||||
xdb = xid - xib;
|
||||
ydb = yid - yib;
|
||||
zdb = zid - zib;
|
||||
xad = xia - xid;
|
||||
yad = yia - yid;
|
||||
zad = zia - zid;
|
||||
xcd = xic - xid;
|
||||
ycd = yic - yid;
|
||||
zcd = zic - zid;
|
||||
|
||||
// Allinger angle between A-C-D plane and D-B vector for D-B < AC
|
||||
|
||||
rad2 = xad*xad + yad*yad + zad*zad;
|
||||
rcd2 = xcd*xcd + ycd*ycd + zcd*zcd;
|
||||
dot = xad*xcd + yad*ycd + zad*zcd;
|
||||
cc = rad2*rcd2 - dot*dot;
|
||||
|
||||
// find the out-of-plane angle bending energy
|
||||
|
||||
ee = xdb*(yab*zcb-zab*ycb) + ydb*(zab*xcb-xab*zcb) + zdb*(xab*ycb-yab*xcb);
|
||||
rdb2 = xdb*xdb + ydb*ydb + zdb*zdb;
|
||||
if (rdb2 == 0.0 || cc == 0.0) continue;
|
||||
|
||||
sine = fabs(ee) / sqrt(cc*rdb2);
|
||||
sine = MIN(1.0,sine);
|
||||
|
||||
// angle needs to be in degrees for Tinker formulas
|
||||
// b/c opbend_3456 coeffs are in mixed units
|
||||
|
||||
angle = rad2degree * asin(sine);
|
||||
dt = angle;
|
||||
dt2 = dt * dt;
|
||||
dt3 = dt2 * dt;
|
||||
dt4 = dt2 * dt2;
|
||||
e = eprefactor * k[type] * dt2 *
|
||||
(1.0 + opbend_cubic*dt + opbend_quartic*dt2 +
|
||||
opbend_pentic*dt3 + opbend_sextic*dt4);
|
||||
|
||||
deddt = fprefactor * k[type] * dt *
|
||||
(2.0 + 3.0*opbend_cubic*dt + 4.0*opbend_quartic*dt2 +
|
||||
5.0*opbend_pentic*dt3 + 6.0*opbend_sextic*dt4);
|
||||
sign = (ee >= 0.0) ? 1.0 : -1.0;
|
||||
dedcos = -deddt * sign / sqrt(cc*rdb2 - ee*ee);
|
||||
|
||||
// chain rule terms for first derivative components
|
||||
|
||||
term = ee / cc;
|
||||
dccdxia = (xad*rcd2-xcd*dot) * term;
|
||||
dccdyia = (yad*rcd2-ycd*dot) * term;
|
||||
dccdzia = (zad*rcd2-zcd*dot) * term;
|
||||
dccdxic = (xcd*rad2-xad*dot) * term;
|
||||
dccdyic = (ycd*rad2-yad*dot) * term;
|
||||
dccdzic = (zcd*rad2-zad*dot) * term;
|
||||
dccdxid = -dccdxia - dccdxic;
|
||||
dccdyid = -dccdyia - dccdyic;
|
||||
dccdzid = -dccdzia - dccdzic;
|
||||
|
||||
term = ee / rdb2;
|
||||
deedxia = ydb*zcb - zdb*ycb;
|
||||
deedyia = zdb*xcb - xdb*zcb;
|
||||
deedzia = xdb*ycb - ydb*xcb;
|
||||
deedxic = yab*zdb - zab*ydb;
|
||||
deedyic = zab*xdb - xab*zdb;
|
||||
deedzic = xab*ydb - yab*xdb;
|
||||
deedxid = ycb*zab - zcb*yab + xdb*term;
|
||||
deedyid = zcb*xab - xcb*zab + ydb*term;
|
||||
deedzid = xcb*yab - ycb*xab + zdb*term;
|
||||
|
||||
// compute first derivative components for this angle
|
||||
|
||||
fa[0] = dedcos * (dccdxia+deedxia);
|
||||
fa[1] = dedcos * (dccdyia+deedyia);
|
||||
fa[2] = dedcos * (dccdzia+deedzia);
|
||||
fc[0] = dedcos * (dccdxic+deedxic);
|
||||
fc[1] = dedcos * (dccdyic+deedyic);
|
||||
fc[2] = dedcos * (dccdzic+deedzic);
|
||||
fd[0] = dedcos * (dccdxid+deedxid);
|
||||
fd[1] = dedcos * (dccdyid+deedyid);
|
||||
fd[2] = dedcos * (dccdzid+deedzid);
|
||||
fb[0] = -fa[0] - fc[0] - fd[0];
|
||||
fb[1] = -fa[1] - fc[1] - fd[1];
|
||||
fb[2] = -fa[2] - fc[2] - fd[2];
|
||||
|
||||
// apply force to each of 4 atoms
|
||||
|
||||
if (newton_bond || id < nlocal) {
|
||||
f[id][0] -= fd[0];
|
||||
f[id][1] -= fd[1];
|
||||
f[id][2] -= fd[2];
|
||||
}
|
||||
|
||||
if (newton_bond || ib < nlocal) {
|
||||
f[ib][0] -= fb[0];
|
||||
f[ib][1] -= fb[1];
|
||||
f[ib][2] -= fb[2];
|
||||
}
|
||||
|
||||
if (newton_bond || ia < nlocal) {
|
||||
f[ia][0] -= fa[0];
|
||||
f[ia][1] -= fa[1];
|
||||
f[ia][2] -= fa[2];
|
||||
}
|
||||
|
||||
if (newton_bond || ic < nlocal) {
|
||||
f[ic][0] -= fc[0];
|
||||
f[ic][1] -= fc[1];
|
||||
f[ic][2] -= fc[2];
|
||||
}
|
||||
|
||||
if (evflag) {
|
||||
fd[0] = -fd[0]; fd[1] = -fd[1]; fd[2] = -fd[2];
|
||||
fa[0] = -fa[0]; fa[1] = -fa[1]; fa[2] = -fa[2];
|
||||
fc[0] = -fc[0]; fc[1] = -fc[1]; fc[2] = -fc[2];
|
||||
ev_tally(id,ib,ia,ic,nlocal,newton_bond,e,fd,fa,fc,
|
||||
xdb,ydb,zdb,xab,yab,zab,xic-xia,yic-yia,zic-zia);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::allocate()
|
||||
{
|
||||
allocated = 1;
|
||||
int n = atom->nimpropertypes;
|
||||
|
||||
memory->create(k,n+1,"improper:k");
|
||||
|
||||
memory->create(setflag,n+1,"improper:setflag");
|
||||
for (int i = 1; i <= n; i++) setflag[i] = 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one type
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::coeff(int narg, char **arg)
|
||||
{
|
||||
if (narg != 2) error->all(FLERR,"Incorrect args for improper coefficients");
|
||||
if (!allocated) allocate();
|
||||
|
||||
int ilo,ihi;
|
||||
utils::bounds(FLERR,arg[0],1,atom->nimpropertypes,ilo,ihi,error);
|
||||
|
||||
double k_one = utils::numeric(FLERR,arg[1],false,lmp);
|
||||
|
||||
// convert chi from degrees to radians
|
||||
|
||||
int count = 0;
|
||||
for (int i = ilo; i <= ihi; i++) {
|
||||
k[i] = k_one;
|
||||
setflag[i] = 1;
|
||||
count++;
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for improper coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set opbend higher-order term weights from PairAmoeba
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::init_style()
|
||||
{
|
||||
// check if PairAmoeba disabled improper terms
|
||||
|
||||
Pair *pair = nullptr;
|
||||
pair = force->pair_match("amoeba",1,0);
|
||||
if (!pair) pair = force->pair_match("hippo",1,0);
|
||||
if (!pair) error->all(FLERR,"Improper amoeba could not find pair amoeba/hippo");
|
||||
|
||||
int tmp;
|
||||
int flag = *((int *) pair->extract("improper_flag",tmp));
|
||||
disable = flag ? 0 : 1;
|
||||
|
||||
// also extract opbend params
|
||||
|
||||
int dim;
|
||||
opbend_cubic = *(double *) pair->extract("opbend_cubic",dim);
|
||||
opbend_quartic = *(double *) pair->extract("opbend_quartic",dim);
|
||||
opbend_pentic = *(double *) pair->extract("opbend_pentic",dim);
|
||||
opbend_sextic = *(double *) pair->extract("opbend_sextic",dim);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes out coeffs to restart file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::write_restart(FILE *fp)
|
||||
{
|
||||
fwrite(&k[1],sizeof(double),atom->nimpropertypes,fp);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 reads coeffs from restart file, bcasts them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::read_restart(FILE *fp)
|
||||
{
|
||||
allocate();
|
||||
|
||||
if (comm->me == 0)
|
||||
utils::sfread(FLERR,&k[1],sizeof(double),atom->nimpropertypes,fp,nullptr,error);
|
||||
MPI_Bcast(&k[1],atom->nimpropertypes,MPI_DOUBLE,0,world);
|
||||
|
||||
for (int i = 1; i <= atom->nimpropertypes; i++) setflag[i] = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
proc 0 writes to data file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void ImproperAmoeba::write_data(FILE *fp)
|
||||
{
|
||||
for (int i = 1; i <= atom->nimpropertypes; i++)
|
||||
fprintf(fp,"%d %g\n",i,k[i]);
|
||||
}
|
||||
47
src/AMOEBA/improper_amoeba.h
Normal file
47
src/AMOEBA/improper_amoeba.h
Normal file
@ -0,0 +1,47 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef IMPROPER_CLASS
|
||||
// clang-format off
|
||||
ImproperStyle(amoeba,ImproperAmoeba);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_IMPROPER_AMOEBA_H
|
||||
#define LMP_IMPROPER_AMOEBA_H
|
||||
|
||||
#include "improper.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ImproperAmoeba : public Improper {
|
||||
public:
|
||||
ImproperAmoeba(class LAMMPS *);
|
||||
~ImproperAmoeba() override;
|
||||
void compute(int, int) override;
|
||||
void coeff(int, char **) override;
|
||||
void init_style() override;
|
||||
void write_restart(FILE *) override;
|
||||
void read_restart(FILE *) override;
|
||||
void write_data(FILE *) override;
|
||||
|
||||
protected:
|
||||
int disable;
|
||||
double opbend_cubic, opbend_quartic, opbend_pentic, opbend_sextic;
|
||||
double *k;
|
||||
|
||||
virtual void allocate();
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
2330
src/AMOEBA/pair_amoeba.cpp
Normal file
2330
src/AMOEBA/pair_amoeba.cpp
Normal file
File diff suppressed because it is too large
Load Diff
480
src/AMOEBA/pair_amoeba.h
Normal file
480
src/AMOEBA/pair_amoeba.h
Normal file
@ -0,0 +1,480 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
// clang-format off
|
||||
PairStyle(amoeba,PairAmoeba);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_AMOEBA_H
|
||||
#define LMP_PAIR_AMOEBA_H
|
||||
|
||||
#include "lmpfftsettings.h"
|
||||
#include "pair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
#define SBBITS15 29
|
||||
#define NEIGHMASK15 0x1FFFFFFF
|
||||
|
||||
class PairAmoeba : public Pair {
|
||||
public:
|
||||
PairAmoeba(class LAMMPS *);
|
||||
~PairAmoeba() override;
|
||||
void compute(int, int) override;
|
||||
void settings(int, char **) override;
|
||||
void coeff(int, char **) override;
|
||||
void init_style() override;
|
||||
double init_one(int, int) override;
|
||||
void finish() override;
|
||||
|
||||
int pack_forward_comm(int, int *, double *, int, int *) override;
|
||||
void unpack_forward_comm(int, int, double *) override;
|
||||
int pack_reverse_comm(int, int, double *) override;
|
||||
void unpack_reverse_comm(int, int *, double *) override;
|
||||
|
||||
void pack_forward_grid(int, void *, int, int *) override;
|
||||
void unpack_forward_grid(int, void *, int, int *) override;
|
||||
void pack_reverse_grid(int, void *, int, int *) override;
|
||||
void unpack_reverse_grid(int, void *, int, int *) override;
|
||||
|
||||
void *extract(const char *, int &) override;
|
||||
double memory_usage() override;
|
||||
|
||||
protected:
|
||||
int nmax; // allocation for owned+ghost
|
||||
int cfstyle, crstyle; // style of forward/reverse comm operations
|
||||
int nualt;
|
||||
double electric;
|
||||
double rotate[3][3]; // rotation matrix
|
||||
|
||||
bool amoeba; // which force field: amoeba == true, hippo == false
|
||||
std::string mystyle; // text label for style
|
||||
int first_flag; // 1 before first init_style()
|
||||
int first_flag_compute; // 1 before first call to compute()
|
||||
int optlevel;
|
||||
|
||||
// turn on/off components of force field
|
||||
|
||||
int hal_flag, repulse_flag, qxfer_flag;
|
||||
int disp_rspace_flag, disp_kspace_flag;
|
||||
int polar_rspace_flag, polar_kspace_flag;
|
||||
int mpole_rspace_flag, mpole_kspace_flag;
|
||||
int bond_flag, angle_flag, dihedral_flag, improper_flag;
|
||||
int urey_flag, pitorsion_flag, bitorsion_flag;
|
||||
|
||||
// DEBUG timers
|
||||
|
||||
double time_init, time_hal, time_repulse, time_disp;
|
||||
double time_mpole, time_induce, time_polar, time_qxfer;
|
||||
|
||||
// energy/virial components
|
||||
|
||||
double ehal, erepulse, edisp, epolar, empole, eqxfer;
|
||||
double virhal[6], virrepulse[6], virdisp[6], virpolar[6], virmpole[6], virqxfer[6];
|
||||
|
||||
// scalar values defined in force-field file
|
||||
|
||||
char *forcefield; // FF name
|
||||
double am_dielectric;
|
||||
|
||||
int opbendtype, vdwtype;
|
||||
int radius_rule, radius_type, radius_size, epsilon_rule;
|
||||
|
||||
double bond_cubic, bond_quartic;
|
||||
double angle_cubic, angle_quartic, angle_pentic, angle_sextic;
|
||||
double opbend_cubic, opbend_quartic, opbend_pentic, opbend_sextic;
|
||||
double torsion_unit;
|
||||
|
||||
int poltyp;
|
||||
|
||||
double special_hal[5];
|
||||
double special_repel[5];
|
||||
double special_disp[5];
|
||||
double special_mpole[5];
|
||||
double special_polar_pscale[5];
|
||||
double special_polar_piscale[5];
|
||||
double special_polar_wscale[5];
|
||||
|
||||
double polar_dscale, polar_uscale;
|
||||
|
||||
// scalar values defined in keyfile
|
||||
|
||||
double dhal, ghal;
|
||||
|
||||
double vdwcut, vdwtaper;
|
||||
double repcut, reptaper;
|
||||
double dispcut, disptaper;
|
||||
double mpolecut, mpoletaper;
|
||||
double ctrncut, ctrntaper;
|
||||
|
||||
double ewaldcut;
|
||||
double dewaldcut;
|
||||
double usolvcut;
|
||||
|
||||
int use_ewald, use_dewald;
|
||||
|
||||
int use_pred;
|
||||
int politer, polpred;
|
||||
int pcgprec, pcgguess;
|
||||
double pcgpeek;
|
||||
int tcgnab, optorder;
|
||||
int maxualt;
|
||||
double poleps;
|
||||
double udiag;
|
||||
|
||||
int aeewald_key, apewald_key, adewald_key;
|
||||
int pmegrid_key, dpmegrid_key;
|
||||
|
||||
// types and classes
|
||||
|
||||
int n_amtype; // # of defined AMOEBA types, 1-N
|
||||
int n_amclass; // # of defined AMOEBA classes, 1-N
|
||||
int max_amtype; // allocation length of per-type data
|
||||
int max_amclass; // allocation length of per-class data
|
||||
|
||||
int *amtype_defined; // 1 if type was defined in FF file
|
||||
int *amclass_defined; // 1 if class was defined in FF file
|
||||
int *amtype2class; // amt2c[i] = class which type I belongs to
|
||||
|
||||
// static per-atom properties, must persist as atoms migrate
|
||||
|
||||
int index_amtype, index_amgroup, index_redID;
|
||||
int index_xyzaxis, index_polaxe, index_pval;
|
||||
|
||||
int *amtype; // AMOEBA type, 1 to N_amtype
|
||||
int *amgroup; // AMOEBA polarization group, 1 to Ngroup
|
||||
|
||||
char *id_pole, *id_udalt, *id_upalt;
|
||||
class FixStore *fixpole; // stores pole = multipole components
|
||||
class FixStore *fixudalt; // stores udalt = induced dipole history
|
||||
class FixStore *fixupalt; // stores upalt = induced dipole history
|
||||
|
||||
// static per-type properties defined in force-field file
|
||||
|
||||
int *atomic_num; // atomic number
|
||||
int *valence; // valence (# of possible bonds)
|
||||
double *am_mass; // atomic weight
|
||||
double *am_q; // charge
|
||||
double **am_mu; // dipole moment
|
||||
|
||||
double *polarity; // for polar
|
||||
double *pdamp; // for polar
|
||||
double *thole; // for polar
|
||||
double *dirdamp; // for polar
|
||||
int *npolgroup; // # of other types in polarization group, per-type
|
||||
int **polgroup; // list of other types in polarization group, per-type
|
||||
|
||||
double *sizpr, *dmppr, *elepr;
|
||||
|
||||
// multipole frame info for each amtype, read from PRM file
|
||||
|
||||
int *nmultiframe; // # of frames for each type
|
||||
int **mpaxis; // polaxe values
|
||||
int **xpole, **ypole, **zpole; // other types in xyz dirs for multipole frame
|
||||
double ***fpole; // 13 values from file
|
||||
// 0 = monopole, same as q
|
||||
// 1,2,3 = 3 dipole components
|
||||
// 4-12 = 9 quadrupole components
|
||||
|
||||
// static per-class properties defined in force-field file
|
||||
|
||||
double *vdwl_eps; // Vdwl epsilon for each class of atom
|
||||
double *vdwl_sigma; // Vdwl sigma for each class of atom
|
||||
double *kred; // fraction that H atoms move towards bonded atom
|
||||
// used in Vdwl, 0.0 if not H atom
|
||||
double *csix, *adisp; // used in dispersion
|
||||
double *chgct, *dmpct; // used in charge transfer
|
||||
double *pcore, *palpha; // for multipole
|
||||
|
||||
int **vdwl_class_pair; // Vdwl iclass/jclass for pair of classes
|
||||
double *vdwl_eps_pair; // Vdwl epsilon for pair of classes
|
||||
double *vdwl_sigma_pair; // Vdwl sigma for pair of classes
|
||||
int nvdwl_pair; // # of pairwise Vdwl entries in file
|
||||
int max_vdwl_pair; // size of allocated data for pairwise Vdwl
|
||||
|
||||
// vectors and arrays of small size
|
||||
|
||||
double *copt, *copm; // 0:optorder in length
|
||||
double *gear, *aspc;
|
||||
|
||||
double *a_ualt, *ap_ualt; // maxualt*(maxualt+1)/2 in length
|
||||
double *b_ualt, *bp_ualt; // maxualt in length
|
||||
double **c_ualt, **cp_ualt; // maxualt x maxualt in size
|
||||
// indices NOT flipped vs Fortran
|
||||
double *bpred, *bpredp, *bpreds, *bpredps; // maxualt in length
|
||||
|
||||
double vmsave[6]; // multipole virial saved to use in polar
|
||||
|
||||
double csixpr; // square of csix for all atoms
|
||||
|
||||
// params common to pairwise terms
|
||||
|
||||
double off2, cut2;
|
||||
double c0, c1, c2, c3, c4, c5;
|
||||
|
||||
// Vdwl hal params - only for AMOEBA
|
||||
|
||||
double **radmin, **epsilon;
|
||||
double **radmin4, **epsilon4;
|
||||
|
||||
// peratom values computed each step
|
||||
// none of them persist with atoms
|
||||
// some of them need communication to ghosts
|
||||
|
||||
double **rpole; // multipole, comm to ghosts
|
||||
|
||||
int *xaxis2local, *yaxis2local, *zaxis2local; // xyz axis IDs -> local indices
|
||||
// just for owned atoms
|
||||
// set to self if not defined
|
||||
|
||||
int *red2local; // local indices of ired IDs, computed for owned and ghost
|
||||
double **xred; // altered coords for H atoms for Vdwl, comm to ghosts
|
||||
|
||||
double **tq; // torque from pairwise multipole, reverse comm from ghosts
|
||||
|
||||
double **uind, **uinp; // computed by induce, comm to ghosts
|
||||
double **udirp;
|
||||
double **rsd, **rsdp; // used by induce, comm to ghosts
|
||||
|
||||
double **field, **fieldp; // used by induce, reverse comm from ghosts
|
||||
double ***uopt, ***uoptp; // Nlocal x Optorder+1 x 3 arrays
|
||||
|
||||
double **ufld, **dufld; // used by polar, reverse comm from ghosts
|
||||
double **zrsd, **zrsdp; // used by induce, reverse comm from ghosts
|
||||
|
||||
double ***uad, ***uap, ***ubd, ***ubp; // used by TCG (not for now)
|
||||
|
||||
double ***fopt, ***foptp; // computed in induce, used by polar, if OPT
|
||||
// Nlocal x optorder x 10
|
||||
|
||||
double *poli;
|
||||
double **conj, **conjp;
|
||||
double **vec, **vecp;
|
||||
double **udir, **usum, **usump;
|
||||
|
||||
double **fuind, **fuinp;
|
||||
double **fdip_phi1, **fdip_phi2, **fdip_sum_phi;
|
||||
double **dipfield1, **dipfield2;
|
||||
|
||||
double **fphid, **fphip;
|
||||
double **fphidp, **cphidp;
|
||||
|
||||
// derived local neighbor lists
|
||||
|
||||
int *numneigh_dipole; // number of dipole neighs for each atom
|
||||
int **firstneigh_dipole; // ptr to each atom's dipole neigh indices
|
||||
MyPage<int> *ipage_dipole; // pages of neighbor indices for dipole neighs
|
||||
|
||||
double **firstneigh_dipdip; // ptr to each atom's dip/dip values
|
||||
MyPage<double> *dpage_dipdip; // pages of dip/dip values for dipole neighs
|
||||
|
||||
int *numneigh_precond; // number of precond neighs for each atom
|
||||
int **firstneigh_precond; // ptr to each atom's precond neigh indices
|
||||
MyPage<int> *ipage_precond; // pages of neighbor indices for precond neighs
|
||||
|
||||
double **firstneigh_pcpc; // ptr to each atom's pc/pc values
|
||||
MyPage<double> *dpage_pcpc; // pages of pc/pc values for precond neighs
|
||||
|
||||
// KSpace data
|
||||
// in indices = owned portion of grid in spatial decomp
|
||||
// out indices = in + ghost grid cells
|
||||
// fft indices = owned portion of grid in FFT decomp
|
||||
|
||||
int nefft1, nefft2, nefft3; // for electrostatic PME operations
|
||||
int ndfft1, ndfft2, ndfft3; // for dispersion PME operations
|
||||
|
||||
int bseorder; // for electrostatics
|
||||
int bsporder; // for polarization
|
||||
int bsdorder; // for dispersion
|
||||
int bsordermax; // max of 3 bsorder values
|
||||
|
||||
double aewald; // current Ewald alpha
|
||||
double aeewald; // for electrostatics
|
||||
double apewald; // for polarization
|
||||
double adewald; // for dispersion
|
||||
|
||||
double *bsmod1, *bsmod2, *bsmod3; // B-spline module along abc axes
|
||||
// set to max of any nfft1,nfft2,nfft3
|
||||
|
||||
double ***thetai1, ***thetai2, ***thetai3; // B-spline coeffs along abc axes
|
||||
// Nlocal x max bsorder x 4
|
||||
|
||||
int **igrid; // grid indices for each owned particle, Nlocal x 3
|
||||
|
||||
double **bsbuild; // used internally in bsplgen, max-bsorder x max-bsorder
|
||||
// indices ARE flipped vs Fortran
|
||||
|
||||
// Kspace data for induce and polar
|
||||
|
||||
double *qfac; // convoulution pre-factors
|
||||
double *gridfft1; // copy of p_kspace FFT grid
|
||||
|
||||
double **cmp, **fmp; // Cartesian and fractional multipoles
|
||||
double **cphi, **fphi;
|
||||
|
||||
// params for current KSpace solve and FFT being worked on
|
||||
|
||||
int nfft1, nfft2, nfft3; // size of FFT
|
||||
int bsorder; // stencil size
|
||||
double recip[3][3]; // indices NOT flipped vs Fortran
|
||||
double ctf[10][10]; // indices NOT flipped vs Fortran
|
||||
double ftc[10][10]; // indices NOT flipped vs Fortran
|
||||
|
||||
class AmoebaConvolution *m_kspace, *p_kspace, *pc_kspace, *d_kspace;
|
||||
class AmoebaConvolution *i_kspace, *ic_kspace;
|
||||
|
||||
// FFT grid size factors
|
||||
|
||||
int nfactors; // # of factors
|
||||
int *factors; // list of possible factors (2,3,5)
|
||||
|
||||
// components of force field
|
||||
|
||||
void hal();
|
||||
|
||||
void repulsion();
|
||||
void damprep(double, double, double, double, double, double, double, double, int, double, double,
|
||||
double *);
|
||||
|
||||
void dispersion();
|
||||
void dispersion_real();
|
||||
void dispersion_kspace();
|
||||
|
||||
void multipole();
|
||||
void multipole_real();
|
||||
void multipole_kspace();
|
||||
|
||||
void polar();
|
||||
void polar_energy();
|
||||
void polar_real();
|
||||
void polar_kspace();
|
||||
void damppole(double, int, double, double, double *, double *, double *);
|
||||
|
||||
void induce();
|
||||
void ulspred();
|
||||
void ufield0c(double **, double **);
|
||||
void uscale0b(int, double **, double **, double **, double **);
|
||||
void dfield0c(double **, double **);
|
||||
void umutual1(double **, double **);
|
||||
void umutual2b(double **, double **);
|
||||
void udirect1(double **);
|
||||
void udirect2b(double **, double **);
|
||||
void dampmut(double, double, double, double *);
|
||||
void dampdir(double, double, double, double *, double *);
|
||||
void cholesky(int, double *, double *);
|
||||
|
||||
void charge_transfer();
|
||||
|
||||
// KSpace methods
|
||||
|
||||
void lattice();
|
||||
void moduli();
|
||||
void bspline(double, int, double *);
|
||||
void dftmod(double *, double *, int, int);
|
||||
void bspline_fill();
|
||||
void bsplgen(double, double **);
|
||||
void cmp_to_fmp(double **, double **);
|
||||
void cart_to_frac();
|
||||
void fphi_to_cphi(double **, double **);
|
||||
void frac_to_cart();
|
||||
|
||||
void grid_mpole(double **, double ***);
|
||||
void fphi_mpole(double ***, double **);
|
||||
void grid_uind(double **, double **, double ****);
|
||||
void fphi_uind(double ****, double **, double **, double **);
|
||||
void grid_disp(double ***);
|
||||
|
||||
void kewald();
|
||||
void kewald_parallel(int, int, int, int, int &, int &, int &, int &, int &, int &, int &, int &,
|
||||
int &, int &, int &, int &, int &, int &, int &, int &, int &, int &);
|
||||
double ewaldcof(double);
|
||||
int factorable(int);
|
||||
|
||||
// debug methods
|
||||
|
||||
FILE *fp_uind;
|
||||
void dump6(FILE *, const char *, double, double **, double **);
|
||||
|
||||
// functions in pair_amoeba.cpp
|
||||
|
||||
void allocate();
|
||||
void print_settings();
|
||||
|
||||
void initialize_vdwl();
|
||||
void allocate_vdwl();
|
||||
void deallocate_vdwl();
|
||||
|
||||
void initialize_smallsize();
|
||||
void allocate_smallsize();
|
||||
void deallocate_smallsize();
|
||||
|
||||
void assign_groups();
|
||||
void pbc_xred();
|
||||
void precond_neigh();
|
||||
void choose(int);
|
||||
void mix();
|
||||
void zero_energy_force_virial();
|
||||
void grow_local();
|
||||
|
||||
// functions in amoeba_utils.cpp
|
||||
|
||||
void kmpole();
|
||||
|
||||
void chkpole(int);
|
||||
void rotmat(int);
|
||||
void rotsite(int);
|
||||
|
||||
void add_onefive_neighbors();
|
||||
void find_hydrogen_neighbors();
|
||||
void find_multipole_neighbors();
|
||||
|
||||
void torque2force(int, double *, double *, double *, double *, double **);
|
||||
|
||||
// functions in file_amoeba.cpp
|
||||
|
||||
void set_defaults();
|
||||
void read_prmfile(char *);
|
||||
void read_keyfile(char *);
|
||||
|
||||
void initialize_type_class();
|
||||
void allocate_type_class(int, int);
|
||||
void deallocate_type_class();
|
||||
|
||||
void file_ffield(const std::vector<std::string> &, int);
|
||||
void file_literature(const std::vector<std::string> &, int);
|
||||
void file_atomtype(const std::vector<std::string> &, int);
|
||||
void file_vdwl(const std::vector<std::string> &, int);
|
||||
void file_vdwl_pair(const std::vector<std::string> &, int);
|
||||
void file_bstretch(const std::vector<std::string> &, int);
|
||||
void file_sbend(const std::vector<std::string> &, int);
|
||||
void file_abend(const std::vector<std::string> &, int);
|
||||
void file_pauli(const std::vector<std::string> &, int);
|
||||
void file_dispersion(const std::vector<std::string> &, int);
|
||||
void file_ub(const std::vector<std::string> &, int);
|
||||
void file_outplane(const std::vector<std::string> &, int);
|
||||
void file_torsion(const std::vector<std::string> &, int);
|
||||
void file_pitorsion(const std::vector<std::string> &, int);
|
||||
void file_multipole(const std::vector<std::string> &, int);
|
||||
void file_charge_penetration(const std::vector<std::string> &, int);
|
||||
void file_dippolar(const std::vector<std::string> &, int);
|
||||
void file_charge_transfer(const std::vector<std::string> &, int);
|
||||
|
||||
// inline function for neighbor list unmasking
|
||||
|
||||
inline int sbmask15(int j) const { return j >> SBBITS15 & 7; }
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
24
src/AMOEBA/pair_hippo.cpp
Normal file
24
src/AMOEBA/pair_hippo.cpp
Normal file
@ -0,0 +1,24 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_hippo.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairHippo::PairHippo(LAMMPS *lmp) : PairAmoeba(lmp)
|
||||
{
|
||||
amoeba = false;
|
||||
mystyle = "hippo";
|
||||
}
|
||||
33
src/AMOEBA/pair_hippo.h
Normal file
33
src/AMOEBA/pair_hippo.h
Normal file
@ -0,0 +1,33 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/ Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
// clang-format off
|
||||
PairStyle(hippo,PairHippo);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_HIPPO_H
|
||||
#define LMP_PAIR_HIPPO_H
|
||||
|
||||
#include "pair_amoeba.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairHippo : public PairAmoeba {
|
||||
public:
|
||||
PairHippo(class LAMMPS *);
|
||||
};
|
||||
} // namespace LAMMPS_NS
|
||||
#endif
|
||||
#endif
|
||||
@ -284,7 +284,7 @@ FixATC::FixATC(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg),
|
||||
int me = ATC::LammpsInterface::instance()->comm_rank();
|
||||
|
||||
string groupName(arg[1]);
|
||||
int igroup = group->find(groupName.c_str());
|
||||
int igroup = group->find(groupName);
|
||||
int atomCount = group->count(igroup);
|
||||
|
||||
try {
|
||||
|
||||
@ -24,10 +24,10 @@
|
||||
#include "neighbor.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -117,7 +117,7 @@ void colvarproxy_lammps::init(const char *conf_file)
|
||||
if (_lmp->update->ntimestep != 0) {
|
||||
cvm::log("Setting initial step number from LAMMPS: "+
|
||||
cvm::to_str(_lmp->update->ntimestep)+"\n");
|
||||
colvars->it = colvars->it_restart =
|
||||
colvarmodule::it = colvarmodule::it_restart =
|
||||
static_cast<cvm::step_number>(_lmp->update->ntimestep);
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ double colvarproxy_lammps::compute()
|
||||
} else {
|
||||
// Use the time step number from LAMMPS Update object
|
||||
if (_lmp->update->ntimestep - previous_step == 1) {
|
||||
colvars->it++;
|
||||
colvarmodule::it++;
|
||||
b_simulation_continuing = false;
|
||||
} else {
|
||||
// Cases covered by this condition:
|
||||
@ -209,7 +209,7 @@ double colvarproxy_lammps::compute()
|
||||
|
||||
if (cvm::debug()) {
|
||||
cvm::log(std::string(cvm::line_marker)+
|
||||
"colvarproxy_lammps, step no. "+cvm::to_str(colvars->it)+"\n"+
|
||||
"colvarproxy_lammps, step no. "+cvm::to_str(colvarmodule::it)+"\n"+
|
||||
"Updating internal data.\n");
|
||||
}
|
||||
|
||||
@ -269,7 +269,7 @@ cvm::rvector colvarproxy_lammps::position_distance(cvm::atom_pos const &pos1,
|
||||
double ytmp = pos2.y - pos1.y;
|
||||
double ztmp = pos2.z - pos1.z;
|
||||
_lmp->domain->minimum_image(xtmp,ytmp,ztmp);
|
||||
return cvm::rvector(xtmp, ytmp, ztmp);
|
||||
return {xtmp, ytmp, ztmp};
|
||||
}
|
||||
|
||||
|
||||
|
||||
@ -135,8 +135,6 @@ static void rebuild_table_int(inthash_t *tptr) {
|
||||
|
||||
/* free memory used by old table */
|
||||
free(old_bucket);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -166,8 +164,6 @@ void inthash_init(inthash_t *tptr, int buckets) {
|
||||
|
||||
/* allocate memory for table */
|
||||
tptr->bucket=(inthash_node_t **) calloc(tptr->size, sizeof(inthash_node_t *));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -847,7 +843,6 @@ void FixColvars::post_force_respa(int vflag, int ilevel, int /*iloop*/)
|
||||
{
|
||||
/* only process colvar forces on the outmost RESPA level. */
|
||||
if (ilevel == nlevels_respa-1) post_force(vflag);
|
||||
return;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -939,7 +934,7 @@ void FixColvars::end_of_step()
|
||||
void FixColvars::write_restart(FILE *fp)
|
||||
{
|
||||
if (me == 0) {
|
||||
std::string rest_text("");
|
||||
std::string rest_text;
|
||||
proxy->serialize_status(rest_text);
|
||||
// TODO call write_output_files()
|
||||
const char *cvm_state = rest_text.c_str();
|
||||
|
||||
@ -76,7 +76,7 @@ void Ndx2Group::command(int narg, char **arg)
|
||||
int len;
|
||||
bigint num;
|
||||
FILE *fp;
|
||||
std::string name = "", next;
|
||||
std::string name, next;
|
||||
|
||||
if (narg < 1) error->all(FLERR,"Illegal ndx2group command");
|
||||
if (atom->tag_enable == 0)
|
||||
|
||||
@ -78,9 +78,9 @@ AtomVecDielectric::AtomVecDielectric(LAMMPS *_lmp) : AtomVec(_lmp)
|
||||
"mu", "area", "ed", "em", "epsilon", "curvature", "q_unscaled"};
|
||||
fields_create = {"q", "molecule", "num_bond", "num_angle", "num_dihedral", "num_improper",
|
||||
"nspecial", "mu", "area", "ed", "em", "epsilon", "curvature", "q_unscaled"};
|
||||
fields_data_atom = { "id", "molecule", "type", "q", "x", "mu3", "area", "ed", "em", "epsilon",
|
||||
fields_data_atom = {"id", "molecule", "type", "q", "x", "mu3", "area", "ed", "em", "epsilon",
|
||||
"curvature"};
|
||||
fields_data_vel = {"id v"};
|
||||
fields_data_vel = {"id", "v"};
|
||||
// clang-format on
|
||||
|
||||
setup_fields();
|
||||
|
||||
@ -58,6 +58,9 @@ PPPMDielectric::PPPMDielectric(LAMMPS *_lmp) : PPPM(_lmp)
|
||||
phi = nullptr;
|
||||
potflag = 0;
|
||||
|
||||
// no warnings about non-neutral systems from qsum_qsq()
|
||||
warn_nonneutral = 2;
|
||||
|
||||
avec = dynamic_cast<AtomVecDielectric *>( atom->style_match("dielectric"));
|
||||
if (!avec) error->all(FLERR,"pppm/dielectric requires atom style dielectric");
|
||||
}
|
||||
@ -463,25 +466,3 @@ void PPPMDielectric::slabcorr()
|
||||
efield[i][2] += ffact * eps[i]*(dipole_all - qsum*x[i][2]);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute qsum,qsqsum,q2 and ignore error/warning if not charge neutral
|
||||
called whenever charges are changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDielectric::qsum_qsq()
|
||||
{
|
||||
const double * const q = atom->q;
|
||||
const int nlocal = atom->nlocal;
|
||||
double qsum_local(0.0), qsqsum_local(0.0);
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
qsum_local += q[i];
|
||||
qsqsum_local += q[i]*q[i];
|
||||
}
|
||||
|
||||
MPI_Allreduce(&qsum_local,&qsum,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(&qsqsum_local,&qsqsum,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
||||
q2 = qsqsum * force->qqrd2e;
|
||||
}
|
||||
|
||||
@ -34,8 +34,6 @@ class PPPMDielectric : public PPPM {
|
||||
double *phi;
|
||||
int potflag; // 1/0 if per-atom electrostatic potential phi is needed
|
||||
|
||||
void qsum_qsq();
|
||||
|
||||
protected:
|
||||
void slabcorr() override;
|
||||
|
||||
|
||||
@ -65,6 +65,9 @@ PPPMDispDielectric::PPPMDispDielectric(LAMMPS *_lmp) : PPPMDisp(_lmp)
|
||||
|
||||
mu_flag = 0;
|
||||
|
||||
// no warnings about non-neutral systems from qsum_qsq()
|
||||
warn_nonneutral = 2;
|
||||
|
||||
efield = nullptr;
|
||||
phi = nullptr;
|
||||
potflag = 0;
|
||||
@ -837,25 +840,3 @@ double PPPMDispDielectric::memory_usage()
|
||||
bytes += nmax * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
compute qsum,qsqsum,q2 and give error/warning if not charge neutral
|
||||
called initially, when particle count changes, when charges are changed
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPMDispDielectric::qsum_qsq()
|
||||
{
|
||||
const double * const q = atom->q;
|
||||
const int nlocal = atom->nlocal;
|
||||
double qsum_local(0.0), qsqsum_local(0.0);
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
qsum_local += q[i];
|
||||
qsqsum_local += q[i]*q[i];
|
||||
}
|
||||
|
||||
MPI_Allreduce(&qsum_local,&qsum,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
MPI_Allreduce(&qsqsum_local,&qsqsum,1,MPI_DOUBLE,MPI_SUM,world);
|
||||
|
||||
q2 = qsqsum * force->qqrd2e;
|
||||
}
|
||||
|
||||
@ -30,8 +30,7 @@ class PPPMDispDielectric : public PPPMDisp {
|
||||
~PPPMDispDielectric() override;
|
||||
double memory_usage() override;
|
||||
void compute(int, int) override;
|
||||
void qsum_qsq();
|
||||
void slabcorr(int);
|
||||
void slabcorr(int) override;
|
||||
|
||||
double **efield;
|
||||
double *phi;
|
||||
|
||||
@ -17,8 +17,8 @@
|
||||
modified velocity-Verlet (MVV) algorithm.
|
||||
Setting verlet = 0.5 recovers the standard velocity-Verlet algorithm.
|
||||
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "fix_mvv_dpd.h"
|
||||
|
||||
@ -17,8 +17,8 @@
|
||||
v and edpd_T) using the modified velocity-Verlet (MVV) algorithm.
|
||||
Setting verlet = 0.5 recovers the standard velocity-Verlet algorithm.
|
||||
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
|
||||
Please cite the related publication:
|
||||
Z. Li, Y.-H. Tang, H. Lei, B. Caswell and G.E. Karniadakis. "Energy-
|
||||
|
||||
@ -17,8 +17,8 @@
|
||||
v and cc) using the modified velocity-Verlet (MVV) algorithm.
|
||||
Setting verlet = 0.5 recovers the standard velocity-Verlet algorithm.
|
||||
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
|
||||
Please cite the related publication:
|
||||
Z. Li, A. Yazdani, A. Tartakovsky and G.E. Karniadakis. "Transport
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_edpd.h"
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_mdpd.h"
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
before the force calculation.
|
||||
The code uses 3D Lucy kernel, it can be modified for other kernels.
|
||||
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_mdpd_rhosum.h"
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Zhen Li (Brown University)
|
||||
Email: zhen_li@brown.edu
|
||||
Contributing author: Zhen Li (Clemson University)
|
||||
Email: zli7@clemson.edu
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_tdpd.h"
|
||||
|
||||
@ -44,7 +44,7 @@ static const double sqrt_2_inv = std::sqrt(0.5);
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairSDPDTaitwaterIsothermal::PairSDPDTaitwaterIsothermal (LAMMPS *lmp)
|
||||
: Pair (lmp) {
|
||||
: Pair (lmp), random(nullptr) {
|
||||
restartinfo = 0;
|
||||
single_enable =0;
|
||||
}
|
||||
@ -61,6 +61,7 @@ PairSDPDTaitwaterIsothermal::~PairSDPDTaitwaterIsothermal () {
|
||||
memory->destroy (soundspeed);
|
||||
memory->destroy (B);
|
||||
}
|
||||
delete random;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -121,12 +121,17 @@ fi
|
||||
if (test $1 = "MANYBODY") then
|
||||
depend ATC
|
||||
depend GPU
|
||||
depend INTEL
|
||||
depend KOKKOS
|
||||
depend OPT
|
||||
depend QEQ
|
||||
depend OPENMP
|
||||
fi
|
||||
|
||||
if (test $1 = "MEAM") then
|
||||
depend KOKKOS
|
||||
fi
|
||||
|
||||
if (test $1 = "MOLECULE") then
|
||||
depend EXTRA-MOLECULE
|
||||
depend GPU
|
||||
|
||||
@ -40,7 +40,7 @@ using namespace MathConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
EwaldElectrode::EwaldElectrode(LAMMPS *lmp) : Ewald(lmp), ElectrodeKSpace()
|
||||
EwaldElectrode::EwaldElectrode(LAMMPS *lmp) : Ewald(lmp)
|
||||
{
|
||||
eikr_step = -1;
|
||||
}
|
||||
|
||||
@ -65,8 +65,7 @@ enum { FORWARD_IK, FORWARD_AD, FORWARD_IK_PERATOM, FORWARD_AD_PERATOM };
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PPPMElectrode::PPPMElectrode(LAMMPS *lmp) :
|
||||
PPPM(lmp), ElectrodeKSpace(), electrolyte_density_brick(nullptr),
|
||||
electrolyte_density_fft(nullptr)
|
||||
PPPM(lmp), electrolyte_density_brick(nullptr), electrolyte_density_fft(nullptr)
|
||||
{
|
||||
group_group_enable = 0;
|
||||
electrolyte_density_brick = nullptr;
|
||||
|
||||
@ -11,10 +11,15 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "compute_ave_sphere_atom.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "math_const.h"
|
||||
@ -98,7 +103,10 @@ void ComputeAveSphereAtom::init()
|
||||
}
|
||||
|
||||
cutsq = cutoff * cutoff;
|
||||
sphere_vol = 4.0 / 3.0 * MY_PI * cutsq * cutoff;
|
||||
if (domain->dimension == 3)
|
||||
volume = 4.0 / 3.0 * MY_PI * cutsq * cutoff;
|
||||
else
|
||||
volume = MY_PI * cutsq;
|
||||
|
||||
// need an occasional full neighbor list
|
||||
|
||||
@ -121,7 +129,7 @@ void ComputeAveSphereAtom::compute_peratom()
|
||||
double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
|
||||
int *ilist, *jlist, *numneigh, **firstneigh;
|
||||
int count;
|
||||
double vsum[3], vavg[3], vnet[3];
|
||||
double p[3], vcom[3], vnet[3];
|
||||
|
||||
invoked_peratom = update->ntimestep;
|
||||
|
||||
@ -152,12 +160,26 @@ void ComputeAveSphereAtom::compute_peratom()
|
||||
|
||||
double **x = atom->x;
|
||||
double **v = atom->v;
|
||||
double *mass = atom->mass;
|
||||
double *rmass = atom->rmass;
|
||||
int *type = atom->type;
|
||||
int *mask = atom->mask;
|
||||
double massone_i, massone_j, totalmass;
|
||||
|
||||
double adof = domain->dimension;
|
||||
double mvv2e = force->mvv2e;
|
||||
double mv2d = force->mv2d;
|
||||
double boltz = force->boltz;
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
if (rmass)
|
||||
massone_i = rmass[i];
|
||||
else
|
||||
massone_i = mass[type[i]];
|
||||
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
@ -167,13 +189,18 @@ void ComputeAveSphereAtom::compute_peratom()
|
||||
// i atom contribution
|
||||
|
||||
count = 1;
|
||||
vsum[0] = v[i][0];
|
||||
vsum[1] = v[i][1];
|
||||
vsum[2] = v[i][2];
|
||||
totalmass = massone_i;
|
||||
p[0] = v[i][0] * massone_i;
|
||||
p[1] = v[i][1] * massone_i;
|
||||
p[2] = v[i][2] * massone_i;
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
j &= NEIGHMASK;
|
||||
if (rmass)
|
||||
massone_j = rmass[j];
|
||||
else
|
||||
massone_j = mass[type[j]];
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
@ -181,42 +208,45 @@ void ComputeAveSphereAtom::compute_peratom()
|
||||
rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq < cutsq) {
|
||||
count++;
|
||||
vsum[0] += v[j][0];
|
||||
vsum[1] += v[j][1];
|
||||
vsum[2] += v[j][2];
|
||||
totalmass += massone_j;
|
||||
p[0] += v[j][0] * massone_j;
|
||||
p[1] += v[j][1] * massone_j;
|
||||
p[2] += v[j][2] * massone_j;
|
||||
}
|
||||
}
|
||||
|
||||
vavg[0] = vsum[0] / count;
|
||||
vavg[1] = vsum[1] / count;
|
||||
vavg[2] = vsum[2] / count;
|
||||
vcom[0] = p[0] / totalmass;
|
||||
vcom[1] = p[1] / totalmass;
|
||||
vcom[2] = p[2] / totalmass;
|
||||
|
||||
// i atom contribution
|
||||
|
||||
count = 1;
|
||||
vnet[0] = v[i][0] - vavg[0];
|
||||
vnet[1] = v[i][1] - vavg[1];
|
||||
vnet[2] = v[i][2] - vavg[2];
|
||||
double ke_sum = vnet[0] * vnet[0] + vnet[1] * vnet[1] + vnet[2] * vnet[2];
|
||||
vnet[0] = v[i][0] - vcom[0];
|
||||
vnet[1] = v[i][1] - vcom[1];
|
||||
vnet[2] = v[i][2] - vcom[2];
|
||||
double ke_sum = massone_i * (vnet[0] * vnet[0] + vnet[1] * vnet[1] + vnet[2] * vnet[2]);
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
j &= NEIGHMASK;
|
||||
if (rmass)
|
||||
massone_j = rmass[j];
|
||||
else
|
||||
massone_j = mass[type[j]];
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq < cutsq) {
|
||||
count++;
|
||||
vnet[0] = v[j][0] - vavg[0];
|
||||
vnet[1] = v[j][1] - vavg[1];
|
||||
vnet[2] = v[j][2] - vavg[2];
|
||||
ke_sum += vnet[0] * vnet[0] + vnet[1] * vnet[1] + vnet[2] * vnet[2];
|
||||
vnet[0] = v[j][0] - vcom[0];
|
||||
vnet[1] = v[j][1] - vcom[1];
|
||||
vnet[2] = v[j][2] - vcom[2];
|
||||
ke_sum += massone_j * (vnet[0] * vnet[0] + vnet[1] * vnet[1] + vnet[2] * vnet[2]);
|
||||
}
|
||||
}
|
||||
double density = count / sphere_vol;
|
||||
double temp = ke_sum / 3.0 / count;
|
||||
double density = mv2d * totalmass / volume;
|
||||
double temp = mvv2e * ke_sum / (adof * count * boltz);
|
||||
result[i][0] = density;
|
||||
result[i][1] = temp;
|
||||
}
|
||||
|
||||
@ -37,7 +37,7 @@ class ComputeAveSphereAtom : public Compute {
|
||||
|
||||
protected:
|
||||
int nmax;
|
||||
double cutoff, cutsq, sphere_vol;
|
||||
double cutoff, cutsq, volume;
|
||||
class NeighList *list;
|
||||
|
||||
double **result;
|
||||
|
||||
@ -80,21 +80,23 @@ ComputeStressCartesian::ComputeStressCartesian(LAMMPS *lmp, int narg, char **arg
|
||||
|
||||
dir2 = 0;
|
||||
bin_width1 = utils::numeric(FLERR, arg[4], false, lmp);
|
||||
bin_width2 = 0.0;
|
||||
bin_width2 = domain->boxhi[dir2] - domain->boxlo[dir2];
|
||||
nbins1 = (int) ((domain->boxhi[dir1] - domain->boxlo[dir1]) / bin_width1);
|
||||
nbins2 = 1;
|
||||
|
||||
// adjust bin width if not a perfect match
|
||||
invV = (domain->boxhi[dir1] - domain->boxlo[dir1]) / nbins1;
|
||||
if ((fabs(invV - bin_width1) > SMALL) && (comm->me == 0))
|
||||
utils::logmesg(lmp, "Adjusting first bin width for compute {} from {:.6f} to {:.6f}\n", style,
|
||||
bin_width1, invV);
|
||||
bin_width1 = invV;
|
||||
double tmp_binwidth = (domain->boxhi[dir1] - domain->boxlo[dir1]) / nbins1;
|
||||
if ((fabs(tmp_binwidth - bin_width1) > SMALL) && (comm->me == 0))
|
||||
utils::logmesg(lmp, "Adjusting second bin width for compute {} from {:.6f} to {:.6f}\n", style,
|
||||
bin_width1, tmp_binwidth);
|
||||
bin_width1 = tmp_binwidth;
|
||||
|
||||
if (bin_width1 <= 0.0)
|
||||
error->all(FLERR, "Illegal compute stress/cartesian command. Bin width must be > 0");
|
||||
else if (bin_width1 > domain->boxhi[dir1] - domain->boxlo[dir1])
|
||||
error->all(FLERR, "Illegal compute stress/cartesian command. Bin width larger than box.");
|
||||
|
||||
invV = bin_width1;
|
||||
if (dims == 2) {
|
||||
if (strcmp(arg[5], "x") == 0)
|
||||
dir2 = 0;
|
||||
@ -107,7 +109,9 @@ ComputeStressCartesian::ComputeStressCartesian(LAMMPS *lmp, int narg, char **arg
|
||||
|
||||
bin_width2 = utils::numeric(FLERR, arg[6], false, lmp);
|
||||
nbins2 = (int) ((domain->boxhi[dir2] - domain->boxlo[dir2]) / bin_width2);
|
||||
double tmp_binwidth = (domain->boxhi[dir2] - domain->boxlo[dir2]) / nbins2;
|
||||
|
||||
// adjust bin width if not a perfect match
|
||||
tmp_binwidth = (domain->boxhi[dir2] - domain->boxlo[dir2]) / nbins2;
|
||||
if ((fabs(tmp_binwidth - bin_width2) > SMALL) && (comm->me == 0))
|
||||
utils::logmesg(lmp, "Adjusting second bin width for compute {} from {:.6f} to {:.6f}\n",
|
||||
style, bin_width2, tmp_binwidth);
|
||||
@ -262,7 +266,7 @@ void ComputeStressCartesian::compute_array()
|
||||
Pair *pair = force->pair;
|
||||
double **cutsq = force->pair->cutsq;
|
||||
|
||||
double xi1, xi2, xj1, xj2;
|
||||
double xi1, xi2;
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
@ -301,9 +305,6 @@ void ComputeStressCartesian::compute_array()
|
||||
}
|
||||
}
|
||||
}
|
||||
xj1 = x[j][dir1];
|
||||
xj2 = x[j][dir2];
|
||||
|
||||
delx = x[j][0] - xtmp;
|
||||
dely = x[j][1] - ytmp;
|
||||
delz = x[j][2] - ztmp;
|
||||
@ -314,8 +315,7 @@ void ComputeStressCartesian::compute_array()
|
||||
// Check if inside cut-off
|
||||
if (rsq >= cutsq[itype][jtype]) continue;
|
||||
pair->single(i, j, itype, jtype, rsq, factor_coul, factor_lj, fpair);
|
||||
if (dims == 1) compute_pressure_1d(fpair, xi1, xj1, delx, dely, delz);
|
||||
if (dims == 2) compute_pressure_2d(fpair, xi1, xi2, xj1, xj2, delx, dely, delz);
|
||||
compute_pressure(fpair, xi1, xi2, delx, dely, delz);
|
||||
}
|
||||
}
|
||||
|
||||
@ -353,107 +353,8 @@ void ComputeStressCartesian::compute_array()
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeStressCartesian::compute_pressure_1d(double fpair, double xi, double xj, double delx,
|
||||
double dely, double delz)
|
||||
{
|
||||
int bin_s, bin_e, bin_step, bin, bin_limit;
|
||||
double xa, xb;
|
||||
|
||||
if (xi < domain->boxlo[dir1])
|
||||
xi += (domain->boxhi[dir1] - domain->boxlo[dir1]);
|
||||
else if (xi > domain->boxhi[dir1])
|
||||
xi -= (domain->boxhi[dir1] - domain->boxlo[dir1]);
|
||||
if (xj < domain->boxlo[dir1])
|
||||
xj += (domain->boxhi[dir1] - domain->boxlo[dir1]);
|
||||
else if (xj > domain->boxhi[dir1])
|
||||
xj -= (domain->boxhi[dir1] - domain->boxlo[dir1]);
|
||||
|
||||
// Integrating contour from bin_s to bin_e
|
||||
bin_s = ((int) lround((xi - domain->boxlo[dir1]) / bin_width1)) % nbins1;
|
||||
bin_e = ((int) lround((xj - domain->boxlo[dir1]) / bin_width1)) % nbins1;
|
||||
|
||||
// If not periodic in dir1
|
||||
if (domain->periodicity[dir1] == 0) {
|
||||
bin_s = ((int) lround((xi - domain->boxlo[dir1]) / bin_width1));
|
||||
bin_e = ((int) lround((xj - domain->boxlo[dir1]) / bin_width1));
|
||||
|
||||
if (bin_e == nbins1) bin_e--;
|
||||
if (bin_s == nbins1) bin_s--;
|
||||
}
|
||||
|
||||
bin_step = 1;
|
||||
if (domain->periodicity[dir1] == 1) {
|
||||
if (bin_e - bin_s > 0.5 * nbins1)
|
||||
bin_step = -1;
|
||||
else if (bin_s - bin_e > 0.5 * nbins1)
|
||||
bin_step = 1;
|
||||
else if (bin_s > bin_e)
|
||||
bin_step = -1;
|
||||
} else {
|
||||
if (bin_s > bin_e) bin_step = -1;
|
||||
}
|
||||
if (domain->periodicity[dir1] == 1)
|
||||
bin_limit = (bin_e + bin_step) % nbins1 < 0 ? (bin_e + bin_step) % nbins1 + nbins1
|
||||
: (bin_e + bin_step) % nbins1;
|
||||
else
|
||||
bin_limit = bin_e + bin_step;
|
||||
|
||||
bin = bin_s;
|
||||
// Integrate from bin_s to bin_e with step bin_step.
|
||||
while (bin < bin_limit) {
|
||||
|
||||
// Calculating exit and entry point (xa, xb). Checking if inside current bin.
|
||||
if (bin == bin_s) {
|
||||
if (domain->periodicity[dir1] == 1)
|
||||
xa = fmod(xi, domain->boxhi[dir1]) + domain->boxlo[dir1];
|
||||
else
|
||||
xa = xi;
|
||||
} else
|
||||
xa = (bin_step == 1) ? bin * bin_width1 : (bin + 1) * bin_width1;
|
||||
if (bin == bin_e) {
|
||||
if (domain->periodicity[dir1] == 1)
|
||||
xb = fmod(xj, domain->boxhi[dir1]) + domain->boxlo[dir1];
|
||||
else
|
||||
xb = xj;
|
||||
} else
|
||||
xb = (bin_step == 1) ? (bin + 1) * bin_width1 : bin * bin_width1;
|
||||
|
||||
if (bin < 0 || bin >= nbins1) error->all(FLERR, "ERROR: Bin outside simulation.");
|
||||
|
||||
if (bin_s != bin_e) {
|
||||
if (dir1 == 0) {
|
||||
tpcxx[bin] += (fpair * delx * delx) * (xb - xa) / delx;
|
||||
tpcyy[bin] += (fpair * dely * dely) * (xb - xa) / delx;
|
||||
tpczz[bin] += (fpair * delz * delz) * (xb - xa) / delx;
|
||||
} else if (dir1 == 1) {
|
||||
tpcxx[bin] += (fpair * delx * delx) * (xb - xa) / dely;
|
||||
tpcyy[bin] += (fpair * dely * dely) * (xb - xa) / dely;
|
||||
tpczz[bin] += (fpair * delz * delz) * (xb - xa) / dely;
|
||||
} else if (dir1 == 2) {
|
||||
tpcxx[bin] += (fpair * delx * delx) * (xb - xa) / delz;
|
||||
tpcyy[bin] += (fpair * dely * dely) * (xb - xa) / delz;
|
||||
tpczz[bin] += (fpair * delz * delz) * (xb - xa) / delz;
|
||||
}
|
||||
}
|
||||
// Particle i and j in same bin. Avoiding zero divided by zero.
|
||||
else {
|
||||
tpcxx[bin] += fpair * delx * delx;
|
||||
tpcyy[bin] += fpair * dely * dely;
|
||||
tpczz[bin] += fpair * delz * delz;
|
||||
}
|
||||
|
||||
// Stepping bin to next bin
|
||||
if (domain->periodicity[dir1] == 1)
|
||||
bin = (bin + bin_step) % nbins1 < 0 ? (bin + bin_step) % nbins1 + nbins1
|
||||
: (bin + bin_step) % nbins1;
|
||||
else
|
||||
bin = bin + bin_step;
|
||||
}
|
||||
}
|
||||
|
||||
void ComputeStressCartesian::compute_pressure_2d(double fpair, double xi, double yi, double /*xj*/,
|
||||
double /*yj*/, double delx, double dely,
|
||||
double delz)
|
||||
void ComputeStressCartesian::compute_pressure(double fpair, double xi, double yi, double delx,
|
||||
double dely, double delz)
|
||||
{
|
||||
int bin1, bin2, next_bin1, next_bin2;
|
||||
double la = 0.0, lb = 0.0, l_sum = 0.0;
|
||||
|
||||
@ -41,8 +41,7 @@ class ComputeStressCartesian : public Compute {
|
||||
double *dens, *pkxx, *pkyy, *pkzz, *pcxx, *pcyy, *pczz;
|
||||
double *tdens, *tpkxx, *tpkyy, *tpkzz, *tpcxx, *tpcyy, *tpczz;
|
||||
class NeighList *list;
|
||||
void compute_pressure_1d(double, double, double, double, double, double);
|
||||
void compute_pressure_2d(double, double, double, double, double, double, double, double);
|
||||
void compute_pressure(double, double, double, double, double, double);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
@ -433,10 +433,10 @@ int xdropen(XDR *xdrs, const char *filename, const char *type)
|
||||
return 0;
|
||||
}
|
||||
if (*type == 'w' || *type == 'W') {
|
||||
type = (char *) "w+";
|
||||
type = (char *) "wb+";
|
||||
lmode = XDR_ENCODE;
|
||||
} else {
|
||||
type = (char *) "r";
|
||||
type = (char *) "rb";
|
||||
lmode = XDR_DECODE;
|
||||
}
|
||||
xdrfiles[xdrid] = fopen(filename, type);
|
||||
|
||||
@ -124,6 +124,12 @@ void DumpYAML::write_data(int n, double *mybuf)
|
||||
}
|
||||
fputs("]\n", fp);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DumpYAML::write_footer()
|
||||
{
|
||||
fputs("...\n", fp);
|
||||
}
|
||||
|
||||
|
||||
@ -35,6 +35,7 @@ class DumpYAML : public DumpCustom {
|
||||
void write() override;
|
||||
void write_header(bigint) override;
|
||||
void write_data(int, double *) override;
|
||||
void write_footer() override;
|
||||
|
||||
int modify_param(int, char **) override;
|
||||
};
|
||||
|
||||
@ -26,6 +26,7 @@
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "pair.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
@ -61,7 +62,6 @@ DihedralFourier::~DihedralFourier()
|
||||
delete [] shift;
|
||||
delete [] cos_shift;
|
||||
delete [] sin_shift;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -332,7 +332,6 @@ void DihedralFourier::coeff(int narg, char **arg)
|
||||
|
||||
void DihedralFourier::write_restart(FILE *fp)
|
||||
{
|
||||
|
||||
fwrite(&nterms[1],sizeof(int),atom->ndihedraltypes,fp);
|
||||
for (int i = 1; i <= atom->ndihedraltypes; i++) {
|
||||
fwrite(k[i],sizeof(double),nterms[i],fp);
|
||||
|
||||
@ -116,15 +116,15 @@ void PairCoulSlaterLong::compute(int eflag, int vflag)
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
r2inv = 1.0/rsq;
|
||||
r = sqrt(rsq);
|
||||
grij = g_ewald * r;
|
||||
expm2 = exp(-grij*grij);
|
||||
t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda)));
|
||||
prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term);
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor;
|
||||
r = sqrt(rsq);
|
||||
grij = g_ewald * r;
|
||||
expm2 = exp(-grij*grij);
|
||||
t = 1.0 / (1.0 + EWALD_P*grij);
|
||||
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
slater_term = exp(-2*r/lamda)*(1 + (2*r/lamda*(1+r/lamda)));
|
||||
prefactor = qqrd2e * scale[itype][jtype] * qtmp*q[j]/r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2 - slater_term);
|
||||
if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor*(1-slater_term);
|
||||
|
||||
fpair = forcecoul * r2inv;
|
||||
|
||||
@ -138,8 +138,8 @@ void PairCoulSlaterLong::compute(int eflag, int vflag)
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
ecoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda));
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor;
|
||||
ecoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda));
|
||||
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor*(1.0-(1 + r/lamda)*exp(-2*r/lamda));
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
||||
|
||||
44
src/INTEL/npair_halffull_newtoff_trim_intel.h
Normal file
44
src/INTEL/npair_halffull_newtoff_trim_intel.h
Normal file
@ -0,0 +1,44 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
// Only used for hybrid to generate list for non-intel style. Use
|
||||
// standard routines.
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newtoff/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newtoff/skip/ghost/trim/intel,
|
||||
NPairHalffullNewtoffTrim,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_NSQ | NP_BIN | NP_MULTI | NP_HALF |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_GHOST | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#endif
|
||||
258
src/INTEL/npair_halffull_newton_trim_intel.cpp
Normal file
258
src/INTEL/npair_halffull_newton_trim_intel.cpp
Normal file
@ -0,0 +1,258 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_halffull_newton_trim_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "modify.h"
|
||||
#include "my_page.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairHalffullNewtonTrimIntel::NPairHalffullNewtonTrimIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full list and trim to shorter cutoff
|
||||
pair stored once if i,j are both owned and i < j
|
||||
if j is ghost, only store if j coords are "above and to the right" of i
|
||||
works if full list is a skip list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = list->listfull->numneigh;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, comm->nthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
if (j < nlocal) {
|
||||
if (i > j) addme = 0;
|
||||
} else {
|
||||
if (x[j].z < ztmp) addme = 0;
|
||||
if (x[j].z == ztmp) {
|
||||
if (x[j].y < ytmp) addme = 0;
|
||||
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// trim to shorter cutoff
|
||||
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if (rsq > cutsq_custom) addme = 0;
|
||||
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
build half list from full 3-body list and trim to shorter cutoff
|
||||
half list is already stored as first part of 3-body list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_full = list->listfull->inum;
|
||||
const int e_nall = atom->nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_full = list->listfull->ilist;
|
||||
const int * _noalias const numneigh_full = numhalf;
|
||||
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
|
||||
|
||||
const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
|
||||
|
||||
int packthreads = 1;
|
||||
if (comm->nthreads > INTEL_HTHREADS) packthreads = comm->nthreads;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if (packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_full, packthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent full list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_full[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over full neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_full[i];
|
||||
const int jnum = numneigh_full[ii];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
|
||||
// trim to shorter cutoff
|
||||
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if (rsq > cutsq_custom) addme = 0;
|
||||
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_full;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairHalffullNewtonTrimIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->three_body_neighbor() == 0) {
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t(list, _fix->get_double_buffers());
|
||||
else
|
||||
build_t(list, _fix->get_single_buffers());
|
||||
} else {
|
||||
int *nhalf, *cnum;
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED) {
|
||||
_fix->get_mixed_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf, _fix->get_mixed_buffers());
|
||||
} else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
|
||||
_fix->get_double_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<double>(list, nhalf, _fix->get_double_buffers());
|
||||
} else {
|
||||
_fix->get_single_buffers()->get_list_data3(list->listfull, nhalf, cnum);
|
||||
build_t3<float>(list, nhalf, _fix->get_single_buffers());
|
||||
}
|
||||
}
|
||||
}
|
||||
61
src/INTEL/npair_halffull_newton_trim_intel.h
Normal file
61
src/INTEL/npair_halffull_newton_trim_intel.h
Normal file
@ -0,0 +1,61 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(halffull/newton/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI| NP_TRIM | NP_INTEL);
|
||||
|
||||
NPairStyle(halffull/newton/skip/trim/intel,
|
||||
NPairHalffullNewtonTrimIntel,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
|
||||
#define LMP_NPAIR_HALFFULL_NEWTON_TRIM_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairHalffullNewtonTrimIntel : public NPair {
|
||||
public:
|
||||
NPairHalffullNewtonTrimIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
|
||||
template <class flt_t, class acc_t> void build_t3(NeighList *, int *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
138
src/INTEL/npair_trim_intel.cpp
Normal file
138
src/INTEL/npair_trim_intel.cpp
Normal file
@ -0,0 +1,138 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_trim_intel.h"
|
||||
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "error.h"
|
||||
#include "modify.h"
|
||||
#include "my_page.h"
|
||||
#include "neigh_list.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairTrimIntel::NPairTrimIntel(LAMMPS *lmp) : NPair(lmp) {
|
||||
_fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
|
||||
if (!_fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
trim from copy list to shorter cutoff
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void NPairTrimIntel::build_t(NeighList *list,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int inum_copy = list->listcopy->inum;
|
||||
const int nlocal = atom->nlocal;
|
||||
const int e_nall = nlocal + atom->nghost;
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias const numneigh = list->numneigh;
|
||||
int ** _noalias const firstneigh = list->firstneigh;
|
||||
const int * _noalias const ilist_copy = list->listcopy->ilist;
|
||||
const int * _noalias const numneigh_copy = list->listcopy->numneigh;
|
||||
const int ** _noalias const firstneigh_copy = (const int ** const)list->listcopy->firstneigh; // NOLINT
|
||||
|
||||
const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, inum_copy, comm->nthreads);
|
||||
|
||||
// each thread has its own page allocator
|
||||
MyPage<int> &ipage = list->ipage[tid];
|
||||
ipage.reset();
|
||||
|
||||
// loop over parent copy list
|
||||
for (int ii = ifrom; ii < ito; ii++) {
|
||||
int n = 0;
|
||||
int *neighptr = ipage.vget();
|
||||
|
||||
const int i = ilist_copy[ii];
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
|
||||
// loop over copy neighbor list
|
||||
|
||||
const int * _noalias const jlist = firstneigh_copy[i];
|
||||
const int jnum = numneigh_copy[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = jlist[jj];
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
int addme = 1;
|
||||
|
||||
// trim to shorter cutoff
|
||||
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if (rsq > cutsq_custom) addme = 0;
|
||||
|
||||
if (addme)
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
ilist[ii] = i;
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
|
||||
int pad_end = n;
|
||||
IP_PRE_neighbor_pad(pad_end, 0);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma loop_count min=1, max=INTEL_COMPILE_WIDTH-1, \
|
||||
avg=INTEL_COMPILE_WIDTH/2
|
||||
#endif
|
||||
for ( ; n < pad_end; n++)
|
||||
neighptr[n] = e_nall;
|
||||
|
||||
ipage.vgot(n);
|
||||
if (ipage.status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
list->inum = inum_copy;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NPairTrimIntel::build(NeighList *list)
|
||||
{
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
build_t(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
build_t(list, _fix->get_double_buffers());
|
||||
else
|
||||
build_t(list, _fix->get_single_buffers());
|
||||
}
|
||||
53
src/INTEL/npair_trim_intel.h
Normal file
53
src/INTEL/npair_trim_intel.h
Normal file
@ -0,0 +1,53 @@
|
||||
// clang-format off
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(trim/intel,
|
||||
NPairTrimIntel,
|
||||
NP_COPY | NP_TRIM | NP_INTEL);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_TRIM_INTEL_H
|
||||
#define LMP_NPAIR_TRIM_INTEL_H
|
||||
|
||||
#include "fix_intel.h"
|
||||
#include "npair.h"
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairTrimIntel : public NPair {
|
||||
public:
|
||||
NPairTrimIntel(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
protected:
|
||||
FixIntel *_fix;
|
||||
|
||||
template <class flt_t, class acc_t> void build_t(NeighList *, IntelBuffers<flt_t, acc_t> *);
|
||||
};
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@ -1101,7 +1101,11 @@ void PairSWIntel::allocate()
|
||||
|
||||
void PairSWIntel::init_style()
|
||||
{
|
||||
// there is no support for skipping threebody loops (yet)
|
||||
bool tmp_threebody = skip_threebody_flag;
|
||||
skip_threebody_flag = false;
|
||||
PairSW::init_style();
|
||||
skip_threebody_flag = tmp_threebody;
|
||||
|
||||
map[0] = map[1];
|
||||
|
||||
|
||||
@ -483,7 +483,7 @@ void PairILPTMD::calc_normal()
|
||||
}
|
||||
}
|
||||
//############################ For the edge atoms of TMD ################################
|
||||
else if (cont > 1 && cont < Nnei) {
|
||||
else if (cont < Nnei) {
|
||||
if (strcmp(elements[itype], "Mo") == 0 || strcmp(elements[itype], "W") == 0 ||
|
||||
strcmp(elements[itype], "S") == 0 || strcmp(elements[itype], "Se") == 0) {
|
||||
// derivatives of Ni[l] respect to the cont neighbors
|
||||
|
||||
@ -182,6 +182,13 @@ action kokkos_base.h
|
||||
action kokkos_base_fft.h fft3d.h
|
||||
action kokkos_few.h
|
||||
action kokkos_type.h
|
||||
action meam_kokkos.h meam.h
|
||||
action meam_dens_final_kokkos.h meam_dens_final.cpp
|
||||
action meam_dens_init_kokkos.h meam_dens_init.cpp
|
||||
action meam_force_kokkos.h meam_force.cpp
|
||||
action meam_funcs_kokkos.h meam_funcs.cpp
|
||||
action meam_impl_kokkos.h meam_impl.cpp
|
||||
action meam_setup_done_kokkos.h meam_setup_done.cpp
|
||||
action memory_kokkos.h
|
||||
action modify_kokkos.cpp
|
||||
action modify_kokkos.h
|
||||
@ -197,6 +204,8 @@ action npair_halffull_kokkos.cpp
|
||||
action npair_halffull_kokkos.h
|
||||
action npair_skip_kokkos.cpp
|
||||
action npair_skip_kokkos.h
|
||||
action npair_trim_kokkos.cpp
|
||||
action npair_trim_kokkos.h
|
||||
action npair_kokkos.cpp
|
||||
action npair_kokkos.h
|
||||
action npair_ssa_kokkos.cpp npair_half_bin_newton_ssa.cpp
|
||||
@ -287,6 +296,8 @@ action pair_lj_gromacs_kokkos.cpp pair_lj_gromacs.cpp
|
||||
action pair_lj_gromacs_kokkos.h pair_lj_gromacs.h
|
||||
action pair_lj_sdk_kokkos.cpp pair_lj_sdk.cpp
|
||||
action pair_lj_sdk_kokkos.h pair_lj_sdk.h
|
||||
action pair_meam_kokkos.cpp pair_meam.cpp
|
||||
action pair_meam_kokkos.h pair_meam.h
|
||||
action pair_morse_kokkos.cpp
|
||||
action pair_morse_kokkos.h
|
||||
action pair_multi_lucy_rx_kokkos.cpp pair_multi_lucy_rx.cpp
|
||||
|
||||
@ -1391,6 +1391,9 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
|
||||
@ -649,6 +649,8 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/11 >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -845,6 +845,9 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
|
||||
@ -774,6 +774,8 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
while (nlocal + nrecv/12 >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -1505,6 +1505,8 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/17 >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -1186,6 +1186,9 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
|
||||
@ -1594,6 +1594,9 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
|
||||
while (nlocal + nrecv/elements >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
|
||||
@ -2341,6 +2341,8 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
while (nlocal + nrecv/16 >= nmax) grow(0);
|
||||
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -863,6 +863,8 @@ struct AtomVecSpinKokkos_UnpackExchangeFunctor {
|
||||
int AtomVecSpinKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
while (nlocal + nrecv/15 >= nmax) grow(0);
|
||||
|
||||
if(space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecSpinKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -109,6 +109,7 @@ void CommKokkos::init()
|
||||
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
||||
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
||||
forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
|
||||
reverse_pair_comm_classic = lmp->kokkos->reverse_pair_comm_classic;
|
||||
forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
|
||||
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
|
||||
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
||||
@ -478,12 +479,13 @@ void CommKokkos::forward_comm_device(Pair *pair)
|
||||
int nsize = pair->comm_forward;
|
||||
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
|
||||
|
||||
int nmax = max_buf_pair;
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
int n = MAX(max_buf_pair,nsize*sendnum[iswap]);
|
||||
n = MAX(n,nsize*recvnum[iswap]);
|
||||
if (n > max_buf_pair)
|
||||
grow_buf_pair(n);
|
||||
nmax = MAX(nmax,nsize*sendnum[iswap]);
|
||||
nmax = MAX(nmax,nsize*recvnum[iswap]);
|
||||
}
|
||||
if (nmax > max_buf_pair)
|
||||
grow_buf_pair(nmax);
|
||||
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
|
||||
@ -545,8 +547,76 @@ void CommKokkos::grow_buf_fix(int n) {
|
||||
|
||||
void CommKokkos::reverse_comm(Pair *pair)
|
||||
{
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(pair);
|
||||
if (pair->execution_space == Host || !pair->reverse_comm_device || reverse_pair_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::reverse_comm(pair);
|
||||
} else {
|
||||
k_sendlist.sync<LMPDeviceType>();
|
||||
reverse_comm_device<LMPDeviceType>(pair);
|
||||
}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void CommKokkos::reverse_comm_device(Pair *pair)
|
||||
{
|
||||
int iswap,n;
|
||||
MPI_Request request;
|
||||
DAT::tdual_xfloat_1d k_buf_tmp;
|
||||
|
||||
KokkosBase* pairKKBase = dynamic_cast<KokkosBase*>(pair);
|
||||
|
||||
int nsize = MAX(pair->comm_reverse,pair->comm_reverse_off);
|
||||
|
||||
int nmax = max_buf_pair;
|
||||
for (iswap = 0; iswap < nswap; iswap++) {
|
||||
nmax = MAX(nmax,nsize*sendnum[iswap]);
|
||||
nmax = MAX(nmax,nsize*recvnum[iswap]);
|
||||
}
|
||||
if (nmax > max_buf_pair)
|
||||
grow_buf_pair(nmax);
|
||||
|
||||
for (iswap = nswap-1; iswap >= 0; iswap--) {
|
||||
|
||||
// pack buffer
|
||||
|
||||
n = pairKKBase->pack_reverse_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send_pair);
|
||||
DeviceType().fence();
|
||||
|
||||
// exchange with another proc
|
||||
// if self, set recv buffer to send buffer
|
||||
|
||||
double* buf_send_pair;
|
||||
double* buf_recv_pair;
|
||||
if (lmp->kokkos->gpu_aware_flag) {
|
||||
buf_send_pair = k_buf_send_pair.view<DeviceType>().data();
|
||||
buf_recv_pair = k_buf_recv_pair.view<DeviceType>().data();
|
||||
} else {
|
||||
k_buf_send_pair.modify<DeviceType>();
|
||||
k_buf_send_pair.sync<LMPHostType>();
|
||||
buf_send_pair = k_buf_send_pair.h_view.data();
|
||||
buf_recv_pair = k_buf_recv_pair.h_view.data();
|
||||
}
|
||||
|
||||
if (sendproc[iswap] != me) {
|
||||
if (sendnum[iswap])
|
||||
MPI_Irecv(buf_recv_pair,nsize*sendnum[iswap],MPI_DOUBLE,sendproc[iswap],0,world,&request);
|
||||
if (recvnum[iswap])
|
||||
MPI_Send(buf_send_pair,n,MPI_DOUBLE,recvproc[iswap],0,world);
|
||||
if (sendnum[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (!lmp->kokkos->gpu_aware_flag) {
|
||||
k_buf_recv_pair.modify<LMPHostType>();
|
||||
k_buf_recv_pair.sync<DeviceType>();
|
||||
}
|
||||
k_buf_tmp = k_buf_recv_pair;
|
||||
} else k_buf_tmp = k_buf_send_pair;
|
||||
|
||||
// unpack buffer
|
||||
|
||||
pairKKBase->unpack_reverse_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_tmp);
|
||||
DeviceType().fence();
|
||||
}
|
||||
}
|
||||
|
||||
void CommKokkos::forward_comm(Dump *dump)
|
||||
|
||||
@ -27,6 +27,7 @@ class CommKokkos : public CommBrick {
|
||||
bool exchange_comm_classic;
|
||||
bool forward_comm_classic;
|
||||
bool forward_pair_comm_classic;
|
||||
bool reverse_pair_comm_classic;
|
||||
bool forward_fix_comm_classic;
|
||||
bool reverse_comm_classic;
|
||||
bool exchange_comm_on_host;
|
||||
@ -58,6 +59,7 @@ class CommKokkos : public CommBrick {
|
||||
template<class DeviceType> void forward_comm_device(int dummy);
|
||||
template<class DeviceType> void reverse_comm_device();
|
||||
template<class DeviceType> void forward_comm_device(Pair *pair);
|
||||
template<class DeviceType> void reverse_comm_device(Pair *pair);
|
||||
template<class DeviceType> void forward_comm_device(Fix *fix, int size=0);
|
||||
template<class DeviceType> void exchange_device();
|
||||
template<class DeviceType> void borders_device();
|
||||
|
||||
@ -11,11 +11,16 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "compute_ave_sphere_atom_kokkos.h"
|
||||
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "comm.h"
|
||||
#include "domain.h"
|
||||
#include "error.h"
|
||||
#include "force.h"
|
||||
#include "memory_kokkos.h"
|
||||
@ -105,11 +110,19 @@ void ComputeAveSphereAtomKokkos<DeviceType>::compute_peratom()
|
||||
// compute properties for each atom in group
|
||||
// use full neighbor list to count atoms less than cutoff
|
||||
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->sync(execution_space,X_MASK|V_MASK|RMASS_MASK|TYPE_MASK|MASK_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
v = atomKK->k_v.view<DeviceType>();
|
||||
rmass = atomKK->k_rmass.view<DeviceType>();
|
||||
mass = atomKK->k_mass.view<DeviceType>();
|
||||
type = atomKK->k_type.view<DeviceType>();
|
||||
mask = atomKK->k_mask.view<DeviceType>();
|
||||
|
||||
adof = domain->dimension;
|
||||
mvv2e = force->mvv2e;
|
||||
mv2d = force->mv2d;
|
||||
boltz = force->boltz;
|
||||
|
||||
Kokkos::deep_copy(d_result,0.0);
|
||||
|
||||
copymode = 1;
|
||||
@ -125,8 +138,13 @@ template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ComputeAveSphereAtomKokkos<DeviceType>::operator()(TagComputeAveSphereAtom, const int &ii) const
|
||||
{
|
||||
double massone_i,massone_j;
|
||||
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
if (rmass.data()) massone_i = rmass[i];
|
||||
else massone_i = mass[type[i]];
|
||||
|
||||
const X_FLOAT xtmp = x(i,0);
|
||||
const X_FLOAT ytmp = x(i,1);
|
||||
const X_FLOAT ztmp = x(i,2);
|
||||
@ -135,14 +153,17 @@ void ComputeAveSphereAtomKokkos<DeviceType>::operator()(TagComputeAveSphereAtom,
|
||||
// i atom contribution
|
||||
|
||||
int count = 1;
|
||||
double vsum[3];
|
||||
vsum[0] = v(i,0);
|
||||
vsum[1] = v(i,1);
|
||||
vsum[2] = v(i,2);
|
||||
double totalmass = massone_i;
|
||||
double p[3];
|
||||
p[0] = v(i,0)*massone_i;
|
||||
p[1] = v(i,1)*massone_i;
|
||||
p[2] = v(i,2)*massone_i;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors(i,jj);
|
||||
j &= NEIGHMASK;
|
||||
if (rmass.data()) massone_j = rmass[j];
|
||||
else massone_j = mass[type[j]];
|
||||
|
||||
const F_FLOAT delx = x(j,0) - xtmp;
|
||||
const F_FLOAT dely = x(j,1) - ytmp;
|
||||
@ -150,44 +171,45 @@ void ComputeAveSphereAtomKokkos<DeviceType>::operator()(TagComputeAveSphereAtom,
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq < cutsq) {
|
||||
count++;
|
||||
vsum[0] += v(j,0);
|
||||
vsum[1] += v(j,1);
|
||||
vsum[2] += v(j,2);
|
||||
totalmass += massone_j;
|
||||
p[0] += v(j,0)*massone_j;
|
||||
p[1] += v(j,1)*massone_j;
|
||||
p[2] += v(j,2)*massone_j;
|
||||
}
|
||||
}
|
||||
|
||||
double vavg[3];
|
||||
vavg[0] = vsum[0]/count;
|
||||
vavg[1] = vsum[1]/count;
|
||||
vavg[2] = vsum[2]/count;
|
||||
double vcom[3];
|
||||
vcom[0] = p[0]/totalmass;
|
||||
vcom[1] = p[1]/totalmass;
|
||||
vcom[2] = p[2]/totalmass;
|
||||
|
||||
// i atom contribution
|
||||
|
||||
count = 1;
|
||||
double vnet[3];
|
||||
vnet[0] = v(i,0) - vavg[0];
|
||||
vnet[1] = v(i,1) - vavg[1];
|
||||
vnet[2] = v(i,2) - vavg[2];
|
||||
double ke_sum = vnet[0]*vnet[0] + vnet[1]*vnet[1] + vnet[2]*vnet[2];
|
||||
vnet[0] = v(i,0) - vcom[0];
|
||||
vnet[1] = v(i,1) - vcom[1];
|
||||
vnet[2] = v(i,2) - vcom[2];
|
||||
double ke_sum = massone_i * (vnet[0]*vnet[0] + vnet[1]*vnet[1] + vnet[2]*vnet[2]);
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = d_neighbors(i,jj);
|
||||
j &= NEIGHMASK;
|
||||
if (rmass.data()) massone_j = rmass[j];
|
||||
else massone_j = mass[type[j]];
|
||||
|
||||
const F_FLOAT delx = x(j,0) - xtmp;
|
||||
const F_FLOAT dely = x(j,1) - ytmp;
|
||||
const F_FLOAT delz = x(j,2) - ztmp;
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if (rsq < cutsq) {
|
||||
count++;
|
||||
vnet[0] = v(j,0) - vavg[0];
|
||||
vnet[1] = v(j,1) - vavg[1];
|
||||
vnet[2] = v(j,2) - vavg[2];
|
||||
ke_sum += vnet[0]*vnet[0] + vnet[1]*vnet[1] + vnet[2]*vnet[2];
|
||||
vnet[0] = v(j,0) - vcom[0];
|
||||
vnet[1] = v(j,1) - vcom[1];
|
||||
vnet[2] = v(j,2) - vcom[2];
|
||||
ke_sum += massone_j * (vnet[0]*vnet[0] + vnet[1]*vnet[1] + vnet[2]*vnet[2]);
|
||||
}
|
||||
}
|
||||
double density = count/sphere_vol;
|
||||
double temp = ke_sum/3.0/count;
|
||||
double density = mv2d*totalmass/volume;
|
||||
double temp = mvv2e*ke_sum/(adof*count*boltz);
|
||||
d_result(i,0) = density;
|
||||
d_result(i,1) = temp;
|
||||
}
|
||||
|
||||
@ -46,13 +46,18 @@ template <class DeviceType> class ComputeAveSphereAtomKokkos : public ComputeAve
|
||||
void operator()(TagComputeAveSphereAtom, const int &) const;
|
||||
|
||||
private:
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_v_array_randomread v;
|
||||
double adof,mvv2e,mv2d,boltz;
|
||||
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_v_array v;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d rmass;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d mass;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d type;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d mask;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d_randomread d_ilist;
|
||||
typename AT::t_int_1d_randomread d_numneigh;
|
||||
typename AT::t_int_1d d_ilist;
|
||||
typename AT::t_int_1d d_numneigh;
|
||||
|
||||
DAT::tdual_float_2d k_result;
|
||||
typename AT::t_float_2d d_result;
|
||||
|
||||
@ -91,6 +91,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
exchange_comm_changed = 0;
|
||||
forward_comm_changed = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
reverse_pair_comm_changed = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
reverse_comm_changed = 0;
|
||||
|
||||
@ -239,7 +240,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
newtonflag = 0;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else {
|
||||
@ -253,7 +254,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
newtonflag = 1;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
}
|
||||
@ -394,17 +395,17 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
forward_pair_comm_classic = reverse_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
@ -441,6 +442,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/pair/reverse") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) reverse_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) reverse_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) reverse_pair_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
reverse_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
|
||||
@ -515,6 +524,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_pair_comm_classic = 1;
|
||||
forward_pair_comm_changed = 1;
|
||||
}
|
||||
if (reverse_pair_comm_classic == 0) {
|
||||
reverse_pair_comm_classic = 1;
|
||||
reverse_pair_comm_changed = 1;
|
||||
}
|
||||
if (forward_fix_comm_classic == 0) {
|
||||
forward_fix_comm_classic = 1;
|
||||
forward_fix_comm_changed = 1;
|
||||
@ -540,6 +553,10 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_pair_comm_classic = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
}
|
||||
if (reverse_pair_comm_changed) {
|
||||
reverse_pair_comm_classic = 0;
|
||||
reverse_pair_comm_changed = 0;
|
||||
}
|
||||
if (forward_fix_comm_changed) {
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
|
||||
@ -30,6 +30,7 @@ class KokkosLMP : protected Pointers {
|
||||
int exchange_comm_classic;
|
||||
int forward_comm_classic;
|
||||
int forward_pair_comm_classic;
|
||||
int reverse_pair_comm_classic;
|
||||
int forward_fix_comm_classic;
|
||||
int reverse_comm_classic;
|
||||
int exchange_comm_on_host;
|
||||
@ -38,6 +39,7 @@ class KokkosLMP : protected Pointers {
|
||||
int exchange_comm_changed;
|
||||
int forward_comm_changed;
|
||||
int forward_pair_comm_changed;
|
||||
int reverse_pair_comm_changed;
|
||||
int forward_fix_comm_changed;
|
||||
int reverse_comm_changed;
|
||||
int nthreads,ngpus;
|
||||
|
||||
@ -29,6 +29,10 @@ class KokkosBase {
|
||||
int, int *) {return 0;};
|
||||
virtual void unpack_forward_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
|
||||
|
||||
virtual int pack_reverse_comm_kokkos(int, int, DAT::tdual_xfloat_1d &) {return 0;};
|
||||
virtual void unpack_reverse_comm_kokkos(int, DAT::tdual_int_2d,
|
||||
int, DAT::tdual_xfloat_1d &) {}
|
||||
|
||||
// Fix
|
||||
virtual int pack_forward_comm_fix_kokkos(int, DAT::tdual_int_2d,
|
||||
int, DAT::tdual_xfloat_1d &,
|
||||
|
||||
@ -477,59 +477,3 @@ double MathSpecialKokkos::erfcx_y100(const double y100)
|
||||
return 1.0;
|
||||
} /* erfcx_y100 */
|
||||
|
||||
/* optimizer friendly implementation of exp2(x).
|
||||
*
|
||||
* strategy:
|
||||
*
|
||||
* split argument into an integer part and a fraction:
|
||||
* ipart = floor(x+0.5);
|
||||
* fpart = x - ipart;
|
||||
*
|
||||
* compute exp2(ipart) from setting the ieee754 exponent
|
||||
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
|
||||
*
|
||||
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
|
||||
*/
|
||||
|
||||
/* IEEE 754 double precision floating point data manipulation */
|
||||
typedef union
|
||||
{
|
||||
double f;
|
||||
uint64_t u;
|
||||
struct {int32_t i0,i1;} s;
|
||||
} udi_t;
|
||||
|
||||
static const double fm_exp2_q[] = {
|
||||
/* 1.00000000000000000000e0, */
|
||||
2.33184211722314911771e2,
|
||||
4.36821166879210612817e3
|
||||
};
|
||||
static const double fm_exp2_p[] = {
|
||||
2.30933477057345225087e-2,
|
||||
2.02020656693165307700e1,
|
||||
1.51390680115615096133e3
|
||||
};
|
||||
|
||||
double MathSpecialKokkos::exp2_x86(double x)
|
||||
{
|
||||
double ipart, fpart, px, qx;
|
||||
udi_t epart;
|
||||
|
||||
ipart = floor(x+0.5);
|
||||
fpart = x - ipart;
|
||||
epart.s.i0 = 0;
|
||||
epart.s.i1 = (((int) ipart) + 1023) << 20;
|
||||
|
||||
x = fpart*fpart;
|
||||
|
||||
px = fm_exp2_p[0];
|
||||
px = px*x + fm_exp2_p[1];
|
||||
qx = x + fm_exp2_q[0];
|
||||
px = px*x + fm_exp2_p[2];
|
||||
qx = qx*x + fm_exp2_q[1];
|
||||
|
||||
px = px * fpart;
|
||||
|
||||
x = 1.0 + 2.0*(px/(qx-px));
|
||||
return epart.f*x;
|
||||
}
|
||||
|
||||
@ -22,79 +22,233 @@ namespace LAMMPS_NS {
|
||||
|
||||
namespace MathSpecialKokkos {
|
||||
|
||||
/*! Fast tabulated factorial function
|
||||
*
|
||||
* This function looks up pre-computed factorial values for arguments of n = 0
|
||||
* to a maximum of 167, which is the maximal value representable by a double
|
||||
* precision floating point number. For other values of n a NaN value is returned.
|
||||
*
|
||||
* \param n argument (valid: 0 <= n <= 167)
|
||||
* \return value of n! as double precision number or NaN */
|
||||
|
||||
extern double factorial(const int n);
|
||||
|
||||
/* optimizer friendly implementation of exp2(x).
|
||||
*
|
||||
* strategy:
|
||||
*
|
||||
* split argument into an integer part and a fraction:
|
||||
* ipart = floor(x+0.5);
|
||||
* fpart = x - ipart;
|
||||
*
|
||||
* compute exp2(ipart) from setting the ieee754 exponent
|
||||
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
|
||||
*
|
||||
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
|
||||
*/
|
||||
|
||||
/* IEEE 754 double precision floating point data manipulation */
|
||||
typedef union
|
||||
{
|
||||
double f;
|
||||
uint64_t u;
|
||||
struct {int32_t i0,i1;} s;
|
||||
} udi_t;
|
||||
|
||||
/* double precision constants */
|
||||
#define FM_DOUBLE_LOG2OFE 1.4426950408889634074
|
||||
|
||||
/*! Fast implementation of 2^x without argument checks for little endian CPUs
|
||||
*
|
||||
* This function implements an optimized version of pow(2.0, x) that does not
|
||||
* check for valid arguments and thus may only be used where arguments are well
|
||||
* behaved. The implementation makes assumptions about the layout of double
|
||||
* precision floating point numbers in memory and thus will only work on little
|
||||
* endian CPUs. If little endian cannot be safely detected, the result of
|
||||
* calling pow(2.0, x) will be returned. This function also is the basis for
|
||||
* the fast exponential fm_exp(x).
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of 2^x as double precision number */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double exp2_x86(double x)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
double ipart, fpart, px, qx;
|
||||
udi_t epart;
|
||||
|
||||
const double fm_exp2_q[2] = {
|
||||
/* 1.00000000000000000000e0, */
|
||||
2.33184211722314911771e2,
|
||||
4.36821166879210612817e3
|
||||
};
|
||||
const double fm_exp2_p[3] = {
|
||||
2.30933477057345225087e-2,
|
||||
2.02020656693165307700e1,
|
||||
1.51390680115615096133e3
|
||||
};
|
||||
|
||||
ipart = floor(x+0.5);
|
||||
fpart = x - ipart;
|
||||
epart.s.i0 = 0;
|
||||
epart.s.i1 = (((int) ipart) + 1023) << 20;
|
||||
|
||||
x = fpart*fpart;
|
||||
|
||||
px = fm_exp2_p[0];
|
||||
px = px*x + fm_exp2_p[1];
|
||||
qx = x + fm_exp2_q[0];
|
||||
px = px*x + fm_exp2_p[2];
|
||||
qx = qx*x + fm_exp2_q[1];
|
||||
|
||||
px = px * fpart;
|
||||
|
||||
x = 1.0 + 2.0*(px/(qx-px));
|
||||
return epart.f*x;
|
||||
#else
|
||||
return pow(2.0, x);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*! Fast implementation of exp(x) for little endian CPUs
|
||||
*
|
||||
* This function implements an optimized version of exp(x) for little endian CPUs.
|
||||
* It calls the exp2_x86(x) function with a suitable prefactor to x to return exp(x).
|
||||
* The implementation makes assumptions about the layout of double
|
||||
* precision floating point numbers in memory and thus will only work on little
|
||||
* endian CPUs. If little endian cannot be safely detected, the result of
|
||||
* calling the exp(x) implementation in the standard math library will be returned.
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^x as double precision number */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double fm_exp(double x)
|
||||
{
|
||||
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
if (x < -1022.0/FM_DOUBLE_LOG2OFE) return 0;
|
||||
if (x > 1023.0/FM_DOUBLE_LOG2OFE) return INFINITY;
|
||||
return exp2_x86(FM_DOUBLE_LOG2OFE * x);
|
||||
#else
|
||||
return ::exp(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
// support function for scaled error function complement
|
||||
|
||||
extern double erfcx_y100(const double y100);
|
||||
|
||||
// fast 2**x function without argument checks for little endian CPUs
|
||||
extern double exp2_x86(double x);
|
||||
|
||||
// scaled error function complement exp(x*x)*erfc(x) for coul/long styles
|
||||
/*! Fast scaled error function complement exp(x*x)*erfc(x) for coul/long styles
|
||||
*
|
||||
* This is a portable fast implementation of exp(x*x)*erfc(x) that can be used
|
||||
* in coul/long pair styles as a replacement for the polynomial expansion that
|
||||
* is/was widely used. Unlike the polynomial expansion, that is only accurate
|
||||
* at the level of single precision floating point it provides full double precision
|
||||
* accuracy, but at comparable speed (unlike the erfc() implementation shipped
|
||||
* with GNU standard math library).
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^(x*x)*erfc(x) */
|
||||
|
||||
static inline double my_erfcx(const double x)
|
||||
{
|
||||
if (x >= 0.0) return erfcx_y100(400.0/(4.0+x));
|
||||
else return 2.0*exp(x*x) - erfcx_y100(400.0/(4.0-x));
|
||||
if (x >= 0.0)
|
||||
return erfcx_y100(400.0 / (4.0 + x));
|
||||
else
|
||||
return 2.0 * exp(x * x) - erfcx_y100(400.0 / (4.0 - x));
|
||||
}
|
||||
|
||||
// exp(-x*x) for coul/long styles
|
||||
/*! Fast implementation of exp(-x*x) for little endian CPUs for coul/long styles
|
||||
*
|
||||
* This function implements an optimized version of exp(-x*x) based on exp2_x86()
|
||||
* for use with little endian CPUs. If little endian cannot be safely detected,
|
||||
* the result of calling the exp(-x*x) implementation in the standard math
|
||||
* library will be returned.
|
||||
*
|
||||
* \param x argument
|
||||
* \return value of e^(-x*x) as double precision number */
|
||||
|
||||
static inline double expmsq(double x)
|
||||
{
|
||||
x *= x;
|
||||
x *= 1.4426950408889634074; // log_2(e)
|
||||
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return (x < 1023.0) ? exp2_x86(-x) : 0.0;
|
||||
#else
|
||||
return (x < 1023.0) ? exp2(-x) : 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
// x**2, use instead of pow(x,2.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double square(const double &x) { return x*x; }
|
||||
/*! Fast inline version of pow(x, 2.0)
|
||||
*
|
||||
* \param x argument
|
||||
* \return x*x */
|
||||
|
||||
// x**3, use instead of pow(x,3.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double cube(const double &x) { return x*x*x; }
|
||||
static double square(const double &x) { return x * x; }
|
||||
|
||||
/*! Fast inline version of pow(x, 3.0)
|
||||
*
|
||||
* \param x argument
|
||||
* \return x*x */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double cube(const double &x) { return x * x * x; }
|
||||
|
||||
/* Fast inline version of pow(-1.0, n)
|
||||
*
|
||||
* \param n argument (integer)
|
||||
* \return -1 if n is odd, 1.0 if n is even */
|
||||
|
||||
// return -1.0 for odd n, 1.0 for even n, like pow(-1.0,n)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsign(const int n) { return (n & 1) ? -1.0 : 1.0; }
|
||||
|
||||
// optimized version of pow(x,n) with n being integer
|
||||
// up to 10x faster than pow(x,y)
|
||||
/* Fast inline version of pow(x,n) for integer n
|
||||
*
|
||||
* This is a version of pow(x,n) optimized for n being integer.
|
||||
* Speedups of up to 10x faster than pow(x,y) have been measured.
|
||||
*
|
||||
* \param n argument (integer)
|
||||
* \return value of x^n */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powint(const double &x, const int n) {
|
||||
double yy,ww;
|
||||
static double powint(const double &x, const int n)
|
||||
{
|
||||
double yy, ww;
|
||||
|
||||
if (x == 0.0) return 0.0;
|
||||
int nn = (n > 0) ? n : -n;
|
||||
ww = x;
|
||||
|
||||
for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww)
|
||||
for (yy = 1.0; nn != 0; nn >>= 1, ww *= ww)
|
||||
if (nn & 1) yy *= ww;
|
||||
|
||||
return (n > 0) ? yy : 1.0/yy;
|
||||
return (n > 0) ? yy : 1.0 / yy;
|
||||
}
|
||||
|
||||
// optimized version of (sin(x)/x)**n with n being a _positive_ integer
|
||||
/* Fast inline version of (sin(x)/x)^n as used by PPPM kspace styles
|
||||
*
|
||||
* This is an optimized function to compute (sin(x)/x)^n as frequently used by PPPM.
|
||||
*
|
||||
* \param n argument (integer). Expected to be positive.
|
||||
* \return value of (sin(x)/x)^n */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsinxx(const double &x, int n) {
|
||||
double yy,ww;
|
||||
static double powsinxx(const double &x, int n)
|
||||
{
|
||||
double yy, ww;
|
||||
|
||||
if (x == 0.0) return 1.0;
|
||||
|
||||
ww = sin(x)/x;
|
||||
ww = sin(x) / x;
|
||||
|
||||
for (yy = 1.0; n != 0; n >>= 1, ww *=ww)
|
||||
for (yy = 1.0; n != 0; n >>= 1, ww *= ww)
|
||||
if (n & 1) yy *= ww;
|
||||
|
||||
return yy;
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace MathSpecialKokkos
|
||||
} // namespace LAMMPS_NS
|
||||
|
||||
#endif
|
||||
|
||||
164
src/KOKKOS/meam_dens_final_kokkos.h
Normal file
164
src/KOKKOS/meam_dens_final_kokkos.h
Normal file
@ -0,0 +1,164 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
#include "math_special.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_final(int nlocal, int eflag_either, int eflag_global, int eflag_atom,
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_int_2d d_scale, int& errorflag, EV_FLOAT &ev_all)
|
||||
{
|
||||
EV_FLOAT ev;
|
||||
this->eflag_either = eflag_either;
|
||||
this->eflag_global = eflag_global;
|
||||
this->eflag_atom = eflag_atom;
|
||||
this->d_eatom = eatom;
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->d_scale = d_scale;
|
||||
|
||||
Kokkos::deep_copy(d_errorflag,0);
|
||||
|
||||
// Complete the calculation of density
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMDensFinal>(0,nlocal),*this,ev);
|
||||
ev_all.evdwl += ev.evdwl;
|
||||
copymode = 0;
|
||||
|
||||
auto h_errorflag = Kokkos::create_mirror_view_and_copy(LMPHostType(),d_errorflag);
|
||||
errorflag = h_errorflag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensFinal, const int &i, EV_FLOAT& ev) const {
|
||||
|
||||
F_FLOAT rhob, G, dG, Gbar, dGbar, gam, shp[3], Z;
|
||||
F_FLOAT denom, rho_bkgd, Fl;
|
||||
double scaleii;
|
||||
|
||||
int elti = d_map[type[i]];
|
||||
if (elti >= 0) {
|
||||
scaleii = d_scale(type[i],type[i]);
|
||||
d_rho1[i] = 0.0;
|
||||
d_rho2[i] = -1.0 / 3.0 * d_arho2b[i] * d_arho2b[i];
|
||||
d_rho3[i] = 0.0;
|
||||
for (int m = 0; m < 3; m++) {
|
||||
d_rho1[i] += d_arho1(i,m) * d_arho1(i,m);
|
||||
d_rho3[i] -= 3.0 / 5.0 * d_arho3b(i,m) * d_arho3b(i,m);
|
||||
}
|
||||
for (int m = 0; m < 6; m++)
|
||||
d_rho2[i] += v2D[m] * d_arho2(i,m) * d_arho2(i,m);
|
||||
for (int m = 0; m < 10; m++)
|
||||
d_rho3[i] += v3D[m] * d_arho3(i,m) * d_arho3(i,m);
|
||||
|
||||
if (d_rho0[i] > 0.0) {
|
||||
if (ialloy == 1) {
|
||||
d_t_ave(i,0) = fdiv_zero_kk(d_t_ave(i,0), d_tsq_ave(i,0));
|
||||
d_t_ave(i,1) = fdiv_zero_kk(d_t_ave(i,1), d_tsq_ave(i,1));
|
||||
d_t_ave(i,2) = fdiv_zero_kk(d_t_ave(i,2), d_tsq_ave(i,2));
|
||||
} else if (ialloy == 2) {
|
||||
d_t_ave(i,0) = t1_meam[elti];
|
||||
d_t_ave(i,1) = t2_meam[elti];
|
||||
d_t_ave(i,2) = t3_meam[elti];
|
||||
} else {
|
||||
d_t_ave(i,0) /= d_rho0[i];
|
||||
d_t_ave(i,1) /= d_rho0[i];
|
||||
d_t_ave(i,2) /= d_rho0[i];
|
||||
}
|
||||
}
|
||||
|
||||
d_gamma[i] = d_t_ave(i,0) * d_rho1[i] + d_t_ave(i,1) * d_rho2[i] + d_t_ave(i,2) * d_rho3[i];
|
||||
|
||||
if (d_rho0[i] > 0.0)
|
||||
d_gamma[i] /= (d_rho0[i] * d_rho0[i]);
|
||||
|
||||
Z = get_Zij(lattce_meam[elti][elti]);
|
||||
|
||||
G = G_gam(d_gamma[i], ibar_meam[elti], d_errorflag());
|
||||
if (d_errorflag() != 0)
|
||||
return;
|
||||
|
||||
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shp);
|
||||
if (ibar_meam[elti] <= 0) {
|
||||
Gbar = 1.0;
|
||||
dGbar = 0.0;
|
||||
} else {
|
||||
if (mix_ref_t == 1)
|
||||
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
|
||||
else
|
||||
gam = (t1_meam[elti] * shp[0] + t2_meam[elti] * shp[1] + t3_meam[elti] * shp[2]) /
|
||||
(Z * Z);
|
||||
Gbar = G_gam(gam, ibar_meam[elti], d_errorflag());
|
||||
}
|
||||
d_rho[i] = d_rho0[i] * G;
|
||||
|
||||
if (mix_ref_t == 1) {
|
||||
if (ibar_meam[elti] <= 0) {
|
||||
Gbar = 1.0;
|
||||
dGbar = 0.0;
|
||||
} else {
|
||||
gam = (d_t_ave(i,0) * shp[0] + d_t_ave(i,1) * shp[1] + d_t_ave(i,2) * shp[2]) / (Z * Z);
|
||||
Gbar = dG_gam(gam, ibar_meam[elti], dGbar);
|
||||
}
|
||||
rho_bkgd = rho0_meam[elti] * Z * Gbar;
|
||||
} else {
|
||||
if (bkgd_dyn == 1)
|
||||
rho_bkgd = rho0_meam[elti] * Z;
|
||||
else
|
||||
rho_bkgd = rho_ref_meam[elti];
|
||||
}
|
||||
rhob = d_rho[i] / rho_bkgd;
|
||||
denom = 1.0 / rho_bkgd;
|
||||
|
||||
G = dG_gam(d_gamma[i], ibar_meam[elti], dG);
|
||||
|
||||
d_dgamma1[i] = (G - 2 * dG * d_gamma[i]) * denom;
|
||||
|
||||
if (!iszero_kk(d_rho0[i]))
|
||||
d_dgamma2[i] = (dG / d_rho0[i]) * denom;
|
||||
else
|
||||
d_dgamma2[i] = 0.0;
|
||||
|
||||
// dgamma3 is nonzero only if we are using the "mixed" rule for
|
||||
// computing t in the reference system (which is not correct, but
|
||||
// included for backward compatibility
|
||||
if (mix_ref_t == 1)
|
||||
d_dgamma3[i] = d_rho0[i] * G * dGbar / (Gbar * Z * Z) * denom;
|
||||
else
|
||||
d_dgamma3[i] = 0.0;
|
||||
|
||||
Fl = embedding(A_meam[elti], Ec_meam[elti][elti], rhob, d_frhop[i]);
|
||||
|
||||
if (eflag_either) {
|
||||
Fl *= scaleii;
|
||||
if (eflag_global) {
|
||||
ev.evdwl += Fl;
|
||||
}
|
||||
if (eflag_atom) {
|
||||
d_eatom[i] += Fl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
602
src/KOKKOS/meam_dens_init_kokkos.h
Normal file
602
src/KOKKOS/meam_dens_init_kokkos.h
Normal file
@ -0,0 +1,602 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
#include "math_special_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMDensInit<NEIGHFLAG>, const int &i) const {
|
||||
int ii, offsetval;
|
||||
ii = d_ilist_half[i];
|
||||
offsetval = d_offset[i];
|
||||
// compute screening function and derivatives
|
||||
this->template getscreen<NEIGHFLAG>(ii, offsetval, x, d_numneigh_half,
|
||||
d_numneigh_full, ntype, type, d_map);
|
||||
|
||||
// calculate intermediate density terms to be communicated
|
||||
this->template calc_rho1<NEIGHFLAG>(ii, ntype, type, d_map, x, d_numneigh_half, offsetval);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::operator()(TagMEAMZero, const int &i) const {
|
||||
d_rho0[i] = 0.0;
|
||||
d_arho2b[i] = 0.0;
|
||||
d_arho1(i,0) = d_arho1(i,1) = d_arho1(i,2) = 0.0;
|
||||
for (int j = 0; j < 6; j++)
|
||||
d_arho2(i,j) = 0.0;
|
||||
for (int j = 0; j < 10; j++)
|
||||
d_arho3(i,j) = 0.0;
|
||||
d_arho3b(i,0) = d_arho3b(i,1) = d_arho3b(i,2) = 0.0;
|
||||
d_t_ave(i,0) = d_t_ave(i,1) = d_t_ave(i,2) = 0.0;
|
||||
d_tsq_ave(i,0) = d_tsq_ave(i,1) = d_tsq_ave(i,2) = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_setup(int atom_nmax, int nall, int n_neigh)
|
||||
{
|
||||
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
|
||||
|
||||
// grow local arrays if necessary
|
||||
|
||||
if (atom_nmax > nmax) {
|
||||
memoryKK->destroy_kokkos(k_rho,rho);
|
||||
memoryKK->destroy_kokkos(k_rho0,rho0);
|
||||
memoryKK->destroy_kokkos(k_rho1,rho1);
|
||||
memoryKK->destroy_kokkos(k_rho2,rho2);
|
||||
memoryKK->destroy_kokkos(k_rho3,rho3);
|
||||
memoryKK->destroy_kokkos(k_frhop,frhop);
|
||||
memoryKK->destroy_kokkos(k_gamma,gamma);
|
||||
memoryKK->destroy_kokkos(k_dgamma1,dgamma1);
|
||||
memoryKK->destroy_kokkos(k_dgamma2,dgamma2);
|
||||
memoryKK->destroy_kokkos(k_dgamma3,dgamma3);
|
||||
memoryKK->destroy_kokkos(k_arho2b,arho2b);
|
||||
memoryKK->destroy_kokkos(k_arho1,arho1);
|
||||
memoryKK->destroy_kokkos(k_arho2,arho2);
|
||||
memoryKK->destroy_kokkos(k_arho3,arho3);
|
||||
memoryKK->destroy_kokkos(k_arho3b,arho3b);
|
||||
memoryKK->destroy_kokkos(k_t_ave,t_ave);
|
||||
memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave);
|
||||
|
||||
nmax = atom_nmax;
|
||||
// memory->create(rho, nmax, "pair:rho");
|
||||
k_rho = DAT::tdual_ffloat_1d("pair:rho",nmax);
|
||||
d_rho = k_rho.template view<DeviceType>();
|
||||
h_rho = k_rho.h_view;
|
||||
// memory->create(rho0, nmax, "pair:rho0");
|
||||
k_rho0 = DAT::tdual_ffloat_1d("pair:rho0",nmax);
|
||||
d_rho0 = k_rho0.template view<DeviceType>();
|
||||
h_rho0 = k_rho0.h_view;
|
||||
//memory->create(rho1, nmax, "pair:rho1");
|
||||
k_rho1 = DAT::tdual_ffloat_1d("pair:rho1",nmax);
|
||||
d_rho1 = k_rho1.template view<DeviceType>();
|
||||
h_rho1 = k_rho1.h_view;
|
||||
//memory->create(rho2, nmax, "pair:rho2");
|
||||
k_rho2 = DAT::tdual_ffloat_1d("pair:rho2",nmax);
|
||||
d_rho2 = k_rho2.template view<DeviceType>();
|
||||
h_rho2 = k_rho2.h_view;
|
||||
//memory->create(rho3, nmax, "pair:rho3");
|
||||
k_rho3 = DAT::tdual_ffloat_1d("pair:rho3",nmax);
|
||||
d_rho3 = k_rho3.template view<DeviceType>();
|
||||
h_rho3 = k_rho3.h_view;
|
||||
//memory->create(frhop, nmax, "pair:frhop");
|
||||
k_frhop = DAT::tdual_ffloat_1d("pair:frhop",nmax);
|
||||
d_frhop = k_frhop.template view<DeviceType>();
|
||||
h_frhop = k_frhop.h_view;
|
||||
//memory->create(gamma, nmax, "pair:gamma");
|
||||
k_gamma = DAT::tdual_ffloat_1d("pair:gamma",nmax);
|
||||
d_gamma = k_gamma.template view<DeviceType>();
|
||||
h_gamma = k_gamma.h_view;
|
||||
//memory->create(dgamma1, nmax, "pair:dgamma1");
|
||||
k_dgamma1 = DAT::tdual_ffloat_1d("pair:dgamma1",nmax);
|
||||
d_dgamma1 = k_dgamma1.template view<DeviceType>();
|
||||
h_dgamma1 = k_dgamma1.h_view;
|
||||
//memory->create(dgamma2, nmax, "pair:dgamma2");
|
||||
k_dgamma2 = DAT::tdual_ffloat_1d("pair:dgamma2",nmax);
|
||||
d_dgamma2 = k_dgamma2.template view<DeviceType>();
|
||||
h_dgamma2 = k_dgamma2.h_view;
|
||||
//memory->create(dgamma3, nmax, "pair:dgamma3");
|
||||
k_dgamma3 = DAT::tdual_ffloat_1d("pair:dgamma3",nmax);
|
||||
d_dgamma3 = k_dgamma3.template view<DeviceType>();
|
||||
h_dgamma3 = k_dgamma3.h_view;
|
||||
//memory->create(arho2b, nmax, "pair:arho2b");
|
||||
k_arho2b = DAT::tdual_ffloat_1d("pair:arho2b",nmax);
|
||||
d_arho2b = k_arho2b.template view<DeviceType>();
|
||||
h_arho2b = k_arho2b.h_view;
|
||||
//memory->create(arho1, nmax, 3, "pair:arho1");
|
||||
k_arho1 = DAT::tdual_ffloat_2d("pair:arho1",nmax, 3);
|
||||
d_arho1 = k_arho1.template view<DeviceType>();
|
||||
h_arho1 = k_arho1.h_view;
|
||||
//memory->create(arho2, nmax, 6, "pair:arho2");
|
||||
k_arho2 = DAT::tdual_ffloat_2d("pair:arho2",nmax, 6);
|
||||
d_arho2 = k_arho2.template view<DeviceType>();
|
||||
h_arho2 = k_arho2.h_view;
|
||||
//memory->create(arho3, nmax, 10, "pair:arho3");
|
||||
k_arho3 = DAT::tdual_ffloat_2d("pair:arho3",nmax, 10);
|
||||
d_arho3 = k_arho3.template view<DeviceType>();
|
||||
h_arho3 = k_arho3.h_view;
|
||||
//memory->create(arho3b, nmax, 3, "pair:arho3b");
|
||||
k_arho3b = DAT::tdual_ffloat_2d("pair:arho3b",nmax, 3);
|
||||
d_arho3b = k_arho3b.template view<DeviceType>();
|
||||
h_arho3b = k_arho3b.h_view;
|
||||
//memory->create(t_ave, nmax, 3, "pair:t_ave");
|
||||
k_t_ave = DAT::tdual_ffloat_2d("pair:t_ave",nmax, 3);
|
||||
d_t_ave = k_t_ave.template view<DeviceType>();
|
||||
h_t_ave = k_t_ave.h_view;
|
||||
//memory->create(tsq_ave, nmax, 3, "pair:tsq_ave");
|
||||
k_tsq_ave = DAT::tdual_ffloat_2d("pair:tsq_ave",nmax, 3);
|
||||
d_tsq_ave = k_tsq_ave.template view<DeviceType>();
|
||||
h_tsq_ave = k_tsq_ave.h_view;
|
||||
}
|
||||
|
||||
if (n_neigh > maxneigh) {
|
||||
memoryKK->destroy_kokkos(k_scrfcn,scrfcn);
|
||||
memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn);
|
||||
memoryKK->destroy_kokkos(k_fcpair,fcpair);
|
||||
maxneigh = n_neigh;
|
||||
// memory->create(scrfcn, maxneigh, "pair:scrfcn");
|
||||
k_scrfcn = DAT::tdual_ffloat_1d("pair:scrfcn", maxneigh);
|
||||
d_scrfcn = k_scrfcn.template view<DeviceType>();
|
||||
h_scrfcn = k_scrfcn.h_view;
|
||||
//memory->create(dscrfcn, maxneigh, "pair:dscrfcn");
|
||||
k_dscrfcn = DAT::tdual_ffloat_1d("pair:dscrfcn", maxneigh);
|
||||
d_dscrfcn = k_dscrfcn.template view<DeviceType>();
|
||||
h_dscrfcn = k_dscrfcn.h_view;
|
||||
//memory->create(fcpair, maxneigh, "pair:fcpair");
|
||||
k_fcpair = DAT::tdual_ffloat_1d("pair:fcpair", maxneigh);
|
||||
d_fcpair = k_fcpair.template view<DeviceType>();
|
||||
h_fcpair = k_fcpair.h_view;
|
||||
}
|
||||
|
||||
// zero out local arrays
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMZero>(0, nall),*this);
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void
|
||||
MEAMKokkos<DeviceType>::meam_dens_init(int inum_half, int ntype, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half, typename AT::t_int_1d d_numneigh_full,
|
||||
typename AT::t_int_1d d_ilist_half, typename AT::t_neighbors_2d d_neighbors_half, typename AT::t_neighbors_2d d_neighbors_full, typename AT::t_int_1d d_offset, int neighflag, int need_dup)
|
||||
{
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->x = x;
|
||||
this->d_numneigh_half = d_numneigh_half;
|
||||
this->d_numneigh_full = d_numneigh_full;
|
||||
this->d_ilist_half = d_ilist_half;
|
||||
this->d_neighbors_half = d_neighbors_half;
|
||||
this->d_neighbors_full = d_neighbors_full;
|
||||
this->d_offset = d_offset;
|
||||
this->nlocal = nlocal;
|
||||
|
||||
if (need_dup) {
|
||||
dup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_rho0);
|
||||
dup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2b);
|
||||
dup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho1);
|
||||
dup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho2);
|
||||
dup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3);
|
||||
dup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_arho3b);
|
||||
dup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_t_ave);
|
||||
dup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_tsq_ave);
|
||||
} else {
|
||||
ndup_rho0 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_rho0);
|
||||
ndup_arho2b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2b);
|
||||
ndup_arho1 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho1);
|
||||
ndup_arho2 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho2);
|
||||
ndup_arho3 = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3);
|
||||
ndup_arho3b = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_arho3b);
|
||||
ndup_t_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_t_ave);
|
||||
ndup_tsq_ave = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_tsq_ave);
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
if (neighflag == HALF)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALF> >(0,inum_half),*this);
|
||||
else if (neighflag == HALFTHREAD)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagMEAMDensInit<HALFTHREAD> >(0,inum_half),*this);
|
||||
copymode = 0;
|
||||
|
||||
if (need_dup) {
|
||||
Kokkos::Experimental::contribute(d_rho0, dup_rho0);
|
||||
Kokkos::Experimental::contribute(d_arho2b, dup_arho2b);
|
||||
Kokkos::Experimental::contribute(d_arho1, dup_arho1);
|
||||
Kokkos::Experimental::contribute(d_arho2, dup_arho2);
|
||||
Kokkos::Experimental::contribute(d_arho3, dup_arho3);
|
||||
Kokkos::Experimental::contribute(d_arho3b, dup_arho3b);
|
||||
Kokkos::Experimental::contribute(d_t_ave, dup_t_ave);
|
||||
Kokkos::Experimental::contribute(d_tsq_ave, dup_tsq_ave);
|
||||
|
||||
// free duplicated memory
|
||||
dup_rho0 = decltype(dup_rho0)();
|
||||
dup_arho2b = decltype(dup_arho2b)();
|
||||
dup_arho1 = decltype(dup_arho1)();
|
||||
dup_arho2 = decltype(dup_arho2)();
|
||||
dup_arho3 = decltype(dup_arho3)();
|
||||
dup_arho3b = decltype(dup_arho3b)();
|
||||
dup_t_ave = decltype(dup_t_ave)();
|
||||
dup_tsq_ave = decltype(dup_tsq_ave)();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
MEAMKokkos<DeviceType>::getscreen(int i, int offset, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh_half,
|
||||
typename AT::t_int_1d d_numneigh_full, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map)
|
||||
const {
|
||||
const double drinv = 1.0 / delr_meam;
|
||||
const int elti = d_map[type[i]];
|
||||
if (elti < 0) return;
|
||||
|
||||
const double xitmp = x(i,0);
|
||||
const double yitmp = x(i,1);
|
||||
const double zitmp = x(i,2);
|
||||
|
||||
for (int jn = 0; jn < d_numneigh_half[i]; jn++) {
|
||||
const int j = d_neighbors_half(i,jn);
|
||||
|
||||
const int eltj = d_map[type[j]];
|
||||
if (eltj < 0) continue;
|
||||
|
||||
// First compute screening function itself, sij
|
||||
const double xjtmp = x(j,0);
|
||||
const double yjtmp = x(j,1);
|
||||
const double zjtmp = x(j,2);
|
||||
const double delxij = xjtmp - xitmp;
|
||||
const double delyij = yjtmp - yitmp;
|
||||
const double delzij = zjtmp - zitmp;
|
||||
|
||||
const double rij2 = delxij * delxij + delyij * delyij + delzij * delzij;
|
||||
|
||||
if (rij2 > cutforcesq) {
|
||||
d_dscrfcn[offset+jn] = 0.0;
|
||||
d_scrfcn[offset+jn] = 0.0;
|
||||
d_fcpair[offset+jn] = 0.0;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now compute derivatives
|
||||
const double rbound = ebound_meam[elti][eltj] * rij2;
|
||||
const double rij = sqrt(rij2);
|
||||
const double rnorm = (cutforce - rij) * drinv;
|
||||
double sij = 1.0;
|
||||
|
||||
// if rjk2 > ebound*rijsq, atom k is definitely outside the ellipse
|
||||
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
int k = d_neighbors_full(i,kn);
|
||||
if (k == j) continue;
|
||||
int eltk = d_map[type[k]];
|
||||
if (eltk < 0) continue;
|
||||
|
||||
const double xktmp = x(k,0);
|
||||
const double yktmp = x(k,1);
|
||||
const double zktmp = x(k,2);
|
||||
|
||||
const double delxjk = xktmp - xjtmp;
|
||||
const double delyjk = yktmp - yjtmp;
|
||||
const double delzjk = zktmp - zjtmp;
|
||||
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
|
||||
if (rjk2 > rbound) continue;
|
||||
|
||||
const double delxik = xktmp - xitmp;
|
||||
const double delyik = yktmp - yitmp;
|
||||
const double delzik = zktmp - zitmp;
|
||||
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
|
||||
if (rik2 > rbound) continue;
|
||||
|
||||
const double xik = rik2 / rij2;
|
||||
const double xjk = rjk2 / rij2;
|
||||
const double a = 1 - (xik - xjk) * (xik - xjk);
|
||||
// if a < 0, then ellipse equation doesn't describe this case and
|
||||
// atom k can't possibly screen i-j
|
||||
if (a <= 0.0) continue;
|
||||
|
||||
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
double sikj;
|
||||
if (cikj >= Cmax) continue;
|
||||
// note that cikj may be slightly negative (within numerical
|
||||
// tolerance) if atoms are colinear, so don't reject that case here
|
||||
// (other negative cikj cases were handled by the test on "a" above)
|
||||
else if (cikj <= Cmin) {
|
||||
sij = 0.0;
|
||||
break;
|
||||
} else {
|
||||
const double delc = Cmax - Cmin;
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
sikj = fcut(cikj);
|
||||
}
|
||||
sij *= sikj;
|
||||
}
|
||||
|
||||
double dfc;
|
||||
const double fc = dfcut(rnorm, dfc);
|
||||
const double fcij = fc;
|
||||
const double dfcij = dfc * drinv;
|
||||
|
||||
// Now compute derivatives
|
||||
d_dscrfcn[offset+jn] = 0.0;
|
||||
const double sfcij = sij * fcij;
|
||||
if (!iszero_kk(sfcij) && !isone_kk(sfcij)) {
|
||||
for (int kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
const int k = d_neighbors_full(i,kn);
|
||||
if (k == j) continue;
|
||||
const int eltk = d_map[type[k]];
|
||||
if (eltk < 0) continue;
|
||||
|
||||
const double delxjk = x(k,0) - xjtmp;
|
||||
const double delyjk = x(k,1) - yjtmp;
|
||||
const double delzjk = x(k,2) - zjtmp;
|
||||
const double rjk2 = delxjk * delxjk + delyjk * delyjk + delzjk * delzjk;
|
||||
if (rjk2 > rbound) continue;
|
||||
|
||||
const double delxik = x(k,0) - xitmp;
|
||||
const double delyik = x(k,1) - yitmp;
|
||||
const double delzik = x(k,2) - zitmp;
|
||||
const double rik2 = delxik * delxik + delyik * delyik + delzik * delzik;
|
||||
if (rik2 > rbound) continue;
|
||||
|
||||
const double xik = rik2 / rij2;
|
||||
const double xjk = rjk2 / rij2;
|
||||
const double a = 1 - (xik - xjk) * (xik - xjk);
|
||||
// if a < 0, then ellipse equation doesn't describe this case and
|
||||
// atom k can't possibly screen i-j
|
||||
if (a <= 0.0) continue;
|
||||
|
||||
double cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
if (cikj >= Cmax) {
|
||||
continue;
|
||||
// Note that cikj may be slightly negative (within numerical
|
||||
// tolerance) if atoms are colinear, so don't reject that case
|
||||
// here
|
||||
// (other negative cikj cases were handled by the test on "a"
|
||||
// above)
|
||||
// Note that we never have 0<cikj<Cmin here, else sij=0
|
||||
// (rejected above)
|
||||
} else {
|
||||
const double delc = Cmax - Cmin;
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
double dfikj;
|
||||
const double sikj = dfcut(cikj, dfikj);
|
||||
const double coef1 = dfikj / (delc * sikj);
|
||||
const double dCikj = dCfunc(rij2, rik2, rjk2);
|
||||
d_dscrfcn[offset+jn] += coef1 * dCikj;
|
||||
}
|
||||
}
|
||||
const double coef1 = sfcij;
|
||||
const double coef2 = sij * dfcij / rij;
|
||||
d_dscrfcn[offset+jn] = d_dscrfcn[offset+jn] * coef1 - coef2;
|
||||
}
|
||||
|
||||
d_scrfcn[offset+jn] = sij;
|
||||
d_fcpair[offset+jn] = fcij;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
template<int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
MEAMKokkos<DeviceType>::calc_rho1(int i, int /*ntype*/, typename AT::t_int_1d type, typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d d_numneigh,
|
||||
int offset) const
|
||||
{
|
||||
// The rho0, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_rho0 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_rho0),decltype(ndup_rho0)>::get(dup_rho0,ndup_rho0);
|
||||
auto a_rho0 = v_rho0.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho2b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2b),decltype(ndup_arho2b)>::get(dup_arho2b,ndup_arho2b);
|
||||
auto a_arho2b = v_arho2b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho1 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho1),decltype(ndup_arho1)>::get(dup_arho1,ndup_arho1);
|
||||
auto a_arho1 = v_arho1.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho2 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho2),decltype(ndup_arho2)>::get(dup_arho2,ndup_arho2);
|
||||
auto a_arho2 = v_arho2.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho3 = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3),decltype(ndup_arho3)>::get(dup_arho3,ndup_arho3);
|
||||
auto a_arho3 = v_arho3.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_arho3b = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_arho3b),decltype(ndup_arho3b)>::get(dup_arho3b,ndup_arho3b);
|
||||
auto a_arho3b = v_arho3b.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_t_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_t_ave),decltype(ndup_t_ave)>::get(dup_t_ave,ndup_t_ave);
|
||||
auto a_t_ave = v_t_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
auto v_tsq_ave = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_tsq_ave),decltype(ndup_tsq_ave)>::get(dup_tsq_ave,ndup_tsq_ave);
|
||||
auto a_tsq_ave = v_tsq_ave.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
|
||||
|
||||
const int elti = d_map[type[i]];
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
for (int jn = 0; jn < d_numneigh[i]; jn++) {
|
||||
if (!iszero_kk(d_scrfcn[offset+jn])) {
|
||||
const int j = d_neighbors_half(i,jn);
|
||||
const double sij = d_scrfcn[offset+jn] * d_fcpair[offset+jn];
|
||||
double delij[3];
|
||||
delij[0] = x(j,0) - xtmp;
|
||||
delij[1] = x(j,1) - ytmp;
|
||||
delij[2] = x(j,2) - ztmp;
|
||||
const double rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
|
||||
if (rij2 < cutforcesq) {
|
||||
const int eltj = d_map[type[j]];
|
||||
const double rij = sqrt(rij2);
|
||||
const double ai = rij / re_meam[elti][elti] - 1.0;
|
||||
const double aj = rij / re_meam[eltj][eltj] - 1.0;
|
||||
const double ro0i = rho0_meam[elti];
|
||||
const double ro0j = rho0_meam[eltj];
|
||||
const double rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj) * sij;
|
||||
double rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj) * sij;
|
||||
double rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj) * sij;
|
||||
double rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj) * sij;
|
||||
const double rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai) * sij;
|
||||
double rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai) * sij;
|
||||
double rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai) * sij;
|
||||
double rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai) * sij;
|
||||
if (ialloy == 1) {
|
||||
rhoa1j *= t1_meam[eltj];
|
||||
rhoa2j *= t2_meam[eltj];
|
||||
rhoa3j *= t3_meam[eltj];
|
||||
rhoa1i *= t1_meam[elti];
|
||||
rhoa2i *= t2_meam[elti];
|
||||
rhoa3i *= t3_meam[elti];
|
||||
}
|
||||
a_rho0[i] += rhoa0j;
|
||||
a_rho0[j] += rhoa0i;
|
||||
// For ialloy = 2, use single-element value (not average)
|
||||
if (ialloy != 2) {
|
||||
a_t_ave(i,0) += t1_meam[eltj] * rhoa0j;
|
||||
a_t_ave(i,1) += t2_meam[eltj] * rhoa0j;
|
||||
a_t_ave(i,2) += t3_meam[eltj] * rhoa0j;
|
||||
a_t_ave(j,0) += t1_meam[elti] * rhoa0i;
|
||||
a_t_ave(j,1) += t2_meam[elti] * rhoa0i;
|
||||
a_t_ave(j,2) += t3_meam[elti] * rhoa0i;
|
||||
}
|
||||
if (ialloy == 1) {
|
||||
a_tsq_ave(i,0) += t1_meam[eltj] * t1_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(i,1) += t2_meam[eltj] * t2_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(i,2) += t3_meam[eltj] * t3_meam[eltj] * rhoa0j;
|
||||
a_tsq_ave(j,0) += t1_meam[elti] * t1_meam[elti] * rhoa0i;
|
||||
a_tsq_ave(j,1) += t2_meam[elti] * t2_meam[elti] * rhoa0i;
|
||||
a_tsq_ave(j,2) += t3_meam[elti] * t3_meam[elti] * rhoa0i;
|
||||
}
|
||||
a_arho2b[i] += rhoa2j;
|
||||
a_arho2b[j] += rhoa2i;
|
||||
|
||||
const double A1j = rhoa1j / rij;
|
||||
const double A2j = rhoa2j / rij2;
|
||||
const double A3j = rhoa3j / (rij2 * rij);
|
||||
const double A1i = rhoa1i / rij;
|
||||
const double A2i = rhoa2i / rij2;
|
||||
const double A3i = rhoa3i / (rij2 * rij);
|
||||
int nv2 = 0;
|
||||
int nv3 = 0;
|
||||
for (int m = 0; m < 3; m++) {
|
||||
a_arho1(i,m) += A1j * delij[m];
|
||||
a_arho1(j,m) += -A1i * delij[m];
|
||||
a_arho3b(i,m) += rhoa3j * delij[m] / rij;
|
||||
a_arho3b(j,m) += -rhoa3i * delij[m] / rij;
|
||||
for (int n = m; n < 3; n++) {
|
||||
a_arho2(i,nv2) += A2j * delij[m] * delij[n];
|
||||
a_arho2(j,nv2) += A2i * delij[m] * delij[n];
|
||||
nv2++;
|
||||
for (int p = n; p < 3; p++) {
|
||||
a_arho3(i,nv3) += A3j * delij[m] * delij[n] * delij[p];
|
||||
a_arho3(j,nv3) += -A3i * delij[m] * delij[n] * delij[p];
|
||||
nv3++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
//Cutoff function and derivative
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dfcut(const double xi, double& dfc) const
|
||||
{
|
||||
if (xi >= 1.0) {
|
||||
dfc = 0.0;
|
||||
return 1.0;
|
||||
} else if (xi <= 0.0) {
|
||||
dfc = 0.0;
|
||||
return 0.0;
|
||||
} else {
|
||||
const double a = 1.0 - xi;
|
||||
const double a3 = a * a * a;
|
||||
const double a4 = a * a3;
|
||||
const double a1m4 = 1.0 - a4;
|
||||
|
||||
dfc = 8 * a1m4 * a3;
|
||||
return a1m4*a1m4;
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Derivative of Cikj w.r.t. rij
|
||||
// Inputs: rij,rij2,rik2,rjk2
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dCfunc(const double rij2, const double rik2, const double rjk2) const
|
||||
{
|
||||
const double rij4 = rij2 * rij2;
|
||||
const double a = rik2 - rjk2;
|
||||
const double b = rik2 + rjk2;
|
||||
const double asq = a*a;
|
||||
double denom = rij4 - asq;
|
||||
denom = denom * denom;
|
||||
return -4 * (-2 * rij2 * asq + rij4 * b + asq * b) / denom;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::dCfunc2(const double rij2, const double rik2, const double rjk2, double& dCikj1, double& dCikj2) const
|
||||
{
|
||||
const double rij4 = rij2 * rij2;
|
||||
const double rik4 = rik2 * rik2;
|
||||
const double rjk4 = rjk2 * rjk2;
|
||||
const double a = rik2 - rjk2;
|
||||
double denom = rij4 - a * a;
|
||||
denom = denom * denom;
|
||||
dCikj1 = 4 * rij2 * (rij4 + rik4 + 2 * rik2 * rjk2 - 3 * rjk4 - 2 * rij2 * a) / denom;
|
||||
dCikj2 = 4 * rij2 * (rij4 - 3 * rik4 + 2 * rik2 * rjk2 + rjk4 + 2 * rij2 * a) / denom;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::fcut(const double xi) const
|
||||
{
|
||||
double a;
|
||||
if (xi >= 1.0)
|
||||
return 1.0;
|
||||
else if (xi <= 0.0)
|
||||
return 0.0;
|
||||
else {
|
||||
// ( 1.d0 - (1.d0 - xi)**4 )**2, but with better codegen
|
||||
a = 1.0 - xi;
|
||||
a *= a; a *= a;
|
||||
a = 1.0 - a;
|
||||
return a * a;
|
||||
}
|
||||
}
|
||||
|
||||
613
src/KOKKOS/meam_force_kokkos.h
Normal file
613
src/KOKKOS/meam_force_kokkos.h
Normal file
@ -0,0 +1,613 @@
|
||||
#include "math_special_kokkos.h"
|
||||
#include "meam_kokkos.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
template <class DeviceType>
|
||||
void MEAMKokkos<DeviceType>::meam_force(
|
||||
int inum_half, int eflag_global, int eflag_atom, int vflag_global, int vflag_atom,
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d eatom, int ntype, typename AT::t_int_1d type,
|
||||
typename AT::t_int_1d d_map, typename AT::t_x_array x, typename AT::t_int_1d numneigh,
|
||||
typename AT::t_int_1d numneigh_full, typename AT::t_f_array f,
|
||||
typename ArrayTypes<DeviceType>::t_virial_array vatom, typename AT::t_int_1d d_ilist_half,
|
||||
typename AT::t_int_1d d_offset, typename AT::t_neighbors_2d d_neighbors_half,
|
||||
typename AT::t_neighbors_2d d_neighbors_full, int neighflag, int need_dup, EV_FLOAT &ev_all)
|
||||
{
|
||||
EV_FLOAT ev;
|
||||
|
||||
this->eflag_either = eflag_either;
|
||||
this->eflag_global = eflag_global;
|
||||
this->eflag_atom = eflag_atom;
|
||||
this->vflag_global = vflag_global;
|
||||
this->vflag_atom = vflag_atom;
|
||||
eflag_either = eflag_atom || eflag_global;
|
||||
vflag_either = vflag_atom || vflag_global;
|
||||
this->d_eatom = eatom;
|
||||
this->ntype = ntype;
|
||||
this->type = type;
|
||||
this->d_map = d_map;
|
||||
this->x = x;
|
||||
this->d_numneigh_half = numneigh;
|
||||
this->d_numneigh_full = numneigh_full;
|
||||
this->d_neighbors_half = d_neighbors_half;
|
||||
this->d_neighbors_full = d_neighbors_full;
|
||||
this->f = f;
|
||||
this->d_vatom = vatom;
|
||||
this->d_ilist_half = d_ilist_half;
|
||||
this->d_offset = d_offset;
|
||||
|
||||
if (need_dup) {
|
||||
dup_f = Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum,
|
||||
Kokkos::Experimental::ScatterDuplicated>(f);
|
||||
if (eflag_atom)
|
||||
dup_eatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_eatom);
|
||||
if (vflag_atom)
|
||||
dup_vatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterDuplicated>(d_vatom);
|
||||
} else {
|
||||
ndup_f =
|
||||
Kokkos::Experimental::create_scatter_view<Kokkos::Experimental::ScatterSum,
|
||||
Kokkos::Experimental::ScatterNonDuplicated>(f);
|
||||
if (eflag_atom)
|
||||
ndup_eatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_eatom);
|
||||
if (vflag_atom)
|
||||
ndup_vatom = Kokkos::Experimental::create_scatter_view<
|
||||
Kokkos::Experimental::ScatterSum, Kokkos::Experimental::ScatterNonDuplicated>(d_vatom);
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
if (neighflag == HALF)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALF>>(0, inum_half),
|
||||
*this, ev);
|
||||
else if (neighflag == HALFTHREAD)
|
||||
Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagMEAMForce<HALFTHREAD>>(0, inum_half),
|
||||
*this, ev);
|
||||
ev_all += ev;
|
||||
copymode = 0;
|
||||
|
||||
if (need_dup) {
|
||||
Kokkos::Experimental::contribute(f, dup_f);
|
||||
if (eflag_atom) Kokkos::Experimental::contribute(d_eatom, dup_eatom);
|
||||
if (vflag_atom) Kokkos::Experimental::contribute(d_vatom, dup_vatom);
|
||||
|
||||
// free duplicated memory
|
||||
dup_f = decltype(dup_f)();
|
||||
if (eflag_atom) dup_eatom = decltype(dup_eatom)();
|
||||
if (vflag_atom) dup_vatom = decltype(dup_vatom)();
|
||||
}
|
||||
}
|
||||
|
||||
template <class DeviceType>
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void MEAMKokkos<DeviceType>::operator()(TagMEAMForce<NEIGHFLAG>,
|
||||
const int &ii, EV_FLOAT &ev) const
|
||||
{
|
||||
int i, j, jn, k, kn, kk, m, n, p, q;
|
||||
int nv2, nv3, elti, eltj, eltk, ind;
|
||||
X_FLOAT xitmp, yitmp, zitmp, delij[3];
|
||||
double rij2, rij, rij3;
|
||||
double v[6], fi[3], fj[3];
|
||||
double third, sixth;
|
||||
double pp, dUdrij, dUdsij, dUdrijm[3], force, forcem;
|
||||
double recip, phi, phip;
|
||||
double sij;
|
||||
double a1, a1i, a1j, a2, a2i, a2j;
|
||||
double a3i, a3j;
|
||||
double shpi[3], shpj[3];
|
||||
double ai, aj, ro0i, ro0j, invrei, invrej;
|
||||
double rhoa0j, drhoa0j, rhoa0i, drhoa0i;
|
||||
double rhoa1j, drhoa1j, rhoa1i, drhoa1i;
|
||||
double rhoa2j, drhoa2j, rhoa2i, drhoa2i;
|
||||
double a3, a3a, rhoa3j, drhoa3j, rhoa3i, drhoa3i;
|
||||
double drho0dr1, drho0dr2, drho0ds1, drho0ds2;
|
||||
double drho1dr1, drho1dr2, drho1ds1, drho1ds2;
|
||||
double drho1drm1[3], drho1drm2[3];
|
||||
double drho2dr1, drho2dr2, drho2ds1, drho2ds2;
|
||||
double drho2drm1[3], drho2drm2[3];
|
||||
double drho3dr1, drho3dr2, drho3ds1, drho3ds2;
|
||||
double drho3drm1[3], drho3drm2[3];
|
||||
double dt1dr1, dt1dr2, dt1ds1, dt1ds2;
|
||||
double dt2dr1, dt2dr2, dt2ds1, dt2ds2;
|
||||
double dt3dr1, dt3dr2, dt3ds1, dt3ds2;
|
||||
double drhodr1, drhodr2, drhods1, drhods2, drhodrm1[3], drhodrm2[3];
|
||||
double arg;
|
||||
double arg1i1, arg1j1, arg1i2, arg1j2, arg1i3, arg1j3, arg3i3, arg3j3;
|
||||
double dsij1, dsij2, force1, force2;
|
||||
double t1i, t2i, t3i, t1j, t2j, t3j;
|
||||
int fnoffset;
|
||||
|
||||
// The f, etc. arrays are duplicated for OpenMP, atomic for CUDA, and neither for Serial
|
||||
|
||||
auto v_f =
|
||||
ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_f), decltype(ndup_f)>::get(
|
||||
dup_f, ndup_f);
|
||||
auto a_f = v_f.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
auto v_eatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_eatom),
|
||||
decltype(ndup_eatom)>::get(dup_eatom, ndup_eatom);
|
||||
auto a_eatom = v_eatom.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
auto v_vatom = ScatterViewHelper<NeedDup_v<NEIGHFLAG, DeviceType>, decltype(dup_vatom),
|
||||
decltype(ndup_vatom)>::get(dup_vatom, ndup_vatom);
|
||||
auto a_vatom = v_vatom.template access<AtomicDup_v<NEIGHFLAG, DeviceType>>();
|
||||
|
||||
i = d_ilist_half[ii];
|
||||
fnoffset = d_offset[i];
|
||||
third = 1.0 / 3.0;
|
||||
sixth = 1.0 / 6.0;
|
||||
|
||||
elti = d_map[type[i]];
|
||||
if (elti < 0) return;
|
||||
|
||||
xitmp = x(i, 0);
|
||||
yitmp = x(i, 1);
|
||||
zitmp = x(i, 2);
|
||||
|
||||
// Treat each pair
|
||||
for (jn = 0; jn < d_numneigh_half[i]; jn++) {
|
||||
j = d_neighbors_half(i, jn);
|
||||
eltj = d_map[type[j]];
|
||||
|
||||
if (!iszero_kk(d_scrfcn[fnoffset + jn]) && eltj >= 0) {
|
||||
|
||||
sij = d_scrfcn[fnoffset + jn] * d_fcpair[fnoffset + jn];
|
||||
delij[0] = x(j, 0) - xitmp;
|
||||
delij[1] = x(j, 1) - yitmp;
|
||||
delij[2] = x(j, 2) - zitmp;
|
||||
rij2 = delij[0] * delij[0] + delij[1] * delij[1] + delij[2] * delij[2];
|
||||
if (rij2 < cutforcesq) {
|
||||
rij = sqrt(rij2);
|
||||
recip = 1.0 / rij;
|
||||
|
||||
// Compute phi and phip
|
||||
ind = eltind[elti][eltj];
|
||||
pp = rij * rdrar;
|
||||
kk = (int) pp;
|
||||
kk = (kk <= (nrar - 2)) ? kk : nrar - 2;
|
||||
pp = pp - kk;
|
||||
pp = (pp <= 1.0) ? pp : 1.0;
|
||||
phi = ((d_phirar3(ind, kk) * pp + d_phirar2(ind, kk)) * pp + d_phirar1(ind, kk)) * pp +
|
||||
d_phirar(ind, kk);
|
||||
phip = (d_phirar6(ind, kk) * pp + d_phirar5(ind, kk)) * pp + d_phirar4(ind, kk);
|
||||
|
||||
if (eflag_either) {
|
||||
double scaleij = d_scale(type[i], type[i]);
|
||||
double phi_sc = phi * scaleij;
|
||||
if (eflag_global) ev.evdwl += phi_sc * sij;
|
||||
if (eflag_atom) {
|
||||
a_eatom[i] += 0.5 * phi * sij;
|
||||
a_eatom[j] += 0.5 * phi * sij;
|
||||
}
|
||||
}
|
||||
|
||||
// write(1,*) "force_meamf: phi: ",phi
|
||||
// write(1,*) "force_meamf: phip: ",phip
|
||||
|
||||
// Compute pair densities and derivatives
|
||||
invrei = 1.0 / re_meam[elti][elti];
|
||||
ai = rij * invrei - 1.0;
|
||||
ro0i = rho0_meam[elti];
|
||||
rhoa0i = ro0i * MathSpecialKokkos::fm_exp(-beta0_meam[elti] * ai);
|
||||
drhoa0i = -beta0_meam[elti] * invrei * rhoa0i;
|
||||
rhoa1i = ro0i * MathSpecialKokkos::fm_exp(-beta1_meam[elti] * ai);
|
||||
drhoa1i = -beta1_meam[elti] * invrei * rhoa1i;
|
||||
rhoa2i = ro0i * MathSpecialKokkos::fm_exp(-beta2_meam[elti] * ai);
|
||||
drhoa2i = -beta2_meam[elti] * invrei * rhoa2i;
|
||||
rhoa3i = ro0i * MathSpecialKokkos::fm_exp(-beta3_meam[elti] * ai);
|
||||
drhoa3i = -beta3_meam[elti] * invrei * rhoa3i;
|
||||
|
||||
if (elti != eltj) {
|
||||
invrej = 1.0 / re_meam[eltj][eltj];
|
||||
aj = rij * invrej - 1.0;
|
||||
ro0j = rho0_meam[eltj];
|
||||
rhoa0j = ro0j * MathSpecialKokkos::fm_exp(-beta0_meam[eltj] * aj);
|
||||
drhoa0j = -beta0_meam[eltj] * invrej * rhoa0j;
|
||||
rhoa1j = ro0j * MathSpecialKokkos::fm_exp(-beta1_meam[eltj] * aj);
|
||||
drhoa1j = -beta1_meam[eltj] * invrej * rhoa1j;
|
||||
rhoa2j = ro0j * MathSpecialKokkos::fm_exp(-beta2_meam[eltj] * aj);
|
||||
drhoa2j = -beta2_meam[eltj] * invrej * rhoa2j;
|
||||
rhoa3j = ro0j * MathSpecialKokkos::fm_exp(-beta3_meam[eltj] * aj);
|
||||
drhoa3j = -beta3_meam[eltj] * invrej * rhoa3j;
|
||||
} else {
|
||||
rhoa0j = rhoa0i;
|
||||
drhoa0j = drhoa0i;
|
||||
rhoa1j = rhoa1i;
|
||||
drhoa1j = drhoa1i;
|
||||
rhoa2j = rhoa2i;
|
||||
drhoa2j = drhoa2i;
|
||||
rhoa3j = rhoa3i;
|
||||
drhoa3j = drhoa3i;
|
||||
}
|
||||
|
||||
const double t1mi = t1_meam[elti];
|
||||
const double t2mi = t2_meam[elti];
|
||||
const double t3mi = t3_meam[elti];
|
||||
const double t1mj = t1_meam[eltj];
|
||||
const double t2mj = t2_meam[eltj];
|
||||
const double t3mj = t3_meam[eltj];
|
||||
|
||||
if (ialloy == 1) {
|
||||
rhoa1j *= t1mj;
|
||||
rhoa2j *= t2mj;
|
||||
rhoa3j *= t3mj;
|
||||
rhoa1i *= t1mi;
|
||||
rhoa2i *= t2mi;
|
||||
rhoa3i *= t3mi;
|
||||
drhoa1j *= t1mj;
|
||||
drhoa2j *= t2mj;
|
||||
drhoa3j *= t3mj;
|
||||
drhoa1i *= t1mi;
|
||||
drhoa2i *= t2mi;
|
||||
drhoa3i *= t3mi;
|
||||
}
|
||||
|
||||
nv2 = 0;
|
||||
nv3 = 0;
|
||||
arg1i1 = 0.0;
|
||||
arg1j1 = 0.0;
|
||||
arg1i2 = 0.0;
|
||||
arg1j2 = 0.0;
|
||||
arg1i3 = 0.0;
|
||||
arg1j3 = 0.0;
|
||||
arg3i3 = 0.0;
|
||||
arg3j3 = 0.0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
for (p = n; p < 3; p++) {
|
||||
for (q = p; q < 3; q++) {
|
||||
arg = delij[n] * delij[p] * delij[q] * v3D[nv3];
|
||||
arg1i3 = arg1i3 + d_arho3(i, nv3) * arg;
|
||||
arg1j3 = arg1j3 - d_arho3(j, nv3) * arg;
|
||||
nv3 = nv3 + 1;
|
||||
}
|
||||
arg = delij[n] * delij[p] * v2D[nv2];
|
||||
arg1i2 = arg1i2 + d_arho2(i, nv2) * arg;
|
||||
arg1j2 = arg1j2 + d_arho2(j, nv2) * arg;
|
||||
nv2 = nv2 + 1;
|
||||
}
|
||||
arg1i1 = arg1i1 + d_arho1(i, n) * delij[n];
|
||||
arg1j1 = arg1j1 - d_arho1(j, n) * delij[n];
|
||||
arg3i3 = arg3i3 + d_arho3b(i, n) * delij[n];
|
||||
arg3j3 = arg3j3 - d_arho3b(j, n) * delij[n];
|
||||
}
|
||||
|
||||
// rho0 terms
|
||||
drho0dr1 = drhoa0j * sij;
|
||||
drho0dr2 = drhoa0i * sij;
|
||||
|
||||
// rho1 terms
|
||||
a1 = 2 * sij / rij;
|
||||
drho1dr1 = a1 * (drhoa1j - rhoa1j / rij) * arg1i1;
|
||||
drho1dr2 = a1 * (drhoa1i - rhoa1i / rij) * arg1j1;
|
||||
a1 = 2.0 * sij / rij;
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho1drm1[m] = a1 * rhoa1j * d_arho1(i, m);
|
||||
drho1drm2[m] = -a1 * rhoa1i * d_arho1(j, m);
|
||||
}
|
||||
|
||||
// rho2 terms
|
||||
a2 = 2 * sij / rij2;
|
||||
drho2dr1 =
|
||||
a2 * (drhoa2j - 2 * rhoa2j / rij) * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * drhoa2j * sij;
|
||||
drho2dr2 =
|
||||
a2 * (drhoa2i - 2 * rhoa2i / rij) * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * drhoa2i * sij;
|
||||
a2 = 4 * sij / rij2;
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho2drm1[m] = 0.0;
|
||||
drho2drm2[m] = 0.0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
drho2drm1[m] = drho2drm1[m] + d_arho2(i, vind2D[m][n]) * delij[n];
|
||||
drho2drm2[m] = drho2drm2[m] - d_arho2(j, vind2D[m][n]) * delij[n];
|
||||
}
|
||||
drho2drm1[m] = a2 * rhoa2j * drho2drm1[m];
|
||||
drho2drm2[m] = -a2 * rhoa2i * drho2drm2[m];
|
||||
}
|
||||
|
||||
// rho3 terms
|
||||
rij3 = rij * rij2;
|
||||
a3 = 2 * sij / rij3;
|
||||
a3a = 6.0 / 5.0 * sij / rij;
|
||||
drho3dr1 =
|
||||
a3 * (drhoa3j - 3 * rhoa3j / rij) * arg1i3 - a3a * (drhoa3j - rhoa3j / rij) * arg3i3;
|
||||
drho3dr2 =
|
||||
a3 * (drhoa3i - 3 * rhoa3i / rij) * arg1j3 - a3a * (drhoa3i - rhoa3i / rij) * arg3j3;
|
||||
a3 = 6 * sij / rij3;
|
||||
a3a = 6 * sij / (5 * rij);
|
||||
for (m = 0; m < 3; m++) {
|
||||
drho3drm1[m] = 0.0;
|
||||
drho3drm2[m] = 0.0;
|
||||
nv2 = 0;
|
||||
for (n = 0; n < 3; n++) {
|
||||
for (p = n; p < 3; p++) {
|
||||
arg = delij[n] * delij[p] * v2D[nv2];
|
||||
drho3drm1[m] = drho3drm1[m] + d_arho3(i, vind3D[m][n][p]) * arg;
|
||||
drho3drm2[m] = drho3drm2[m] + d_arho3(j, vind3D[m][n][p]) * arg;
|
||||
nv2 = nv2 + 1;
|
||||
}
|
||||
}
|
||||
drho3drm1[m] = (a3 * drho3drm1[m] - a3a * d_arho3b(i, m)) * rhoa3j;
|
||||
drho3drm2[m] = (-a3 * drho3drm2[m] + a3a * d_arho3b(j, m)) * rhoa3i;
|
||||
}
|
||||
|
||||
// Compute derivatives of weighting functions t wrt rij
|
||||
t1i = d_t_ave(i, 0);
|
||||
t2i = d_t_ave(i, 1);
|
||||
t3i = d_t_ave(i, 2);
|
||||
t1j = d_t_ave(j, 0);
|
||||
t2j = d_t_ave(j, 1);
|
||||
t3j = d_t_ave(j, 2);
|
||||
|
||||
if (ialloy == 1) {
|
||||
|
||||
a1i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 0));
|
||||
a1j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 0));
|
||||
a2i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 1));
|
||||
a2j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 1));
|
||||
a3i = fdiv_zero_kk(drhoa0j * sij, d_tsq_ave(i, 2));
|
||||
a3j = fdiv_zero_kk(drhoa0i * sij, d_tsq_ave(j, 2));
|
||||
|
||||
dt1dr1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
|
||||
dt1dr2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
|
||||
dt2dr1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
|
||||
dt2dr2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
|
||||
dt3dr1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
|
||||
dt3dr2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
|
||||
|
||||
} else if (ialloy == 2) {
|
||||
|
||||
dt1dr1 = 0.0;
|
||||
dt1dr2 = 0.0;
|
||||
dt2dr1 = 0.0;
|
||||
dt2dr2 = 0.0;
|
||||
dt3dr1 = 0.0;
|
||||
dt3dr2 = 0.0;
|
||||
|
||||
} else {
|
||||
|
||||
ai = 0.0;
|
||||
if (!iszero_kk(d_rho0[i])) ai = drhoa0j * sij / d_rho0[i];
|
||||
aj = 0.0;
|
||||
if (!iszero_kk(d_rho0[j])) aj = drhoa0i * sij / d_rho0[j];
|
||||
|
||||
dt1dr1 = ai * (t1mj - t1i);
|
||||
dt1dr2 = aj * (t1mi - t1j);
|
||||
dt2dr1 = ai * (t2mj - t2i);
|
||||
dt2dr2 = aj * (t2mi - t2j);
|
||||
dt3dr1 = ai * (t3mj - t3i);
|
||||
dt3dr2 = aj * (t3mi - t3j);
|
||||
}
|
||||
|
||||
// Compute derivatives of total density wrt rij, sij and rij(3)
|
||||
get_shpfcn(lattce_meam[elti][elti], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpi);
|
||||
get_shpfcn(lattce_meam[eltj][eltj], stheta_meam[elti][elti], ctheta_meam[elti][elti], shpj);
|
||||
|
||||
drhodr1 = d_dgamma1[i] * drho0dr1 +
|
||||
d_dgamma2[i] *
|
||||
(dt1dr1 * d_rho1[i] + t1i * drho1dr1 + dt2dr1 * d_rho2[i] + t2i * drho2dr1 +
|
||||
dt3dr1 * d_rho3[i] + t3i * drho3dr1) -
|
||||
d_dgamma3[i] * (shpi[0] * dt1dr1 + shpi[1] * dt2dr1 + shpi[2] * dt3dr1);
|
||||
drhodr2 = d_dgamma1[j] * drho0dr2 +
|
||||
d_dgamma2[j] *
|
||||
(dt1dr2 * d_rho1[j] + t1j * drho1dr2 + dt2dr2 * d_rho2[j] + t2j * drho2dr2 +
|
||||
dt3dr2 * d_rho3[j] + t3j * drho3dr2) -
|
||||
d_dgamma3[j] * (shpj[0] * dt1dr2 + shpj[1] * dt2dr2 + shpj[2] * dt3dr2);
|
||||
for (m = 0; m < 3; m++) {
|
||||
drhodrm1[m] = 0.0;
|
||||
drhodrm2[m] = 0.0;
|
||||
drhodrm1[m] =
|
||||
d_dgamma2[i] * (t1i * drho1drm1[m] + t2i * drho2drm1[m] + t3i * drho3drm1[m]);
|
||||
drhodrm2[m] =
|
||||
d_dgamma2[j] * (t1j * drho1drm2[m] + t2j * drho2drm2[m] + t3j * drho3drm2[m]);
|
||||
}
|
||||
|
||||
// Compute derivatives wrt sij, but only if necessary
|
||||
if (!iszero_kk(d_dscrfcn[fnoffset + jn])) {
|
||||
drho0ds1 = rhoa0j;
|
||||
drho0ds2 = rhoa0i;
|
||||
a1 = 2.0 / rij;
|
||||
drho1ds1 = a1 * rhoa1j * arg1i1;
|
||||
drho1ds2 = a1 * rhoa1i * arg1j1;
|
||||
a2 = 2.0 / rij2;
|
||||
drho2ds1 = a2 * rhoa2j * arg1i2 - 2.0 / 3.0 * d_arho2b[i] * rhoa2j;
|
||||
drho2ds2 = a2 * rhoa2i * arg1j2 - 2.0 / 3.0 * d_arho2b[j] * rhoa2i;
|
||||
a3 = 2.0 / rij3;
|
||||
a3a = 6.0 / (5.0 * rij);
|
||||
drho3ds1 = a3 * rhoa3j * arg1i3 - a3a * rhoa3j * arg3i3;
|
||||
drho3ds2 = a3 * rhoa3i * arg1j3 - a3a * rhoa3i * arg3j3;
|
||||
|
||||
if (ialloy == 1) {
|
||||
a1i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 0));
|
||||
a1j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 0));
|
||||
a2i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 1));
|
||||
a2j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 1));
|
||||
a3i = fdiv_zero_kk(rhoa0j, d_tsq_ave(i, 2));
|
||||
a3j = fdiv_zero_kk(rhoa0i, d_tsq_ave(j, 2));
|
||||
|
||||
dt1ds1 = a1i * (t1mj - t1i * MathSpecialKokkos::square(t1mj));
|
||||
dt1ds2 = a1j * (t1mi - t1j * MathSpecialKokkos::square(t1mi));
|
||||
dt2ds1 = a2i * (t2mj - t2i * MathSpecialKokkos::square(t2mj));
|
||||
dt2ds2 = a2j * (t2mi - t2j * MathSpecialKokkos::square(t2mi));
|
||||
dt3ds1 = a3i * (t3mj - t3i * MathSpecialKokkos::square(t3mj));
|
||||
dt3ds2 = a3j * (t3mi - t3j * MathSpecialKokkos::square(t3mi));
|
||||
|
||||
} else if (ialloy == 2) {
|
||||
|
||||
dt1ds1 = 0.0;
|
||||
dt1ds2 = 0.0;
|
||||
dt2ds1 = 0.0;
|
||||
dt2ds2 = 0.0;
|
||||
dt3ds1 = 0.0;
|
||||
dt3ds2 = 0.0;
|
||||
|
||||
} else {
|
||||
|
||||
ai = 0.0;
|
||||
if (!iszero_kk(d_rho0[i])) ai = rhoa0j / d_rho0[i];
|
||||
aj = 0.0;
|
||||
if (!iszero_kk(d_rho0[j])) aj = rhoa0i / d_rho0[j];
|
||||
|
||||
dt1ds1 = ai * (t1mj - t1i);
|
||||
dt1ds2 = aj * (t1mi - t1j);
|
||||
dt2ds1 = ai * (t2mj - t2i);
|
||||
dt2ds2 = aj * (t2mi - t2j);
|
||||
dt3ds1 = ai * (t3mj - t3i);
|
||||
dt3ds2 = aj * (t3mi - t3j);
|
||||
}
|
||||
|
||||
drhods1 = d_dgamma1[i] * drho0ds1 +
|
||||
d_dgamma2[i] *
|
||||
(dt1ds1 * d_rho1[i] + t1i * drho1ds1 + dt2ds1 * d_rho2[i] + t2i * drho2ds1 +
|
||||
dt3ds1 * d_rho3[i] + t3i * drho3ds1) -
|
||||
d_dgamma3[i] * (shpi[0] * dt1ds1 + shpi[1] * dt2ds1 + shpi[2] * dt3ds1);
|
||||
drhods2 = d_dgamma1[j] * drho0ds2 +
|
||||
d_dgamma2[j] *
|
||||
(dt1ds2 * d_rho1[j] + t1j * drho1ds2 + dt2ds2 * d_rho2[j] + t2j * drho2ds2 +
|
||||
dt3ds2 * d_rho3[j] + t3j * drho3ds2) -
|
||||
d_dgamma3[j] * (shpj[0] * dt1ds2 + shpj[1] * dt2ds2 + shpj[2] * dt3ds2);
|
||||
}
|
||||
|
||||
// Compute derivatives of energy wrt rij, sij and rij[3]
|
||||
dUdrij = phip * sij + d_frhop[i] * drhodr1 + d_frhop[j] * drhodr2;
|
||||
dUdsij = 0.0;
|
||||
if (!iszero_kk(d_dscrfcn[fnoffset + jn])) {
|
||||
dUdsij = phi + d_frhop[i] * drhods1 + d_frhop[j] * drhods2;
|
||||
}
|
||||
for (m = 0; m < 3; m++) {
|
||||
dUdrijm[m] = d_frhop[i] * drhodrm1[m] + d_frhop[j] * drhodrm2[m];
|
||||
}
|
||||
|
||||
// Add the part of the force due to dUdrij and dUdsij
|
||||
force = dUdrij * recip + dUdsij * d_dscrfcn[fnoffset + jn];
|
||||
for (m = 0; m < 3; m++) {
|
||||
forcem = delij[m] * force + dUdrijm[m];
|
||||
a_f(i, m) += forcem;
|
||||
a_f(j, m) -= forcem;
|
||||
}
|
||||
|
||||
// Tabulate per-atom virial as symmetrized stress tensor
|
||||
|
||||
if (vflag_either) {
|
||||
fi[0] = delij[0] * force + dUdrijm[0];
|
||||
fi[1] = delij[1] * force + dUdrijm[1];
|
||||
fi[2] = delij[2] * force + dUdrijm[2];
|
||||
v[0] = -0.5 * (delij[0] * fi[0]);
|
||||
v[1] = -0.5 * (delij[1] * fi[1]);
|
||||
v[2] = -0.5 * (delij[2] * fi[2]);
|
||||
v[3] = -0.25 * (delij[0] * fi[1] + delij[1] * fi[0]);
|
||||
v[4] = -0.25 * (delij[0] * fi[2] + delij[2] * fi[0]);
|
||||
v[5] = -0.25 * (delij[1] * fi[2] + delij[2] * fi[1]);
|
||||
|
||||
if (vflag_global)
|
||||
for (m = 0; m < 6; m++) ev.v[m] += 2.0 * v[m];
|
||||
|
||||
if (vflag_atom) {
|
||||
for (m = 0; m < 6; m++) {
|
||||
a_vatom(i, m) += v[m];
|
||||
a_vatom(j, m) += v[m];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now compute forces on other atoms k due to change in sij
|
||||
|
||||
if (iszero_kk(sij) || isone_kk(sij)) continue; //: cont jn loop
|
||||
|
||||
double dxik(0), dyik(0), dzik(0);
|
||||
double dxjk(0), dyjk(0), dzjk(0);
|
||||
|
||||
for (kn = 0; kn < d_numneigh_full[i]; kn++) {
|
||||
k = d_neighbors_full(i, kn);
|
||||
eltk = d_map[type[k]];
|
||||
if (k != j && eltk >= 0) {
|
||||
double xik, xjk, cikj, sikj, dfc, a;
|
||||
double dCikj1, dCikj2;
|
||||
double delc, rik2, rjk2;
|
||||
|
||||
sij = d_scrfcn[jn + fnoffset] * d_fcpair[jn + fnoffset];
|
||||
const double Cmax = Cmax_meam[elti][eltj][eltk];
|
||||
const double Cmin = Cmin_meam[elti][eltj][eltk];
|
||||
|
||||
dsij1 = 0.0;
|
||||
dsij2 = 0.0;
|
||||
if (!iszero_kk(sij) && !isone_kk(sij)) {
|
||||
const double rbound = rij2 * ebound_meam[elti][eltj];
|
||||
delc = Cmax - Cmin;
|
||||
dxjk = x(k, 0) - x(j, 0);
|
||||
dyjk = x(k, 1) - x(j, 1);
|
||||
dzjk = x(k, 2) - x(j, 2);
|
||||
rjk2 = dxjk * dxjk + dyjk * dyjk + dzjk * dzjk;
|
||||
if (rjk2 <= rbound) {
|
||||
dxik = x(k, 0) - x(i, 0);
|
||||
dyik = x(k, 1) - x(i, 1);
|
||||
dzik = x(k, 2) - x(i, 2);
|
||||
rik2 = dxik * dxik + dyik * dyik + dzik * dzik;
|
||||
if (rik2 <= rbound) {
|
||||
xik = rik2 / rij2;
|
||||
xjk = rjk2 / rij2;
|
||||
a = 1 - (xik - xjk) * (xik - xjk);
|
||||
if (!iszero_kk(a)) {
|
||||
cikj = (2.0 * (xik + xjk) + a - 2.0) / a;
|
||||
if (cikj >= Cmin && cikj <= Cmax) {
|
||||
cikj = (cikj - Cmin) / delc;
|
||||
sikj = dfcut(cikj, dfc);
|
||||
dCfunc2(rij2, rik2, rjk2, dCikj1, dCikj2);
|
||||
a = sij / delc * dfc / sikj;
|
||||
dsij1 = a * dCikj1;
|
||||
dsij2 = a * dCikj2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!iszero_kk(dsij1) || !iszero_kk(dsij2)) {
|
||||
force1 = dUdsij * dsij1;
|
||||
force2 = dUdsij * dsij2;
|
||||
|
||||
a_f(i, 0) += force1 * dxik;
|
||||
a_f(i, 1) += force1 * dyik;
|
||||
a_f(i, 2) += force1 * dzik;
|
||||
a_f(j, 0) += force2 * dxjk;
|
||||
a_f(j, 1) += force2 * dyjk;
|
||||
a_f(j, 2) += force2 * dzjk;
|
||||
a_f(k, 0) -= force1 * dxik + force2 * dxjk;
|
||||
a_f(k, 1) -= force1 * dyik + force2 * dyjk;
|
||||
a_f(k, 2) -= force1 * dzik + force2 * dzjk;
|
||||
|
||||
// Tabulate per-atom virial as symmetrized stress tensor
|
||||
|
||||
if (vflag_either) {
|
||||
fi[0] = force1 * dxik;
|
||||
fi[1] = force1 * dyik;
|
||||
fi[2] = force1 * dzik;
|
||||
fj[0] = force2 * dxjk;
|
||||
fj[1] = force2 * dyjk;
|
||||
fj[2] = force2 * dzjk;
|
||||
v[0] = -third * (dxik * fi[0] + dxjk * fj[0]);
|
||||
v[1] = -third * (dyik * fi[1] + dyjk * fj[1]);
|
||||
v[2] = -third * (dzik * fi[2] + dzjk * fj[2]);
|
||||
v[3] = -sixth * (dxik * fi[1] + dxjk * fj[1] + dyik * fi[0] + dyjk * fj[0]);
|
||||
v[4] = -sixth * (dxik * fi[2] + dxjk * fj[2] + dzik * fi[0] + dzjk * fj[0]);
|
||||
v[5] = -sixth * (dyik * fi[2] + dyjk * fj[2] + dzik * fi[1] + dzjk * fj[1]);
|
||||
|
||||
if (vflag_global)
|
||||
for (m = 0; m < 6; m++) ev.v[m] += 3.0 * v[m];
|
||||
|
||||
if (vflag_atom) {
|
||||
for (m = 0; m < 6; m++) {
|
||||
a_vatom(i, m) += v[m];
|
||||
a_vatom(j, m) += v[m];
|
||||
a_vatom(k, m) += v[m];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// end of k loop
|
||||
}
|
||||
}
|
||||
}
|
||||
// end of j loop
|
||||
}
|
||||
}
|
||||
289
src/KOKKOS/meam_funcs_kokkos.h
Normal file
289
src/KOKKOS/meam_funcs_kokkos.h
Normal file
@ -0,0 +1,289 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Naga Vydyanathan (NVIDIA)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "math_special_kokkos.h"
|
||||
#include <cmath>
|
||||
#include "meam_kokkos.h"
|
||||
using namespace MathSpecialKokkos;
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute G(gamma) based on selection flag ibar:
|
||||
// 0 => G = sqrt(1+gamma)
|
||||
// 1 => G = exp(gamma/2)
|
||||
// 2 => not implemented
|
||||
// 3 => G = 2/(1+exp(-gamma))
|
||||
// 4 => G = sqrt(1+gamma)
|
||||
// -5 => G = +-sqrt(abs(1+gamma))
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::G_gam(const double gamma, const int ibar, int &errorflag) const
|
||||
{
|
||||
double gsmooth_switchpoint;
|
||||
|
||||
switch (ibar) {
|
||||
case 0:
|
||||
case 4:
|
||||
gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1);
|
||||
if (gamma < gsmooth_switchpoint) {
|
||||
// e.g. gsmooth_factor is 99, {:
|
||||
// gsmooth_switchpoint = -0.99
|
||||
// G = 0.01*(-0.99/gamma)**99
|
||||
double G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor);
|
||||
return sqrt(G);
|
||||
} else {
|
||||
return sqrt(1.0 + gamma);
|
||||
}
|
||||
case 1:
|
||||
return MathSpecialKokkos::fm_exp(gamma / 2.0);
|
||||
case 3:
|
||||
return 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma));
|
||||
case -5:
|
||||
if ((1.0 + gamma) >= 0) {
|
||||
return sqrt(1.0 + gamma);
|
||||
} else {
|
||||
return -sqrt(-1.0 - gamma);
|
||||
}
|
||||
}
|
||||
errorflag = 1;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute G(gamma and dG(gamma) based on selection flag ibar:
|
||||
// 0 => G = sqrt(1+gamma)
|
||||
// 1 => G = exp(gamma/2)
|
||||
// 2 => not implemented
|
||||
// 3 => G = 2/(1+exp(-gamma))
|
||||
// 4 => G = sqrt(1+gamma)
|
||||
// -5 => G = +-sqrt(abs(1+gamma))
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::dG_gam(const double gamma, const int ibar, double& dG) const
|
||||
{
|
||||
double gsmooth_switchpoint;
|
||||
double G;
|
||||
|
||||
switch (ibar) {
|
||||
case 0:
|
||||
case 4:
|
||||
gsmooth_switchpoint = -gsmooth_factor / (gsmooth_factor + 1);
|
||||
if (gamma < gsmooth_switchpoint) {
|
||||
// e.g. gsmooth_factor is 99, {:
|
||||
// gsmooth_switchpoint = -0.99
|
||||
// G = 0.01*(-0.99/gamma)**99
|
||||
G = 1 / (gsmooth_factor + 1) * pow((gsmooth_switchpoint / gamma), gsmooth_factor);
|
||||
G = sqrt(G);
|
||||
dG = -gsmooth_factor * G / (2.0 * gamma);
|
||||
return G;
|
||||
} else {
|
||||
G = sqrt(1.0 + gamma);
|
||||
dG = 1.0 / (2.0 * G);
|
||||
return G;
|
||||
}
|
||||
case 1:
|
||||
G = MathSpecialKokkos::fm_exp(gamma / 2.0);
|
||||
dG = G / 2.0;
|
||||
return G;
|
||||
case 3:
|
||||
G = 2.0 / (1.0 + MathSpecialKokkos::fm_exp(-gamma));
|
||||
dG = G * (2.0 - G) / 2;
|
||||
return G;
|
||||
case -5:
|
||||
if ((1.0 + gamma) >= 0) {
|
||||
G = sqrt(1.0 + gamma);
|
||||
dG = 1.0 / (2.0 * G);
|
||||
return G;
|
||||
} else {
|
||||
G = -sqrt(-1.0 - gamma);
|
||||
dG = -1.0 / (2.0 * G);
|
||||
return G;
|
||||
}
|
||||
}
|
||||
dG = 1.0;
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute ZBL potential
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::zbl(const double r, const int z1, const int z2) const
|
||||
{
|
||||
int i;
|
||||
const double c[] = { 0.028171, 0.28022, 0.50986, 0.18175 };
|
||||
const double d[] = { 0.20162, 0.40290, 0.94229, 3.1998 };
|
||||
const double azero = 0.4685;
|
||||
const double cc = 14.3997;
|
||||
double a, x;
|
||||
// azero = (9pi^2/128)^1/3 (0.529) Angstroms
|
||||
a = azero / (pow(z1, 0.23) + pow(z2, 0.23));
|
||||
double result = 0.0;
|
||||
x = r / a;
|
||||
for (i = 0; i <= 3; i++) {
|
||||
result = result + c[i] * MathSpecialKokkos::fm_exp(-d[i] * x);
|
||||
}
|
||||
if (r > 0.0)
|
||||
result = result * z1 * z2 / r * cc;
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute embedding function F(rhobar) and derivative F'(rhobar), eqn I.5
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::embedding(const double A, const double Ec, const double rhobar, double& dF) const
|
||||
{
|
||||
const double AEc = A * Ec;
|
||||
|
||||
if (rhobar > 0.0) {
|
||||
const double lrb = log(rhobar);
|
||||
dF = AEc * (1.0 + lrb);
|
||||
return AEc * rhobar * lrb;
|
||||
} else {
|
||||
if (emb_lin_neg == 0) {
|
||||
dF = 0.0;
|
||||
return 0.0;
|
||||
} else {
|
||||
dF = - AEc;
|
||||
return - AEc * rhobar;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Compute Rose energy function, I.16
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double MEAMKokkos<DeviceType>::erose(const double r, const double re, const double alpha, const double Ec, const double repuls,
|
||||
const double attrac, const int form) const
|
||||
{
|
||||
double astar, a3;
|
||||
double result = 0.0;
|
||||
|
||||
if (r > 0.0) {
|
||||
astar = alpha * (r / re - 1.0);
|
||||
a3 = 0.0;
|
||||
if (astar >= 0)
|
||||
a3 = attrac;
|
||||
else if (astar < 0)
|
||||
a3 = repuls;
|
||||
|
||||
if (form == 1)
|
||||
result = -Ec * (1 + astar + (-attrac + repuls / r) * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
else if (form == 2)
|
||||
result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
else
|
||||
result = -Ec * (1 + astar + a3 * MathSpecialKokkos::cube(astar) / (r / re)) * MathSpecialKokkos::fm_exp(-astar);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Shape factors for various configurations
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void MEAMKokkos<DeviceType>::get_shpfcn(const lattice_t latt, const double sthe, const double cthe, double (&s)[3]) const
|
||||
{
|
||||
switch (latt) {
|
||||
case FCC:
|
||||
case BCC:
|
||||
case B1:
|
||||
case B2:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 0.0;
|
||||
break;
|
||||
case HCP:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 1.0 / 3.0;
|
||||
break;
|
||||
case CH4: // CH4 actually needs shape factor for diamond for C, dimer for H
|
||||
case DIA:
|
||||
case DIA3:
|
||||
s[0] = 0.0;
|
||||
s[1] = 0.0;
|
||||
s[2] = 32.0 / 9.0;
|
||||
break;
|
||||
case DIM:
|
||||
s[0] = 1.0;
|
||||
s[1] = 2.0 / 3.0;
|
||||
// s(4) = 1.d0 // this should be 0.4 unless (1-legendre) is multiplied in the density calc.
|
||||
s[2] = 0.40; // this is (1-legendre) where legendre = 0.6 in dynamo is accounted.
|
||||
break;
|
||||
case LIN: // linear, theta being 180
|
||||
s[0] = 0.0;
|
||||
s[1] = 8.0 / 3.0; // 4*(co**4 + si**4 - 1.0/3.0) in zig become 4*(1-1/3)
|
||||
s[2] = 0.0;
|
||||
break;
|
||||
case ZIG: //zig-zag
|
||||
case TRI: //trimer e.g. H2O
|
||||
s[0] = 4.0*pow(cthe,2);
|
||||
s[1] = 4.0*(pow(cthe,4) + pow(sthe,4) - 1.0/3.0);
|
||||
s[2] = 4.0*(pow(cthe,2) * (3*pow(sthe,4) + pow(cthe,4)));
|
||||
s[2] = s[2] - 0.6*s[0]; //legend in dyn, 0.6 is default value.
|
||||
break;
|
||||
default:
|
||||
s[0] = 0.0;
|
||||
// call error('Lattice not defined in get_shpfcn.')
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Number of neighbors for the reference structure
|
||||
//
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int MEAMKokkos<DeviceType>::get_Zij(const lattice_t latt) const
|
||||
{
|
||||
switch (latt) {
|
||||
case FCC:
|
||||
return 12;
|
||||
case BCC:
|
||||
return 8;
|
||||
case HCP:
|
||||
return 12;
|
||||
case DIA:
|
||||
case DIA3:
|
||||
return 4;
|
||||
case DIM:
|
||||
return 1;
|
||||
case B1:
|
||||
return 6;
|
||||
case C11:
|
||||
return 10;
|
||||
case L12:
|
||||
return 12;
|
||||
case B2:
|
||||
return 8;
|
||||
case CH4: // DYNAMO currently implemented this way while it needs two Z values, 4 and 1
|
||||
return 4;
|
||||
case LIN:
|
||||
case ZIG:
|
||||
case TRI:
|
||||
return 2;
|
||||
// call error('Lattice not defined in get_Zij.')
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
68
src/KOKKOS/meam_impl_kokkos.h
Normal file
68
src/KOKKOS/meam_impl_kokkos.h
Normal file
@ -0,0 +1,68 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Naga Vydyanathan (NVIDIA), Stan Moore (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "memory_kokkos.h"
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
MEAMKokkos<DeviceType>::MEAMKokkos(Memory *mem) : MEAM(mem)
|
||||
{
|
||||
d_errorflag = typename AT::t_int_scalar("meam:errorflag");
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
MEAMKokkos<DeviceType>::~MEAMKokkos()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
MemoryKokkos *memoryKK = (MemoryKokkos *)memory;
|
||||
|
||||
memoryKK->destroy_kokkos(k_rho,rho);
|
||||
memoryKK->destroy_kokkos(k_rho0,rho0);
|
||||
memoryKK->destroy_kokkos(k_rho1,rho1);
|
||||
memoryKK->destroy_kokkos(k_rho2,rho2);
|
||||
memoryKK->destroy_kokkos(k_rho3,rho3);
|
||||
memoryKK->destroy_kokkos(k_frhop,frhop);
|
||||
memoryKK->destroy_kokkos(k_gamma,gamma);
|
||||
memoryKK->destroy_kokkos(k_dgamma1,dgamma1);
|
||||
memoryKK->destroy_kokkos(k_dgamma2,dgamma2);
|
||||
memoryKK->destroy_kokkos(k_dgamma3,dgamma3);
|
||||
memoryKK->destroy_kokkos(k_arho2b,arho2b);
|
||||
|
||||
memoryKK->destroy_kokkos(k_arho1,arho1);
|
||||
memoryKK->destroy_kokkos(k_arho2,arho2);
|
||||
memoryKK->destroy_kokkos(k_arho3,arho3);
|
||||
memoryKK->destroy_kokkos(k_arho3b,arho3b);
|
||||
memoryKK->destroy_kokkos(k_t_ave,t_ave);
|
||||
memoryKK->destroy_kokkos(k_tsq_ave,tsq_ave);
|
||||
|
||||
memoryKK->destroy_kokkos(k_scrfcn,scrfcn);
|
||||
memoryKK->destroy_kokkos(k_dscrfcn,dscrfcn);
|
||||
memoryKK->destroy_kokkos(k_fcpair,fcpair);
|
||||
}
|
||||
|
||||
#include "meam_setup_done_kokkos.h"
|
||||
#include "meam_funcs_kokkos.h"
|
||||
#include "meam_dens_init_kokkos.h"
|
||||
#include "meam_dens_final_kokkos.h"
|
||||
#include "meam_force_kokkos.h"
|
||||
|
||||
224
src/KOKKOS/meam_kokkos.h
Normal file
224
src/KOKKOS/meam_kokkos.h
Normal file
@ -0,0 +1,224 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_MEAMKOKKOS_H
|
||||
#define LMP_MEAMKOKKOS_H
|
||||
|
||||
#include "kokkos.h"
|
||||
#include "meam.h"
|
||||
#include "memory_kokkos.h"
|
||||
#include "neigh_request.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagMEAMDensFinal {};
|
||||
template <int NEIGHFLAG> struct TagMEAMDensInit {
|
||||
};
|
||||
struct TagMEAMZero {};
|
||||
template <int NEIGHFLAG> struct TagMEAMForce {
|
||||
};
|
||||
|
||||
template <class DeviceType> class MEAMKokkos : public MEAM {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
typedef EV_FLOAT value_type;
|
||||
MEAMKokkos(Memory *mem);
|
||||
~MEAMKokkos() override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagMEAMDensFinal, const int &, EV_FLOAT &) const;
|
||||
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void operator()(TagMEAMDensInit<NEIGHFLAG>, const int &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagMEAMZero, const int &) const;
|
||||
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void operator()(TagMEAMForce<NEIGHFLAG>, const int &, EV_FLOAT &) const;
|
||||
|
||||
private:
|
||||
// parameters to meam_dens_init
|
||||
|
||||
int ntype, nlocal;
|
||||
typename AT::t_int_1d type;
|
||||
typename AT::t_int_1d d_offset;
|
||||
typename AT::t_int_1d d_map;
|
||||
typename AT::t_int_2d d_scale;
|
||||
typename AT::t_x_array x;
|
||||
typename AT::t_int_1d d_numneigh_half;
|
||||
typename AT::t_int_1d d_numneigh_full;
|
||||
typename AT::t_neighbors_2d d_neighbors_half;
|
||||
typename AT::t_neighbors_2d d_neighbors_full;
|
||||
typename AT::t_int_1d d_ilist_half;
|
||||
typename AT::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
// parameters to meam_dens_final
|
||||
|
||||
typename AT::t_int_scalar d_errorflag;
|
||||
int eflag_either, eflag_global, eflag_atom, vflag_either, vflag_global, vflag_atom;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
|
||||
public:
|
||||
void meam_dens_setup(int, int, int) override;
|
||||
void meam_setup_done(double *) override;
|
||||
void meam_dens_init(int, int, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_x_array, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, typename AT::t_neighbors_2d,
|
||||
typename AT::t_neighbors_2d, typename AT::t_int_1d, int, int);
|
||||
void meam_dens_final(int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d, int,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_int_2d, int &,
|
||||
EV_FLOAT &);
|
||||
void meam_force(int, int, int, int, int, typename ArrayTypes<DeviceType>::t_efloat_1d, int,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_x_array,
|
||||
typename AT::t_int_1d, typename AT::t_int_1d, typename AT::t_f_array,
|
||||
typename ArrayTypes<DeviceType>::t_virial_array, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, typename AT::t_neighbors_2d, typename AT::t_neighbors_2d,
|
||||
int, int, EV_FLOAT &);
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void getscreen(int, int, typename AT::t_x_array, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d, int, typename AT::t_int_1d,
|
||||
typename AT::t_int_1d) const;
|
||||
template <int NEIGHFLAG>
|
||||
KOKKOS_INLINE_FUNCTION void calc_rho1(int, int, typename AT::t_int_1d, typename AT::t_int_1d,
|
||||
typename AT::t_x_array, typename AT::t_int_1d, int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double fcut(const double xi) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dfcut(const double xi, double &dfc) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dCfunc(const double, const double, const double) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void dCfunc2(const double, const double, const double, double &, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double G_gam(const double, const int, int &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double dG_gam(const double, const int, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double zbl(const double, const int, const int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double embedding(const double, const double, const double, double &) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double erose(const double, const double, const double, const double, const double, const double,
|
||||
const int) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void get_shpfcn(const lattice_t latt, const double sthe, const double cthe, double (&s)[3]) const;
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int get_Zij(const lattice_t) const;
|
||||
|
||||
public:
|
||||
DAT::tdual_ffloat_1d k_rho, k_rho0, k_rho1, k_rho2, k_rho3, k_frhop;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_rho, d_rho0, d_rho1, d_rho2, d_rho3, d_frhop;
|
||||
HAT::t_ffloat_1d h_rho, h_rho0, h_rho1, h_rho2, h_rho3, h_frhop;
|
||||
DAT::tdual_ffloat_1d k_gamma, k_dgamma1, k_dgamma2, k_dgamma3, k_arho2b;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_gamma, d_dgamma1, d_dgamma2, d_dgamma3, d_arho2b;
|
||||
HAT::t_ffloat_1d h_gamma, h_dgamma1, h_dgamma2, h_dgamma3, h_arho2b;
|
||||
DAT::tdual_ffloat_2d k_arho1, k_arho2, k_arho3, k_arho3b, k_t_ave, k_tsq_ave;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_arho1, d_arho2, d_arho3, d_arho3b, d_t_ave,
|
||||
d_tsq_ave;
|
||||
HAT::t_ffloat_2d h_arho1, h_arho2, h_arho3, h_arho3b, h_t_ave, h_tsq_ave;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_2d d_phir, d_phirar, d_phirar1, d_phirar2, d_phirar3,
|
||||
d_phirar4, d_phirar5, d_phirar6;
|
||||
DAT::tdual_ffloat_1d k_scrfcn, k_dscrfcn, k_fcpair;
|
||||
typename ArrayTypes<DeviceType>::t_ffloat_1d d_scrfcn, d_dscrfcn, d_fcpair;
|
||||
HAT::t_ffloat_1d h_scrfcn, h_dscrfcn, h_fcpair;
|
||||
|
||||
protected:
|
||||
int need_dup;
|
||||
using KKDeviceType = typename KKDevice<DeviceType>::value;
|
||||
|
||||
template <typename DataType, typename Layout>
|
||||
using DupScatterView =
|
||||
KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterDuplicated>;
|
||||
|
||||
template <typename DataType, typename Layout>
|
||||
using NonDupScatterView =
|
||||
KKScatterView<DataType, Layout, KKDeviceType, KKScatterSum, KKScatterNonDuplicated>;
|
||||
|
||||
DupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout>
|
||||
dup_rho0;
|
||||
NonDupScatterView<typename decltype(d_rho0)::data_type, typename decltype(d_rho0)::array_layout>
|
||||
ndup_rho0;
|
||||
DupScatterView<typename decltype(d_arho2b)::data_type, typename decltype(d_arho2b)::array_layout>
|
||||
dup_arho2b;
|
||||
NonDupScatterView<typename decltype(d_arho2b)::data_type,
|
||||
typename decltype(d_arho2b)::array_layout>
|
||||
ndup_arho2b;
|
||||
DupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout>
|
||||
dup_arho1;
|
||||
NonDupScatterView<typename decltype(d_arho1)::data_type, typename decltype(d_arho1)::array_layout>
|
||||
ndup_arho1;
|
||||
DupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout>
|
||||
dup_arho2;
|
||||
NonDupScatterView<typename decltype(d_arho2)::data_type, typename decltype(d_arho2)::array_layout>
|
||||
ndup_arho2;
|
||||
DupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout>
|
||||
dup_arho3;
|
||||
NonDupScatterView<typename decltype(d_arho3)::data_type, typename decltype(d_arho3)::array_layout>
|
||||
ndup_arho3;
|
||||
DupScatterView<typename decltype(d_arho3b)::data_type, typename decltype(d_arho3b)::array_layout>
|
||||
dup_arho3b;
|
||||
NonDupScatterView<typename decltype(d_arho3b)::data_type,
|
||||
typename decltype(d_arho3b)::array_layout>
|
||||
ndup_arho3b;
|
||||
DupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout>
|
||||
dup_t_ave;
|
||||
NonDupScatterView<typename decltype(d_t_ave)::data_type, typename decltype(d_t_ave)::array_layout>
|
||||
ndup_t_ave;
|
||||
DupScatterView<typename decltype(d_tsq_ave)::data_type,
|
||||
typename decltype(d_tsq_ave)::array_layout>
|
||||
dup_tsq_ave;
|
||||
NonDupScatterView<typename decltype(d_tsq_ave)::data_type,
|
||||
typename decltype(d_tsq_ave)::array_layout>
|
||||
ndup_tsq_ave;
|
||||
DupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> dup_f;
|
||||
NonDupScatterView<typename decltype(f)::data_type, typename decltype(f)::array_layout> ndup_f;
|
||||
DupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout>
|
||||
dup_eatom;
|
||||
NonDupScatterView<typename decltype(d_eatom)::data_type, typename decltype(d_eatom)::array_layout>
|
||||
ndup_eatom;
|
||||
DupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout>
|
||||
dup_vatom;
|
||||
NonDupScatterView<typename decltype(d_vatom)::data_type, typename decltype(d_vatom)::array_layout>
|
||||
ndup_vatom;
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool iszero_kk(const double f)
|
||||
{
|
||||
return fabs(f) < 1e-20;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static bool isone_kk(const double f)
|
||||
{
|
||||
return fabs(f - 1.0) < 1e-20;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double fdiv_zero_kk(const double n, const double d)
|
||||
{
|
||||
if (iszero_kk(d)) return 0.0;
|
||||
return n / d;
|
||||
}
|
||||
|
||||
// Functions we need for compat
|
||||
|
||||
} // namespace LAMMPS_NS
|
||||
#include "meam_impl_kokkos.h"
|
||||
|
||||
#endif
|
||||
60
src/KOKKOS/meam_setup_done_kokkos.h
Normal file
60
src/KOKKOS/meam_setup_done_kokkos.h
Normal file
@ -0,0 +1,60 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "meam_kokkos.h"
|
||||
|
||||
template<class DeviceType>
|
||||
void MEAMKokkos<DeviceType>::meam_setup_done(double* cutmax)
|
||||
{
|
||||
MEAM::meam_setup_done(cutmax);
|
||||
|
||||
MemKK::realloc_kokkos(d_phir, "pair:phir", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar, "pair:phirar", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar1, "pair:phirar1", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar2, "pair:phirar2", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar3, "pair:phirar3", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar4, "pair:phirar4", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar5, "pair:phirar5", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
MemKK::realloc_kokkos(d_phirar6, "pair:phirar6", (neltypes * (neltypes + 1)) / 2, nr);
|
||||
|
||||
auto h_phir = Kokkos::create_mirror_view(d_phir);
|
||||
auto h_phirar = Kokkos::create_mirror_view(d_phirar);
|
||||
auto h_phirar1 = Kokkos::create_mirror_view(d_phirar1);
|
||||
auto h_phirar2 = Kokkos::create_mirror_view(d_phirar2);
|
||||
auto h_phirar3 = Kokkos::create_mirror_view(d_phirar3);
|
||||
auto h_phirar4 = Kokkos::create_mirror_view(d_phirar4);
|
||||
auto h_phirar5 = Kokkos::create_mirror_view(d_phirar5);
|
||||
auto h_phirar6 = Kokkos::create_mirror_view(d_phirar6);
|
||||
|
||||
for (int i = 0; i <(neltypes * (neltypes + 1)) / 2; i++)
|
||||
for(int j = 0; j < nr; j++) {
|
||||
h_phir(i,j) = phir[i][j];
|
||||
h_phirar(i,j) = phirar[i][j];
|
||||
h_phirar1(i,j) = phirar1[i][j];
|
||||
h_phirar2(i,j) = phirar2[i][j];
|
||||
h_phirar3(i,j) = phirar3[i][j];
|
||||
h_phirar4(i,j) = phirar4[i][j];
|
||||
h_phirar5(i,j) = phirar5[i][j];
|
||||
h_phirar6(i,j) = phirar6[i][j];
|
||||
}
|
||||
|
||||
Kokkos::deep_copy(d_phir,h_phir);
|
||||
Kokkos::deep_copy(d_phirar,h_phirar);
|
||||
Kokkos::deep_copy(d_phirar1,h_phirar1);
|
||||
Kokkos::deep_copy(d_phirar2,h_phirar2);
|
||||
Kokkos::deep_copy(d_phirar3,h_phirar3);
|
||||
Kokkos::deep_copy(d_phirar4,h_phirar4);
|
||||
Kokkos::deep_copy(d_phirar5,h_phirar5);
|
||||
Kokkos::deep_copy(d_phirar6,h_phirar6);
|
||||
}
|
||||
@ -20,6 +20,8 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
typedef MemoryKokkos MemKK;
|
||||
|
||||
class MemoryKokkos : public Memory {
|
||||
public:
|
||||
MemoryKokkos(class LAMMPS *lmp) : Memory(lmp) {}
|
||||
@ -278,46 +280,11 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type** &array)
|
||||
deallocate first to reduce memory use
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1)
|
||||
template <typename TYPE, typename... Indices>
|
||||
static void realloc_kokkos(TYPE &data, const char *name, Indices... ns)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5);
|
||||
}
|
||||
|
||||
template <typename TYPE>
|
||||
void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4, int n5, int n6)
|
||||
{
|
||||
data = TYPE();
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)),n1,n2,n3,n4,n5,n6);
|
||||
data = TYPE(Kokkos::NoInit(std::string(name)), ns...);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -325,7 +292,7 @@ void realloc_kokkos(TYPE &data, const char *name, int n1, int n2, int n3, int n4
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template <typename TYPE>
|
||||
double memory_usage(TYPE &data)
|
||||
static double memory_usage(TYPE &data)
|
||||
{
|
||||
return data.span() * sizeof(typename TYPE::value_type);
|
||||
}
|
||||
|
||||
@ -49,6 +49,8 @@ int MinCGKokkos::iterate(int maxiter)
|
||||
fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
|
||||
fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
|
||||
|
||||
atomKK->sync(Device,F_MASK);
|
||||
|
||||
// nlimit = max # of CG iterations before restarting
|
||||
// set to ndoftotal unless too big
|
||||
|
||||
|
||||
@ -79,6 +79,8 @@ void MinKokkos::setup(int flag)
|
||||
}
|
||||
update->setupflag = 1;
|
||||
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
// setup extra global dof due to fixes
|
||||
// cannot be done in init() b/c update init() is before modify init()
|
||||
|
||||
@ -170,7 +172,7 @@ void MinKokkos::setup(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
@ -242,6 +244,8 @@ void MinKokkos::setup_minimal(int flag)
|
||||
// acquire ghosts
|
||||
// build neighbor lists
|
||||
|
||||
lmp->kokkos->auto_sync = 1;
|
||||
|
||||
if (flag) {
|
||||
modify->setup_pre_exchange();
|
||||
if (triclinic) domain->x2lamda(atom->nlocal);
|
||||
@ -277,7 +281,7 @@ void MinKokkos::setup_minimal(int flag)
|
||||
}
|
||||
else if (force->pair) force->pair->compute_dummy(eflag,vflag);
|
||||
|
||||
if (atomKK->molecular) {
|
||||
if (atom->molecular != Atom::ATOMIC) {
|
||||
if (force->bond) {
|
||||
atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
|
||||
force->bond->compute(eflag,vflag);
|
||||
@ -495,6 +499,7 @@ double MinKokkos::energy_force(int resetflag)
|
||||
if (force->newton) {
|
||||
comm->reverse_comm();
|
||||
timer->stamp(Timer::COMM);
|
||||
atomKK->sync(Device,F_MASK);
|
||||
}
|
||||
|
||||
// update per-atom minimization variables stored by pair styles
|
||||
@ -567,7 +572,7 @@ void MinKokkos::force_clear()
|
||||
}
|
||||
});
|
||||
}
|
||||
atomKK->modified(Device,F_MASK);
|
||||
atomKK->modified(Device,F_MASK|TORQUE_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -576,6 +581,7 @@ void MinKokkos::force_clear()
|
||||
|
||||
double MinKokkos::fnorm_sqr()
|
||||
{
|
||||
atomKK->sync(Device,F_MASK);
|
||||
|
||||
double local_norm2_sqr = 0.0;
|
||||
{
|
||||
@ -604,6 +610,7 @@ double MinKokkos::fnorm_sqr()
|
||||
|
||||
double MinKokkos::fnorm_inf()
|
||||
{
|
||||
atomKK->sync(Device,F_MASK);
|
||||
|
||||
double local_norm_inf = 0.0;
|
||||
{
|
||||
@ -632,6 +639,7 @@ double MinKokkos::fnorm_inf()
|
||||
|
||||
double MinKokkos::fnorm_max()
|
||||
{
|
||||
atomKK->sync(Device,F_MASK);
|
||||
|
||||
double local_norm_max = 0.0;
|
||||
{
|
||||
|
||||
@ -111,9 +111,6 @@ void MinLineSearchKokkos::reset_vectors()
|
||||
x0 = fix_minimize_kk->request_vector_kokkos(0);
|
||||
g = fix_minimize_kk->request_vector_kokkos(1);
|
||||
h = fix_minimize_kk->request_vector_kokkos(2);
|
||||
|
||||
auto h_fvec = Kokkos::create_mirror_view(fvec);
|
||||
Kokkos::deep_copy(h_fvec,fvec);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -181,6 +178,8 @@ int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
|
||||
fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
|
||||
fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
|
||||
|
||||
atomKK->sync(Device,X_MASK|F_MASK);
|
||||
|
||||
// fdothall = projection of search dir along downhill gradient
|
||||
// if search direction is not downhill, exit with error
|
||||
|
||||
@ -364,8 +363,8 @@ double MinLineSearchKokkos::alpha_step(double alpha, int resetflag)
|
||||
// reset to starting point
|
||||
|
||||
if (nextra_global) modify->min_step(0.0,hextra);
|
||||
atomKK->k_x.clear_sync_state(); // ignore if host positions since device
|
||||
// positions will be reset below
|
||||
atomKK->k_x.clear_sync_state(); // ignore if host positions modified since
|
||||
// device positions will be reset below
|
||||
{
|
||||
// local variables for lambda capture
|
||||
|
||||
@ -409,6 +408,8 @@ double MinLineSearchKokkos::compute_dir_deriv(double &ff)
|
||||
double dot[2],dotall[2];
|
||||
double fh;
|
||||
|
||||
atomKK->sync(Device,F_MASK);
|
||||
|
||||
// compute new fh, alpha, delfh
|
||||
|
||||
s_double2 sdot;
|
||||
|
||||
@ -26,8 +26,8 @@ using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int NEWTON>
|
||||
NPairHalffullKokkos<DeviceType,NEWTON>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
|
||||
template<class DeviceType, int NEWTON, int TRIM>
|
||||
NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
}
|
||||
@ -41,15 +41,17 @@ NPairHalffullKokkos<DeviceType,NEWTON>::NPairHalffullKokkos(LAMMPS *lmp) : NPair
|
||||
if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int NEWTON>
|
||||
void NPairHalffullKokkos<DeviceType,NEWTON>::build(NeighList *list)
|
||||
template<class DeviceType, int NEWTON, int TRIM>
|
||||
void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
|
||||
{
|
||||
if (NEWTON) {
|
||||
if (NEWTON || TRIM) {
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK);
|
||||
}
|
||||
nlocal = atom->nlocal;
|
||||
|
||||
cutsq_custom = cutoff_custom*cutoff_custom;
|
||||
|
||||
NeighListKokkos<DeviceType>* k_list_full = static_cast<NeighListKokkos<DeviceType>*>(list->listfull);
|
||||
d_ilist_full = k_list_full->d_ilist;
|
||||
d_numneigh_full = k_list_full->d_numneigh;
|
||||
@ -76,14 +78,14 @@ void NPairHalffullKokkos<DeviceType,NEWTON>::build(NeighList *list)
|
||||
k_list->k_ilist.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
template<class DeviceType, int NEWTON>
|
||||
template<class DeviceType, int NEWTON, int TRIM>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NPairHalffullKokkos<DeviceType,NEWTON>::operator()(TagNPairHalffullCompute, const int &ii) const {
|
||||
void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
|
||||
int n = 0;
|
||||
|
||||
const int i = d_ilist_full(ii);
|
||||
F_FLOAT xtmp,ytmp,ztmp;
|
||||
if (NEWTON) {
|
||||
if (NEWTON || TRIM) {
|
||||
xtmp = x(i,0);
|
||||
ytmp = x(i,1);
|
||||
ztmp = x(i,2);
|
||||
@ -108,9 +110,29 @@ void NPairHalffullKokkos<DeviceType,NEWTON>::operator()(TagNPairHalffullCompute,
|
||||
if (x(j,1) == ytmp && x(j,0) < xtmp) continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (TRIM) {
|
||||
const double delx = xtmp - x(j,0);
|
||||
const double dely = ytmp - x(j,1);
|
||||
const double delz = ztmp - x(j,2);
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq > cutsq_custom) continue;
|
||||
}
|
||||
|
||||
neighbors_i(n++) = joriginal;
|
||||
} else if (j > i) {
|
||||
|
||||
if (TRIM) {
|
||||
const double delx = xtmp - x(j,0);
|
||||
const double dely = ytmp - x(j,1);
|
||||
const double delz = ztmp - x(j,2);
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq > cutsq_custom) continue;
|
||||
}
|
||||
|
||||
neighbors_i(n++) = joriginal;
|
||||
} else {
|
||||
if (j > i) neighbors_i(n++) = joriginal;
|
||||
}
|
||||
}
|
||||
|
||||
@ -119,10 +141,14 @@ void NPairHalffullKokkos<DeviceType,NEWTON>::operator()(TagNPairHalffullCompute,
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairHalffullKokkos<LMPDeviceType,0>;
|
||||
template class NPairHalffullKokkos<LMPDeviceType,1>;
|
||||
template class NPairHalffullKokkos<LMPDeviceType,0,0>;
|
||||
template class NPairHalffullKokkos<LMPDeviceType,0,1>;
|
||||
template class NPairHalffullKokkos<LMPDeviceType,1,0>;
|
||||
template class NPairHalffullKokkos<LMPDeviceType,1,1>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class NPairHalffullKokkos<LMPHostType,0>;
|
||||
template class NPairHalffullKokkos<LMPHostType,1>;
|
||||
template class NPairHalffullKokkos<LMPHostType,0,0>;
|
||||
template class NPairHalffullKokkos<LMPHostType,0,1>;
|
||||
template class NPairHalffullKokkos<LMPHostType,1,0>;
|
||||
template class NPairHalffullKokkos<LMPHostType,1,1>;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -13,27 +13,30 @@
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
|
||||
// Trim off
|
||||
|
||||
// Newton
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1> NPairKokkosHalffullNewtonDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
|
||||
NPairStyle(halffull/newton/kk/device,
|
||||
NPairKokkosHalffullNewtonDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1> NPairKokkosHalffullNewtonHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
|
||||
NPairStyle(halffull/newton/kk/host,
|
||||
NPairKokkosHalffullNewtonHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1> NPairKokkosHalffullNewtonDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
|
||||
NPairStyle(halffull/newton/skip/kk/device,
|
||||
NPairKokkosHalffullNewtonDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1> NPairKokkosHalffullNewtonHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
|
||||
NPairStyle(halffull/newton/skip/kk/host,
|
||||
NPairKokkosHalffullNewtonHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
@ -41,25 +44,25 @@ NPairStyle(halffull/newton/skip/kk/host,
|
||||
|
||||
// Newtoff
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0> NPairKokkosHalffullNewtoffDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
|
||||
NPairStyle(halffull/newtoff/kk/device,
|
||||
NPairKokkosHalffullNewtoffDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0> NPairKokkosHalffullNewtoffHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
|
||||
NPairStyle(halffull/newtoff/kk/host,
|
||||
NPairKokkosHalffullNewtoffHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0> NPairKokkosHalffullNewtoffDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
|
||||
NPairStyle(halffull/newtoff/skip/kk/device,
|
||||
NPairKokkosHalffullNewtoffDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0> NPairKokkosHalffullNewtoffHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
|
||||
NPairStyle(halffull/newtoff/skip/kk/host,
|
||||
NPairKokkosHalffullNewtoffHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
@ -69,25 +72,25 @@ NPairStyle(halffull/newtoff/skip/kk/host,
|
||||
|
||||
// Newton
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1> NPairKokkosHalffullNewtonGhostDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
|
||||
NPairStyle(halffull/newton/ghost/kk/device,
|
||||
NPairKokkosHalffullNewtonGhostDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1> NPairKokkosHalffullNewtonHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
|
||||
NPairStyle(halffull/newton/ghost/kk/host,
|
||||
NPairKokkosHalffullNewtonHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1> NPairKokkosHalffullNewtonGhostDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
|
||||
NPairStyle(halffull/newton/skip/ghost/kk/device,
|
||||
NPairKokkosHalffullNewtonGhostDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1> NPairKokkosHalffullNewtonHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
|
||||
NPairStyle(halffull/newton/skip/ghost/kk/host,
|
||||
NPairKokkosHalffullNewtonHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
@ -95,29 +98,138 @@ NPairStyle(halffull/newton/skip/ghost/kk/host,
|
||||
|
||||
// Newtoff
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0> NPairKokkosHalffullNewtoffGhostDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
|
||||
NPairStyle(halffull/newtoff/ghost/kk/device,
|
||||
NPairKokkosHalffullNewtoffGhostDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0> NPairKokkosHalffullNewtoffHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
|
||||
NPairStyle(halffull/newtoff/ghost/kk/host,
|
||||
NPairKokkosHalffullNewtoffHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0> NPairKokkosHalffullNewtoffGhostDevice;
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
|
||||
NPairStyle(halffull/newtoff/skip/ghost/kk/device,
|
||||
NPairKokkosHalffullNewtoffGhostDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0> NPairKokkosHalffullNewtoffHost;
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
|
||||
NPairStyle(halffull/newtoff/skip/ghost/kk/host,
|
||||
NPairKokkosHalffullNewtoffHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
|
||||
|
||||
|
||||
//************ Trim **************
|
||||
|
||||
// Newton
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
|
||||
NPairStyle(halffull/newton/trim/kk/device,
|
||||
NPairKokkosHalffullNewtonTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
|
||||
NPairStyle(halffull/newton/trim/kk/host,
|
||||
NPairKokkosHalffullNewtonTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
|
||||
NPairStyle(halffull/newton/skip/trim/kk/device,
|
||||
NPairKokkosHalffullNewtonTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
|
||||
NPairStyle(halffull/newton/skip/trim/kk/host,
|
||||
NPairKokkosHalffullNewtonTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
// Newtoff
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
|
||||
NPairStyle(halffull/newtoff/trim/kk/device,
|
||||
NPairKokkosHalffullNewtoffTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
|
||||
NPairStyle(halffull/newtoff/trim/kk/host,
|
||||
NPairKokkosHalffullNewtoffTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
|
||||
NPairStyle(halffull/newtoff/skip/trim/kk/device,
|
||||
NPairKokkosHalffullNewtoffTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
|
||||
NPairStyle(halffull/newtoff/skip/trim/kk/host,
|
||||
NPairKokkosHalffullNewtoffTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
//************ Ghost **************
|
||||
|
||||
// Newton
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
|
||||
NPairStyle(halffull/newton/ghost/trim/kk/device,
|
||||
NPairKokkosHalffullNewtonGhostTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
|
||||
NPairStyle(halffull/newton/ghost/trim/kk/host,
|
||||
NPairKokkosHalffullNewtonTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
|
||||
NPairStyle(halffull/newton/skip/ghost/trim/kk/device,
|
||||
NPairKokkosHalffullNewtonGhostTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
|
||||
NPairStyle(halffull/newton/skip/ghost/trim/kk/host,
|
||||
NPairKokkosHalffullNewtonTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
// Newtoff
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
|
||||
NPairStyle(halffull/newtoff/ghost/trim/kk/device,
|
||||
NPairKokkosHalffullNewtoffGhostTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
|
||||
NPairStyle(halffull/newtoff/ghost/trim/kk/host,
|
||||
NPairKokkosHalffullNewtoffTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
|
||||
NPairStyle(halffull/newtoff/skip/ghost/trim/kk/device,
|
||||
NPairKokkosHalffullNewtoffGhostTrimDevice,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
|
||||
NPairStyle(halffull/newtoff/skip/ghost/trim/kk/host,
|
||||
NPairKokkosHalffullNewtoffTrimHost,
|
||||
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
|
||||
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
@ -132,7 +244,7 @@ namespace LAMMPS_NS {
|
||||
|
||||
struct TagNPairHalffullCompute{};
|
||||
|
||||
template<class DeviceType, int NEWTON>
|
||||
template<class DeviceType, int NEWTON, int TRIM>
|
||||
class NPairHalffullKokkos : public NPair {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
@ -146,6 +258,7 @@ class NPairHalffullKokkos : public NPair {
|
||||
|
||||
private:
|
||||
int nlocal;
|
||||
double cutsq_custom;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
|
||||
|
||||
@ -153,6 +153,9 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
int nall = nlocal;
|
||||
if (GHOST)
|
||||
nall += atom->nghost;
|
||||
|
||||
if (nall == 0) return;
|
||||
|
||||
list->grow(nall);
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
|
||||
196
src/KOKKOS/npair_trim_kokkos.cpp
Normal file
196
src/KOKKOS/npair_trim_kokkos.cpp
Normal file
@ -0,0 +1,196 @@
|
||||
// clang-format off
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Trimright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_trim_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NPairTrimKokkos<DeviceType>::NPairTrimKokkos(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create list which is simply a copy of parent list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairTrimKokkos<DeviceType>::build(NeighList *list)
|
||||
{
|
||||
NeighList *listcopy = list->listcopy;
|
||||
|
||||
cutsq_custom = cutoff_custom*cutoff_custom;
|
||||
|
||||
if (list->kokkos) {
|
||||
if (!listcopy->kokkos)
|
||||
error->all(FLERR,"Cannot trim non-Kokkos neighbor list to Kokkos neighbor list");
|
||||
trim_to_kokkos(list);
|
||||
} else {
|
||||
if (!listcopy->kokkos)
|
||||
error->all(FLERR,"Missing Kokkos neighbor list for trim");
|
||||
trim_to_cpu(list);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
|
||||
{
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
atomKK->sync(execution_space,X_MASK);
|
||||
|
||||
cutsq_custom = cutoff_custom*cutoff_custom;
|
||||
|
||||
NeighListKokkos<DeviceType>* k_list_copy = static_cast<NeighListKokkos<DeviceType>*>(list->listcopy);
|
||||
d_ilist_copy = k_list_copy->d_ilist;
|
||||
d_numneigh_copy = k_list_copy->d_numneigh;
|
||||
d_neighbors_copy = k_list_copy->d_neighbors;
|
||||
int inum_copy = list->listcopy->inum;
|
||||
if (list->ghost) inum_copy += list->listcopy->gnum;
|
||||
|
||||
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
|
||||
k_list->maxneighs = k_list_copy->maxneighs; // simple, but could be made more memory efficient
|
||||
k_list->grow(atom->nmax);
|
||||
d_ilist = k_list->d_ilist;
|
||||
d_numneigh = k_list->d_numneigh;
|
||||
d_neighbors = k_list->d_neighbors;
|
||||
|
||||
// loop over parent list and trim
|
||||
|
||||
copymode = 1;
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_copy),*this);
|
||||
copymode = 0;
|
||||
|
||||
list->inum = k_list_copy->inum;
|
||||
list->gnum = k_list_copy->gnum;
|
||||
|
||||
k_list->k_ilist.template modify<DeviceType>();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NPairTrimKokkos<DeviceType>::operator()(TagNPairTrim, const int &ii) const {
|
||||
int n = 0;
|
||||
|
||||
const int i = d_ilist_copy(ii);
|
||||
const double xtmp = x(i,0);
|
||||
const double ytmp = x(i,1);
|
||||
const double ztmp = x(i,2);
|
||||
|
||||
// loop over copy neighbor list
|
||||
|
||||
const int jnum = d_numneigh_copy(i);
|
||||
const AtomNeighbors neighbors_i = AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
|
||||
&d_neighbors(i,1)-&d_neighbors(i,0));
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = d_neighbors_copy(i,jj);
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
|
||||
const double delx = xtmp - x(j,0);
|
||||
const double dely = ytmp - x(j,1);
|
||||
const double delz = ztmp - x(j,2);
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq > cutsq_custom) continue;
|
||||
|
||||
neighbors_i(n++) = joriginal;
|
||||
}
|
||||
|
||||
d_numneigh(i) = n;
|
||||
d_ilist(ii) = i;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
|
||||
{
|
||||
NeighList *listcopy = list->listcopy;
|
||||
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) listcopy;
|
||||
|
||||
listcopy_kk->k_ilist.template sync<LMPHostType>();
|
||||
|
||||
double** x = atom->x;
|
||||
|
||||
int inum = listcopy->inum;
|
||||
int gnum = listcopy->gnum;
|
||||
int inum_all = inum;
|
||||
if (list->ghost) inum_all += gnum;
|
||||
auto h_ilist = listcopy_kk->k_ilist.h_view;
|
||||
auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
|
||||
auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
|
||||
|
||||
list->inum = inum;
|
||||
list->gnum = gnum;
|
||||
auto ilist = list->ilist;
|
||||
auto numneigh = list->numneigh;
|
||||
|
||||
// Kokkos neighbor data is stored differently than regular CPU,
|
||||
// must loop over lists
|
||||
|
||||
int *neighptr;
|
||||
int **firstneigh = list->firstneigh;
|
||||
MyPage<int> *ipage = list->ipage;
|
||||
ipage->reset();
|
||||
|
||||
for (int ii = 0; ii < inum_all; ii++) {
|
||||
int n = 0;
|
||||
neighptr = ipage->vget();
|
||||
|
||||
const int i = h_ilist[ii];
|
||||
ilist[ii] = i;
|
||||
const double xtmp = x[i][0];
|
||||
const double ytmp = x[i][1];
|
||||
const double ztmp = x[i][2];
|
||||
|
||||
// loop over Kokkos neighbor list
|
||||
|
||||
const int jnum = h_numneigh[i];
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int joriginal = h_neighbors(i,jj);
|
||||
|
||||
const int j = joriginal & NEIGHMASK;
|
||||
|
||||
const double delx = xtmp - x[j][0];
|
||||
const double dely = ytmp - x[j][1];
|
||||
const double delz = ztmp - x[j][2];
|
||||
const double rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq > cutsq_custom) continue;
|
||||
|
||||
neighptr[n++] = joriginal;
|
||||
}
|
||||
|
||||
firstneigh[i] = neighptr;
|
||||
numneigh[i] = n;
|
||||
ipage->vgot(n);
|
||||
if (ipage->status())
|
||||
error->one(FLERR,"Neighbor list overflow, boost neigh_modify one");
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairTrimKokkos<LMPDeviceType>;
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
template class NPairTrimKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
70
src/KOKKOS/npair_trim_kokkos.h
Normal file
70
src/KOKKOS/npair_trim_kokkos.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
https://www.lammps.org/, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Trimright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
// clang-format off
|
||||
NPairStyle(trim/kk/device,
|
||||
NPairTrimKokkos<LMPDeviceType>,
|
||||
NP_COPY | NP_TRIM | NP_KOKKOS_DEVICE);
|
||||
|
||||
NPairStyle(trim/kk/host,
|
||||
NPairTrimKokkos<LMPHostType>,
|
||||
NP_COPY | NP_TRIM | NP_KOKKOS_HOST);
|
||||
// clang-format on
|
||||
#else
|
||||
|
||||
// clang-format off
|
||||
#ifndef LMP_NPAIR_TRIM_KOKKOS_H
|
||||
#define LMP_NPAIR_TRIM_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagNPairTrim{};
|
||||
|
||||
template<class DeviceType>
|
||||
class NPairTrimKokkos : public NPair {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
NPairTrimKokkos(class LAMMPS *);
|
||||
void build(class NeighList *) override;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagNPairTrim, const int&) const;
|
||||
|
||||
private:
|
||||
double cutsq_custom;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
|
||||
typename AT::t_neighbors_2d_const d_neighbors_copy;
|
||||
typename AT::t_int_1d_const d_ilist_copy;
|
||||
typename AT::t_int_1d_const d_numneigh_copy;
|
||||
|
||||
typename AT::t_neighbors_2d d_neighbors;
|
||||
typename AT::t_int_1d d_ilist;
|
||||
typename AT::t_int_1d d_numneigh;
|
||||
|
||||
void trim_to_kokkos(class NeighList *);
|
||||
void trim_to_cpu(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user