Working hippo multipole real-space term, added helper functions in a separate file
This commit is contained in:
@ -143,7 +143,7 @@ class BaseAmoeba {
|
||||
double *charge, double *boxlo, double *prd);
|
||||
|
||||
/// Compute multipole real-space with device neighboring
|
||||
int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole, double *sublo, double *subhi,
|
||||
tagint *tag, int **nspecial, tagint **special,
|
||||
@ -155,7 +155,7 @@ class BaseAmoeba {
|
||||
double *boxlo, double *prd, void **tep_ptr);
|
||||
|
||||
/// Compute the real space part of the permanent field (udirect2b) with device neighboring
|
||||
int** compute_udirect2b(const int ago, const int inum_full, const int nall,
|
||||
virtual int** compute_udirect2b(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole,
|
||||
double **host_uind, double **host_uinp,
|
||||
@ -169,7 +169,7 @@ class BaseAmoeba {
|
||||
double *boxlo, double *prd, void **fieldp_ptr);
|
||||
|
||||
/// Compute the real space part of the induced field (umutual2b) with device neighboring
|
||||
int** compute_umutual2b(const int ago, const int inum_full, const int nall,
|
||||
virtual int** compute_umutual2b(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole,
|
||||
double **host_uind, double **host_uinp,
|
||||
@ -183,7 +183,7 @@ class BaseAmoeba {
|
||||
double *boxlo, double *prd, void **fieldp_ptr);
|
||||
|
||||
/// Compute polar real-space with device neighboring
|
||||
int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
||||
virtual int** compute_polar_real(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole, double **host_uind,
|
||||
double **host_uinp, double *sublo, double *subhi,
|
||||
|
||||
@ -56,6 +56,7 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const int maxspecial15,
|
||||
const double cell_size, const double gpu_split, FILE *_screen,
|
||||
@ -69,7 +70,9 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// specific to HIPPO
|
||||
k_dispersion.set_function(*(this->pair_program),"k_hippo_dispersion");
|
||||
_pval.alloc(this->_max_tep_size,*(this->ucl_device),UCL_READ_ONLY,UCL_READ_ONLY);
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
@ -98,8 +101,8 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
||||
for (int i = 0; i < max_amclass; i++) {
|
||||
host_write2[i].x = host_csix[i];
|
||||
host_write2[i].y = host_adisp[i];
|
||||
host_write2[i].z = (numtyp)0;
|
||||
host_write2[i].w = (numtyp)0;
|
||||
host_write2[i].z = host_pcore[i];
|
||||
host_write2[i].w = host_palpha[i];
|
||||
}
|
||||
|
||||
coeff_amclass.alloc(max_amclass,*(this->ucl_device), UCL_READ_ONLY);
|
||||
@ -262,6 +265,93 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
|
||||
return GX;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Reneighbor on GPU if necessary, and then compute multipole real-space
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
||||
const int nall, double **host_x,
|
||||
int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole,
|
||||
double *sublo, double *subhi, tagint *tag,
|
||||
int **nspecial, tagint **special,
|
||||
int *nspecial15, tagint **special15,
|
||||
const bool eflag_in, const bool vflag_in,
|
||||
const bool eatom, const bool vatom,
|
||||
int &host_start, int **ilist, int **jnum,
|
||||
const double cpu_time, bool &success,
|
||||
const double aewald, const double felec,
|
||||
const double off2_mpole, double *host_q,
|
||||
double *boxlo, double *prd, void **tep_ptr) {
|
||||
this->acc_timers();
|
||||
int eflag, vflag;
|
||||
if (eatom) eflag=2;
|
||||
else if (eflag_in) eflag=1;
|
||||
else eflag=0;
|
||||
if (vatom) vflag=2;
|
||||
else if (vflag_in) vflag=1;
|
||||
else vflag=0;
|
||||
|
||||
#ifdef LAL_NO_BLOCK_REDUCE
|
||||
if (eflag) eflag=2;
|
||||
if (vflag) vflag=2;
|
||||
#endif
|
||||
|
||||
this->set_kernel(eflag,vflag);
|
||||
|
||||
// reallocate per-atom arrays, transfer data from the host
|
||||
// and build the neighbor lists if needed
|
||||
// NOTE:
|
||||
// For now we invoke precompute() again here,
|
||||
// to be able to turn on/off the udirect2b kernel (which comes before this)
|
||||
// Once all the kernels are ready, precompute() is needed only once
|
||||
// in the first kernel in a time step.
|
||||
// We only need to cast uind and uinp from host to device here
|
||||
// if the neighbor lists are rebuilt and other per-atom arrays
|
||||
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
||||
|
||||
int** firstneigh = nullptr;
|
||||
firstneigh = this->precompute(ago, inum_full, nall, host_x, host_type,
|
||||
host_amtype, host_amgroup, host_rpole,
|
||||
nullptr, nullptr, sublo, subhi, tag,
|
||||
nspecial, special, nspecial15, special15,
|
||||
eflag_in, vflag_in, eatom, vatom,
|
||||
host_start, ilist, jnum, cpu_time,
|
||||
success, host_q, boxlo, prd);
|
||||
|
||||
// ------------------- Resize _tep array ------------------------
|
||||
|
||||
if (inum_full>this->_max_tep_size) {
|
||||
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||
this->_tep.resize(this->_max_tep_size*4);
|
||||
}
|
||||
*tep_ptr=this->_tep.host.begin();
|
||||
|
||||
this->_off2_mpole = off2_mpole;
|
||||
this->_felec = felec;
|
||||
this->_aewald = aewald;
|
||||
const int red_blocks=multipole_real(eflag,vflag);
|
||||
|
||||
// leave the answers (forces, energies and virial) on the device,
|
||||
// only copy them back in the last kernel (polar_real)
|
||||
//ans->copy_answers(eflag_in,vflag_in,eatom,vatom,red_blocks);
|
||||
//device->add_ans_object(ans);
|
||||
|
||||
this->hd_balancer.stop_timer();
|
||||
|
||||
// copy tep from device to host
|
||||
|
||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
||||
/*
|
||||
printf("GPU lib: tep size = %d: max tep size = %d\n", this->_tep.cols(), _max_tep_size);
|
||||
for (int i = 0; i < 10; i++) {
|
||||
numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
|
||||
printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
|
||||
}
|
||||
*/
|
||||
return firstneigh; // nbor->host_jlist.begin()-host_start;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Calculate the multipole real-space term, returning tep
|
||||
// ---------------------------------------------------------------------------
|
||||
@ -290,13 +380,14 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
||||
&nbor_pitch, &this->_threads_per_atom);
|
||||
|
||||
this->k_multipole.set_size(GX,BX);
|
||||
this->k_multipole.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||
&this->_threads_per_atom, &this->_aewald, &this->_felec,
|
||||
&this->_off2_mpole, &_polar_dscale, &_polar_uscale);
|
||||
this->k_multipole.run(&this->atom->x, &this->atom->extra, &_pval,
|
||||
&coeff_amtype, &coeff_amclass, &sp_polar,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->dev_short_nbor,
|
||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||
&this->_threads_per_atom, &this->_aewald, &this->_felec,
|
||||
&this->_off2_mpole, &_polar_dscale, &_polar_uscale);
|
||||
this->time_pair.stop();
|
||||
|
||||
return GX;
|
||||
|
||||
@ -15,7 +15,8 @@
|
||||
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include <stdio.h>
|
||||
#include "lal_aux_fun1.h"
|
||||
#include "lal_hippo_extra.h"
|
||||
//#include "lal_aux_fun1.h"
|
||||
#ifdef LAMMPS_SMALLBIG
|
||||
#define tagint int
|
||||
#endif
|
||||
@ -404,6 +405,318 @@ _texture( q_tex,int2);
|
||||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
repulsion = Pauli repulsion interactions
|
||||
adapted from Tinker erepel1b() routine
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
__kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict extra,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const __global numtyp4 *restrict sp_nonpolar,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const __global int *dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global acctyp4 *restrict tep,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom, const numtyp aewald,
|
||||
const numtyp off2, const numtyp cut2,
|
||||
const numtyp c0, const numtyp c1, const numtyp c2,
|
||||
const numtyp c3, const numtyp c4, const numtyp c5)
|
||||
{
|
||||
int tid, ii, offset, i;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
int n_stride;
|
||||
local_allocate_store_charge();
|
||||
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp energy, e_coul, virial[6];
|
||||
if (EVFLAG) {
|
||||
energy=(acctyp)0;
|
||||
e_coul=(acctyp)0;
|
||||
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
||||
}
|
||||
|
||||
acctyp4 tq;
|
||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||
|
||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||
|
||||
if (ii<inum) {
|
||||
int numj, nbor, nbor_end;
|
||||
const __global int* nbor_mem=dev_packed;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
//int itype=ix.w;
|
||||
|
||||
// recalculate numj and nbor_end for use of the short nbor list
|
||||
if (dev_packed==dev_nbor) {
|
||||
numj = dev_short_nbor[nbor];
|
||||
nbor += n_stride;
|
||||
nbor_end = nbor+fast_mul(numj,n_stride);
|
||||
nbor_mem = dev_short_nbor;
|
||||
}
|
||||
|
||||
const numtyp4 pol1i = polar1[i];
|
||||
numtyp ci = pol1i.x; // rpole[i][0];
|
||||
numtyp dix = pol1i.y; // rpole[i][1];
|
||||
numtyp diy = pol1i.z; // rpole[i][2];
|
||||
numtyp diz = pol1i.w; // rpole[i][3];
|
||||
const numtyp4 pol2i = polar2[i];
|
||||
numtyp qixx = pol2i.x; // rpole[i][4];
|
||||
numtyp qixy = pol2i.y; // rpole[i][5];
|
||||
numtyp qixz = pol2i.z; // rpole[i][6];
|
||||
numtyp qiyy = pol2i.w; // rpole[i][8];
|
||||
const numtyp4 pol3i = polar3[i];
|
||||
numtyp qiyz = pol3i.x; // rpole[i][9];
|
||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||
int itype = pol3i.z; // amtype[i];
|
||||
numtyp sizi = coeff[itype].x; // sizpr[itype];
|
||||
numtyp dmpi = coeff[itype].y; // dmppr[itype];
|
||||
numtyp vali = coeff[itype].z; // elepr[itype];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
int jextra=nbor_mem[nbor];
|
||||
int j = jextra & NEIGHMASK15;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
//int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp xr = ix.x - jx.x;
|
||||
numtyp yr = ix.y - jx.y;
|
||||
numtyp zr = ix.z - jx.z;
|
||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||
|
||||
if (r2>off2) continue;
|
||||
|
||||
const numtyp4 pol1j = polar1[j];
|
||||
numtyp ck = pol1j.x; // rpole[j][0];
|
||||
numtyp dkx = pol1j.y; // rpole[j][1];
|
||||
numtyp dky = pol1j.z; // rpole[j][2];
|
||||
numtyp dkz = pol1j.w; // rpole[j][3];
|
||||
const numtyp4 pol2j = polar2[j];
|
||||
numtyp qkxx = pol2j.x; // rpole[j][4];
|
||||
numtyp qkxy = pol2j.y; // rpole[j][5];
|
||||
numtyp qkxz = pol2j.z; // rpole[j][6];
|
||||
numtyp qkyy = pol2j.w; // rpole[j][8];
|
||||
const numtyp4 pol3j = polar3[j];
|
||||
numtyp qkyz = pol3j.x; // rpole[j][9];
|
||||
numtyp qkzz = pol3j.y; // rpole[j][12];
|
||||
int jtype = pol3j.z; // amtype[j];
|
||||
|
||||
numtyp sizk = coeff[jtype].x; // sizpr[jtype];
|
||||
numtyp dmpk = coeff[jtype].y; // dmppr[jtype];
|
||||
numtyp valk = coeff[jtype].z; // elepr[jtype];
|
||||
|
||||
const numtyp4 sp_nonpol = sp_nonpolar[sbmask15(jextra)];
|
||||
numtyp factor_repel = sp_nonpol.y; // factor_repel = special_repel[sbmask15(j)];
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||
numtyp qix = qixx*xr + qixy*yr + qixz*zr;
|
||||
numtyp qiy = qixy*xr + qiyy*yr + qiyz*zr;
|
||||
numtyp qiz = qixz*xr + qiyz*yr + qizz*zr;
|
||||
numtyp qir = qix*xr + qiy*yr + qiz*zr;
|
||||
numtyp dkr = dkx*xr + dky*yr + dkz*zr;
|
||||
numtyp qkx = qkxx*xr + qkxy*yr + qkxz*zr;
|
||||
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
|
||||
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
|
||||
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
|
||||
|
||||
numtyp dik = dix*dkx + diy*dky + diz*dkz;
|
||||
numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
|
||||
numtyp diqk = dix*qkx + diy*qky + diz*qkz;
|
||||
numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
|
||||
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
|
||||
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
|
||||
|
||||
// additional intermediates involving moments and distance
|
||||
|
||||
numtyp dirx = diy*zr - diz*yr;
|
||||
numtyp diry = diz*xr - dix*zr;
|
||||
numtyp dirz = dix*yr - diy*xr;
|
||||
numtyp dkrx = dky*zr - dkz*yr;
|
||||
numtyp dkry = dkz*xr - dkx*zr;
|
||||
numtyp dkrz = dkx*yr - dky*xr;
|
||||
numtyp dikx = diy*dkz - diz*dky;
|
||||
numtyp diky = diz*dkx - dix*dkz;
|
||||
numtyp dikz = dix*dky - diy*dkx;
|
||||
numtyp qirx = qiz*yr - qiy*zr;
|
||||
numtyp qiry = qix*zr - qiz*xr;
|
||||
numtyp qirz = qiy*xr - qix*yr;
|
||||
numtyp qkrx = qkz*yr - qky*zr;
|
||||
numtyp qkry = qkx*zr - qkz*xr;
|
||||
numtyp qkrz = qky*xr - qkx*yr;
|
||||
numtyp qikx = qky*qiz - qkz*qiy;
|
||||
numtyp qiky = qkz*qix - qkx*qiz;
|
||||
numtyp qikz = qkx*qiy - qky*qix;
|
||||
numtyp qixk = qixx*qkx + qixy*qky + qixz*qkz;
|
||||
numtyp qiyk = qixy*qkx + qiyy*qky + qiyz*qkz;
|
||||
numtyp qizk = qixz*qkx + qiyz*qky + qizz*qkz;
|
||||
numtyp qkxi = qkxx*qix + qkxy*qiy + qkxz*qiz;
|
||||
numtyp qkyi = qkxy*qix + qkyy*qiy + qkyz*qiz;
|
||||
numtyp qkzi = qkxz*qix + qkyz*qiy + qkzz*qiz;
|
||||
numtyp qikrx = qizk*yr - qiyk*zr;
|
||||
numtyp qikry = qixk*zr - qizk*xr;
|
||||
numtyp qikrz = qiyk*xr - qixk*yr;
|
||||
numtyp qkirx = qkzi*yr - qkyi*zr;
|
||||
numtyp qkiry = qkxi*zr - qkzi*xr;
|
||||
numtyp qkirz = qkyi*xr - qkxi*yr;
|
||||
numtyp diqkx = dix*qkxx + diy*qkxy + diz*qkxz;
|
||||
numtyp diqky = dix*qkxy + diy*qkyy + diz*qkyz;
|
||||
numtyp diqkz = dix*qkxz + diy*qkyz + diz*qkzz;
|
||||
numtyp dkqix = dkx*qixx + dky*qixy + dkz*qixz;
|
||||
numtyp dkqiy = dkx*qixy + dky*qiyy + dkz*qiyz;
|
||||
numtyp dkqiz = dkx*qixz + dky*qiyz + dkz*qizz;
|
||||
numtyp diqkrx = diqkz*yr - diqky*zr;
|
||||
numtyp diqkry = diqkx*zr - diqkz*xr;
|
||||
numtyp diqkrz = diqky*xr - diqkx*yr;
|
||||
numtyp dkqirx = dkqiz*yr - dkqiy*zr;
|
||||
numtyp dkqiry = dkqix*zr - dkqiz*xr;
|
||||
numtyp dkqirz = dkqiy*xr - dkqix*yr;
|
||||
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
|
||||
(numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
|
||||
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
|
||||
(numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
|
||||
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
|
||||
(numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
|
||||
|
||||
// get reciprocal distance terms for this interaction
|
||||
|
||||
numtyp r = ucl_sqrt(r2);
|
||||
numtyp rinv = ucl_recip(r);
|
||||
numtyp r2inv = rinv*rinv;
|
||||
numtyp rr1 = rinv;
|
||||
numtyp rr3 = rr1 * r2inv;
|
||||
numtyp rr5 = (numtyp)3.0 * rr3 * r2inv;
|
||||
numtyp rr7 = (numtyp)5.0 * rr5 * r2inv;
|
||||
numtyp rr9 = (numtyp)7.0 * rr7 * r2inv;
|
||||
numtyp rr11 = (numtyp)9.0 * rr9 * r2inv;
|
||||
|
||||
// get damping coefficients for the Pauli repulsion energy
|
||||
numtyp dmpik[11];
|
||||
damprep(r,r2,rr1,rr3,rr5,rr7,rr9,rr11,11,dmpi,dmpk,dmpik);
|
||||
|
||||
// calculate intermediate terms needed for the energy
|
||||
|
||||
numtyp term1 = vali*valk;
|
||||
numtyp term2 = valk*dir - vali*dkr + dik;
|
||||
numtyp term3 = vali*qkr + valk*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
|
||||
numtyp term4 = dir*qkr - dkr*qir - 4.0*qik;
|
||||
numtyp term5 = qir*qkr;
|
||||
numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
|
||||
term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
|
||||
|
||||
// compute the Pauli repulsion energy for this interaction
|
||||
|
||||
numtyp sizik = sizi * sizk * factor_repel;
|
||||
numtyp e = sizik * eterm * rr1;
|
||||
|
||||
// calculate intermediate terms for force and torque
|
||||
|
||||
numtyp de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] +
|
||||
term4*dmpik[8] + term5*dmpik[10];
|
||||
term1 = -valk*dmpik[2] + dkr*dmpik[4] - qkr*dmpik[6];
|
||||
term2 = vali*dmpik[2] + dir*dmpik[4] + qir*dmpik[6];
|
||||
term3 = (numtyp)2.0 * dmpik[4];
|
||||
term4 = (numtyp)2.0 * (-valk*dmpik[4] + dkr*dmpik[6] - qkr*dmpik[8]);
|
||||
term5 = (numtyp)2.0 * (-vali*dmpik[4] - dir*dmpik[6] - qir*dmpik[8]);
|
||||
numtyp term6 = (numtyp)4.0 * dmpik[6];
|
||||
|
||||
// compute the force components for this interaction
|
||||
|
||||
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
|
||||
term4*qix + term5*qkx + term6*(qixk+qkxi);
|
||||
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
|
||||
term4*qiy + term5*qky + term6*(qiyk+qkyi);
|
||||
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
|
||||
term4*qiz + term5*qkz + term6*(qizk+qkzi);
|
||||
frcx = frcx*rr1 + eterm*rr3*xr;
|
||||
frcy = frcy*rr1 + eterm*rr3*yr;
|
||||
frcz = frcz*rr1 + eterm*rr3*zr;
|
||||
|
||||
// compute the torque components for this interaction
|
||||
|
||||
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
|
||||
term4*qirx - term6*(qikrx+qikx);
|
||||
numtyp ttmiy = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) -
|
||||
term4*qiry - term6*(qikry+qiky);
|
||||
numtyp ttmiz = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) -
|
||||
term4*qirz - term6*(qikrz+qikz);
|
||||
ttmix = sizik * ttmix * rr1;
|
||||
ttmiy = sizik * ttmiy * rr1;
|
||||
ttmiz = sizik * ttmiz * rr1;
|
||||
|
||||
// use energy switching if near the cutoff distance
|
||||
|
||||
if (r2 > cut2) {
|
||||
numtyp r3 = r2 * r;
|
||||
numtyp r4 = r2 * r2;
|
||||
numtyp r5 = r2 * r3;
|
||||
numtyp taper = c5*r5 + c4*r4 + c3*r3 + c2*r2 + c1*r + c0;
|
||||
numtyp dtaper = (numtyp)5.0*c5*r4 + (numtyp).0*c4*r3 +
|
||||
(numtyp)3.0*c3*r2 + (numtyp)2.0*c2*r + c1;
|
||||
dtaper *= e * rr1;
|
||||
e *= taper;
|
||||
frcx = frcx*taper - dtaper*xr;
|
||||
frcy = frcy*taper - dtaper*yr;
|
||||
frcz = frcz*taper - dtaper*zr;
|
||||
ttmix *= taper;
|
||||
ttmiy *= taper;
|
||||
ttmiz *= taper;
|
||||
}
|
||||
|
||||
energy += e;
|
||||
|
||||
// increment force-based gradient and torque on atom I
|
||||
|
||||
f.x += frcx;
|
||||
f.y += frcy;
|
||||
f.z += frcz;
|
||||
tq.x += ttmix;
|
||||
tq.y += ttmiy;
|
||||
tq.z += ttmiz;
|
||||
|
||||
// increment the internal virial tensor components
|
||||
if (EVFLAG && vflag) {
|
||||
numtyp vxx = -xr * frcx;
|
||||
numtyp vxy = (numtyp)-0.5 * (yr*frcx+xr*frcy);
|
||||
numtyp vxz = (numtyp)-0.5 * (zr*frcx+xr*frcz);
|
||||
numtyp vyy = -yr * frcy;
|
||||
numtyp vyz = (numtyp)-0.5 * (zr*frcy+yr*frcz);
|
||||
numtyp vzz = -zr * frcz;
|
||||
|
||||
virial[0] += vxx;
|
||||
virial[1] += vyy;
|
||||
virial[2] += vzz;
|
||||
virial[3] += vxy;
|
||||
virial[4] += vxz;
|
||||
virial[5] += vyz;
|
||||
}
|
||||
} // nbor
|
||||
|
||||
} // ii<inum
|
||||
|
||||
// accumulate tq
|
||||
store_answers_hippo_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
|
||||
// accumate force, energy and virial
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||
offset,eflag,vflag,ans,engv);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
dispersion = real-space portion of Ewald dispersion
|
||||
adapted from Tinker edreal1d() routine
|
||||
@ -594,20 +907,22 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
__kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict extra,
|
||||
const __global numtyp4 *restrict coeff,
|
||||
const __global numtyp4 *restrict sp_polar,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const __global int *dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global acctyp4 *restrict tep,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom, const numtyp aewald,
|
||||
const numtyp felec, const numtyp off2,
|
||||
const numtyp polar_dscale, const numtyp polar_uscale)
|
||||
const __global numtyp *restrict extra,
|
||||
const __global numtyp *restrict pval,
|
||||
const __global numtyp4 *restrict coeff_amtype,
|
||||
const __global numtyp4 *restrict coeff_amclass,
|
||||
const __global numtyp4 *restrict sp_polar,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
const __global int *dev_short_nbor,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
__global acctyp4 *restrict tep,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nall, const int nbor_pitch,
|
||||
const int t_per_atom, const numtyp aewald,
|
||||
const numtyp felec, const numtyp off2,
|
||||
const numtyp polar_dscale, const numtyp polar_uscale)
|
||||
{
|
||||
int tid, ii, offset, i;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
@ -633,6 +948,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
|
||||
if (ii<inum) {
|
||||
int m;
|
||||
int itype,iclass;
|
||||
numtyp bfac;
|
||||
numtyp term1,term2,term3;
|
||||
numtyp term4,term5,term6;
|
||||
@ -668,6 +984,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
const numtyp4 pol3i = polar3[i];
|
||||
numtyp qiyz = pol3i.x; // rpole[i][9];
|
||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||
itype = pol3i.z; // amtype[i];
|
||||
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||
|
||||
numtyp corei = coeff_amclass[itype].z; // pcore[iclass];
|
||||
numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
|
||||
numtyp vali = pval[i];
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
|
||||
@ -701,10 +1023,15 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||
numtyp qkzz = pol3j.y; // rpole[j][12];
|
||||
int jtype = pol3j.z; // amtype[j];
|
||||
int jgroup = pol3j.w; // amgroup[j];
|
||||
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
|
||||
|
||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||
numtyp factor_mpole = sp_pol.w; // sp_mpole[sbmask15(jextra)];
|
||||
|
||||
numtyp corek = coeff_amclass[jtype].z; // pcore[jclass];
|
||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
||||
numtyp valk = pval[j];
|
||||
|
||||
// intermediates involving moments and separation distance
|
||||
|
||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||
|
||||
@ -48,6 +48,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const int maxspecial15, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
@ -65,6 +66,18 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
const double aewald, const double off2_disp, double *charge,
|
||||
double *boxlo, double *prd);
|
||||
|
||||
/// Compute multipole real-space with device neighboring
|
||||
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
int *host_amgroup, double **host_rpole, double *sublo, double *subhi,
|
||||
tagint *tag, int **nspecial, tagint **special,
|
||||
int *nspecial15, tagint **special15,
|
||||
const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **numj, const double cpu_time, bool &success,
|
||||
const double aewald, const double felec, const double off2_mpole, double *charge,
|
||||
double *boxlo, double *prd, void **tep_ptr);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
@ -105,6 +118,8 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
||||
|
||||
UCL_Kernel k_dispersion;
|
||||
|
||||
UCL_Vector<acctyp,acctyp> _pval;
|
||||
|
||||
protected:
|
||||
bool _allocated;
|
||||
int dispersion_real(const int eflag, const int vflag);
|
||||
|
||||
@ -38,6 +38,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const int maxspecial15,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
@ -73,7 +74,8 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||
host_csix, host_adisp, host_pcore, host_palpha,
|
||||
nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
screen, polar_dscale, polar_uscale);
|
||||
|
||||
@ -97,7 +99,8 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
host_special_repel, host_special_disp,
|
||||
host_special_mpole, host_special_polar_wscale,
|
||||
host_special_polar_piscale, host_special_polar_pscale,
|
||||
host_csix, host_adisp, nlocal, nall, max_nbors,
|
||||
host_csix, host_adisp, host_pcore, host_palpha,
|
||||
nlocal, nall, max_nbors,
|
||||
maxspecial, maxspecial15, cell_size, gpu_split,
|
||||
screen, polar_dscale, polar_uscale);
|
||||
|
||||
|
||||
326
lib/gpu/lal_hippo_extra.h
Normal file
326
lib/gpu/lal_hippo_extra.h
Normal file
@ -0,0 +1,326 @@
|
||||
/// **************************************************************************
|
||||
// hippo_extra.h
|
||||
// -------------------
|
||||
// Trung Dac Nguyen
|
||||
//
|
||||
// Device code for hippo math routines
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/*
|
||||
|
||||
#ifndef LAL_HIPPO_EXTRA_H
|
||||
#define LAL_HIPPO_EXTRA_H
|
||||
|
||||
#if defined(NV_KERNEL) || defined(USE_HIP)
|
||||
#include "lal_aux_fun1.h"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#define MY_PI2 (numtyp)1.57079632679489661923
|
||||
#define MY_PI4 (numtyp)0.78539816339744830962
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
damprep generates coefficients for the Pauli repulsion
|
||||
damping function for powers of the interatomic distance
|
||||
|
||||
literature reference:
|
||||
|
||||
J. A. Rackers and J. W. Ponder, "Classical Pauli Repulsion: An
|
||||
Anisotropic, Atomic Multipole Model", Journal of Chemical Physics,
|
||||
150, 084104 (2019)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
|
||||
const numtyp rr3, const numtyp rr5, const numtyp rr7,
|
||||
const numtyp rr9, const numtyp rr11, const int rorder,
|
||||
const numtyp dmpi, const numtyp dmpk, numtyp dmpik[11])
|
||||
{
|
||||
numtyp r3,r4;
|
||||
numtyp r5,r6,r7,r8;
|
||||
numtyp s,ds,d2s;
|
||||
numtyp d3s,d4s,d5s;
|
||||
numtyp dmpi2,dmpk2;
|
||||
numtyp dmpi22,dmpi23;
|
||||
numtyp dmpi24,dmpi25;
|
||||
numtyp dmpi26,dmpi27;
|
||||
numtyp dmpk22,dmpk23;
|
||||
numtyp dmpk24,dmpk25;
|
||||
numtyp dmpk26;
|
||||
numtyp eps,diff;
|
||||
numtyp expi,expk;
|
||||
numtyp dampi,dampk;
|
||||
numtyp pre,term,tmp;
|
||||
|
||||
// compute tolerance value for damping exponents
|
||||
|
||||
eps = (numtyp)0.001;
|
||||
diff = dmpi-dmpk;
|
||||
if (diff < (numtyp)0) diff = -diff;
|
||||
|
||||
// treat the case where alpha damping exponents are equal
|
||||
|
||||
if (diff < eps) {
|
||||
r3 = r2 * r;
|
||||
r4 = r3 * r;
|
||||
r5 = r4 * r;
|
||||
r6 = r5 * r;
|
||||
r7 = r6 * r;
|
||||
dmpi2 = (numtyp)0.5 * dmpi;
|
||||
dampi = dmpi2 * r;
|
||||
expi = ucl_exp(-dampi);
|
||||
dmpi22 = dmpi2 * dmpi2;
|
||||
dmpi23 = dmpi22 * dmpi2;
|
||||
dmpi24 = dmpi23 * dmpi2;
|
||||
dmpi25 = dmpi24 * dmpi2;
|
||||
dmpi26 = dmpi25 * dmpi2;
|
||||
pre = (numtyp)128.0;
|
||||
s = (r + dmpi2*r2 + dmpi22*r3/(numtyp)3.0) * expi;
|
||||
|
||||
ds = (dmpi22*r3 + dmpi23*r4) * expi / 3.0;
|
||||
d2s = dmpi24 * expi * r5 / 9.0;
|
||||
d3s = dmpi25 * expi * r6 / 45.0;
|
||||
d4s = (dmpi25*r6 + dmpi26*r7) * expi / 315.0;
|
||||
if (rorder >= 11) {
|
||||
r8 = r7 * r;
|
||||
dmpi27 = dmpi2 * dmpi26;
|
||||
d5s = (dmpi25*r6 + dmpi26*r7 + dmpi27*r8/3.0) * expi / 945.0;
|
||||
}
|
||||
|
||||
// treat the case where alpha damping exponents are unequal
|
||||
|
||||
} else {
|
||||
r3 = r2 * r;
|
||||
r4 = r3 * r;
|
||||
r5 = r4 * r;
|
||||
dmpi2 = 0.5 * dmpi;
|
||||
dmpk2 = 0.5 * dmpk;
|
||||
dampi = dmpi2 * r;
|
||||
dampk = dmpk2 * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
dmpi22 = dmpi2 * dmpi2;
|
||||
dmpi23 = dmpi22 * dmpi2;
|
||||
dmpi24 = dmpi23 * dmpi2;
|
||||
dmpi25 = dmpi24 * dmpi2;
|
||||
dmpk22 = dmpk2 * dmpk2;
|
||||
dmpk23 = dmpk22 * dmpk2;
|
||||
dmpk24 = dmpk23 * dmpk2;
|
||||
dmpk25 = dmpk24 * dmpk2;
|
||||
term = dmpi22 - dmpk22;
|
||||
pre = 8192.0 * dmpi23 * dmpk23 / pow(term,4.0);
|
||||
tmp = 4.0 * dmpi2 * dmpk2 / term;
|
||||
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
|
||||
|
||||
ds = (dmpi2*dmpk2*r2 - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2 + 4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/3.0 -
|
||||
(4.0/3.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi2*dmpk2*r2/3.0 + dmpi22*dmpk2*r3/3.0 +
|
||||
(4.0/3.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/5.0 + dmpi2*dmpk2*r2/5.0 -
|
||||
(4.0/15.0)*dmpi2*dmpk24*r3/term - (8.0/5.0)*dmpi2*dmpk23*r2/term -
|
||||
4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
|
||||
(dmpi23*dmpk2*r4/15.0 + dmpi22*dmpk2*r3/5.0 + dmpi2*dmpk2*r2/5.0 +
|
||||
(4.0/15.0)*dmpi24*dmpk2*r3/term + (8.0/5.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0/term*dmpi2*dmpk2) * expi;
|
||||
d4s = (dmpi2*dmpk24*r5/105.0 + (2.0/35.0)*dmpi2*dmpk23*r4 +
|
||||
dmpi2*dmpk22*r3/7.0 + dmpi2*dmpk2*r2/7.0 -
|
||||
(4.0/105.0)*dmpi2*dmpk25*r4/term - (8.0/21.0)*dmpi2*dmpk24*r3/term -
|
||||
(12.0/7.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi24*dmpk2*r5/105.0 + (2.0/35.0)*dmpi23*dmpk2*r4 +
|
||||
dmpi22*dmpk2*r3/7.0 + dmpi2*dmpk2*r2/7.0 +
|
||||
(4.0/105.0)*dmpi25*dmpk2*r4/term + (8.0/21.0)*dmpi24*dmpk2*r3/term +
|
||||
(12.0/7.0)*dmpi23*dmpk2*r2/term + 4.0*dmpi22*dmpk2*r/term +
|
||||
4.0*dmpi2*dmpk2/term) * expi;
|
||||
|
||||
if (rorder >= 11) {
|
||||
r6 = r5 * r;
|
||||
dmpi26 = dmpi25 * dmpi2;
|
||||
dmpk26 = dmpk25 * dmpk2;
|
||||
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
|
||||
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
|
||||
(4.0/945.0)*dmpi2*dmpk26*r5/term -
|
||||
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
|
||||
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
|
||||
4.0*dmpi2*dmpk2/term) * expk +
|
||||
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
|
||||
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
|
||||
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
|
||||
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
|
||||
4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
|
||||
}
|
||||
}
|
||||
|
||||
// convert partial derivatives into full derivatives
|
||||
|
||||
s = s * rr1;
|
||||
ds = ds * rr3;
|
||||
d2s = d2s * rr5;
|
||||
d3s = d3s * rr7;
|
||||
d4s = d4s * rr9;
|
||||
d5s = d5s * rr11;
|
||||
dmpik[0] = 0.5 * pre * s * s;
|
||||
dmpik[2] = pre * s * ds;
|
||||
dmpik[4] = pre * (s*d2s + ds*ds);
|
||||
dmpik[6] = pre * (s*d3s + 3.0*ds*d2s);
|
||||
dmpik[8] = pre * (s*d4s + 4.0*ds*d3s + 3.0*d2s*d2s);
|
||||
if (rorder >= 11) dmpik[10] = pre * (s*d5s + 5.0*ds*d4s + 10.0*d2s*d3s);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
damppole generates coefficients for the charge penetration
|
||||
damping function for powers of the interatomic distance
|
||||
|
||||
literature references:
|
||||
|
||||
L. V. Slipchenko and M. S. Gordon, "Electrostatic Energy in the
|
||||
Effective Fragment Potential Method: Theory and Application to
|
||||
the Benzene Dimer", Journal of Computational Chemistry, 28,
|
||||
276-291 (2007) [Gordon f1 and f2 models]
|
||||
|
||||
J. A. Rackers, Q. Wang, C. Liu, J.-P. Piquemal, P. Ren and
|
||||
J. W. Ponder, "An Optimized Charge Penetration Model for Use with
|
||||
the AMOEBA Force Field", Physical Chemistry Chemical Physics, 19,
|
||||
276-291 (2017)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
ucl_inline void damppole(const numtyp r, const int rorder,
|
||||
const numtyp alphai, const numtyp alphak,
|
||||
numtyp dmpi[9], numtyp dmpk[9], numtyp dmpik[11])
|
||||
{
|
||||
numtyp termi,termk;
|
||||
numtyp termi2,termk2;
|
||||
numtyp alphai2,alphak2;
|
||||
numtyp eps,diff;
|
||||
numtyp expi,expk;
|
||||
numtyp dampi,dampk;
|
||||
numtyp dampi2,dampi3;
|
||||
numtyp dampi4,dampi5;
|
||||
numtyp dampi6,dampi7;
|
||||
numtyp dampi8;
|
||||
numtyp dampk2,dampk3;
|
||||
numtyp dampk4,dampk5;
|
||||
numtyp dampk6;
|
||||
|
||||
// compute tolerance and exponential damping factors
|
||||
|
||||
eps = 0.001;
|
||||
diff = fabs(alphai-alphak);
|
||||
dampi = alphai * r;
|
||||
dampk = alphak * r;
|
||||
expi = exp(-dampi);
|
||||
expk = exp(-dampk);
|
||||
|
||||
// core-valence charge penetration damping for Gordon f1
|
||||
|
||||
dampi2 = dampi * dampi;
|
||||
dampi3 = dampi * dampi2;
|
||||
dampi4 = dampi2 * dampi2;
|
||||
dampi5 = dampi2 * dampi3;
|
||||
dmpi[0] = 1.0 - (1.0 + 0.5*dampi)*expi;
|
||||
dmpi[2] = 1.0 - (1.0 + dampi + 0.5*dampi2)*expi;
|
||||
dmpi[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0)*expi;
|
||||
dmpi[6] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 + dampi4/30.0)*expi;
|
||||
dmpi[8] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
4.0*dampi4/105.0 + dampi5/210.0)*expi;
|
||||
if (diff < eps) {
|
||||
dmpk[0] = dmpi[0];
|
||||
dmpk[2] = dmpi[2];
|
||||
dmpk[4] = dmpi[4];
|
||||
dmpk[6] = dmpi[6];
|
||||
dmpk[8] = dmpi[8];
|
||||
} else {
|
||||
dampk2 = dampk * dampk;
|
||||
dampk3 = dampk * dampk2;
|
||||
dampk4 = dampk2 * dampk2;
|
||||
dampk5 = dampk2 * dampk3;
|
||||
dmpk[0] = 1.0 - (1.0 + 0.5*dampk)*expk;
|
||||
dmpk[2] = 1.0 - (1.0 + dampk + 0.5*dampk2)*expk;
|
||||
dmpk[4] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0)*expk;
|
||||
dmpk[6] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + dampk4/30.0)*expk;
|
||||
dmpk[8] = 1.0 - (1.0 + dampk + 0.5*dampk2 + dampk3/6.0 +
|
||||
4.0*dampk4/105.0 + dampk5/210.0)*expk;
|
||||
}
|
||||
|
||||
// valence-valence charge penetration damping for Gordon f1
|
||||
|
||||
if (diff < eps) {
|
||||
dampi6 = dampi3 * dampi3;
|
||||
dampi7 = dampi3 * dampi4;
|
||||
dmpik[0] = 1.0 - (1.0 + 11.0*dampi/16.0 + 3.0*dampi2/16.0 +
|
||||
dampi3/48.0)*expi;
|
||||
dmpik[2] = 1.0 - (1.0 + dampi + 0.5*dampi2 +
|
||||
7.0*dampi3/48.0 + dampi4/48.0)*expi;
|
||||
dmpik[4] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/144.0)*expi;
|
||||
dmpik[6] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0)*expi;
|
||||
dmpik[8] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0 +
|
||||
dampi7/5040.0)*expi;
|
||||
if (rorder >= 11) {
|
||||
dampi8 = dampi4 * dampi4;
|
||||
dmpik[10] = 1.0 - (1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
dampi4/24.0 + dampi5/120.0 + dampi6/720.0 +
|
||||
dampi7/5040.0 + dampi8/45360.0)*expi;
|
||||
}
|
||||
|
||||
} else {
|
||||
alphai2 = alphai * alphai;
|
||||
alphak2 = alphak * alphak;
|
||||
termi = alphak2 / (alphak2-alphai2);
|
||||
termk = alphai2 / (alphai2-alphak2);
|
||||
termi2 = termi * termi;
|
||||
termk2 = termk * termk;
|
||||
dmpik[0] = 1.0 - termi2*(1.0 + 2.0*termk + 0.5*dampi)*expi -
|
||||
termk2*(1.0 + 2.0*termi + 0.5*dampk)*expk;
|
||||
dmpik[2] = 1.0 - termi2*(1.0+dampi+0.5*dampi2)*expi -
|
||||
termk2*(1.0+dampk+0.5*dampk2)*expk -
|
||||
2.0*termi2*termk*(1.0+dampi)*expi -
|
||||
2.0*termk2*termi*(1.0+dampk)*expk;
|
||||
dmpik[4] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + dampi2/3.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + dampk2/3.0)*expk;
|
||||
dmpik[6] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 +
|
||||
dampi3/6.0 + dampi4/30.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + dampk4/30.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 2.0*dampi2/5.0 + dampi3/15.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 2.0*dampk2/5.0 + dampk3/15.0)*expk;
|
||||
dmpik[8] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
4.0*dampi4/105.0 + dampi5/210.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 +
|
||||
4.0*dampk4/105.0 + dampk5/210.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 3.0*dampi2/7.0 +
|
||||
2.0*dampi3/21.0 + dampi4/105.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 3.0*dampk2/7.0 +
|
||||
2.0*dampk3/21.0 + dampk4/105.0)*expk;
|
||||
|
||||
if (rorder >= 11) {
|
||||
dampi6 = dampi3 * dampi3;
|
||||
dampk6 = dampk3 * dampk3;
|
||||
dmpik[10] = 1.0 - termi2*(1.0 + dampi + 0.5*dampi2 + dampi3/6.0 +
|
||||
5.0*dampi4/126.0 + 2.0*dampi5/315.0 +
|
||||
dampi6/1890.0)*expi -
|
||||
termk2*(1.0 + dampk + 0.5*dampk2 + dampk3/6.0 + 5.0*dampk4/126.0 +
|
||||
2.0*dampk5/315.0 + dampk6/1890.0)*expk -
|
||||
2.0*termi2*termk*(1.0 + dampi + 4.0*dampi2/9.0 + dampi3/9.0 +
|
||||
dampi4/63.0 + dampi5/945.0)*expi -
|
||||
2.0*termk2*termi*(1.0 + dampk + 4.0*dampk2/9.0 + dampk3/9.0 +
|
||||
dampk4/63.0 + dampk5/945.0)*expk;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@ -59,6 +59,7 @@ int hippo_gpu_init(const int ntypes, const int max_amtype, const int max_amclass
|
||||
const double *host_special_polar_piscale,
|
||||
const double *host_special_polar_pscale,
|
||||
const double *host_csix, const double *host_adisp,
|
||||
const double *host_pcore, const double *host_palpha,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const int maxspecial15,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
@ -191,10 +192,10 @@ void PairHippoGPU::init_style()
|
||||
pdamp, thole, dirdamp, amtype2class, special_hal,
|
||||
special_repel, special_disp, special_mpole,
|
||||
special_polar_wscale, special_polar_piscale,
|
||||
special_polar_pscale, csix, adisp, atom->nlocal,
|
||||
atom->nlocal+atom->nghost, mnf, maxspecial,
|
||||
maxspecial15, cell_size, gpu_mode, screen,
|
||||
polar_dscale, polar_uscale, tq_size);
|
||||
special_polar_pscale, csix, adisp, pcore, palpha,
|
||||
atom->nlocal, atom->nlocal+atom->nghost, mnf,
|
||||
maxspecial, maxspecial15, cell_size, gpu_mode,
|
||||
screen, polar_dscale, polar_uscale, tq_size);
|
||||
GPU_EXTRA::check_flag(success,error,world);
|
||||
|
||||
if (gpu_mode == GPU_FORCE)
|
||||
|
||||
Reference in New Issue
Block a user