charge pointer and corrections
This commit is contained in:
@ -14,36 +14,36 @@
|
|||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
|
|
||||||
#if defined(USE_OPENCL)
|
#if defined(USE_OPENCL)
|
||||||
#include "dpd_cl.h"
|
#include "dpd_charged_cl.h"
|
||||||
#elif defined(USE_CUDART)
|
#elif defined(USE_CUDART)
|
||||||
const char *dpd=0;
|
const char *dpd=0;
|
||||||
#else
|
#else
|
||||||
#include "dpd_cubin.h"
|
#include "dpd_charged_cubin.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "lal_dpd.h"
|
#include "lal_dpd_charged.h"
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
namespace LAMMPS_AL {
|
namespace LAMMPS_AL {
|
||||||
#define DPDT DPD<numtyp, acctyp>
|
#define DPDChargedT DPDCharged<numtyp, acctyp>
|
||||||
|
|
||||||
extern Device<PRECISION,ACC_PRECISION> device;
|
extern Device<PRECISION,ACC_PRECISION> device;
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
DPDT::DPD() : BaseDPD<numtyp,acctyp>(), _allocated(false) {
|
DPDChargedT::DPDCharged() : BaseDPD<numtyp,acctyp>(), _allocated(false) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
DPDT::~DPD() {
|
DPDChargedT::~DPDCharged() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DPDT::bytes_per_atom(const int max_nbors) const {
|
int DPDChargedT::bytes_per_atom(const int max_nbors) const {
|
||||||
return this->bytes_per_atom_atomic(max_nbors);
|
return this->bytes_per_atom_atomic(max_nbors);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DPDT::init(const int ntypes,
|
int DPDChargedT::init(const int ntypes,
|
||||||
double **host_cutsq, double **host_a0,
|
double **host_cutsq, double **host_a0,
|
||||||
double **host_gamma, double **host_sigma,
|
double **host_gamma, double **host_sigma,
|
||||||
double **host_cut, double *host_special_lj,
|
double **host_cut, double *host_special_lj,
|
||||||
@ -122,7 +122,7 @@ int DPDT::init(const int ntypes,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DPDT::clear() {
|
void DPDChargedT::clear() {
|
||||||
if (!_allocated)
|
if (!_allocated)
|
||||||
return;
|
return;
|
||||||
_allocated=false;
|
_allocated=false;
|
||||||
@ -135,7 +135,7 @@ void DPDT::clear() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
double DPDT::host_memory_usage() const {
|
double DPDChargedT::host_memory_usage() const {
|
||||||
return this->host_memory_usage_atomic()+sizeof(DPD<numtyp,acctyp>);
|
return this->host_memory_usage_atomic()+sizeof(DPD<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,7 +143,28 @@ double DPDT::host_memory_usage() const {
|
|||||||
// Calculate energies, forces, and torques
|
// Calculate energies, forces, and torques
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DPDT::loop(const int eflag, const int vflag) {
|
int DPDChargedT::loop(const int eflag, const int vflag) {
|
||||||
|
|
||||||
|
int nall = this->atom->nall();
|
||||||
|
// signal that we need to transfer extra data from the host
|
||||||
|
|
||||||
|
this->atom->extra_data_unavail();
|
||||||
|
|
||||||
|
numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(this->atom->extra[0]));
|
||||||
|
|
||||||
|
int n = 0;
|
||||||
|
int nstride = 1;
|
||||||
|
for (int i = 0; i < nall; i++) {
|
||||||
|
int idx = n+i*nstride;
|
||||||
|
numtyp4 v;
|
||||||
|
v.x = q[i];
|
||||||
|
v.y = 0;
|
||||||
|
v.z = 0;
|
||||||
|
v.w = 0;
|
||||||
|
pextra[idx] = v;
|
||||||
|
}
|
||||||
|
this->atom->add_extra_data();
|
||||||
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
const int BX=this->block_size();
|
const int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||||
@ -154,7 +175,7 @@ int DPDT::loop(const int eflag, const int vflag) {
|
|||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
if (shared_types) {
|
if (shared_types) {
|
||||||
this->k_pair_sel->set_size(GX,BX);
|
this->k_pair_sel->set_size(GX,BX);
|
||||||
this->k_pair_sel->run(&this->atom->x, &coeff, &sp_lj, &sp_sqrt,
|
this->k_pair_sel->run(&this->atom->x, &this->atom->extra, &coeff, &sp_lj, &sp_sqrt,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->ans->force, &this->ans->engv, &eflag,
|
&this->ans->force, &this->ans->engv, &eflag,
|
||||||
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
|
&vflag, &ainum, &nbor_pitch, &this->atom->v, &cutsq,
|
||||||
@ -162,7 +183,7 @@ int DPDT::loop(const int eflag, const int vflag) {
|
|||||||
&this->_tstat_only, &this->_threads_per_atom);
|
&this->_tstat_only, &this->_threads_per_atom);
|
||||||
} else {
|
} else {
|
||||||
this->k_pair.set_size(GX,BX);
|
this->k_pair.set_size(GX,BX);
|
||||||
this->k_pair.run(&this->atom->x, &coeff, &_lj_types, &sp_lj, &sp_sqrt,
|
this->k_pair.run(&this->atom->x, &this->atom->extra, &coeff, &_lj_types, &sp_lj, &sp_sqrt,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||||
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,
|
&ainum, &nbor_pitch, &this->atom->v, &cutsq, &this->_dtinvsqrt,
|
||||||
@ -174,7 +195,7 @@ int DPDT::loop(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void DPDT::update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
void DPDChargedT::update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
||||||
double **host_sigma, double **host_cut)
|
double **host_sigma, double **host_cut)
|
||||||
{
|
{
|
||||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||||
@ -183,5 +204,16 @@ void DPDT::update_coeff(int ntypes, double **host_a0, double **host_gamma,
|
|||||||
host_sigma,host_cut);
|
host_sigma,host_cut);
|
||||||
}
|
}
|
||||||
|
|
||||||
template class DPD<PRECISION,ACC_PRECISION>;
|
// ---------------------------------------------------------------------------
|
||||||
|
// Get the extra data pointers from host
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
void DPDChargedT::get_extra_data(double *host_q) {
|
||||||
|
q = host_q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template class DPDCharged<PRECISION,ACC_PRECISION>;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -161,7 +161,8 @@ _texture_2d( vel_tex,int4);
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
__kernel void k_dpd(const __global numtyp4 *restrict x_,
|
__kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
|
||||||
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const int lj_types,
|
const int lj_types,
|
||||||
const __global numtyp *restrict sp_lj,
|
const __global numtyp *restrict sp_lj,
|
||||||
@ -201,6 +202,8 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||||
int itag=iv.w;
|
int itag=iv.w;
|
||||||
|
|
||||||
|
const numtyp qi = extra[i].x;
|
||||||
|
|
||||||
numtyp factor_dpd, factor_sqrt;
|
numtyp factor_dpd, factor_sqrt;
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
ucl_prefetch(dev_packed+nbor+n_stride);
|
ucl_prefetch(dev_packed+nbor+n_stride);
|
||||||
@ -233,6 +236,8 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
numtyp dot = delx*delvx + dely*delvy + delz*delvz;
|
numtyp dot = delx*delvx + dely*delvy + delz*delvz;
|
||||||
numtyp wd = (numtyp)1.0 - r/coeff[mtype].w;
|
numtyp wd = (numtyp)1.0 - r/coeff[mtype].w;
|
||||||
|
|
||||||
|
const numtyp qj = extra[j].x;
|
||||||
|
|
||||||
unsigned int tag1=itag, tag2=jtag;
|
unsigned int tag1=itag, tag2=jtag;
|
||||||
if (tag1 > tag2) {
|
if (tag1 > tag2) {
|
||||||
tag1 = jtag; tag2 = itag;
|
tag1 = jtag; tag2 = itag;
|
||||||
@ -279,7 +284,8 @@ __kernel void k_dpd(const __global numtyp4 *restrict x_,
|
|||||||
ans,engv);
|
ans,engv);
|
||||||
}
|
}
|
||||||
|
|
||||||
__kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
__kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
|
||||||
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_in,
|
const __global numtyp4 *restrict coeff_in,
|
||||||
const __global numtyp *restrict sp_lj_in,
|
const __global numtyp *restrict sp_lj_in,
|
||||||
const __global numtyp *restrict sp_sqrt_in,
|
const __global numtyp *restrict sp_sqrt_in,
|
||||||
@ -341,6 +347,8 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
|
||||||
int itag=iv.w;
|
int itag=iv.w;
|
||||||
|
|
||||||
|
const numtyp qi = extra[i].x;
|
||||||
|
|
||||||
#ifndef ONETYPE
|
#ifndef ONETYPE
|
||||||
numtyp factor_dpd, factor_sqrt;
|
numtyp factor_dpd, factor_sqrt;
|
||||||
#endif
|
#endif
|
||||||
@ -382,6 +390,8 @@ __kernel void k_dpd_fast(const __global numtyp4 *restrict x_,
|
|||||||
#endif
|
#endif
|
||||||
numtyp wd = (numtyp)1.0 - r/coeffw;
|
numtyp wd = (numtyp)1.0 - r/coeffw;
|
||||||
|
|
||||||
|
const numtyp qj = extra[j].x;
|
||||||
|
|
||||||
unsigned int tag1=itag, tag2=jtag;
|
unsigned int tag1=itag, tag2=jtag;
|
||||||
if (tag1 > tag2) {
|
if (tag1 > tag2) {
|
||||||
tag1 = jtag; tag2 = itag;
|
tag1 = jtag; tag2 = itag;
|
||||||
|
|||||||
@ -63,6 +63,10 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
|
|||||||
UCL_D_Vec<numtyp4> coeff;
|
UCL_D_Vec<numtyp4> coeff;
|
||||||
|
|
||||||
UCL_D_Vec<numtyp> cutsq;
|
UCL_D_Vec<numtyp> cutsq;
|
||||||
|
UCL_D_Vec<numtyp> cut_dpd;
|
||||||
|
UCL_D_Vec<numtyp> cut_dpdsq;
|
||||||
|
UCL_D_Vec<numtyp> cut_slater;
|
||||||
|
UCL_D_Vec<numtyp> cut_slatersq;
|
||||||
|
|
||||||
/// Special LJ values
|
/// Special LJ values
|
||||||
UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
|
UCL_D_Vec<numtyp> sp_lj, sp_sqrt;
|
||||||
@ -76,8 +80,8 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
|
|||||||
/// Only used for thermostat
|
/// Only used for thermostat
|
||||||
int _tstat_only;
|
int _tstat_only;
|
||||||
|
|
||||||
/// pointer to host data of charge
|
/// pointer to host data for atom charge
|
||||||
double *
|
double *q;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
bool _allocated;
|
bool _allocated;
|
||||||
|
|||||||
@ -17,12 +17,12 @@
|
|||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "lal_dpd.h"
|
#include "lal_dpd_charged.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace LAMMPS_AL;
|
using namespace LAMMPS_AL;
|
||||||
|
|
||||||
static DPD<PRECISION,ACC_PRECISION> DPDCMF;
|
static DPDCharged<PRECISION,ACC_PRECISION> DPDCMF;
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Allocate memory on host and device and copy constants to device
|
// Allocate memory on host and device and copy constants to device
|
||||||
|
|||||||
@ -277,6 +277,14 @@ void PairDPDChargedGPU::compute(int eflag, int vflag)
|
|||||||
void PairDPDChargedGPU::init_style()
|
void PairDPDChargedGPU::init_style()
|
||||||
{
|
{
|
||||||
|
|
||||||
|
if (comm->ghost_velocity == 0)
|
||||||
|
error->all(FLERR,"Pair dpd requires ghost atoms store velocity");
|
||||||
|
if (!atom->q_flag)
|
||||||
|
error->all(FLERR,"Pair style coul/slater/long requires atom attribute q");
|
||||||
|
// ensure use of KSpace long-range solver, set g_ewald
|
||||||
|
if (force->kspace == nullptr) error->all(FLERR, "Pair style requires a KSpace style");
|
||||||
|
g_ewald = force->kspace->g_ewald;
|
||||||
|
|
||||||
// Repeat cutsq calculation because done after call to init_style
|
// Repeat cutsq calculation because done after call to init_style
|
||||||
double maxcut = -1.0;
|
double maxcut = -1.0;
|
||||||
double mcut;
|
double mcut;
|
||||||
@ -319,13 +327,14 @@ void PairDPDChargedGPU::cpu_compute(int start, int inum, int eflag, int /* vflag
|
|||||||
int *numneigh, int **firstneigh)
|
int *numneigh, int **firstneigh)
|
||||||
{
|
{
|
||||||
int i, j, ii, jj, jnum, itype, jtype;
|
int i, j, ii, jj, jnum, itype, jtype;
|
||||||
double xtmp, ytmp, ztmp, delx, dely, delz, evdwl, fpair;
|
double qtmp, xtmp, ytmp, ztmp, delx, dely, delz, evdwl, ecoul, fpair;
|
||||||
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
double vxtmp, vytmp, vztmp, delvx, delvy, delvz;
|
||||||
double r2inv,forcedpd,forcecoul,factor_coul;
|
double r2inv,forcedpd,forcecoul,factor_coul;
|
||||||
double grij,expm2,prefactor,t,erfc;
|
double grij,expm2,prefactor,t,erfc;
|
||||||
double rsq,r,rinv,dot,wd,randnum,factor_dpd,factor_sqrt;
|
double rsq,r,rinv,dot,wd,randnum,factor_dpd,factor_sqrt;
|
||||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||||
double slater_term;
|
double slater_term;
|
||||||
|
|
||||||
int *jlist;
|
int *jlist;
|
||||||
tagint itag, jtag;
|
tagint itag, jtag;
|
||||||
|
|
||||||
@ -422,19 +431,26 @@ void PairDPDChargedGPU::cpu_compute(int start, int inum, int eflag, int /* vflag
|
|||||||
forcecoul *= r2inv;
|
forcecoul *= r2inv;
|
||||||
} else forcecoul = 0.0;
|
} else forcecoul = 0.0;
|
||||||
|
|
||||||
|
fpair = forcedpd + forcecoul;
|
||||||
|
|
||||||
f[i][0] += delx * fpair;
|
f[i][0] += delx * fpair;
|
||||||
f[i][1] += dely * fpair;
|
f[i][1] += dely * fpair;
|
||||||
f[i][2] += delz * fpair;
|
f[i][2] += delz * fpair;
|
||||||
|
|
||||||
if (eflag) {
|
if (eflag) {
|
||||||
// unshifted eng of conservative term:
|
if (rsq < cut_dpdsq[itype][jtype]) {
|
||||||
// evdwl = -a0[itype][jtype]*r * (1.0-0.5*r/cut[itype][jtype]);
|
// eng shifted to 0.0 at cutoff
|
||||||
// eng shifted to 0.0 at cutoff
|
evdwl = 0.5*a0[itype][jtype]*cut_dpd[itype][jtype] * wd*wd;
|
||||||
evdwl = 0.5 * a0[itype][jtype] * cut[itype][jtype] * wd * wd;
|
evdwl *= factor_dpd;
|
||||||
evdwl *= factor_dpd;
|
} else evdwl = 0.0;
|
||||||
|
|
||||||
|
if (cut_slater[itype][jtype] != 0.0 && rsq < cut_slatersq[itype][jtype]){
|
||||||
|
ecoul = prefactor*(erfc - (1 + r/lamda)*exp(-2*r/lamda));
|
||||||
|
if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor*(1.0-(1 + r/lamda)*exp(-2*r/lamda));
|
||||||
|
} else ecoul = 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evflag) ev_tally_full(i, evdwl, 0.0, fpair, delx, dely, delz);
|
if (evflag) ev_tally_full(i, evdwl, ecoul, fpair, delx, dely, delz);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user