pack4 cutoffs

This commit is contained in:
Eddy Barraud
2024-06-05 11:40:24 +02:00
parent b187001f38
commit 9ea57acf54
5 changed files with 28 additions and 32 deletions

View File

@ -47,7 +47,7 @@ int DPDChargedT::init(const int ntypes,
double **host_cutsq, double **host_a0, double **host_cutsq, double **host_a0,
double **host_gamma, double **host_sigma, double **host_gamma, double **host_sigma,
double **host_cut, double **host_cut,
double **host_cut_dpd, **host_cut_slater, double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
double *host_special_lj, double *host_special_lj,
const bool tstat_only, const bool tstat_only,
const int nlocal, const int nall, const int nlocal, const int nall,
@ -94,21 +94,16 @@ int DPDChargedT::init(const int ntypes,
coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); coeff.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma, this->atom->type_pack4(ntypes,lj_types,coeff,host_write,host_a0,host_gamma,
host_sigma,host_cut); host_sigma,host_cut_dpd);
UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types,*(this->ucl_device), // Allocate a host write buffer for data initialization
UCL_H_Vec<numtyp> host_rsq(lj_types*lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY); UCL_WRITE_ONLY);
for (int i=0; i<lj_types*lj_types; i++)
host_rsq[i]=0.0;
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY); cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq); this->atom->type_pack4(ntypes,lj_types,cutsq,host_rsq,host_cutsq,
host_cut_dpdsq, host_cut_dpd, host_cut_slatersq);
cut_dpd.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,cut_dpdsq,host_rsq,host_cut_dpd);
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,cutsq,host_rsq,host_cutsq);
scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
this->atom->type_pack1(ntypes,lj_types,scale,host_write,host_scale);
double special_sqrt[4]; double special_sqrt[4];
special_sqrt[0] = sqrt(host_special_lj[0]); special_sqrt[0] = sqrt(host_special_lj[0]);
@ -207,12 +202,12 @@ int DPDChargedT::loop(const int eflag, const int vflag) {
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
void DPDChargedT::update_coeff(int ntypes, double **host_a0, double **host_gamma, void DPDChargedT::update_coeff(int ntypes, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater) double **host_sigma, double **host_cut_dpd)
{ {
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device), UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
UCL_WRITE_ONLY); UCL_WRITE_ONLY);
this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_a0,host_gamma, this->atom->type_pack4(ntypes,_lj_types,coeff,host_write,host_a0,host_gamma,
host_sigma,host_cut,host_cut_dpd, host_cut_slater); host_sigma,host_cut_dpd);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -174,7 +174,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
const int eflag, const int vflag, const int inum, const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int nbor_pitch,
const __global numtyp4 *restrict v_, const __global numtyp4 *restrict v_,
const __global numtyp *restrict cutsq, const __global numtyp4 *restrict cutsq,
const numtyp dtinvsqrt, const int seed, const numtyp dtinvsqrt, const int seed,
const int timestep, const int tstat_only, const int timestep, const int tstat_only,
const int t_per_atom) { const int t_per_atom) {
@ -202,7 +202,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i]; numtyp4 iv; fetch4(iv,i,vel_tex); //v_[i];
int itag=iv.w; int itag=iv.w;
const numtyp qi = extra[i].x; const numtyp qtmp = extra[i].x; // q[i]
numtyp factor_dpd, factor_sqrt; numtyp factor_dpd, factor_sqrt;
for ( ; nbor<nbor_end; nbor+=n_stride) { for ( ; nbor<nbor_end; nbor+=n_stride) {
@ -225,7 +225,7 @@ __kernel void k_dpd_charged(const __global numtyp4 *restrict x_,
numtyp rsq = delx*delx+dely*dely+delz*delz; numtyp rsq = delx*delx+dely*dely+delz*delz;
int mtype=itype*lj_types+jtype; int mtype=itype*lj_types+jtype;
if (rsq<cutsq[mtype]) { if (rsq<cutsq.x[mtype]) {
numtyp r=ucl_sqrt(rsq); numtyp r=ucl_sqrt(rsq);
if (r < EPSILON) continue; if (r < EPSILON) continue;
@ -296,7 +296,7 @@ __kernel void k_dpd_charged_fast(const __global numtyp4 *restrict x_,
const int eflag, const int vflag, const int inum, const int eflag, const int vflag, const int inum,
const int nbor_pitch, const int nbor_pitch,
const __global numtyp4 *restrict v_, const __global numtyp4 *restrict v_,
const __global numtyp *restrict cutsq, const __global numtyp4 *restrict cutsq,
const numtyp dtinvsqrt, const int seed, const numtyp dtinvsqrt, const int seed,
const int timestep, const int tstat_only, const int timestep, const int tstat_only,
const int t_per_atom) { const int t_per_atom) {

View File

@ -39,6 +39,7 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
* - -5 Double precision is not supported on card **/ * - -5 Double precision is not supported on card **/
int init(const int ntypes, double **host_cutsq, double **host_a0, int init(const int ntypes, double **host_cutsq, double **host_a0,
double **host_gamma, double **host_sigma, double **host_cut, double **host_gamma, double **host_sigma, double **host_cut,
double **host_cut_dpd, double **host_cut_dpdsq, double **host_cut_slatersq,
double *host_special_lj, bool tstat_only, const int nlocal, double *host_special_lj, bool tstat_only, const int nlocal,
const int nall, const int max_nbors, const int maxspecial, const int nall, const int max_nbors, const int maxspecial,
const double cell_size, const double gpu_split, FILE *screen); const double cell_size, const double gpu_split, FILE *screen);
@ -55,18 +56,15 @@ class DPDCharged : public BaseDPD<numtyp, acctyp> {
/// Update coeff if needed (tstat only) /// Update coeff if needed (tstat only)
void update_coeff(int ntypes, double **host_a0, double **host_gamma, void update_coeff(int ntypes, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut); double **host_sigma, double **host_cut_dpd );
// --------------------------- TYPE DATA -------------------------- // --------------------------- TYPE DATA --------------------------
/// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut /// coeff.x = a0, coeff.y = gamma, coeff.z = sigma, coeff.w = cut_dpd
UCL_D_Vec<numtyp4> coeff; UCL_D_Vec<numtyp4> coeff;
UCL_D_Vec<numtyp> cutsq; /// cutsq.x = cutsq, cutsq.y = cut_dpd_sq, cutsq.z = cut_dpd, cutsq.w = cut_slatersq
UCL_D_Vec<numtyp> cut_dpd; UCL_D_Vec<numtyp4> cutsq;
UCL_D_Vec<numtyp> cut_dpdsq;
UCL_D_Vec<numtyp> cut_slater;
UCL_D_Vec<numtyp> cut_slatersq;
/// Special LJ values /// Special LJ values
UCL_D_Vec<numtyp> sp_lj, sp_sqrt; UCL_D_Vec<numtyp> sp_lj, sp_sqrt;

View File

@ -56,7 +56,7 @@ int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0,
int init_ok=0; int init_ok=0;
if (world_me==0) if (world_me==0)
init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma, init_ok=DPDCMF.init(ntypes, cutsq, host_a0, host_gamma, host_sigma,
host_cut, host_cut_dpd, host_cut_slater, special_lj, false, inum, nall, max_nbors, host_cut, host_cut_dpd, host_cut_dpdsq, host_cut_slatersq, special_lj, false, inum, nall, max_nbors,
maxspecial, cell_size, gpu_split, screen); maxspecial, cell_size, gpu_split, screen);
DPDCMF.device->world_barrier(); DPDCMF.device->world_barrier();
@ -122,9 +122,9 @@ void dpd_charged_gpu_compute(const int ago, const int inum_full, const int nall,
} }
void dpd_charged_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma, void dpd_charged_gpu_update_coeff(int ntypes, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut, double **host_cut_dpd, **host_cut_slater) double **host_sigma, double **host_cut, double **host_cut_dpd)
{ {
DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd, host_cut_slater,); DPDCMF.update_coeff(ntypes,host_a0,host_gamma,host_sigma,host_cut, host_cut_dpd);
} }
void dpd_charged_gpu_get_extra_data(double *host_q) { void dpd_charged_gpu_get_extra_data(double *host_q) {

View File

@ -41,7 +41,8 @@ using namespace EwaldConst;
int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma, int dpd_charged_gpu_init(const int ntypes, double **cutsq, double **host_a0, double **host_gamma,
double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum, double **host_sigma, double **host_cut, double **host_cut_dpd, double **host_cut_slater, double *special_lj, const int inum,
const int nall, const int max_nbors, const int maxspecial, const double cell_size, const int nall, const int max_nbors, const int maxspecial, const double cell_size,
int &gpu_mode, FILE *screen); int &gpu_mode, FILE *screen,
double *host_special_coul, const double qqrd2e, const double g_ewald, const double lamda);
void dpd_charged_gpu_clear(); void dpd_charged_gpu_clear();
int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x, int **dpd_charged_gpu_compute_n(const int ago, const int inum_full, const int nall, double **host_x,
int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial, int *host_type, double *sublo, double *subhi, tagint *tag, int **nspecial,
@ -312,8 +313,10 @@ void PairDPDChargedGPU::init_style()
if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial; if (atom->molecular != Atom::ATOMIC) maxspecial = atom->maxspecial;
int mnf = 5e-2 * neighbor->oneatom; int mnf = 5e-2 * neighbor->oneatom;
int success = int success =
dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut, cut_dpd, cut_slater, force->special_lj, atom->nlocal, dpd_charged_gpu_init(atom->ntypes + 1, cutsq, a0, gamma, sigma, cut,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen); cut_dpd, cut_dpdsq, cut_slatersq, force->special_lj, atom->nlocal,
atom->nlocal + atom->nghost, mnf, maxspecial, cell_size, gpu_mode, screen,
force->special_coul, force->qqrd2e, g_ewald, lamda);
GPU_EXTRA::check_flag(success, error, world); GPU_EXTRA::check_flag(success, error, world);
if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL); if (gpu_mode == GPU_FORCE) neighbor->add_request(this, NeighConst::REQ_FULL);