Cosmetic changes and cleanup
This commit is contained in:
@ -185,7 +185,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space permanent field, returning field and fieldp
|
// Launch the real-space permanent field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
||||||
@ -202,7 +202,9 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
(BX/this->_threads_per_atom)));
|
(BX/this->_threads_per_atom)));
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list if not done yet
|
// Build the short neighbor list for the cutoff _off2_polar, if not done yet
|
||||||
|
// this is the first kernel in a time step where _off2_polar is used
|
||||||
|
|
||||||
if (!this->short_nbor_polar_avail) {
|
if (!this->short_nbor_polar_avail) {
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
@ -225,7 +227,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space induced field, returning field and fieldp
|
// Launch the real-space induced field kernel, returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
||||||
@ -264,7 +266,7 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the polar real-space term, returning tep
|
// Launch the polar real-space kernel, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int AmoebaT::polar_real(const int eflag, const int vflag) {
|
int AmoebaT::polar_real(const int eflag, const int vflag) {
|
||||||
|
|||||||
@ -447,7 +447,9 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute multipole real-space
|
// Compute multipole real-space part
|
||||||
|
// precompute() should be already invoked before mem (re)allocation
|
||||||
|
// this is the first part in a time step done on the GPU for AMOEBA for now
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
||||||
@ -464,21 +466,6 @@ void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
const double aewald, const double felec,
|
const double aewald, const double felec,
|
||||||
const double off2_mpole, double *host_q,
|
const double off2_mpole, double *host_q,
|
||||||
double *boxlo, double *prd, void **tep_ptr) {
|
double *boxlo, double *prd, void **tep_ptr) {
|
||||||
// reallocate per-atom arrays, transfer data from the host
|
|
||||||
// and build the neighbor lists if needed
|
|
||||||
// NOTE:
|
|
||||||
// Once all the kernels are ready, precompute() is needed only once
|
|
||||||
// in the first kernel in a time step.
|
|
||||||
/*
|
|
||||||
int** firstneigh = nullptr;
|
|
||||||
firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
|
|
||||||
host_amtype, host_amgroup, host_rpole,
|
|
||||||
nullptr, nullptr, nullptr, sublo, subhi, tag,
|
|
||||||
nspecial, special, nspecial15, special15,
|
|
||||||
eflag_in, vflag_in, eatom, vatom,
|
|
||||||
host_start, ilist, jnum, cpu_time,
|
|
||||||
success, host_q, boxlo, prd);
|
|
||||||
*/
|
|
||||||
// ------------------- Resize _tep array ------------------------
|
// ------------------- Resize _tep array ------------------------
|
||||||
|
|
||||||
if (inum_full>_max_tep_size) {
|
if (inum_full>_max_tep_size) {
|
||||||
@ -502,8 +489,6 @@ void BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
|
|
||||||
_tep.update_host(_max_tep_size*4,false);
|
_tep.update_host(_max_tep_size*4,false);
|
||||||
|
|
||||||
// return firstneigh; // nbor->host_jlist.begin()-host_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -842,22 +827,23 @@ double BaseAmoebaT::host_memory_usage_atomic() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Setup the FFT plan
|
// Setup the FFT plan: only placeholder for now
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::setup_fft(const int numel, const int element_type)
|
void BaseAmoebaT::setup_fft(const int numel, const int element_type)
|
||||||
{
|
{
|
||||||
|
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Compute FFT on the device
|
// Compute FFT on the device: only placeholder for now
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
|
void BaseAmoebaT::compute_fft1d(void* in, void* out, const int numel, const int mode)
|
||||||
{
|
{
|
||||||
|
// TODO: setting up FFT plan based on the backend (cuFFT or hipFFT)
|
||||||
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
#if !defined(USE_OPENCL) && !defined(USE_HIP)
|
||||||
if (fft_plan_created == false) {
|
if (fft_plan_created == false) {
|
||||||
int m = numel/2;
|
int m = numel/2;
|
||||||
|
|||||||
@ -143,8 +143,12 @@ int HippoT::init(const int ntypes, const int max_amtype, const int max_amclass,
|
|||||||
_polar_uscale = polar_uscale;
|
_polar_uscale = polar_uscale;
|
||||||
|
|
||||||
_allocated=true;
|
_allocated=true;
|
||||||
this->_max_bytes=coeff_amtype.row_bytes() + coeff_rep.row_bytes() + coeff_amclass.row_bytes() +
|
this->_max_bytes=coeff_amtype.row_bytes() + coeff_rep.row_bytes()
|
||||||
+ sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes();
|
+ coeff_amclass.row_bytes() + sp_polar.row_bytes()
|
||||||
|
+ sp_nonpolar.row_bytes() + this->_tep.row_bytes()
|
||||||
|
+ this->_fieldp.row_bytes() + this->_thetai1.row_bytes()
|
||||||
|
+ this->_thetai2.row_bytes() + this->_thetai3.row_bytes()
|
||||||
|
+ this->_igrid.row_bytes() + this->_cgrid_brick.row_bytes();
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -169,7 +173,7 @@ double HippoT::host_memory_usage() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute repulsion
|
// Compute the repulsion term, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_repulsion(const int ago, const int inum_full,
|
void HippoT::compute_repulsion(const int ago, const int inum_full,
|
||||||
@ -203,26 +207,6 @@ void HippoT::compute_repulsion(const int ago, const int inum_full,
|
|||||||
|
|
||||||
this->set_kernel(eflag,vflag);
|
this->set_kernel(eflag,vflag);
|
||||||
|
|
||||||
// reallocate per-atom arrays, transfer data from the host
|
|
||||||
// and build the neighbor lists if needed
|
|
||||||
// NOTE:
|
|
||||||
// For now we invoke precompute() again here,
|
|
||||||
// to be able to turn on/off the udirect2b kernel (which comes before this)
|
|
||||||
// Once all the kernels are ready, precompute() is needed only once
|
|
||||||
// in the first kernel in a time step.
|
|
||||||
// We only need to cast the necessary from host to device here
|
|
||||||
// if the neighbor lists are rebuilt and other per-atom arrays
|
|
||||||
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
|
||||||
/*
|
|
||||||
int** firstneigh = nullptr;
|
|
||||||
firstneigh = this->precompute(ago, inum_full, nall, host_x, host_type,
|
|
||||||
host_amtype, host_amgroup, host_rpole,
|
|
||||||
nullptr, nullptr, nullptr, sublo, subhi, tag,
|
|
||||||
nspecial, special, nspecial15, special15,
|
|
||||||
eflag_in, vflag_in, eatom, vatom,
|
|
||||||
host_start, ilist, jnum, cpu_time,
|
|
||||||
success, host_q, boxlo, prd);
|
|
||||||
*/
|
|
||||||
// ------------------- Resize _tep array ------------------------
|
// ------------------- Resize _tep array ------------------------
|
||||||
|
|
||||||
if (inum_full>this->_max_tep_size) {
|
if (inum_full>this->_max_tep_size) {
|
||||||
@ -252,12 +236,10 @@ void HippoT::compute_repulsion(const int ago, const int inum_full,
|
|||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
|
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*4,false);
|
||||||
|
|
||||||
// return firstneigh; // nbor->host_jlist.begin()-host_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the repulsion term, returning tep
|
// Launch the repulsion kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::repulsion(const int eflag, const int vflag) {
|
int HippoT::repulsion(const int eflag, const int vflag) {
|
||||||
@ -299,7 +281,7 @@ int HippoT::repulsion(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute dispersion real-space
|
// Compute dispersion real-space
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup,
|
void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup,
|
||||||
@ -323,12 +305,10 @@ void HippoT::compute_dispersion_real(int *host_amtype, int *host_amgroup,
|
|||||||
//this->device->add_ans_object(this->ans);
|
//this->device->add_ans_object(this->ans);
|
||||||
|
|
||||||
this->hd_balancer.stop_timer();
|
this->hd_balancer.stop_timer();
|
||||||
|
|
||||||
// return nullptr; // nbor->host_jlist.begin()-host_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the dispersion real-space term, returning tep
|
// Launch the dispersion real-space kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::dispersion_real(const int eflag, const int vflag) {
|
int HippoT::dispersion_real(const int eflag, const int vflag) {
|
||||||
@ -346,7 +326,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
|
|||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list for the cutoff off2_disp,
|
// Build the short neighbor list for the cutoff off2_disp,
|
||||||
// at this point mpole is the first kernel in a time step
|
// at this point dispersion is the first kernel in a time step
|
||||||
|
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
@ -356,20 +336,20 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
|
|||||||
|
|
||||||
k_dispersion.set_size(GX,BX);
|
k_dispersion.set_size(GX,BX);
|
||||||
k_dispersion.run(&this->atom->x, &this->atom->extra,
|
k_dispersion.run(&this->atom->x, &this->atom->extra,
|
||||||
&coeff_amtype, &coeff_amclass, &sp_nonpolar,
|
&coeff_amtype, &coeff_amclass, &sp_nonpolar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
&this->ans->force, &this->ans->engv,
|
&this->ans->force, &this->ans->engv,
|
||||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||||
&this->_threads_per_atom, &this->_aewald,
|
&this->_threads_per_atom, &this->_aewald,
|
||||||
&this->_off2_disp);
|
&this->_off2_disp);
|
||||||
this->time_pair.stop();
|
this->time_pair.stop();
|
||||||
|
|
||||||
return GX;
|
return GX;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute multipole real-space
|
// Compute the multipole real-space term, returning tep
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_multipole_real(const int ago, const int inum_full,
|
void HippoT::compute_multipole_real(const int ago, const int inum_full,
|
||||||
@ -416,12 +396,10 @@ void HippoT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
|
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*4,false);
|
||||||
|
|
||||||
//return nullptr; // nbor->host_jlist.begin()-host_start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the multipole real-space term, returning tep
|
// Launch the multipole real-space kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::multipole_real(const int eflag, const int vflag) {
|
int HippoT::multipole_real(const int eflag, const int vflag) {
|
||||||
@ -438,8 +416,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
|||||||
(BX/this->_threads_per_atom)));
|
(BX/this->_threads_per_atom)));
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list for the cutoff off2_mpole,
|
// Build the short neighbor list for the cutoff off2_mpole
|
||||||
// at this point mpole is the first kernel in a time step
|
|
||||||
|
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
@ -462,8 +439,8 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute the direct real space part
|
// Compute the direct real space part of the permanent field
|
||||||
// of the permanent field
|
// returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
@ -488,7 +465,7 @@ void HippoT::compute_udirect2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space permanent field, returning field and fieldp
|
// Launch the real-space permanent field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::udirect2b(const int eflag, const int vflag) {
|
int HippoT::udirect2b(const int eflag, const int vflag) {
|
||||||
@ -505,7 +482,9 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
|
|||||||
(BX/this->_threads_per_atom)));
|
(BX/this->_threads_per_atom)));
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
// Build the short neighbor list if not done yet
|
// Build the short neighbor list for the cutoff _off2_polar, if not done yet
|
||||||
|
// this is the first kernel in a time step where _off2_polar is used
|
||||||
|
|
||||||
if (!this->short_nbor_polar_avail) {
|
if (!this->short_nbor_polar_avail) {
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
@ -529,8 +508,8 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute the direct real space part
|
// Compute the direct real space term of the induced field
|
||||||
// of the induced field
|
// returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
@ -554,7 +533,7 @@ void HippoT::compute_umutual2b(int *host_amtype, int *host_amgroup, double **hos
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space induced field, returning field and fieldp
|
// Launch the real-space induced field kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::umutual2b(const int eflag, const int vflag) {
|
int HippoT::umutual2b(const int eflag, const int vflag) {
|
||||||
@ -628,7 +607,7 @@ void HippoT::compute_polar_real(int *host_amtype, int *host_amgroup, double **ho
|
|||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the polar real-space term, returning tep
|
// Launch the polar real-space kernel
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int HippoT::polar_real(const int eflag, const int vflag) {
|
int HippoT::polar_real(const int eflag, const int vflag) {
|
||||||
|
|||||||
Reference in New Issue
Block a user