Attempted to resolve issues with switching from acctyp4 to acctyp3 in tep, fieldp since the changes in PR #3675, noting some changes with Intel OCL PR #3663

This commit is contained in:
Trung Nguyen
2023-07-08 00:50:19 -05:00
parent 146245e6ae
commit d6412dc97b
5 changed files with 46 additions and 37 deletions

View File

@ -420,6 +420,11 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval); cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
atom->add_extra_data(); atom->add_extra_data();
if (_max_tep_size>_max_fieldp_size) {
_max_fieldp_size = _max_tep_size;
_fieldp.resize(_max_fieldp_size*6);
}
*fieldp_ptr=_fieldp.host.begin(); *fieldp_ptr=_fieldp.host.begin();
// specify the correct cutoff and alpha values // specify the correct cutoff and alpha values

View File

@ -203,7 +203,7 @@ class BaseAmoeba {
virtual void update_fieldp(void **fieldp_ptr) { virtual void update_fieldp(void **fieldp_ptr) {
*fieldp_ptr=_fieldp.host.begin(); *fieldp_ptr=_fieldp.host.begin();
// _fieldp store both arrays, one after another // _fieldp store both arrays, one after another
_fieldp.update_host(_max_fieldp_size*8,false); _fieldp.update_host(_max_fieldp_size*6,false);
} }
/// setup a plan for FFT, where size is the number of elements /// setup a plan for FFT, where size is the number of elements

View File

@ -211,7 +211,7 @@ void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
if (inum_full>this->_max_tep_size) { if (inum_full>this->_max_tep_size) {
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10); this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
this->_tep.resize(this->_max_tep_size*4); this->_tep.resize(this->_max_tep_size*3);
} }
*tep_ptr=this->_tep.host.begin(); *tep_ptr=this->_tep.host.begin();
@ -226,7 +226,7 @@ void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
repulsion(this->_eflag,this->_vflag); repulsion(this->_eflag,this->_vflag);
// copy tep from device to host // copy tep from device to host
this->_tep.update_host(this->_max_tep_size*4,false); this->_tep.update_host(this->_max_tep_size*3,false);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -366,7 +366,7 @@ void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
if (inum_full>this->_max_tep_size) { if (inum_full>this->_max_tep_size) {
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10); this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
this->_tep.resize(this->_max_tep_size*4); this->_tep.resize(this->_max_tep_size*3);
} }
*tep_ptr=this->_tep.host.begin(); *tep_ptr=this->_tep.host.begin();
@ -376,7 +376,7 @@ void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
multipole_real(this->_eflag,this->_vflag); multipole_real(this->_eflag,this->_vflag);
// copy tep from device to host // copy tep from device to host
this->_tep.update_host(this->_max_tep_size*4,false); this->_tep.update_host(this->_max_tep_size*3,false);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -434,6 +434,10 @@ void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, do
this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, host_pval); this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, host_pval);
this->atom->add_extra_data(); this->atom->add_extra_data();
if (this->_max_tep_size>this->_max_fieldp_size) {
this->_max_fieldp_size = this->_max_tep_size;
this->_fieldp.resize(this->_max_fieldp_size*6);
}
*fieldp_ptr=this->_fieldp.host.begin(); *fieldp_ptr=this->_fieldp.host.begin();
this->_off2_polar = off2_polar; this->_off2_polar = off2_polar;
@ -442,7 +446,7 @@ void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, do
// copy field and fieldp from device to host (_fieldp store both arrays, one after another) // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
this->_fieldp.update_host(this->_max_fieldp_size*8,false); this->_fieldp.update_host(this->_max_fieldp_size*6,false);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
@ -580,7 +584,7 @@ void HippoT::compute_polar_real(int * /*host_amtype*/, int * /*host_amgroup*/, d
this->device->add_ans_object(this->ans); this->device->add_ans_object(this->ans);
// copy tep from device to host // copy tep from device to host
this->_tep.update_host(this->_max_tep_size*4,false); this->_tep.update_host(this->_max_tep_size*3,false);
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------

View File

@ -745,15 +745,15 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -762,15 +762,15 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
auto field_ptr = (double *)fieldp_pinned; auto field_ptr = (double *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -976,15 +976,15 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -993,15 +993,15 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
auto field_ptr = (double *)fieldp_pinned; auto field_ptr = (double *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -2029,9 +2029,9 @@ void PairAmoebaGPU::compute_force_from_torque(const numtyp* tq_ptr,
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) { for (i = 0; i < nlocal; i++) {
_tq[0] = tq_ptr[4*i]; _tq[0] = tq_ptr[3*i];
_tq[1] = tq_ptr[4*i+1]; _tq[1] = tq_ptr[3*i+1];
_tq[2] = tq_ptr[4*i+2]; _tq[2] = tq_ptr[3*i+2];
torque2force(i,_tq,fix,fiy,fiz,force_comp); torque2force(i,_tq,fix,fiy,fiz,force_comp);
iz = zaxis2local[i]; iz = zaxis2local[i];

View File

@ -859,15 +859,15 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
auto field_ptr = (float *)fieldp_pinned; auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -877,15 +877,15 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
auto field_ptr = (double *)fieldp_pinned; auto field_ptr = (double *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -1087,15 +1087,15 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
auto *field_ptr = (float *)fieldp_pinned; auto *field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -1105,15 +1105,15 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
auto *field_ptr = (double *)fieldp_pinned; auto *field_ptr = (double *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
field_ptr += 4*inum; field_ptr += 3*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 3*i;
fieldp[i][0] += field_ptr[idx]; fieldp[i][0] += field_ptr[idx];
fieldp[i][1] += field_ptr[idx+1]; fieldp[i][1] += field_ptr[idx+1];
fieldp[i][2] += field_ptr[idx+2]; fieldp[i][2] += field_ptr[idx+2];
@ -1456,9 +1456,9 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) { for (i = 0; i < nlocal; i++) {
_tq[0] = tq_ptr[4*i]; _tq[0] = tq_ptr[3*i];
_tq[1] = tq_ptr[4*i+1]; _tq[1] = tq_ptr[3*i+1];
_tq[2] = tq_ptr[4*i+2]; _tq[2] = tq_ptr[3*i+2];
torque2force(i,_tq,fix,fiy,fiz,force_comp); torque2force(i,_tq,fix,fiy,fiz,force_comp);
iz = zaxis2local[i]; iz = zaxis2local[i];