Attempted to resolve issues with switching from acctyp4 to acctyp3 in tep, fieldp since the changes in PR #3675, noting some changes with Intel OCL PR #3663
This commit is contained in:
@ -420,6 +420,11 @@ void BaseAmoebaT::compute_udirect2b(int *host_amtype, int *host_amgroup, double
|
|||||||
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
||||||
atom->add_extra_data();
|
atom->add_extra_data();
|
||||||
|
|
||||||
|
if (_max_tep_size>_max_fieldp_size) {
|
||||||
|
_max_fieldp_size = _max_tep_size;
|
||||||
|
_fieldp.resize(_max_fieldp_size*6);
|
||||||
|
}
|
||||||
|
|
||||||
*fieldp_ptr=_fieldp.host.begin();
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
|
|
||||||
// specify the correct cutoff and alpha values
|
// specify the correct cutoff and alpha values
|
||||||
|
|||||||
@ -203,7 +203,7 @@ class BaseAmoeba {
|
|||||||
virtual void update_fieldp(void **fieldp_ptr) {
|
virtual void update_fieldp(void **fieldp_ptr) {
|
||||||
*fieldp_ptr=_fieldp.host.begin();
|
*fieldp_ptr=_fieldp.host.begin();
|
||||||
// _fieldp store both arrays, one after another
|
// _fieldp store both arrays, one after another
|
||||||
_fieldp.update_host(_max_fieldp_size*8,false);
|
_fieldp.update_host(_max_fieldp_size*6,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// setup a plan for FFT, where size is the number of elements
|
/// setup a plan for FFT, where size is the number of elements
|
||||||
|
|||||||
@ -211,7 +211,7 @@ void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
|
|||||||
|
|
||||||
if (inum_full>this->_max_tep_size) {
|
if (inum_full>this->_max_tep_size) {
|
||||||
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
this->_tep.resize(this->_max_tep_size*4);
|
this->_tep.resize(this->_max_tep_size*3);
|
||||||
}
|
}
|
||||||
*tep_ptr=this->_tep.host.begin();
|
*tep_ptr=this->_tep.host.begin();
|
||||||
|
|
||||||
@ -226,7 +226,7 @@ void HippoT::compute_repulsion(const int /*ago*/, const int inum_full,
|
|||||||
repulsion(this->_eflag,this->_vflag);
|
repulsion(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*3,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -366,7 +366,7 @@ void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
|
|||||||
|
|
||||||
if (inum_full>this->_max_tep_size) {
|
if (inum_full>this->_max_tep_size) {
|
||||||
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
this->_max_tep_size=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
this->_tep.resize(this->_max_tep_size*4);
|
this->_tep.resize(this->_max_tep_size*3);
|
||||||
}
|
}
|
||||||
*tep_ptr=this->_tep.host.begin();
|
*tep_ptr=this->_tep.host.begin();
|
||||||
|
|
||||||
@ -376,7 +376,7 @@ void HippoT::compute_multipole_real(const int /*ago*/, const int inum_full,
|
|||||||
multipole_real(this->_eflag,this->_vflag);
|
multipole_real(this->_eflag,this->_vflag);
|
||||||
|
|
||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*3,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -434,6 +434,10 @@ void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, do
|
|||||||
this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, host_pval);
|
this->cast_extra_data(nullptr, nullptr, nullptr, host_uind, host_uinp, host_pval);
|
||||||
this->atom->add_extra_data();
|
this->atom->add_extra_data();
|
||||||
|
|
||||||
|
if (this->_max_tep_size>this->_max_fieldp_size) {
|
||||||
|
this->_max_fieldp_size = this->_max_tep_size;
|
||||||
|
this->_fieldp.resize(this->_max_fieldp_size*6);
|
||||||
|
}
|
||||||
*fieldp_ptr=this->_fieldp.host.begin();
|
*fieldp_ptr=this->_fieldp.host.begin();
|
||||||
|
|
||||||
this->_off2_polar = off2_polar;
|
this->_off2_polar = off2_polar;
|
||||||
@ -442,7 +446,7 @@ void HippoT::compute_udirect2b(int * /*host_amtype*/, int * /*host_amgroup*/, do
|
|||||||
|
|
||||||
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
// copy field and fieldp from device to host (_fieldp store both arrays, one after another)
|
||||||
|
|
||||||
this->_fieldp.update_host(this->_max_fieldp_size*8,false);
|
this->_fieldp.update_host(this->_max_fieldp_size*6,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -580,7 +584,7 @@ void HippoT::compute_polar_real(int * /*host_amtype*/, int * /*host_amgroup*/, d
|
|||||||
this->device->add_ans_object(this->ans);
|
this->device->add_ans_object(this->ans);
|
||||||
|
|
||||||
// copy tep from device to host
|
// copy tep from device to host
|
||||||
this->_tep.update_host(this->_max_tep_size*4,false);
|
this->_tep.update_host(this->_max_tep_size*3,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|||||||
@ -745,15 +745,15 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
|
|||||||
auto field_ptr = (float *)fieldp_pinned;
|
auto field_ptr = (float *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -762,15 +762,15 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
|
|||||||
auto field_ptr = (double *)fieldp_pinned;
|
auto field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -976,15 +976,15 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
|
|||||||
auto field_ptr = (float *)fieldp_pinned;
|
auto field_ptr = (float *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -993,15 +993,15 @@ void PairAmoebaGPU::ufield0c(double **field, double **fieldp)
|
|||||||
auto field_ptr = (double *)fieldp_pinned;
|
auto field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -2029,9 +2029,9 @@ void PairAmoebaGPU::compute_force_from_torque(const numtyp* tq_ptr,
|
|||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
_tq[0] = tq_ptr[4*i];
|
_tq[0] = tq_ptr[3*i];
|
||||||
_tq[1] = tq_ptr[4*i+1];
|
_tq[1] = tq_ptr[3*i+1];
|
||||||
_tq[2] = tq_ptr[4*i+2];
|
_tq[2] = tq_ptr[3*i+2];
|
||||||
torque2force(i,_tq,fix,fiy,fiz,force_comp);
|
torque2force(i,_tq,fix,fiy,fiz,force_comp);
|
||||||
|
|
||||||
iz = zaxis2local[i];
|
iz = zaxis2local[i];
|
||||||
|
|||||||
@ -859,15 +859,15 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
|
|||||||
auto field_ptr = (float *)fieldp_pinned;
|
auto field_ptr = (float *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -877,15 +877,15 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
|
|||||||
|
|
||||||
auto field_ptr = (double *)fieldp_pinned;
|
auto field_ptr = (double *)fieldp_pinned;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -1087,15 +1087,15 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
|
|||||||
auto *field_ptr = (float *)fieldp_pinned;
|
auto *field_ptr = (float *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -1105,15 +1105,15 @@ void PairHippoGPU::ufield0c(double **field, double **fieldp)
|
|||||||
auto *field_ptr = (double *)fieldp_pinned;
|
auto *field_ptr = (double *)fieldp_pinned;
|
||||||
|
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
field[i][0] += field_ptr[idx];
|
field[i][0] += field_ptr[idx];
|
||||||
field[i][1] += field_ptr[idx+1];
|
field[i][1] += field_ptr[idx+1];
|
||||||
field[i][2] += field_ptr[idx+2];
|
field[i][2] += field_ptr[idx+2];
|
||||||
}
|
}
|
||||||
|
|
||||||
field_ptr += 4*inum;
|
field_ptr += 3*inum;
|
||||||
for (int i = 0; i < nlocal; i++) {
|
for (int i = 0; i < nlocal; i++) {
|
||||||
int idx = 4*i;
|
int idx = 3*i;
|
||||||
fieldp[i][0] += field_ptr[idx];
|
fieldp[i][0] += field_ptr[idx];
|
||||||
fieldp[i][1] += field_ptr[idx+1];
|
fieldp[i][1] += field_ptr[idx+1];
|
||||||
fieldp[i][2] += field_ptr[idx+2];
|
fieldp[i][2] += field_ptr[idx+2];
|
||||||
@ -1456,9 +1456,9 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
|
|||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
_tq[0] = tq_ptr[4*i];
|
_tq[0] = tq_ptr[3*i];
|
||||||
_tq[1] = tq_ptr[4*i+1];
|
_tq[1] = tq_ptr[3*i+1];
|
||||||
_tq[2] = tq_ptr[4*i+2];
|
_tq[2] = tq_ptr[3*i+2];
|
||||||
torque2force(i,_tq,fix,fiy,fiz,force_comp);
|
torque2force(i,_tq,fix,fiy,fiz,force_comp);
|
||||||
|
|
||||||
iz = zaxis2local[i];
|
iz = zaxis2local[i];
|
||||||
|
|||||||
Reference in New Issue
Block a user