Fixed bugs with hippo/gpu for single- and mixed- precisions

This commit is contained in:
Trung Nguyen
2023-01-24 23:55:30 -06:00
parent 6c63d7dcb9
commit b206b4d1f6

View File

@ -849,24 +849,44 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
// field and fieldp may already have some nonzero values from kspace (udirect1) // field and fieldp may already have some nonzero values from kspace (udirect1)
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
double *field_ptr = (double *)fieldp_pinned; if (tq_single) {
auto field_ptr = (float *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 4*i;
field[i][0] += field_ptr[idx]; field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1]; field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2]; field[i][2] += field_ptr[idx+2];
} }
double* fieldp_ptr = (double *)fieldp_pinned; auto fieldp_ptr = (float *)fieldp_pinned;
fieldp_ptr += 4*inum; fieldp_ptr += 4*inum;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int idx = 4*i; int idx = 4*i;
fieldp[i][0] += fieldp_ptr[idx]; fieldp[i][0] += fieldp_ptr[idx];
fieldp[i][1] += fieldp_ptr[idx+1]; fieldp[i][1] += fieldp_ptr[idx+1];
fieldp[i][2] += fieldp_ptr[idx+2]; fieldp[i][2] += fieldp_ptr[idx+2];
} }
} else {
auto field_ptr = (double *)fieldp_pinned;
for (int i = 0; i < nlocal; i++) {
int idx = 4*i;
field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2];
}
auto fieldp_ptr = (double *)fieldp_pinned;
fieldp_ptr += 4*inum;
for (int i = 0; i < nlocal; i++) {
int idx = 4*i;
fieldp[i][0] += fieldp_ptr[idx];
fieldp[i][1] += fieldp_ptr[idx+1];
fieldp[i][2] += fieldp_ptr[idx+2];
}
}
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
@ -1246,30 +1266,61 @@ void PairHippoGPU::fphi_uind(FFT_SCALAR ****grid, double **fdip_phi1,
&fdip_sum_phi_pinned); &fdip_sum_phi_pinned);
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned; if (tq_single) {
for (int i = 0; i < nlocal; i++) { auto _fdip_phi1_ptr = (float *)fdip_phi1_pinned;
int n = i; for (int i = 0; i < nlocal; i++) {
for (int m = 0; m < 10; m++) { int n = i;
fdip_phi1[i][m] = _fdip_phi1_ptr[n]; for (int m = 0; m < 10; m++) {
n += nlocal; fdip_phi1[i][m] = _fdip_phi1_ptr[n];
n += nlocal;
}
} }
}
double *_fdip_phi2_ptr = (double *)fdip_phi2_pinned; auto _fdip_phi2_ptr = (float *)fdip_phi2_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int n = i; int n = i;
for (int m = 0; m < 10; m++) { for (int m = 0; m < 10; m++) {
fdip_phi2[i][m] = _fdip_phi2_ptr[n]; fdip_phi2[i][m] = _fdip_phi2_ptr[n];
n += nlocal; n += nlocal;
}
} }
}
double *_fdip_sum_phi_ptr = (double *)fdip_sum_phi_pinned; auto _fdip_sum_phi_ptr = (float *)fdip_sum_phi_pinned;
for (int i = 0; i < nlocal; i++) { for (int i = 0; i < nlocal; i++) {
int n = i; int n = i;
for (int m = 0; m < 20; m++) { for (int m = 0; m < 20; m++) {
fdip_sum_phi[i][m] = _fdip_sum_phi_ptr[n]; fdip_sum_phi[i][m] = _fdip_sum_phi_ptr[n];
n += nlocal; n += nlocal;
}
}
} else {
auto _fdip_phi1_ptr = (double *)fdip_phi1_pinned;
for (int i = 0; i < nlocal; i++) {
int n = i;
for (int m = 0; m < 10; m++) {
fdip_phi1[i][m] = _fdip_phi1_ptr[n];
n += nlocal;
}
}
auto _fdip_phi2_ptr = (double *)fdip_phi2_pinned;
for (int i = 0; i < nlocal; i++) {
int n = i;
for (int m = 0; m < 10; m++) {
fdip_phi2[i][m] = _fdip_phi2_ptr[n];
n += nlocal;
}
}
auto _fdip_sum_phi_ptr = (double *)fdip_sum_phi_pinned;
for (int i = 0; i < nlocal; i++) {
int n = i;
for (int m = 0; m < 20; m++) {
fdip_sum_phi[i][m] = _fdip_sum_phi_ptr[n];
n += nlocal;
}
} }
} }
} }