performance improvement through avoiding function call and dereference overhead

- make i_to_potl() and ij_to_potl() functions inline and const
- don't dereference inside the functions, but cache, if possible in external variables
=> up to 15% speedup.
This commit is contained in:
Axel Kohlmeyer
2017-05-09 15:38:10 -04:00
parent 9cd994f57c
commit 6c5edf6c70
3 changed files with 39 additions and 40 deletions

View File

@ -99,8 +99,9 @@ PairMEAMSpline::~PairMEAMSpline()
void PairMEAMSpline::compute(int eflag, int vflag)
{
double** const x = atom->x;
double** forces = atom->f;
const double* const * const x = atom->x;
double* const * const forces = atom->f;
const int ntypes = atom->ntypes;
if (eflag || vflag) {
ev_setup(eflag, vflag);
@ -144,6 +145,9 @@ void PairMEAMSpline::compute(int eflag, int vflag)
// compute charge density and numBonds
MEAM2Body* nextTwoBodyInfo = twoBodyInfo;
double rho_value = 0;
const int ntypes = atom->ntypes;
const int itype = atom->type[i];
for(int jj = 0; jj < listfull->numneigh[i]; jj++) {
int j = listfull->firstneigh[i][jj];
j &= NEIGHMASK;
@ -156,10 +160,11 @@ void PairMEAMSpline::compute(int eflag, int vflag)
if(rij_sq < cutoff*cutoff) {
double rij = sqrt(rij_sq);
double partial_sum = 0;
const int jtype = atom->type[j];
nextTwoBodyInfo->tag = j;
nextTwoBodyInfo->r = rij;
nextTwoBodyInfo->f = fs[i_to_potl(j)].eval(rij, nextTwoBodyInfo->fprime);
nextTwoBodyInfo->f = fs[i_to_potl(jtype)].eval(rij, nextTwoBodyInfo->fprime);
nextTwoBodyInfo->del[0] = jdelx / rij;
nextTwoBodyInfo->del[1] = jdely / rij;
nextTwoBodyInfo->del[2] = jdelz / rij;
@ -169,11 +174,11 @@ void PairMEAMSpline::compute(int eflag, int vflag)
double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] +
nextTwoBodyInfo->del[1]*bondk.del[1] +
nextTwoBodyInfo->del[2]*bondk.del[2]);
partial_sum += bondk.f * gs[ij_to_potl(j,bondk.tag)].eval(cos_theta);
partial_sum += bondk.f * gs[ij_to_potl(jtype,atom->type[bondk.tag],ntypes)].eval(cos_theta);
}
rho_value += nextTwoBodyInfo->f * partial_sum;
rho_value += rhos[i_to_potl(j)].eval(rij);
rho_value += rhos[i_to_potl(jtype)].eval(rij);
numBonds++;
nextTwoBodyInfo++;
@ -182,8 +187,8 @@ void PairMEAMSpline::compute(int eflag, int vflag)
// Compute embedding energy and its derivative
double Uprime_i;
double embeddingEnergy = Us[i_to_potl(i)].eval(rho_value, Uprime_i)
- zero_atom_energies[i_to_potl(i)];
double embeddingEnergy = Us[i_to_potl(itype)].eval(rho_value, Uprime_i)
- zero_atom_energies[i_to_potl(itype)];
Uprime_values[i] = Uprime_i;
if(eflag) {
@ -204,6 +209,7 @@ void PairMEAMSpline::compute(int eflag, int vflag)
double f_rij = bondj.f;
double forces_j[3] = {0, 0, 0};
const int jtype = atom->type[j];
MEAM2Body const* bondk = twoBodyInfo;
for(int kk = 0; kk < jj; kk++, ++bondk) {
@ -213,7 +219,7 @@ void PairMEAMSpline::compute(int eflag, int vflag)
bondj.del[1]*bondk->del[1] +
bondj.del[2]*bondk->del[2]);
double g_prime;
double g_value = gs[ij_to_potl(j,bondk->tag)].eval(cos_theta, g_prime);
double g_value = gs[ij_to_potl(jtype,atom->type[bondk->tag],ntypes)].eval(cos_theta, g_prime);
double f_rik_prime = bondk->fprime;
double f_rik = bondk->f;
@ -280,6 +286,7 @@ void PairMEAMSpline::compute(int eflag, int vflag)
// Compute two-body pair interactions
for(int ii = 0; ii < listhalf->inum; ii++) {
int i = listhalf->ilist[ii];
const int itype = atom->type[i];
for(int jj = 0; jj < listhalf->numneigh[i]; jj++) {
int j = listhalf->firstneigh[i][jj];
@ -293,13 +300,14 @@ void PairMEAMSpline::compute(int eflag, int vflag)
if(rij_sq < cutoff*cutoff) {
double rij = sqrt(rij_sq);
const int jtype = atom->type[j];
double rho_prime_i,rho_prime_j;
rhos[i_to_potl(i)].eval(rij,rho_prime_i);
rhos[i_to_potl(j)].eval(rij,rho_prime_j);
rhos[i_to_potl(itype)].eval(rij,rho_prime_i);
rhos[i_to_potl(jtype)].eval(rij,rho_prime_j);
double fpair = rho_prime_j * Uprime_values[i] + rho_prime_i*Uprime_values[j];
double pair_pot_deriv;
double pair_pot = phis[ij_to_potl(i,j)].eval(rij, pair_pot_deriv);
double pair_pot = phis[ij_to_potl(itype,jtype,ntypes)].eval(rij, pair_pot_deriv);
fpair += pair_pot_deriv;
@ -323,24 +331,6 @@ void PairMEAMSpline::compute(int eflag, int vflag)
virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
helper functions to map atom types to potential array indices
------------------------------------------------------------------------- */
int PairMEAMSpline::ij_to_potl(int i, int j) {
int n = atom->ntypes;
int itype = atom->type[i];
int jtype = atom->type[j];
return jtype - 1 + (itype-1)*n - (itype-1)*itype/2;
}
int PairMEAMSpline::i_to_potl(int i) {
int itype = atom->type[i];
return itype - 1;
}
/* ---------------------------------------------------------------------- */
void PairMEAMSpline::allocate()

View File

@ -54,8 +54,11 @@ public:
// helper functions for compute()
int ij_to_potl(int i, int j);
int i_to_potl(int i);
int ij_to_potl(const int itype, const int jtype, const int ntypes) const {
return jtype - 1 + (itype-1)*ntypes - (itype-1)*itype/2;
}
int i_to_potl(const int itype) const { return itype-1; }
int pack_forward_comm(int, int *, double *, int, int *);
void unpack_forward_comm(int, int, double *);

View File

@ -110,6 +110,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
const int nthreads = comm->nthreads;
const int nlocal = atom->nlocal;
const int nall = nlocal + atom->nghost;
const int ntypes = atom->ntypes;
const double cutforcesq = cutoff*cutoff;
@ -135,12 +136,13 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz;
if (rij_sq < cutforcesq) {
const int jtype = atom->type[j];
const double rij = sqrt(rij_sq);
double partial_sum = 0;
nextTwoBodyInfo->tag = j;
nextTwoBodyInfo->r = rij;
nextTwoBodyInfo->f = fs[i_to_potl(j)].eval(rij, nextTwoBodyInfo->fprime);
nextTwoBodyInfo->f = fs[i_to_potl(jtype)].eval(rij, nextTwoBodyInfo->fprime);
nextTwoBodyInfo->del[0] = jdelx / rij;
nextTwoBodyInfo->del[1] = jdely / rij;
nextTwoBodyInfo->del[2] = jdelz / rij;
@ -150,21 +152,22 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] +
nextTwoBodyInfo->del[1]*bondk.del[1] +
nextTwoBodyInfo->del[2]*bondk.del[2]);
partial_sum += bondk.f * gs[ij_to_potl(j,bondk.tag)].eval(cos_theta);
partial_sum += bondk.f * gs[ij_to_potl(jtype,atom->type[bondk.tag],ntypes)].eval(cos_theta);
}
rho_value += nextTwoBodyInfo->f * partial_sum;
rho_value += rhos[i_to_potl(j)].eval(rij);
rho_value += rhos[i_to_potl(jtype)].eval(rij);
numBonds++;
nextTwoBodyInfo++;
}
}
const int itype = atom->type[i];
// Compute embedding energy and its derivative.
double Uprime_i;
double embeddingEnergy = Us[i_to_potl(i)].eval(rho_value, Uprime_i)
- zero_atom_energies[i_to_potl(i)];
double embeddingEnergy = Us[i_to_potl(itype)].eval(rho_value, Uprime_i)
- zero_atom_energies[i_to_potl(itype)];
Uprime_thr[i] = Uprime_i;
if (EFLAG)
e_tally_thr(this,i,i,nlocal,1/*newton_pair*/,embeddingEnergy,0.0,thr);
@ -176,6 +179,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
const MEAM2Body bondj = myTwoBodyInfo[jj];
const double rij = bondj.r;
const int j = bondj.tag;
const int jtype = atom->type[j];
const double f_rij_prime = bondj.fprime;
const double f_rij = bondj.f;
@ -190,7 +194,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
+ bondj.del[1]*bondk->del[1]
+ bondj.del[2]*bondk->del[2]);
double g_prime;
double g_value = gs[ij_to_potl(j,bondk->tag)].eval(cos_theta, g_prime);
double g_value = gs[ij_to_potl(jtype,atom->type[bondk->tag],ntypes)].eval(cos_theta, g_prime);
const double f_rik_prime = bondk->fprime;
const double f_rik = bondk->f;
@ -282,6 +286,7 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
const double ztmp = x[i][2];
const int* const jlist = firstneigh_half[i];
const int jnum = numneigh_half[i];
const int itype = atom->type[i];
for(int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj] & NEIGHMASK;
@ -294,14 +299,15 @@ void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr)
if(rij_sq < cutforcesq) {
double rij = sqrt(rij_sq);
const int jtype = atom->type[j];
double rho_prime_i,rho_prime_j;
rhos[i_to_potl(i)].eval(rij,rho_prime_i);
rhos[i_to_potl(j)].eval(rij,rho_prime_j);
rhos[i_to_potl(itype)].eval(rij,rho_prime_i);
rhos[i_to_potl(jtype)].eval(rij,rho_prime_j);
double fpair = rho_prime_j * Uprime_values[i] + rho_prime_i*Uprime_values[j];
double pair_pot_deriv;
double pair_pot = phis[ij_to_potl(i,j)].eval(rij, pair_pot_deriv);
double pair_pot = phis[ij_to_potl(itype,jtype,ntypes)].eval(rij, pair_pot_deriv);
fpair += pair_pot_deriv;