diff --git a/src/USER-OMP/pair_beck_omp.cpp b/src/USER-OMP/pair_beck_omp.cpp new file mode 100644 index 0000000000..fe279e6b96 --- /dev/null +++ b/src/USER-OMP/pair_beck_omp.cpp @@ -0,0 +1,173 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_beck_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBeckOMP::PairBeckOMP(LAMMPS *lmp) : + PairBeck(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairBeckOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairBeckOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r5,force_beck,factor_lj; + double r,rinv; + double aaij,alphaij,betaij; + double term1,term1inv,term2,term3,term4,term5,term6; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + int *type = atom->type; + int nlocal = atom->nlocal; + double *special_lj = force->special_lj; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + r5 = rsq*rsq*r; + aaij = aa[itype][jtype]; + alphaij = alpha[itype][jtype]; + betaij = beta[itype][jtype]; + term1 = aaij*aaij + rsq; + term2 = 1.0/pow(term1,5); + term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq; + term4 = alphaij + r5*betaij; + term5 = alphaij + 6.0*r5*betaij; + rinv = 1.0/r; + force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5; + force_beck -= BB[itype][jtype]*r*term2*term3; + + fpair = factor_lj*force_beck*rinv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j][0] -= delx*fpair; + f[j][1] -= dely*fpair; + f[j][2] -= delz*fpair; + } + + if (EFLAG) { + term6 = 1.0/pow(term1,3); + term1inv = 1.0/term1; + evdwl = AA[itype][jtype]*exp(-1.0*r*term4); + evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv); + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i][0] += fxtmp; + f[i][1] += fytmp; + f[i][2] += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBeckOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairBeck::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_beck_omp.h b/src/USER-OMP/pair_beck_omp.h new file mode 100644 index 0000000000..f079ff5ecb --- /dev/null +++ b/src/USER-OMP/pair_beck_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(beck/omp,PairBeckOMP) + +#else + +#ifndef LMP_PAIR_BECK_OMP_H +#define LMP_PAIR_BECK_OMP_H + +#include "pair_beck.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairBeckOMP : public PairBeck, public ThrOMP { + + public: + PairBeckOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_meam_spline_omp.cpp b/src/USER-OMP/pair_meam_spline_omp.cpp new file mode 100644 index 0000000000..33dfcd466c --- /dev/null +++ b/src/USER-OMP/pair_meam_spline_omp.cpp @@ -0,0 +1,324 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "string.h" + +#include "pair_meam_spline_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairMEAMSplineOMP::PairMEAMSplineOMP(LAMMPS *lmp) : + PairMEAMSpline(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairMEAMSplineOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = listfull->inum; + + if (listhalf->inum != inum) + error->warning(FLERR,"inconsistent half and full neighborlist"); + + // Grow per-atom array if necessary. + + if (atom->nmax > nmax) { + memory->destroy(Uprime_values); + nmax = atom->nmax; + memory->create(Uprime_values,nmax*nthreads,"pair:Uprime"); + } + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + thr->init_eam(nall,Uprime_values); + + if (evflag) { + if (eflag) { + eval<1,1>(ifrom, ito, thr); + } else { + eval<1,0>(ifrom, ito, thr); + } + } else { + eval<0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairMEAMSplineOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + const int* const ilist_full = listfull->ilist; + const int* const numneigh_full = listfull->numneigh; + const int* const * const firstneigh_full = listfull->firstneigh; + + // Determine the maximum number of neighbors a single atom has. + int myMaxNeighbors = 0; + for(int ii = iifrom; ii < iito; ii++) { + int jnum = numneigh_full[ilist_full[ii]]; + if(jnum > myMaxNeighbors) myMaxNeighbors = jnum; + } + + // Allocate array for temporary bond info. + MEAM2Body *myTwoBodyInfo = new MEAM2Body[myMaxNeighbors]; + + const double * const * const x = atom->x; + double * const * const forces = thr->get_f(); + double * const Uprime_thr = thr->get_rho(); + const int tid = thr->get_tid(); + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const int nall = nlocal + atom->nghost; + + const double cutforcesq = cutoff*cutoff; + + // Sum three-body contributions to charge density and compute embedding energies. + for(int ii = iifrom; ii < iito; ii++) { + + const int i = ilist_full[ii]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int* const jlist = firstneigh_full[i]; + const int jnum = numneigh_full[i]; + double rho_value = 0; + int numBonds = 0; + MEAM2Body* nextTwoBodyInfo = myTwoBodyInfo; + + for(int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + + const double jdelx = x[j][0] - xtmp; + const double jdely = x[j][1] - ytmp; + const double jdelz = x[j][2] - ztmp; + const double rij_sq = jdelx*jdelx + jdely*jdely + jdelz*jdelz; + + if (rij_sq < cutforcesq) { + const double rij = sqrt(rij_sq); + double partial_sum = 0; + + nextTwoBodyInfo->tag = j; + nextTwoBodyInfo->r = rij; + nextTwoBodyInfo->f = f.eval(rij, nextTwoBodyInfo->fprime); + nextTwoBodyInfo->del[0] = jdelx / rij; + nextTwoBodyInfo->del[1] = jdely / rij; + nextTwoBodyInfo->del[2] = jdelz / rij; + + for(int kk = 0; kk < numBonds; kk++) { + const MEAM2Body& bondk = myTwoBodyInfo[kk]; + double cos_theta = (nextTwoBodyInfo->del[0]*bondk.del[0] + nextTwoBodyInfo->del[1]*bondk.del[1] + nextTwoBodyInfo->del[2]*bondk.del[2]); + partial_sum += bondk.f * g.eval(cos_theta); + } + + rho_value += nextTwoBodyInfo->f * partial_sum; + rho_value += rho.eval(rij); + + numBonds++; + nextTwoBodyInfo++; + } + } + + // Compute embedding energy and its derivative. + double Uprime_i; + double embeddingEnergy = U.eval(rho_value, Uprime_i) - zero_atom_energy; + Uprime_thr[i] = Uprime_i; + if (EFLAG) { + if (eflag_global) eng_vdwl += embeddingEnergy; + if (eflag_atom) eatom[i] += embeddingEnergy; + } + + double forces_i[3] = {0.0, 0.0, 0.0}; + + // Compute three-body contributions to force. + for(int jj = 0; jj < numBonds; jj++) { + const MEAM2Body bondj = myTwoBodyInfo[jj]; + const double rij = bondj.r; + const int j = bondj.tag; + + const double f_rij_prime = bondj.fprime; + const double f_rij = bondj.f; + + double forces_j[3] = {0.0, 0.0, 0.0}; + + MEAM2Body const* bondk = myTwoBodyInfo; + for(int kk = 0; kk < jj; kk++, ++bondk) { + const double rik = bondk->r; + + const double cos_theta = (bondj.del[0]*bondk->del[0] + bondj.del[1]*bondk->del[1] + bondj.del[2]*bondk->del[2]); + double g_prime; + double g_value = g.eval(cos_theta, g_prime); + const double f_rik_prime = bondk->fprime; + const double f_rik = bondk->f; + + double fij = -Uprime_i * g_value * f_rik * f_rij_prime; + double fik = -Uprime_i * g_value * f_rij * f_rik_prime; + + const double prefactor = Uprime_i * f_rij * f_rik * g_prime; + const double prefactor_ij = prefactor / rij; + const double prefactor_ik = prefactor / rik; + fij += prefactor_ij * cos_theta; + fik += prefactor_ik * cos_theta; + + double fj[3], fk[3]; + + fj[0] = bondj.del[0] * fij - bondk->del[0] * prefactor_ij; + fj[1] = bondj.del[1] * fij - bondk->del[1] * prefactor_ij; + fj[2] = bondj.del[2] * fij - bondk->del[2] * prefactor_ij; + forces_j[0] += fj[0]; + forces_j[1] += fj[1]; + forces_j[2] += fj[2]; + + fk[0] = bondk->del[0] * fik - bondj.del[0] * prefactor_ik; + fk[1] = bondk->del[1] * fik - bondj.del[1] * prefactor_ik; + fk[2] = bondk->del[2] * fik - bondj.del[2] * prefactor_ik; + forces_i[0] -= fk[0]; + forces_i[1] -= fk[1]; + forces_i[2] -= fk[2]; + + const int k = bondk->tag; + forces[k][0] += fk[0]; + forces[k][1] += fk[1]; + forces[k][2] += fk[2]; + + if(EVFLAG) { + double delta_ij[3]; + double delta_ik[3]; + delta_ij[0] = bondj.del[0] * rij; + delta_ij[1] = bondj.del[1] * rij; + delta_ij[2] = bondj.del[2] * rij; + delta_ik[0] = bondk->del[0] * rik; + delta_ik[1] = bondk->del[1] * rik; + delta_ik[2] = bondk->del[2] * rik; + ev_tally3_thr(this,i,j,k,0.0,0.0,fj,fk,delta_ij,delta_ik,thr); + } + } + + forces[i][0] -= forces_j[0]; + forces[i][1] -= forces_j[1]; + forces[i][2] -= forces_j[2]; + forces[j][0] += forces_j[0]; + forces[j][1] += forces_j[1]; + forces[j][2] += forces_j[2]; + } + + forces[i][0] += forces_i[0]; + forces[i][1] += forces_i[1]; + forces[i][2] += forces_i[2]; + } + + delete[] myTwoBodyInfo; + + sync_threads(); + + // reduce per thread density + data_reduce_thr(Uprime_values, nall, nthreads, 1, tid); + + // wait until reduction is complete so that master thread + // can communicate U'(rho) values. + sync_threads(); + +#if defined(_OPENMP) +#pragma omp master +#endif + { comm->forward_comm_pair(this); } + + // wait until master thread is done with communication + sync_threads(); + + const int* const ilist_half = listhalf->ilist; + const int* const numneigh_half = listhalf->numneigh; + const int* const * const firstneigh_half = listhalf->firstneigh; + + // Compute two-body pair interactions. + for(int ii = iifrom; ii < iito; ii++) { + const int i = ilist_half[ii]; + const double xtmp = x[i][0]; + const double ytmp = x[i][1]; + const double ztmp = x[i][2]; + const int* const jlist = firstneigh_half[i]; + const int jnum = numneigh_half[i]; + + for(int jj = 0; jj < jnum; jj++) { + const int j = jlist[jj] & NEIGHMASK; + + double jdel[3]; + jdel[0] = x[j][0] - xtmp; + jdel[1] = x[j][1] - ytmp; + jdel[2] = x[j][2] - ztmp; + double rij_sq = jdel[0]*jdel[0] + jdel[1]*jdel[1] + jdel[2]*jdel[2]; + + if(rij_sq < cutforcesq) { + double rij = sqrt(rij_sq); + + double rho_prime; + rho.eval(rij, rho_prime); + double fpair = rho_prime * (Uprime_values[i] + Uprime_values[j]); + + double pair_pot_deriv; + double pair_pot = phi.eval(rij, pair_pot_deriv); + fpair += pair_pot_deriv; + + // Divide by r_ij to get forces from gradient. + fpair /= rij; + + forces[i][0] += jdel[0]*fpair; + forces[i][1] += jdel[1]*fpair; + forces[i][2] += jdel[2]*fpair; + forces[j][0] -= jdel[0]*fpair; + forces[j][1] -= jdel[1]*fpair; + forces[j][2] -= jdel[2]*fpair; + if (EVFLAG) ev_tally_thr(this,i,j,nlocal, 1 /* newton_pair */, + pair_pot,0.0,-fpair,jdel[0],jdel[1],jdel[2],thr); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +double PairMEAMSplineOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairMEAMSpline::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_meam_spline_omp.h b/src/USER-OMP/pair_meam_spline_omp.h new file mode 100644 index 0000000000..c8f92ce74c --- /dev/null +++ b/src/USER-OMP/pair_meam_spline_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(meam/spline/omp,PairMEAMSplineOMP) + +#else + +#ifndef LMP_PAIR_MEAM_SPLINE_OMP_H +#define LMP_PAIR_MEAM_SPLINE_OMP_H + +#include "pair_meam_spline.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairMEAMSplineOMP : public PairMEAMSpline, public ThrOMP { + + public: + PairMEAMSplineOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int iifrom, int iito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_tip4p_omp.cpp b/src/USER-OMP/pppm_tip4p_omp.cpp new file mode 100644 index 0000000000..efee0a0d99 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_omp.cpp @@ -0,0 +1,398 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_tip4p_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" + +#include +#include + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define OFFSET 16384 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMTIP4POMP::PPPMTIP4POMP(LAMMPS *lmp, int narg, char **arg) : + PPPMOMP(lmp, narg, arg) {} + +/* ---------------------------------------------------------------------- */ + +// NOTE: special version of reduce_data for FFT_SCALAR data type. +// reduce per thread data into the first part of the data +// array that is used for the non-threaded parts and reset +// the temporary storage to 0.0. this routine depends on +// multi-dimensional arrays like force stored in this order +// x1,y1,z1,x2,y2,z2,... +// we need to post a barrier to wait until all threads are done +// writing to the array. +static void data_reduce_fft(FFT_SCALAR *dall, int nall, int nthreads, int ndim, int tid) +{ +#if defined(_OPENMP) + // NOOP in non-threaded execution. + if (nthreads == 1) return; +#pragma omp barrier + { + const int nvals = ndim*nall; + const int idelta = nvals/nthreads + 1; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); + + // this if protects against having more threads than atoms + if (ifrom < nall) { + for (int m = ifrom; m < ito; ++m) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } + } +#else + // NOOP in non-threaded execution. + return; +#endif +} + +/* ---------------------------------------------------------------------- */ + +void PPPMTIP4POMP::init() +{ + // TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms + + if (force->newton == 0) + error->all(FLERR,"Kspace style pppm/tip4p/omp requires newton on"); + + PPPMOMP::init(); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::particle_map() +{ + const int * const type = atom->type; + const double * const * const x = atom->x; + const int nlocal = atom->nlocal; + + int i, flag = 0; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) +#endif + for (i = 0; i < nlocal; i++) { + int nx,ny,nz,iH1,iH2; + double xM[3]; + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((xM[0]-boxlo[0])*delxinv+shift) - OFFSET; + ny = static_cast ((xM[1]-boxlo[1])*delyinv+shift) - OFFSET; + nz = static_cast ((xM[2]-boxlo[2])*delzinv+shift) - OFFSET; + + part2grid[i][0] = nx; + part2grid[i][1] = ny; + part2grid[i][2] = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) flag++; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::make_rho() +{ + const double * const q = atom->q; + const double * const * const x = atom->x; + const int * const type = atom->type; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int tid = 0; + const int ifrom = 0; + const int ito = nlocal; +#endif + + int l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + double xM[3]; + + // set up clear 3d density array + const int nzoffs = (nzhi_out-nzlo_out+1)*tid; + FFT_SCALAR * const * const * const db = &(density_brick[nzoffs]); + memset(&(db[nzlo_out][nylo_out][nxlo_out]),0,ngrid*sizeof(FFT_SCALAR)); + + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + db[mz][my][mx] += x0*r1d[0][l]; + } + } + } + } + } +#if defined(_OPENMP) + // reduce 3d density array + if (nthreads > 1) { + data_reduce_fft(&(density_brick[nzlo_out][nylo_out][nxlo_out]),ngrid,nthreads,1,tid); + } +#endif + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::fieldforce() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int * const type = atom->type; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int iH1,iH2; + double xM[3], fx,fy,fz; + double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + } else { + xM[0] = x[i][0]; + xM[1] = x[i][1]; + xM[2] = x[i][2]; + } + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xM[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xM[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xM[2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + + if (type[i] != typeO) { + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + + } else { + fx = qfactor * ekx; + fy = qfactor * eky; + fz = qfactor * ekz; + find_M(i,iH1,iH2,xM); + + rOMx = xM[0] - x[i][0]; + rOMy = xM[1] - x[i][1]; + rOMz = xM[2] - x[i][2]; + + ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist); + + f1x = ddotf * rOMx; + f1y = ddotf * rOMy; + f1z = ddotf * rOMz; + + f[i][0] += fx - alpha * (fx - f1x); + f[i][1] += fy - alpha * (fy - f1y); + f[i][2] += fz - alpha * (fz - f1z); + + f[iH1][0] += 0.5*alpha*(fx - f1x); + f[iH1][1] += 0.5*alpha*(fy - f1y); + f[iH1][2] += 0.5*alpha*(fz - f1z); + + f[iH2][0] += 0.5*alpha*(fx - f1x); + f[iH2][1] += 0.5*alpha*(fy - f1y); + f[iH2][2] += 0.5*alpha*(fz - f1z); + } + } + } + } +} + +/* ---------------------------------------------------------------------- + find 2 H atoms bonded to O atom i + compute position xM of fictitious charge site for O atom + also return local indices iH1,iH2 of H atoms +------------------------------------------------------------------------- */ + +void PPPMTIP4POMP::find_M(int i, int &iH1, int &iH2, double *xM) +{ + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + + const double * const * const x = atom->x; + + double delx1 = x[iH1][0] - x[i][0]; + double dely1 = x[iH1][1] - x[i][1]; + double delz1 = x[iH1][2] - x[i][2]; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = x[iH2][0] - x[i][0]; + double dely2 = x[iH2][1] - x[i][1]; + double delz2 = x[iH2][2] - x[i][2]; + domain->minimum_image(delx2,dely2,delz2); + + xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2); + xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2); + xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2); +} diff --git a/src/USER-OMP/pppm_tip4p_omp.h b/src/USER-OMP/pppm_tip4p_omp.h new file mode 100644 index 0000000000..33e5b0f224 --- /dev/null +++ b/src/USER-OMP/pppm_tip4p_omp.h @@ -0,0 +1,56 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/tip4p/omp,PPPMTIP4POMP) + +#else + +#ifndef LMP_PPPM_TIP4P_OMP_H +#define LMP_PPPM_TIP4P_OMP_H + +#include "pppm_omp.h" + +namespace LAMMPS_NS { + +class PPPMTIP4POMP : public PPPMOMP { + public: + PPPMTIP4POMP(class LAMMPS *, int, char **); + virtual ~PPPMTIP4POMP () {}; + virtual void init(); + + protected: + virtual void particle_map(); + virtual void fieldforce(); + virtual void make_rho(); + + private: + void find_M(int, int &, int &, double *); + +// void slabcorr(int); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Kspace style pppm/tip4p/omp requires newton on + +Self-explanatory. + +*/