diff --git a/doc/Manual.html b/doc/Manual.html index ff260f26d5..f1ef175999 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -22,7 +22,7 @@

LAMMPS-ICMS Documentation

-

6 Jun 2013 version +

7 Jun 2013 version

Version info:

diff --git a/doc/Manual.pdf b/doc/Manual.pdf index a359740a04..ce0a8c8c74 100644 Binary files a/doc/Manual.pdf and b/doc/Manual.pdf differ diff --git a/doc/Manual.txt b/doc/Manual.txt index 54f59a04af..97679bf8ee 100644 --- a/doc/Manual.txt +++ b/doc/Manual.txt @@ -18,7 +18,7 @@

LAMMPS-ICMS Documentation :c,h3 -6 Jun 2013 version :c,h4 +7 Jun 2013 version :c,h4 Version info: :h4 diff --git a/lib/atc/ATC_TransferHardy.cpp b/lib/atc/ATC_TransferHardy.cpp index 1e91448b61..d819de261c 100644 --- a/lib/atc/ATC_TransferHardy.cpp +++ b/lib/atc/ATC_TransferHardy.cpp @@ -1589,6 +1589,7 @@ namespace ATC { //int tag_i = (lammpsInterface_->atom_tag())[lammps_i]; for (int j = 0; j < numneigh[lammps_i]; j++) { int lammps_j = firstneigh[lammps_i][j]; + lammps_j &= NEIGHMASK; //int tag_j = (lammpsInterface_->atom_tag())[lammps_j]; //if (lammps_i > lammps_j) continue; // skip a > b pairs //if (tag_i > tag_j) continue; // skip a > b pairs @@ -1741,6 +1742,7 @@ namespace ATC { int lammps_i = internalToAtom_(i); for (int k = 0; k < numneigh[lammps_i]; k++) { int j = firstneigh[lammps_i][k]; + j &= NEIGHMASK; pair_ij.first = lammps_i; pair_ij.second = j; pairMapIterator = pairMap_.find(pair_ij); @@ -1900,6 +1902,7 @@ namespace ATC { periodicity_correction(xaI); for (int k = 0; k < numneigh[lammps_j]; ++k) { int lammps_k = firstneigh[lammps_j][k]; + lammps_k &= NEIGHMASK; // first atom location xb.copy(xPointer_[lammps_k],3); double * xk = xatom[lammps_k]; @@ -1960,6 +1963,7 @@ namespace ATC { xa.copy(xPointer_[lammps_j],3); for (int k = 0; k < numneigh[lammps_j]; ++k) { int lammps_k = firstneigh[lammps_j][k]; + lammps_k &= NEIGHMASK; //if (lammps_k < lammps_j) continue; // full neighbor list // second (neighbor) atom location xb.copy(xPointer_[lammps_k],3); @@ -2070,6 +2074,7 @@ namespace ATC { periodicity_correction(xaI); for (int k = 0; k < numneigh[lammps_j]; ++k) { int lammps_k = firstneigh[lammps_j][k]; + lammps_k &= NEIGHMASK; // first atom location xb.copy(xPointer_[lammps_k],3); double * xk = xatom[lammps_k]; @@ -2121,6 +2126,7 @@ namespace ATC { xa.copy(xPointer_[lammps_j],3); for (int k = 0; k < numneigh[lammps_j]; ++k) { int lammps_k = firstneigh[lammps_j][k]; + lammps_k &= NEIGHMASK; //if (lammps_k < lammps_j) continue; // full neighbor list // second (neighbor) atom location xb.copy(xPointer_[lammps_k],3); diff --git a/src/DIPOLE/pair_lj_cut_dipole_cut.h b/src/DIPOLE/pair_lj_cut_dipole_cut.h index d3d58f9713..ab0e0855c6 100755 --- a/src/DIPOLE/pair_lj_cut_dipole_cut.h +++ b/src/DIPOLE/pair_lj_cut_dipole_cut.h @@ -1,74 +1,74 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/dipole/cut,PairLJCutDipoleCut) - -#else - -#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_H -#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairLJCutDipoleCut : public Pair { - public: - PairLJCutDipoleCut(class LAMMPS *); - virtual ~PairLJCutDipoleCut(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_style(); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - - protected: - double cut_lj_global,cut_coul_global; - double **cut_lj,**cut_ljsq; - double **cut_coul,**cut_coulsq; - double **epsilon,**sigma; - double **lj1,**lj2,**lj3,**lj4,**offset; - - void allocate(); -}; - -} - -#endif -#endif - -/* ERROR/WARNING messages: - -E: Incorrect args in pair_style command - -Self-explanatory. - -E: Cannot (yet) use 'electron' units with dipoles - -This feature is not yet supported. - -E: Incorrect args for pair coefficients - -Self-explanatory. Check the input script or data file. - -E: Pair dipole/cut requires atom attributes q, mu, torque - -The atom style defined does not have these attributes. - -*/ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/dipole/cut,PairLJCutDipoleCut) + +#else + +#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_H +#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairLJCutDipoleCut : public Pair { + public: + PairLJCutDipoleCut(class LAMMPS *); + virtual ~PairLJCutDipoleCut(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + + protected: + double cut_lj_global,cut_coul_global; + double **cut_lj,**cut_ljsq; + double **cut_coul,**cut_coulsq; + double **epsilon,**sigma; + double **lj1,**lj2,**lj3,**lj4,**offset; + + void allocate(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Incorrect args in pair_style command + +Self-explanatory. + +E: Cannot (yet) use 'electron' units with dipoles + +This feature is not yet supported. + +E: Incorrect args for pair coefficients + +Self-explanatory. Check the input script or data file. + +E: Pair dipole/cut requires atom attributes q, mu, torque + +The atom style defined does not have these attributes. + +*/ diff --git a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp index 120789f0d7..e70cc3af85 100755 --- a/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp +++ b/src/GPU/pair_lj_cut_dipole_cut_gpu.cpp @@ -1,370 +1,370 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Trung Dac Nguyen (ORNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "math.h" -#include "stdio.h" -#include "stdlib.h" -#include "pair_lj_cut_dipole_cut_gpu.h" -#include "atom.h" -#include "atom_vec.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "integrate.h" -#include "memory.h" -#include "error.h" -#include "neigh_request.h" -#include "universe.h" -#include "update.h" -#include "domain.h" -#include "string.h" -#include "gpu_extra.h" - -// External functions from cuda library for atom decomposition - -int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double **offset, double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double **host_cut_coulsq, - double *host_special_coul, const double qqrd2e); -void dpl_gpu_clear(); -int ** dpl_gpu_compute_n(const int ago, const int inum, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, - int **nspecial, int **special, const bool eflag, - const bool vflag, const bool eatom, const bool vatom, - int &host_start, int **ilist, int **jnum, - const double cpu_time, bool &success, - double *host_q, double **host_mu, - double *boxlo, double *prd); -void dpl_gpu_compute(const int ago, const int inum, - const int nall, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, const double cpu_time, - bool &success, double *host_q, double **host_mu, - const int nlocal, double *boxlo, double *prd); -double dpl_gpu_bytes(); - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp), - gpu_mode(GPU_FORCE) -{ - respa_enable = 0; - cpu_time = 0.0; - GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); -} - -/* ---------------------------------------------------------------------- - free all arrays -------------------------------------------------------------------------- */ - -PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU() -{ - dpl_gpu_clear(); -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutDipoleCutGPU::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; - - int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; - if (gpu_mode != GPU_FORCE) { - inum = atom->nlocal; - firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q, atom->mu, domain->boxlo, - domain->prd); - } else { - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, - ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, - vflag_atom, host_start, cpu_time, success, atom->q, - atom->mu, atom->nlocal, domain->boxlo, domain->prd); - } - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); - - if (host_startq_flag || !atom->mu_flag || !atom->torque_flag) - error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque"); - - if (force->newton_pair) - error->all(FLERR,"Cannot use newton pair with dipole/cut/gpu pair style"); - - if (strcmp(update->unit_style,"electron") == 0) - error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); - - // Repeat cutsq calculation because done after call to init_style - double maxcut = -1.0; - double cut; - for (int i = 1; i <= atom->ntypes; i++) { - for (int j = i; j <= atom->ntypes; j++) { - if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { - cut = init_one(i,j); - cut *= cut; - if (cut > maxcut) - maxcut = cut; - cutsq[i][j] = cutsq[j][i] = cut; - } else - cutsq[i][j] = cutsq[j][i] = 0.0; - } - } - double cell_size = sqrt(maxcut) + neighbor->skin; - - int maxspecial=0; - if (atom->molecular) - maxspecial=atom->maxspecial; - int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - offset, force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, - force->special_coul, force->qqrd2e); - GPU_EXTRA::check_flag(success,error,world); - - if (gpu_mode == GPU_FORCE) { - int irequest = neighbor->request(this); - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; - } -} - -/* ---------------------------------------------------------------------- */ - -double PairLJCutDipoleCutGPU::memory_usage() -{ - double bytes = Pair::memory_usage(); - return bytes + dpl_gpu_bytes(); -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag, - int *ilist, int *numneigh, - int **firstneigh) -{ - int i,j,ii,jj,jnum,itype,jtype; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; - double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv; - double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; - double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; - double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; - double forcelj,factor_coul,factor_lj; - int *jlist; - - evdwl = ecoul = 0.0; - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; - - double **x = atom->x; - double **f = atom->f; - double *q = atom->q; - double **mu = atom->mu; - double **torque = atom->torque; - int *type = atom->type; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; - - - // loop over neighbors of my atoms - - for (ii = start; ii < inum; ii++) { - i = ilist[ii]; - qtmp = q[i]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; - factor_coul = special_coul[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - rinv = sqrt(r2inv); - - // atom can have both a charge and dipole - // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole - - forcecoulx = forcecouly = forcecoulz = 0.0; - tixcoul = tiycoul = tizcoul = 0.0; - tjxcoul = tjycoul = tjzcoul = 0.0; - - if (rsq < cut_coulsq[itype][jtype]) { - - if (qtmp != 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - pre1 = qtmp*q[j]*r3inv; - - forcecoulx += pre1*delx; - forcecouly += pre1*dely; - forcecoulz += pre1*delz; - } - - if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - r7inv = r5inv*r2inv; - - pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; - pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; - pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; - - pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; - pre2 = 3.0*r5inv*pjdotr; - pre3 = 3.0*r5inv*pidotr; - pre4 = -1.0*r3inv; - - forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0]; - forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1]; - forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2]; - - crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); - crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); - crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); - - tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); - tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); - tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); - tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); - tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); - tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); - } - - if (mu[i][3] > 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; - pre1 = 3.0*q[j]*r5inv * pidotr; - pre2 = q[j]*r3inv; - - forcecoulx += pre2*mu[i][0] - pre1*delx; - forcecouly += pre2*mu[i][1] - pre1*dely; - forcecoulz += pre2*mu[i][2] - pre1*delz; - tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); - tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); - tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); - } - - if (mu[j][3] > 0.0 && qtmp != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; - pre1 = 3.0*qtmp*r5inv * pjdotr; - pre2 = qtmp*r3inv; - - forcecoulx += pre1*delx - pre2*mu[j][0]; - forcecouly += pre1*dely - pre2*mu[j][1]; - forcecoulz += pre1*delz - pre2*mu[j][2]; - tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); - tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); - tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); - } - } - - // LJ interaction - - if (rsq < cut_ljsq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - forcelj *= factor_lj * r2inv; - } else forcelj = 0.0; - - // total force - - fq = factor_coul*qqrd2e; - fx = fq*forcecoulx + delx*forcelj; - fy = fq*forcecouly + dely*forcelj; - fz = fq*forcecoulz + delz*forcelj; - - // force & torque accumulation - - f[i][0] += fx; - f[i][1] += fy; - f[i][2] += fz; - torque[i][0] += fq*tixcoul; - torque[i][1] += fq*tiycoul; - torque[i][2] += fq*tizcoul; - - if (eflag) { - if (rsq < cut_coulsq[itype][jtype]) { - ecoul = qtmp*q[j]*rinv; - if (mu[i][3] > 0.0 && mu[j][3] > 0.0) - ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; - if (mu[i][3] > 0.0 && q[j] != 0.0) - ecoul += -q[j]*r3inv*pidotr; - if (mu[j][3] > 0.0 && qtmp != 0.0) - ecoul += qtmp*r3inv*pjdotr; - ecoul *= factor_coul*qqrd2e; - } else ecoul = 0.0; - - if (rsq < cut_ljsq[itype][jtype]) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } else evdwl = 0.0; - } - - if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz); - } - } - } -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_lj_cut_dipole_cut_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int dpl_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **offset, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e); +void dpl_gpu_clear(); +int ** dpl_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, + int **nspecial, int **special, const bool eflag, + const bool vflag, const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success, + double *host_q, double **host_mu, + double *boxlo, double *prd); +void dpl_gpu_compute(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, const double cpu_time, + bool &success, double *host_q, double **host_mu, + const int nlocal, double *boxlo, double *prd); +double dpl_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutDipoleCutGPU::PairLJCutDipoleCutGPU(LAMMPS *lmp) : PairLJCutDipoleCut(lmp), + gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairLJCutDipoleCutGPU::~PairLJCutDipoleCutGPU() +{ + dpl_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutDipoleCutGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = dpl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, atom->mu, domain->boxlo, + domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + dpl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, atom->q, + atom->mu, atom->nlocal, domain->boxlo, domain->prd); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startq_flag || !atom->mu_flag || !atom->torque_flag) + error->all(FLERR,"Pair dipole/cut/gpu requires atom attributes q, mu, torque"); + + if (force->newton_pair) + error->all(FLERR,"Cannot use newton pair with dipole/cut/gpu pair style"); + + if (strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = dpl_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, + force->special_coul, force->qqrd2e); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutDipoleCutGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + dpl_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutDipoleCutGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, + int **firstneigh) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + int *jlist; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + double **mu = atom->mu; + double **torque = atom->torque; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + pre1 = qtmp*q[j]*r3inv; + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + r7inv = r5inv*r2inv; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; + pre2 = 3.0*r5inv*pjdotr; + pre3 = 3.0*r5inv*pidotr; + pre4 = -1.0*r3inv; + + forcecoulx += pre1*delx + pre2*mu[i][0] + pre3*mu[j][0]; + forcecouly += pre1*dely + pre2*mu[i][1] + pre3*mu[j][1]; + forcecoulz += pre1*delz + pre2*mu[i][2] + pre3*mu[j][2]; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pre1 = 3.0*q[j]*r5inv * pidotr; + pre2 = q[j]*r3inv; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + pre1 = 3.0*qtmp*r5inv * pjdotr; + pre2 = qtmp*r3inv; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + f[i][0] += fx; + f[i][1] += fy; + f[i][2] += fz; + torque[i][0] += fq*tixcoul; + torque[i][1] += fq*tiycoul; + torque[i][2] += fq*tizcoul; + + if (eflag) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp*q[j]*rinv; + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j]*r3inv*pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv*pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_xyz_full(i,evdwl,ecoul,fx,fy,fz,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_lj_cut_dipole_cut_gpu.h b/src/GPU/pair_lj_cut_dipole_cut_gpu.h index 069f454d34..ba0827b23f 100755 --- a/src/GPU/pair_lj_cut_dipole_cut_gpu.h +++ b/src/GPU/pair_lj_cut_dipole_cut_gpu.h @@ -1,67 +1,67 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/dipole/cut/gpu,PairLJCutDipoleCutGPU) - -#else - -#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H -#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H - -#include "pair_lj_cut_dipole_cut.h" - -namespace LAMMPS_NS { - -class PairLJCutDipoleCutGPU : public PairLJCutDipoleCut { - public: - PairLJCutDipoleCutGPU(LAMMPS *lmp); - ~PairLJCutDipoleCutGPU(); - void cpu_compute(int, int, int, int, int *, int *, int **); - void compute(int, int); - void init_style(); - double memory_usage(); - - enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; - - private: - int gpu_mode; - double cpu_time; - int *gpulist; -}; - -} -#endif -#endif - -/* ERROR/WARNING messages: - -E: Insufficient memory on accelerator - -There is insufficient memory on one of the devices specified for the gpu -package - -E: Pair dipole/cut/gpu requires atom attributes q, mu, torque - -The atom style defined does not have this attribute. - -E: Cannot use newton pair with dipole/cut/gpu pair style - -Self-explanatory. - -E: Cannot (yet) use 'electron' units with dipoles - -This feature is not yet supported. - -*/ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/dipole/cut/gpu,PairLJCutDipoleCutGPU) + +#else + +#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H +#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_GPU_H + +#include "pair_lj_cut_dipole_cut.h" + +namespace LAMMPS_NS { + +class PairLJCutDipoleCutGPU : public PairLJCutDipoleCut { + public: + PairLJCutDipoleCutGPU(LAMMPS *lmp); + ~PairLJCutDipoleCutGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Pair dipole/cut/gpu requires atom attributes q, mu, torque + +The atom style defined does not have this attribute. + +E: Cannot use newton pair with dipole/cut/gpu pair style + +Self-explanatory. + +E: Cannot (yet) use 'electron' units with dipoles + +This feature is not yet supported. + +*/ diff --git a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp index dd0e018ebb..dfe482b82b 100755 --- a/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp +++ b/src/GPU/pair_lj_sf_dipole_sf_gpu.cpp @@ -1,400 +1,400 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Trung Dac Nguyen (ORNL) -------------------------------------------------------------------------- */ - -#include "lmptype.h" -#include "math.h" -#include "stdio.h" -#include "stdlib.h" -#include "pair_lj_sf_dipole_sf_gpu.h" -#include "atom.h" -#include "atom_vec.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" -#include "integrate.h" -#include "memory.h" -#include "error.h" -#include "neigh_request.h" -#include "universe.h" -#include "update.h" -#include "domain.h" -#include "string.h" -#include "gpu_extra.h" - -// External functions from cuda library for atom decomposition - -int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, - double **host_lj2, double **host_lj3, double **host_lj4, - double *special_lj, const int nlocal, - const int nall, const int max_nbors, const int maxspecial, - const double cell_size, int &gpu_mode, FILE *screen, - double **host_cut_ljsq, double **host_cut_coulsq, - double *host_special_coul, const double qqrd2e); -void dplsf_gpu_clear(); -int ** dplsf_gpu_compute_n(const int ago, const int inum, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, int *tag, int **nspecial, - int **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **jnum, const double cpu_time, - bool &success, double *host_q, double **host_mu, - double *boxlo, double *prd); -void dplsf_gpu_compute(const int ago, const int inum, - const int nall, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, const double cpu_time, - bool &success, double *host_q, double **host_mu, const int nlocal, - double *boxlo, double *prd); -double dplsf_gpu_bytes(); - -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp), - gpu_mode(GPU_FORCE) -{ - respa_enable = 0; - cpu_time = 0.0; - GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); -} - -/* ---------------------------------------------------------------------- - free all arrays -------------------------------------------------------------------------- */ - -PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU() -{ - dplsf_gpu_clear(); -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSFDipoleSFGPU::compute(int eflag, int vflag) -{ - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; - - int nall = atom->nlocal + atom->nghost; - int inum, host_start; - - bool success = true; - int *ilist, *numneigh, **firstneigh; - if (gpu_mode != GPU_FORCE) { - inum = atom->nlocal; - firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x, - atom->type, domain->sublo, domain->subhi, - atom->tag, atom->nspecial, atom->special, - eflag, vflag, eflag_atom, vflag_atom, - host_start, &ilist, &numneigh, cpu_time, - success, atom->q, atom->mu, domain->boxlo, - domain->prd); - } else { - inum = list->inum; - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, - ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, - vflag_atom, host_start, cpu_time, success, atom->q, - atom->mu, atom->nlocal, domain->boxlo, domain->prd); - } - if (!success) - error->one(FLERR,"Insufficient memory on accelerator"); - - if (host_startq_flag || !atom->mu_flag || !atom->torque_flag) - error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque"); - - if (force->newton_pair) - error->all(FLERR,"Cannot use newton pair with dipole/sf/gpu pair style"); - - if (strcmp(update->unit_style,"electron") == 0) - error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); - - // Repeat cutsq calculation because done after call to init_style - double maxcut = -1.0; - double cut; - for (int i = 1; i <= atom->ntypes; i++) { - for (int j = i; j <= atom->ntypes; j++) { - if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { - cut = init_one(i,j); - cut *= cut; - if (cut > maxcut) - maxcut = cut; - cutsq[i][j] = cutsq[j][i] = cut; - } else - cutsq[i][j] = cutsq[j][i] = 0.0; - } - } - double cell_size = sqrt(maxcut) + neighbor->skin; - - int maxspecial=0; - if (atom->molecular) - maxspecial=atom->maxspecial; - int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, - force->special_lj, atom->nlocal, - atom->nlocal+atom->nghost, 300, maxspecial, - cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, - force->special_coul, force->qqrd2e); - GPU_EXTRA::check_flag(success,error,world); - - if (gpu_mode == GPU_FORCE) { - int irequest = neighbor->request(this); - neighbor->requests[irequest]->half = 0; - neighbor->requests[irequest]->full = 1; - } -} - -/* ---------------------------------------------------------------------- */ - -double PairLJSFDipoleSFGPU::memory_usage() -{ - double bytes = Pair::memory_usage(); - return bytes + dplsf_gpu_bytes(); -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag, - int *ilist, int *numneigh, - int **firstneigh) -{ - int i,j,ii,jj,jnum,itype,jtype; - double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; - double rsq,rinv,r2inv,r6inv,r3inv,r5inv; - double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; - double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; - double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; - double forcelj,factor_coul,factor_lj; - double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; - double aforcecoulx,aforcecouly,aforcecoulz; - double bforcecoulx,bforcecouly,bforcecoulz; - double rcutlj2inv, rcutcoul2inv,rcutlj6inv; - int *jlist; - - evdwl = ecoul = 0.0; - if (eflag || vflag) ev_setup(eflag,vflag); - else evflag = vflag_fdotr = 0; - - double **x = atom->x; - double **f = atom->f; - double *q = atom->q; - double **mu = atom->mu; - double **torque = atom->torque; - int *type = atom->type; - double *special_coul = force->special_coul; - double *special_lj = force->special_lj; - double qqrd2e = force->qqrd2e; - - - // loop over neighbors of my atoms - - for (ii = start; ii < inum; ii++) { - i = ilist[ii]; - qtmp = q[i]; - xtmp = x[i][0]; - ytmp = x[i][1]; - ztmp = x[i][2]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_lj = special_lj[sbmask(j)]; - factor_coul = special_coul[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j][0]; - dely = ytmp - x[j][1]; - delz = ztmp - x[j][2]; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - rinv = sqrt(r2inv); - - // atom can have both a charge and dipole - // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole - - forcecoulx = forcecouly = forcecoulz = 0.0; - tixcoul = tiycoul = tizcoul = 0.0; - tjxcoul = tjycoul = tjzcoul = 0.0; - - if (rsq < cut_coulsq[itype][jtype]) { - - if (qtmp != 0.0 && q[j] != 0.0) { - pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); - - forcecoulx += pre1*delx; - forcecouly += pre1*dely; - forcecoulz += pre1*delz; - } - - if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - - pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; - pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; - pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; - - afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; - pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); - aforcecoulx = pre1*delx; - aforcecouly = pre1*dely; - aforcecoulz = pre1*delz; - - bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + - 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; - presf = 2.0 * r2inv * pidotr * pjdotr; - bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx); - bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely); - bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz); - - forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); - forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); - forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); - - pre2 = 3.0 * bfac * r5inv * pjdotr; - pre3 = 3.0 * bfac * r5inv * pidotr; - pre4 = -bfac * r3inv; - - crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); - crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); - crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); - - tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); - tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); - tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); - tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); - tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); - tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); - } - - if (mu[i][3] > 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); - pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + - 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); - pre2 = q[j] * r3inv * pqfac; - - forcecoulx += pre2*mu[i][0] - pre1*delx; - forcecouly += pre2*mu[i][1] - pre1*dely; - forcecoulz += pre2*mu[i][2] - pre1*delz; - tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); - tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); - tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); - } - - if (mu[j][3] > 0.0 && qtmp != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); - qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + - 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); - pre2 = qtmp * r3inv * qpfac; - - forcecoulx += pre1*delx - pre2*mu[j][0]; - forcecouly += pre1*dely - pre2*mu[j][1]; - forcecoulz += pre1*delz - pre2*mu[j][2]; - tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); - tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); - tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); - } - } - - // LJ interaction - - if (rsq < cut_ljsq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; - - rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; - rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; - forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * - rcutlj6inv * rcutlj2inv; - - forcelj = factor_lj * (forceljcut - forceljsf); - } else forcelj = 0.0; - - // total force - - fq = factor_coul*qqrd2e; - fx = fq*forcecoulx + delx*forcelj; - fy = fq*forcecouly + dely*forcelj; - fz = fq*forcecoulz + delz*forcelj; - - // force & torque accumulation - - f[i][0] += fx; - f[i][1] += fy; - f[i][2] += fz; - torque[i][0] += fq*tixcoul; - torque[i][1] += fq*tiycoul; - torque[i][2] += fq*tizcoul; - - if (eflag) { - if (rsq < cut_coulsq[itype][jtype]) { - ecoul = qtmp*q[j]*rinv* - pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2); - if (mu[i][3] > 0.0 && mu[j][3] > 0.0) - ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); - if (mu[i][3] > 0.0 && q[j] != 0.0) - ecoul += -q[j]*r3inv * pqfac * pidotr; - if (mu[j][3] > 0.0 && qtmp != 0.0) - ecoul += qtmp*r3inv * qpfac * pjdotr; - ecoul *= factor_coul*qqrd2e; - } else ecoul = 0.0; - - if (rsq < cut_ljsq[itype][jtype]) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + - rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* - rsq*rcutlj2inv + - rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); - evdwl *= factor_lj; - } else evdwl = 0.0; - } - - if (evflag) ev_tally_xyz_full(i,evdwl,ecoul, - fx,fy,fz,delx,dely,delz); - } - } - } -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_lj_sf_dipole_sf_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int dplsf_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double **host_cut_coulsq, + double *host_special_coul, const double qqrd2e); +void dplsf_gpu_clear(); +int ** dplsf_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double **host_mu, + double *boxlo, double *prd); +void dplsf_gpu_compute(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, const double cpu_time, + bool &success, double *host_q, double **host_mu, const int nlocal, + double *boxlo, double *prd); +double dplsf_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJSFDipoleSFGPU::PairLJSFDipoleSFGPU(LAMMPS *lmp) : PairLJSFDipoleSF(lmp), + gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairLJSFDipoleSFGPU::~PairLJSFDipoleSFGPU() +{ + dplsf_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSFDipoleSFGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = dplsf_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, atom->mu, domain->boxlo, + domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + dplsf_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, atom->q, + atom->mu, atom->nlocal, domain->boxlo, domain->prd); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startq_flag || !atom->mu_flag || !atom->torque_flag) + error->all(FLERR,"Pair dipole/sf/gpu requires atom attributes q, mu, torque"); + + if (force->newton_pair) + error->all(FLERR,"Cannot use newton pair with dipole/sf/gpu pair style"); + + if (strcmp(update->unit_style,"electron") == 0) + error->all(FLERR,"Cannot (yet) use 'electron' units with dipoles"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = dplsf_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, cut_coulsq, + force->special_coul, force->qqrd2e); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSFDipoleSFGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + dplsf_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSFDipoleSFGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, + int **firstneigh) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fx,fy,fz; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; + double aforcecoulx,aforcecouly,aforcecoulz; + double bforcecoulx,bforcecouly,bforcecoulz; + double rcutlj2inv, rcutcoul2inv,rcutlj6inv; + int *jlist; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + double **mu = atom->mu; + double **torque = atom->torque; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + + pdotp = mu[i][0]*mu[j][0] + mu[i][1]*mu[j][1] + mu[i][2]*mu[j][2]; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + + afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; + pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); + aforcecoulx = pre1*delx; + aforcecouly = pre1*dely; + aforcecoulz = pre1*delz; + + bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + + 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; + presf = 2.0 * r2inv * pidotr * pjdotr; + bforcecoulx = bfac * (pjdotr*mu[i][0]+pidotr*mu[j][0]-presf*delx); + bforcecouly = bfac * (pjdotr*mu[i][1]+pidotr*mu[j][1]-presf*dely); + bforcecoulz = bfac * (pjdotr*mu[i][2]+pidotr*mu[j][2]-presf*delz); + + forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); + forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); + forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); + + pre2 = 3.0 * bfac * r5inv * pjdotr; + pre3 = 3.0 * bfac * r5inv * pidotr; + pre4 = -bfac * r3inv; + + crossx = pre4 * (mu[i][1]*mu[j][2] - mu[i][2]*mu[j][1]); + crossy = pre4 * (mu[i][2]*mu[j][0] - mu[i][0]*mu[j][2]); + crossz = pre4 * (mu[i][0]*mu[j][1] - mu[i][1]*mu[j][0]); + + tixcoul += crossx + pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += crossy + pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += crossz + pre2 * (mu[i][0]*dely - mu[i][1]*delx); + tjxcoul += -crossx + pre3 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -crossy + pre3 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -crossz + pre3 * (mu[j][0]*dely - mu[j][1]*delx); + } + + if (mu[i][3] > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i][0]*delx + mu[i][1]*dely + mu[i][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); + pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = q[j] * r3inv * pqfac; + + forcecoulx += pre2*mu[i][0] - pre1*delx; + forcecouly += pre2*mu[i][1] - pre1*dely; + forcecoulz += pre2*mu[i][2] - pre1*delz; + tixcoul += pre2 * (mu[i][1]*delz - mu[i][2]*dely); + tiycoul += pre2 * (mu[i][2]*delx - mu[i][0]*delz); + tizcoul += pre2 * (mu[i][0]*dely - mu[i][1]*delx); + } + + if (mu[j][3] > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j][0]*delx + mu[j][1]*dely + mu[j][2]*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); + qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = qtmp * r3inv * qpfac; + + forcecoulx += pre1*delx - pre2*mu[j][0]; + forcecouly += pre1*dely - pre2*mu[j][1]; + forcecoulz += pre1*delz - pre2*mu[j][2]; + tjxcoul += -pre2 * (mu[j][1]*delz - mu[j][2]*dely); + tjycoul += -pre2 * (mu[j][2]*delx - mu[j][0]*delz); + tjzcoul += -pre2 * (mu[j][0]*dely - mu[j][1]*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; + + rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; + rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; + forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * + rcutlj6inv * rcutlj2inv; + + forcelj = factor_lj * (forceljcut - forceljsf); + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + f[i][0] += fx; + f[i][1] += fy; + f[i][2] += fz; + torque[i][0] += fq*tixcoul; + torque[i][1] += fq*tiycoul; + torque[i][2] += fq*tizcoul; + + if (eflag) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp*q[j]*rinv* + pow((1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])),2); + if (mu[i][3] > 0.0 && mu[j][3] > 0.0) + ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); + if (mu[i][3] > 0.0 && q[j] != 0.0) + ecoul += -q[j]*r3inv * pqfac * pidotr; + if (mu[j][3] > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv * qpfac * pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) + + rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* + rsq*rcutlj2inv + + rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_xyz_full(i,evdwl,ecoul, + fx,fy,fz,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_lj_sf_dipole_sf_gpu.h b/src/GPU/pair_lj_sf_dipole_sf_gpu.h index 5c53fec8f8..21452f8f85 100755 --- a/src/GPU/pair_lj_sf_dipole_sf_gpu.h +++ b/src/GPU/pair_lj_sf_dipole_sf_gpu.h @@ -1,67 +1,67 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/sf/dipole/sf/gpu,PairLJSFDipoleSFGPU) - -#else - -#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H -#define LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H - -#include "pair_lj_sf_dipole_sf.h" - -namespace LAMMPS_NS { - -class PairLJSFDipoleSFGPU : public PairLJSFDipoleSF { - public: - PairLJSFDipoleSFGPU(LAMMPS *lmp); - ~PairLJSFDipoleSFGPU(); - void cpu_compute(int, int, int, int, int *, int *, int **); - void compute(int, int); - void init_style(); - double memory_usage(); - - enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; - - private: - int gpu_mode; - double cpu_time; - int *gpulist; -}; - -} -#endif -#endif - -/* ERROR/WARNING messages: - -E: Insufficient memory on accelerator - -There is insufficient memory on one of the devices specified for the gpu -package - -E: Pair style dipole/cut/gpu requires atom attribute q - -The atom style defined does not have this attribute. - -E: Cannot use newton pair with dipole/cut/gpu pair style - -Self-explanatory. - -E: Cannot (yet) use 'electron' units with dipoles - -This feature is not yet supported. - -*/ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sf/dipole/sf/gpu,PairLJSFDipoleSFGPU) + +#else + +#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H +#define LMP_PAIR_LJ_SF_DIPOLE_SF_GPU_H + +#include "pair_lj_sf_dipole_sf.h" + +namespace LAMMPS_NS { + +class PairLJSFDipoleSFGPU : public PairLJSFDipoleSF { + public: + PairLJSFDipoleSFGPU(LAMMPS *lmp); + ~PairLJSFDipoleSFGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Pair style dipole/cut/gpu requires atom attribute q + +The atom style defined does not have this attribute. + +E: Cannot use newton pair with dipole/cut/gpu pair style + +Self-explanatory. + +E: Cannot (yet) use 'electron' units with dipoles + +This feature is not yet supported. + +*/ diff --git a/src/KSPACE/pppm_disp_tip4p.cpp b/src/KSPACE/pppm_disp_tip4p.cpp index 72d2020e00..d3c2484b00 100644 --- a/src/KSPACE/pppm_disp_tip4p.cpp +++ b/src/KSPACE/pppm_disp_tip4p.cpp @@ -1,519 +1,519 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL) - Rolf Isele-Holder (Aachen University) -------------------------------------------------------------------------- */ - -#include "math.h" -#include "pppm_disp_tip4p.h" -#include "pppm_disp.h" -#include "atom.h" -#include "domain.h" -#include "force.h" -#include "memory.h" -#include "error.h" -#include "math_const.h" - -using namespace LAMMPS_NS; -using namespace MathConst; - -#define OFFSET 16384 - -#ifdef FFT_SINGLE -#define ZEROF 0.0f -#define ONEF 1.0f -#else -#define ZEROF 0.0 -#define ONEF 1.0 -#endif - -/* ---------------------------------------------------------------------- */ - -PPPMDispTIP4P::PPPMDispTIP4P(LAMMPS *lmp, int narg, char **arg) : - PPPMDisp(lmp, narg, arg) -{ - triclinic_support = 0; - tip4pflag = 1; -} - -/* ---------------------------------------------------------------------- */ - -void PPPMDispTIP4P::init() -{ - // TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms - - if (force->newton == 0) - error->all(FLERR,"Kspace style pppm/disp/tip4p requires newton on"); - - PPPMDisp::init(); -} - -/* ---------------------------------------------------------------------- - find center grid pt for each of my particles - check that full stencil for the particle will fit in my 3d brick - store central grid pt indices in part2grid array -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::particle_map_c(double delx, double dely, double delz, - double sft, int** p2g, int nup, int nlow, - int nxlo, int nylo, int nzlo, - int nxhi, int nyhi, int nzhi) -{ - int nx,ny,nz,iH1,iH2; - double *xi,xM[3]; - - int *type = atom->type; - double **x = atom->x; - int nlocal = atom->nlocal; - - int flag = 0; - for (int i = 0; i < nlocal; i++) { - if (type[i] == typeO) { - find_M(i,iH1,iH2,xM); - xi = xM; - } else xi = x[i]; - - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // current particle coord can be outside global and local box - // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 - - nx = static_cast ((xi[0]-boxlo[0])*delx+sft) - OFFSET; - ny = static_cast ((xi[1]-boxlo[1])*dely+sft) - OFFSET; - nz = static_cast ((xi[2]-boxlo[2])*delz+sft) - OFFSET; - - p2g[i][0] = nx; - p2g[i][1] = ny; - p2g[i][2] = nz; - - - // check that entire stencil around nx,ny,nz will fit in my 3d brick - - if (nx+nlow < nxlo || nx+nup > nxhi || - ny+nlow < nylo || ny+nup > nyhi || - nz+nlow < nzlo || nz+nup > nzhi) - flag = 1; - } - - if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); -} - -/* ---------------------------------------------------------------------- - create discretized "density" on section of global grid due to my particles - density(x,y,z) = charge "density" at grid points of my 3d brick - (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) - in global grid -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::make_rho_c() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - double *xi,xM[3]; - - // clear 3d density array - - FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (i = 0; i < ngrid; i++) vec[i] = ZEROF; - - // loop over my charges, add their contribution to nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - - int *type = atom->type; - double *q = atom->q; - double **x = atom->x; - int nlocal = atom->nlocal; - - for (int i = 0; i < nlocal; i++) { - if (type[i] == typeO) { - find_M(i,iH1,iH2,xM); - xi = xM; - } else xi = x[i]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; - dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; - dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - z0 = delvolinv * q[i]; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - y0 = z0*rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - x0 = y0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - density_brick[mz][my][mx] += x0*rho1d[0][l]; - } - } - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ik differentiation -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::fieldforce_c_ik() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - FFT_SCALAR ekx,eky,ekz; - double *xi; - int iH1,iH2; - double xM[3]; - double fx,fy,fz; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int *type = atom->type; - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - if (type[i] == typeO) { - find_M(i,iH1,iH2,xM); - xi = xM; - } else xi = x[i]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; - dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; - dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - ekx -= x0*vdx_brick[mz][my][mx]; - eky -= x0*vdy_brick[mz][my][mx]; - ekz -= x0*vdz_brick[mz][my][mx]; - } - } - } - - // convert E-field to force - - const double qfactor = force->qqrd2e * scale * q[i]; - if (type[i] != typeO) { - f[i][0] += qfactor*ekx; - f[i][1] += qfactor*eky; - if (slabflag != 2) f[i][2] += qfactor*ekz; - - } else { - fx = qfactor * ekx; - fy = qfactor * eky; - fz = qfactor * ekz; - find_M(i,iH1,iH2,xM); - - f[i][0] += fx*(1 - alpha); - f[i][1] += fy*(1 - alpha); - if (slabflag != 2) f[i][2] += fz*(1 - alpha); - - f[iH1][0] += 0.5*alpha*fx; - f[iH1][1] += 0.5*alpha*fy; - if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; - - f[iH2][0] += 0.5*alpha*fx; - f[iH2][1] += 0.5*alpha*fy; - if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; - } - } -} - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles - for ad scheme -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::fieldforce_c_ad() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0; - FFT_SCALAR ekx,eky,ekz; - double *xi; - int iH1,iH2; - double xM[3]; - double s1,s2,s3; - double *prd; - double fx,fy,fz; - double sf; - - if (triclinic == 0) prd = domain->prd; - else prd = domain->prd_lamda; - - double xprd = prd[0]; - double yprd = prd[1]; - double zprd = prd[2]; - double zprd_slab = zprd*slab_volfactor; - - double hx_inv = nx_pppm/xprd; - double hy_inv = ny_pppm/yprd; - double hz_inv = nz_pppm/zprd_slab; - - - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int *type = atom->type; - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - if (type[i] == typeO) { - find_M(i,iH1,iH2,xM); - xi = xM; - } else xi = x[i]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; - dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; - dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); - - ekx = eky = ekz = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; - ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; - } - } - } - - ekx *= hx_inv; - eky *= hy_inv; - ekz *= hz_inv; - - // convert E-field to force and substract self forces - const double qfactor = force->qqrd2e * scale; - - s1 = x[i][0]*hx_inv; - s2 = x[i][1]*hy_inv; - s3 = x[i][2]*hz_inv; - sf = sf_coeff[0]*sin(2*MY_PI*s1); - sf += sf_coeff[1]*sin(4*MY_PI*s1); - sf *= 2*q[i]*q[i]; - fx = qfactor*(ekx*q[i] - sf); - - sf = sf_coeff[2]*sin(2*MY_PI*s2); - sf += sf_coeff[3]*sin(4*MY_PI*s2); - sf *= 2*q[i]*q[i]; - fy = qfactor*(eky*q[i] - sf); - - sf = sf_coeff[4]*sin(2*MY_PI*s3); - sf += sf_coeff[5]*sin(4*MY_PI*s3); - sf *= 2*q[i]*q[i]; - fz = qfactor*(ekz*q[i] - sf); - - if (type[i] != typeO) { - f[i][0] += fx; - f[i][1] += fy; - if (slabflag != 2) f[i][2] += fz; - - } else { - find_M(i,iH1,iH2,xM); - - f[i][0] += fx*(1 - alpha); - f[i][1] += fy*(1 - alpha); - if (slabflag != 2) f[i][2] += fz*(1 - alpha); - - f[iH1][0] += 0.5*alpha*fx; - f[iH1][1] += 0.5*alpha*fy; - if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; - - f[iH2][0] += 0.5*alpha*fx; - f[iH2][1] += 0.5*alpha*fy; - if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; - } - } -} - - -/* ---------------------------------------------------------------------- - interpolate from grid to get electric field & force on my particles -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::fieldforce_c_peratom() -{ - int i,l,m,n,nx,ny,nz,mx,my,mz; - FFT_SCALAR dx,dy,dz,x0,y0,z0; - double *xi; - int iH1,iH2; - double xM[3]; - FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; - - // loop over my charges, interpolate electric field from nearby grid points - // (nx,ny,nz) = global coords of grid pt to "lower left" of charge - // (dx,dy,dz) = distance to "lower left" grid pt - // (mx,my,mz) = global coords of moving stencil pt - // ek = 3 components of E-field on particle - double *q = atom->q; - double **x = atom->x; - double **f = atom->f; - - int *type = atom->type; - int nlocal = atom->nlocal; - - for (i = 0; i < nlocal; i++) { - if (type[i] == typeO) { - find_M(i,iH1,iH2,xM); - xi = xM; - } else xi = x[i]; - - nx = part2grid[i][0]; - ny = part2grid[i][1]; - nz = part2grid[i][2]; - dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; - dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; - dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; - - compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); - - u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; - for (n = nlower; n <= nupper; n++) { - mz = n+nz; - z0 = rho1d[2][n]; - for (m = nlower; m <= nupper; m++) { - my = m+ny; - y0 = z0*rho1d[1][m]; - for (l = nlower; l <= nupper; l++) { - mx = l+nx; - x0 = y0*rho1d[0][l]; - if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; - if (vflag_atom) { - v0 += x0*v0_brick[mz][my][mx]; - v1 += x0*v1_brick[mz][my][mx]; - v2 += x0*v2_brick[mz][my][mx]; - v3 += x0*v3_brick[mz][my][mx]; - v4 += x0*v4_brick[mz][my][mx]; - v5 += x0*v5_brick[mz][my][mx]; - } - } - } - } - - const double qfactor = 0.5*force->qqrd2e * scale * q[i]; - - if (eflag_atom) { - if (type[i] != typeO) { - eatom[i] += qfactor*u_pa; - } else { - eatom[i] += qfactor*u_pa*(1-alpha); - eatom[iH1] += qfactor*u_pa*alpha*0.5; - eatom[iH2] += qfactor*u_pa*alpha*0.5; - } - } - if (vflag_atom) { - if (type[i] != typeO) { - vatom[i][0] += v0*qfactor; - vatom[i][1] += v1*qfactor; - vatom[i][2] += v2*qfactor; - vatom[i][3] += v3*qfactor; - vatom[i][4] += v4*qfactor; - vatom[i][5] += v5*qfactor; - } else { - vatom[i][0] += v0*(1-alpha)*qfactor; - vatom[i][1] += v1*(1-alpha)*qfactor; - vatom[i][2] += v2*(1-alpha)*qfactor; - vatom[i][3] += v3*(1-alpha)*qfactor; - vatom[i][4] += v4*(1-alpha)*qfactor; - vatom[i][5] += v5*(1-alpha)*qfactor; - vatom[iH1][0] += v0*alpha*0.5*qfactor; - vatom[iH1][1] += v1*alpha*0.5*qfactor; - vatom[iH1][2] += v2*alpha*0.5*qfactor; - vatom[iH1][3] += v3*alpha*0.5*qfactor; - vatom[iH1][4] += v4*alpha*0.5*qfactor; - vatom[iH1][5] += v5*alpha*0.5*qfactor; - vatom[iH2][0] += v0*alpha*0.5*qfactor; - vatom[iH2][1] += v1*alpha*0.5*qfactor; - vatom[iH2][2] += v2*alpha*0.5*qfactor; - vatom[iH2][3] += v3*alpha*0.5*qfactor; - vatom[iH2][4] += v4*alpha*0.5*qfactor; - vatom[iH2][5] += v5*alpha*0.5*qfactor; - } - } - } -} - -/* ---------------------------------------------------------------------- - find 2 H atoms bonded to O atom i - compute position xM of fictitious charge site for O atom - also return local indices iH1,iH2 of H atoms -------------------------------------------------------------------------- */ - -void PPPMDispTIP4P::find_M(int i, int &iH1, int &iH2, double *xM) -{ - iH1 = atom->map(atom->tag[i] + 1); - iH2 = atom->map(atom->tag[i] + 2); - - if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); - if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) - error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); - - double **x = atom->x; - - double delx1 = x[iH1][0] - x[i][0]; - double dely1 = x[iH1][1] - x[i][1]; - double delz1 = x[iH1][2] - x[i][2]; - domain->minimum_image(delx1,dely1,delz1); - - double delx2 = x[iH2][0] - x[i][0]; - double dely2 = x[iH2][1] - x[i][1]; - double delz2 = x[iH2][2] - x[i][2]; - domain->minimum_image(delx2,dely2,delz2); - - xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2); - xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2); - xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2); -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Amalie Frischknecht and Ahmed Ismail (SNL) + Rolf Isele-Holder (Aachen University) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pppm_disp_tip4p.h" +#include "pppm_disp.h" +#include "atom.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "error.h" +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace MathConst; + +#define OFFSET 16384 + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +/* ---------------------------------------------------------------------- */ + +PPPMDispTIP4P::PPPMDispTIP4P(LAMMPS *lmp, int narg, char **arg) : + PPPMDisp(lmp, narg, arg) +{ + triclinic_support = 0; + tip4pflag = 1; +} + +/* ---------------------------------------------------------------------- */ + +void PPPMDispTIP4P::init() +{ + // TIP4P PPPM requires newton on, b/c it computes forces on ghost atoms + + if (force->newton == 0) + error->all(FLERR,"Kspace style pppm/disp/tip4p requires newton on"); + + PPPMDisp::init(); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::particle_map_c(double delx, double dely, double delz, + double sft, int** p2g, int nup, int nlow, + int nxlo, int nylo, int nzlo, + int nxhi, int nyhi, int nzhi) +{ + int nx,ny,nz,iH1,iH2; + double *xi,xM[3]; + + int *type = atom->type; + double **x = atom->x; + int nlocal = atom->nlocal; + + int flag = 0; + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + nx = static_cast ((xi[0]-boxlo[0])*delx+sft) - OFFSET; + ny = static_cast ((xi[1]-boxlo[1])*dely+sft) - OFFSET; + nz = static_cast ((xi[2]-boxlo[2])*delz+sft) - OFFSET; + + p2g[i][0] = nx; + p2g[i][1] = ny; + p2g[i][2] = nz; + + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlow < nxlo || nx+nup > nxhi || + ny+nlow < nylo || ny+nup > nyhi || + nz+nlow < nzlo || nz+nup > nzhi) + flag = 1; + } + + if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::make_rho_c() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + double *xi,xM[3]; + + // clear 3d density array + + FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (i = 0; i < ngrid; i++) vec[i] = ZEROF; + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + int *type = atom->type; + double *q = atom->q; + double **x = atom->x; + int nlocal = atom->nlocal; + + for (int i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + z0 = delvolinv * q[i]; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + y0 = z0*rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + x0 = y0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + density_brick[mz][my][mx] += x0*rho1d[0][l]; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik differentiation +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::fieldforce_c_ik() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + double *xi; + int iH1,iH2; + double xM[3]; + double fx,fy,fz; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int *type = atom->type; + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = force->qqrd2e * scale * q[i]; + if (type[i] != typeO) { + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + if (slabflag != 2) f[i][2] += qfactor*ekz; + + } else { + fx = qfactor * ekx; + fy = qfactor * eky; + fz = qfactor * ekz; + find_M(i,iH1,iH2,xM); + + f[i][0] += fx*(1 - alpha); + f[i][1] += fy*(1 - alpha); + if (slabflag != 2) f[i][2] += fz*(1 - alpha); + + f[iH1][0] += 0.5*alpha*fx; + f[iH1][1] += 0.5*alpha*fy; + if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; + + f[iH2][0] += 0.5*alpha*fx; + f[iH2][1] += 0.5*alpha*fy; + if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::fieldforce_c_ad() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0,dx0,dy0,dz0; + FFT_SCALAR ekx,eky,ekz; + double *xi; + int iH1,iH2; + double xM[3]; + double s1,s2,s3; + double *prd; + double fx,fy,fz; + double sf; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double hx_inv = nx_pppm/xprd; + double hy_inv = ny_pppm/yprd; + double hz_inv = nz_pppm/zprd_slab; + + + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int *type = atom->type; + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + compute_drho1d(dx,dy,dz, order, drho_coeff, drho1d); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx]; + ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx]; + } + } + } + + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force and substract self forces + const double qfactor = force->qqrd2e * scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + fx = qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + fy = qfactor*(eky*q[i] - sf); + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + fz = qfactor*(ekz*q[i] - sf); + + if (type[i] != typeO) { + f[i][0] += fx; + f[i][1] += fy; + if (slabflag != 2) f[i][2] += fz; + + } else { + find_M(i,iH1,iH2,xM); + + f[i][0] += fx*(1 - alpha); + f[i][1] += fy*(1 - alpha); + if (slabflag != 2) f[i][2] += fz*(1 - alpha); + + f[iH1][0] += 0.5*alpha*fx; + f[iH1][1] += 0.5*alpha*fy; + if (slabflag != 2) f[iH1][2] += 0.5*alpha*fz; + + f[iH2][0] += 0.5*alpha*fx; + f[iH2][1] += 0.5*alpha*fy; + if (slabflag != 2) f[iH2][2] += 0.5*alpha*fz; + } + } +} + + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::fieldforce_c_peratom() +{ + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + double *xi; + int iH1,iH2; + double xM[3]; + FFT_SCALAR u_pa,v0,v1,v2,v3,v4,v5; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + double *q = atom->q; + double **x = atom->x; + double **f = atom->f; + + int *type = atom->type; + int nlocal = atom->nlocal; + + for (i = 0; i < nlocal; i++) { + if (type[i] == typeO) { + find_M(i,iH1,iH2,xM); + xi = xM; + } else xi = x[i]; + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (xi[0]-boxlo[0])*delxinv; + dy = ny+shiftone - (xi[1]-boxlo[1])*delyinv; + dz = nz+shiftone - (xi[2]-boxlo[2])*delzinv; + + compute_rho1d(dx,dy,dz, order, rho_coeff, rho1d); + + u_pa = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = rho1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*rho1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*rho1d[0][l]; + if (eflag_atom) u_pa += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + const double qfactor = 0.5*force->qqrd2e * scale * q[i]; + + if (eflag_atom) { + if (type[i] != typeO) { + eatom[i] += qfactor*u_pa; + } else { + eatom[i] += qfactor*u_pa*(1-alpha); + eatom[iH1] += qfactor*u_pa*alpha*0.5; + eatom[iH2] += qfactor*u_pa*alpha*0.5; + } + } + if (vflag_atom) { + if (type[i] != typeO) { + vatom[i][0] += v0*qfactor; + vatom[i][1] += v1*qfactor; + vatom[i][2] += v2*qfactor; + vatom[i][3] += v3*qfactor; + vatom[i][4] += v4*qfactor; + vatom[i][5] += v5*qfactor; + } else { + vatom[i][0] += v0*(1-alpha)*qfactor; + vatom[i][1] += v1*(1-alpha)*qfactor; + vatom[i][2] += v2*(1-alpha)*qfactor; + vatom[i][3] += v3*(1-alpha)*qfactor; + vatom[i][4] += v4*(1-alpha)*qfactor; + vatom[i][5] += v5*(1-alpha)*qfactor; + vatom[iH1][0] += v0*alpha*0.5*qfactor; + vatom[iH1][1] += v1*alpha*0.5*qfactor; + vatom[iH1][2] += v2*alpha*0.5*qfactor; + vatom[iH1][3] += v3*alpha*0.5*qfactor; + vatom[iH1][4] += v4*alpha*0.5*qfactor; + vatom[iH1][5] += v5*alpha*0.5*qfactor; + vatom[iH2][0] += v0*alpha*0.5*qfactor; + vatom[iH2][1] += v1*alpha*0.5*qfactor; + vatom[iH2][2] += v2*alpha*0.5*qfactor; + vatom[iH2][3] += v3*alpha*0.5*qfactor; + vatom[iH2][4] += v4*alpha*0.5*qfactor; + vatom[iH2][5] += v5*alpha*0.5*qfactor; + } + } + } +} + +/* ---------------------------------------------------------------------- + find 2 H atoms bonded to O atom i + compute position xM of fictitious charge site for O atom + also return local indices iH1,iH2 of H atoms +------------------------------------------------------------------------- */ + +void PPPMDispTIP4P::find_M(int i, int &iH1, int &iH2, double *xM) +{ + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + + double **x = atom->x; + + double delx1 = x[iH1][0] - x[i][0]; + double dely1 = x[iH1][1] - x[i][1]; + double delz1 = x[iH1][2] - x[i][2]; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = x[iH2][0] - x[i][0]; + double dely2 = x[iH2][1] - x[i][1]; + double delz2 = x[iH2][2] - x[i][2]; + domain->minimum_image(delx2,dely2,delz2); + + xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2); + xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2); + xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2); +} diff --git a/src/USER-MISC/dihedral_nharmonic.cpp b/src/USER-MISC/dihedral_nharmonic.cpp index 5c7a2643ae..d06ef08a9f 100644 --- a/src/USER-MISC/dihedral_nharmonic.cpp +++ b/src/USER-MISC/dihedral_nharmonic.cpp @@ -59,7 +59,7 @@ void DihedralNHarmonic::compute(int eflag, int vflag) double vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z,vb2xm,vb2ym,vb2zm; double edihedral,f1[3],f2[3],f3[3],f4[3]; double sb1,sb2,sb3,rb1,rb3,c0,b1mag2,b1mag,b2mag2; - double b2mag,b3mag2,b3mag,ctmp,r12c1,c1mag,r12c2; + double b2mag,b3mag2,b3mag,ctmp,c_,r12c1,c1mag,r12c2; double c2mag,sc1,sc2,s1,s12,c,p,pd,a11,a22; double a33,a12,a13,a23,sx2,sy2,sz2; double s2,sin2; @@ -177,14 +177,15 @@ void DihedralNHarmonic::compute(int eflag, int vflag) // force & energy // p = sum (i=1,n) a_i * c**(i-1) // pd = dp/dc - p = a[type][0]; - pd = a[type][1]; + c_ = c; + p = this->a[type][0]; + pd = this->a[type][1]; for (int i = 1; i < nterms[type]-1; i++) { - p += c * a[type][i]; - pd += c * static_cast(i+1) * a[type][i+1]; - c *= c; + p += c_ * this->a[type][i]; + pd += c_ * static_cast(i+1) * this->a[type][i+1]; + c_ *= c; } - p += c * a[type][nterms[type]-1]; + p += c_ * this->a[type][nterms[type]-1]; if (eflag) edihedral = p; @@ -284,7 +285,7 @@ void DihedralNHarmonic::coeff(int narg, char **arg) a[i] = new double [n]; nterms[i] = n; for (int j = 0; j < n; j++ ) { - a[i][j] = force->numeric(FLERR,arg[2+i]); + a[i][j] = force->numeric(FLERR,arg[2+j]); setflag[i] = 1; } count++; diff --git a/src/USER-MISC/pair_lj_sf_dipole_sf.h b/src/USER-MISC/pair_lj_sf_dipole_sf.h index c59fe5a4c1..44cf009b5d 100755 --- a/src/USER-MISC/pair_lj_sf_dipole_sf.h +++ b/src/USER-MISC/pair_lj_sf_dipole_sf.h @@ -1,54 +1,54 @@ -/* -*- c++ -*------------------------------------------------------------------ - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/sf/dipole/sf,PairLJSFDipoleSF) - -#else - -#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_H -#define LMP_PAIR_LJ_SF_DIPOLE_SF_H - -#include "pair.h" - -namespace LAMMPS_NS { - -class PairLJSFDipoleSF : public Pair { - public: - PairLJSFDipoleSF(class LAMMPS *); - virtual ~PairLJSFDipoleSF(); - virtual void compute(int, int); - void settings(int, char **); - void coeff(int, char **); - void init_style(); - double init_one(int, int); - void write_restart(FILE *); - void read_restart(FILE *); - void write_restart_settings(FILE *); - void read_restart_settings(FILE *); - - protected: - double cut_lj_global,cut_coul_global; - double **cut_lj,**cut_ljsq; - double **cut_coul,**cut_coulsq; - double **epsilon,**sigma; - double **lj1,**lj2,**lj3,**lj4; - - void allocate(); -}; - -} - -#endif +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sf/dipole/sf,PairLJSFDipoleSF) + +#else + +#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_H +#define LMP_PAIR_LJ_SF_DIPOLE_SF_H + +#include "pair.h" + +namespace LAMMPS_NS { + +class PairLJSFDipoleSF : public Pair { + public: + PairLJSFDipoleSF(class LAMMPS *); + virtual ~PairLJSFDipoleSF(); + virtual void compute(int, int); + void settings(int, char **); + void coeff(int, char **); + void init_style(); + double init_one(int, int); + void write_restart(FILE *); + void read_restart(FILE *); + void write_restart_settings(FILE *); + void read_restart_settings(FILE *); + + protected: + double cut_lj_global,cut_coul_global; + double **cut_lj,**cut_ljsq; + double **cut_coul,**cut_coulsq; + double **epsilon,**sigma; + double **lj1,**lj2,**lj3,**lj4; + + void allocate(); +}; + +} + +#endif #endif diff --git a/src/USER-OMP/msm_omp.h b/src/USER-OMP/msm_omp.h index 0a41c57ac3..a086b7af92 100644 --- a/src/USER-OMP/msm_omp.h +++ b/src/USER-OMP/msm_omp.h @@ -1,46 +1,46 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#ifdef KSPACE_CLASS - -KSpaceStyle(msm/omp,MSMOMP) - -#else - -#ifndef LMP_MSM_OMP_H -#define LMP_MSM_OMP_H - -#include "msm.h" -#include "thr_omp.h" - -namespace LAMMPS_NS { - - class MSMOMP : public MSM, public ThrOMP { - public: - MSMOMP(class LAMMPS *, int, char **); - virtual ~MSMOMP () {}; - - protected: - virtual void direct(int); - virtual void compute(int,int); - - private: - template void direct_eval(int); - template void direct_peratom(int); - -}; - -} - -#endif -#endif +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(msm/omp,MSMOMP) + +#else + +#ifndef LMP_MSM_OMP_H +#define LMP_MSM_OMP_H + +#include "msm.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class MSMOMP : public MSM, public ThrOMP { + public: + MSMOMP(class LAMMPS *, int, char **); + virtual ~MSMOMP () {}; + + protected: + virtual void direct(int); + virtual void compute(int,int); + + private: + template void direct_eval(int); + template void direct_peratom(int); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp index b1c4defa4d..94900cca51 100755 --- a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp +++ b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.cpp @@ -1,286 +1,286 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - This software is distributed under the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#include "math.h" -#include "pair_lj_cut_dipole_cut_omp.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" - -#include "suffix.h" -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJCutDipoleCutOMP::PairLJCutDipoleCutOMP(LAMMPS *lmp) : - PairLJCutDipoleCut(lmp), ThrOMP(lmp, THR_PAIR) -{ - suffix_flag |= Suffix::OMP; - respa_enable = 0; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJCutDipoleCutOMP::compute(int eflag, int vflag) -{ - if (eflag || vflag) { - ev_setup(eflag,vflag); - } else evflag = vflag_fdotr = 0; - - const int nall = atom->nlocal + atom->nghost; - const int nthreads = comm->nthreads; - const int inum = list->inum; - -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) -#endif - { - int ifrom, ito, tid; - - loop_setup_thr(ifrom, ito, tid, inum, nthreads); - ThrData *thr = fix->get_thr(tid); - ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); - - if (evflag) { - if (eflag) { - if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); - else eval<1,1,0>(ifrom, ito, thr); - } else { - if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); - else eval<1,0,0>(ifrom, ito, thr); - } - } else { - if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); - else eval<0,0,0>(ifrom, ito, thr); - } - - reduce_thr(this, eflag, vflag, thr); - } // end of omp parallel region -} - -template -void PairLJCutDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr) -{ - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; - double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv,fx,fy,fz; - double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; - double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; - double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; - double forcelj,factor_coul,factor_lj; - int *ilist,*jlist,*numneigh,**firstneigh; - - evdwl = 0.0; - - const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; - double * const * const torque = thr->get_torque(); - const double * _noalias const q = atom->q; - const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0]; - const int * _noalias const type = atom->type; - const int nlocal = atom->nlocal; - const double * _noalias const special_coul = force->special_coul; - const double * _noalias const special_lj = force->special_lj; - const double qqrd2e = force->qqrd2e; - double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; - - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - // loop over neighbors of my atoms - - for (ii = iifrom; ii < iito; ++ii) { - - i = ilist[ii]; - xtmp = x[i].x; - ytmp = x[i].y; - ztmp = x[i].z; - qtmp = q[i]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_coul = special_coul[sbmask(j)]; - factor_lj = special_lj[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j].x; - dely = ytmp - x[j].y; - delz = ztmp - x[j].z; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - rinv = sqrt(r2inv); - - // atom can have both a charge and dipole - // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole - - forcecoulx = forcecouly = forcecoulz = 0.0; - tixcoul = tiycoul = tizcoul = 0.0; - tjxcoul = tjycoul = tjzcoul = 0.0; - - if (rsq < cut_coulsq[itype][jtype]) { - - if (qtmp != 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - pre1 = qtmp*q[j]*r3inv; - - forcecoulx += pre1*delx; - forcecouly += pre1*dely; - forcecoulz += pre1*delz; - } - - if (mu[i].w > 0.0 && mu[j].w > 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - r7inv = r5inv*r2inv; - - pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z; - pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; - pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; - - pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; - pre2 = 3.0*r5inv*pjdotr; - pre3 = 3.0*r5inv*pidotr; - pre4 = -1.0*r3inv; - - forcecoulx += pre1*delx + pre2*mu[i].x + pre3*mu[j].x; - forcecouly += pre1*dely + pre2*mu[i].y + pre3*mu[j].y; - forcecoulz += pre1*delz + pre2*mu[i].z + pre3*mu[j].z; - - crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y); - crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z); - crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x); - - tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely); - tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz); - tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx); - tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely); - tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz); - tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx); - } - - if (mu[i].w > 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; - pre1 = 3.0*q[j]*r5inv * pidotr; - pre2 = q[j]*r3inv; - - forcecoulx += pre2*mu[i].x - pre1*delx; - forcecouly += pre2*mu[i].y - pre1*dely; - forcecoulz += pre2*mu[i].z - pre1*delz; - tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely); - tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz); - tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx); - } - - if (mu[j].w > 0.0 && qtmp != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; - pre1 = 3.0*qtmp*r5inv * pjdotr; - pre2 = qtmp*r3inv; - - forcecoulx += pre1*delx - pre2*mu[j].x; - forcecouly += pre1*dely - pre2*mu[j].y; - forcecoulz += pre1*delz - pre2*mu[j].z; - tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely); - tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz); - tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx); - } - } - - // LJ interaction - - if (rsq < cut_ljsq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); - forcelj *= factor_lj * r2inv; - } else forcelj = 0.0; - - // total force - - fq = factor_coul*qqrd2e; - fx = fq*forcecoulx + delx*forcelj; - fy = fq*forcecouly + dely*forcelj; - fz = fq*forcecoulz + delz*forcelj; - - // force & torque accumulation - - fxtmp += fx; - fytmp += fy; - fztmp += fz; - t1tmp += fq*tixcoul; - t2tmp += fq*tiycoul; - t3tmp += fq*tizcoul; - - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= fx; - f[j].y -= fy; - f[j].z -= fz; - torque[j][0] += fq*tjxcoul; - torque[j][1] += fq*tjycoul; - torque[j][2] += fq*tjzcoul; - } - - if (EFLAG) { - if (rsq < cut_coulsq[itype][jtype]) { - ecoul = qtmp*q[j]*rinv; - if (mu[i].w > 0.0 && mu[j].w > 0.0) - ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; - if (mu[i].w > 0.0 && q[j] != 0.0) - ecoul += -q[j]*r3inv*pidotr; - if (mu[j].w > 0.0 && qtmp != 0.0) - ecoul += qtmp*r3inv*pjdotr; - ecoul *= factor_coul*qqrd2e; - } else ecoul = 0.0; - - if (rsq < cut_ljsq[itype][jtype]) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - - offset[itype][jtype]; - evdwl *= factor_lj; - } else evdwl = 0.0; - } - - if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); - } - } - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - torque[i][0] += t1tmp; - torque[i][1] += t2tmp; - torque[i][2] += t3tmp; - } -} - -/* ---------------------------------------------------------------------- */ - -double PairLJCutDipoleCutOMP::memory_usage() -{ - double bytes = memory_usage_thr(); - bytes += PairLJCutDipoleCut::memory_usage(); - - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_dipole_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutDipoleCutOMP::PairLJCutDipoleCutOMP(LAMMPS *lmp) : + PairLJCutDipoleCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutDipoleCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJCutDipoleCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,r7inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + double * const * const torque = thr->get_torque(); + const double * _noalias const q = atom->q; + const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0]; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + qtmp = q[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + pre1 = qtmp*q[j]*r3inv; + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i].w > 0.0 && mu[j].w > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + r7inv = r5inv*r2inv; + + pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z; + pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; + pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; + + pre1 = 3.0*r5inv*pdotp - 15.0*r7inv*pidotr*pjdotr; + pre2 = 3.0*r5inv*pjdotr; + pre3 = 3.0*r5inv*pidotr; + pre4 = -1.0*r3inv; + + forcecoulx += pre1*delx + pre2*mu[i].x + pre3*mu[j].x; + forcecouly += pre1*dely + pre2*mu[i].y + pre3*mu[j].y; + forcecoulz += pre1*delz + pre2*mu[i].z + pre3*mu[j].z; + + crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y); + crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z); + crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x); + + tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely); + tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz); + tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx); + tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely); + tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz); + tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx); + } + + if (mu[i].w > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; + pre1 = 3.0*q[j]*r5inv * pidotr; + pre2 = q[j]*r3inv; + + forcecoulx += pre2*mu[i].x - pre1*delx; + forcecouly += pre2*mu[i].y - pre1*dely; + forcecoulz += pre2*mu[i].z - pre1*delz; + tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely); + tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz); + tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx); + } + + if (mu[j].w > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; + pre1 = 3.0*qtmp*r5inv * pjdotr; + pre2 = qtmp*r3inv; + + forcecoulx += pre1*delx - pre2*mu[j].x; + forcecouly += pre1*dely - pre2*mu[j].y; + forcecoulz += pre1*delz - pre2*mu[j].z; + tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely); + tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz); + tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj * r2inv; + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= fx; + f[j].y -= fy; + f[j].z -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = qtmp*q[j]*rinv; + if (mu[i].w > 0.0 && mu[j].w > 0.0) + ecoul += r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr; + if (mu[i].w > 0.0 && q[j] != 0.0) + ecoul += -q[j]*r3inv*pidotr; + if (mu[j].w > 0.0 && qtmp != 0.0) + ecoul += qtmp*r3inv*pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutDipoleCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutDipoleCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.h b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.h index 53bd592d18..cebacac00f 100755 --- a/src/USER-OMP/pair_lj_cut_dipole_cut_omp.h +++ b/src/USER-OMP/pair_lj_cut_dipole_cut_omp.h @@ -1,48 +1,48 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/cut/dipole/cut/omp,PairLJCutDipoleCutOMP) - -#else - -#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_OMP_H -#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_OMP_H - -#include "pair_lj_cut_dipole_cut.h" -#include "thr_omp.h" - -namespace LAMMPS_NS { - -class PairLJCutDipoleCutOMP : public PairLJCutDipoleCut, public ThrOMP { - - public: - PairLJCutDipoleCutOMP(class LAMMPS *); - - virtual void compute(int, int); - virtual double memory_usage(); - - private: - template - void eval(int ifrom, int ito, ThrData * const thr); -}; - -} - -#endif -#endif +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/dipole/cut/omp,PairLJCutDipoleCutOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_DIPOLE_CUT_OMP_H +#define LMP_PAIR_LJ_CUT_DIPOLE_CUT_OMP_H + +#include "pair_lj_cut_dipole_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutDipoleCutOMP : public PairLJCutDipoleCut, public ThrOMP { + + public: + PairLJCutDipoleCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp index 1eb57d9f21..2024a80dca 100755 --- a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp +++ b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.cpp @@ -1,319 +1,319 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - This software is distributed under the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#include "math.h" -#include "pair_lj_sf_dipole_sf_omp.h" -#include "atom.h" -#include "comm.h" -#include "force.h" -#include "neighbor.h" -#include "neigh_list.h" - -#include "suffix.h" -using namespace LAMMPS_NS; - -/* ---------------------------------------------------------------------- */ - -PairLJSFDipoleSFOMP::PairLJSFDipoleSFOMP(LAMMPS *lmp) : - PairLJSFDipoleSF(lmp), ThrOMP(lmp, THR_PAIR) -{ - suffix_flag |= Suffix::OMP; - respa_enable = 0; -} - -/* ---------------------------------------------------------------------- */ - -void PairLJSFDipoleSFOMP::compute(int eflag, int vflag) -{ - if (eflag || vflag) { - ev_setup(eflag,vflag); - } else evflag = vflag_fdotr = 0; - - const int nall = atom->nlocal + atom->nghost; - const int nthreads = comm->nthreads; - const int inum = list->inum; - -#if defined(_OPENMP) -#pragma omp parallel default(none) shared(eflag,vflag) -#endif - { - int ifrom, ito, tid; - - loop_setup_thr(ifrom, ito, tid, inum, nthreads); - ThrData *thr = fix->get_thr(tid); - ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); - - if (evflag) { - if (eflag) { - if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); - else eval<1,1,0>(ifrom, ito, thr); - } else { - if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); - else eval<1,0,0>(ifrom, ito, thr); - } - } else { - if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); - else eval<0,0,0>(ifrom, ito, thr); - } - - reduce_thr(this, eflag, vflag, thr); - } // end of omp parallel region -} - -template -void PairLJSFDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) -{ - int i,j,ii,jj,jnum,itype,jtype; - double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; - double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz; - double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; - double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; - double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; - double forcelj,factor_coul,factor_lj; - double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; - double aforcecoulx,aforcecouly,aforcecoulz; - double bforcecoulx,bforcecouly,bforcecoulz; - double rcutlj2inv, rcutcoul2inv,rcutlj6inv; - int *ilist,*jlist,*numneigh,**firstneigh; - - evdwl = 0.0; - - const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; - dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; - double * const * const torque = thr->get_torque(); - const double * _noalias const q = atom->q; - const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0]; - const int * _noalias const type = atom->type; - const int nlocal = atom->nlocal; - const double * _noalias const special_coul = force->special_coul; - const double * _noalias const special_lj = force->special_lj; - const double qqrd2e = force->qqrd2e; - double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; - - ilist = list->ilist; - numneigh = list->numneigh; - firstneigh = list->firstneigh; - - // loop over neighbors of my atoms - - for (ii = iifrom; ii < iito; ++ii) { - - i = ilist[ii]; - xtmp = x[i].x; - ytmp = x[i].y; - ztmp = x[i].z; - qtmp = q[i]; - itype = type[i]; - jlist = firstneigh[i]; - jnum = numneigh[i]; - fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; - - for (jj = 0; jj < jnum; jj++) { - j = jlist[jj]; - factor_coul = special_coul[sbmask(j)]; - factor_lj = special_lj[sbmask(j)]; - j &= NEIGHMASK; - - delx = xtmp - x[j].x; - dely = ytmp - x[j].y; - delz = ztmp - x[j].z; - rsq = delx*delx + dely*dely + delz*delz; - jtype = type[j]; - - if (rsq < cutsq[itype][jtype]) { - r2inv = 1.0/rsq; - rinv = sqrt(r2inv); - - // atom can have both a charge and dipole - // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole - // atom can have both a charge and dipole - // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole - - forcecoulx = forcecouly = forcecoulz = 0.0; - tixcoul = tiycoul = tizcoul = 0.0; - tjxcoul = tjycoul = tjzcoul = 0.0; - - if (rsq < cut_coulsq[itype][jtype]) { - - if (qtmp != 0.0 && q[j] != 0.0) { - pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); - - forcecoulx += pre1*delx; - forcecouly += pre1*dely; - forcecoulz += pre1*delz; - } - - if (mu[i].w > 0.0 && mu[j].w > 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - - pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z; - pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; - pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; - - afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; - pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); - aforcecoulx = pre1*delx; - aforcecouly = pre1*dely; - aforcecoulz = pre1*delz; - - bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + - 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; - presf = 2.0 * r2inv * pidotr * pjdotr; - bforcecoulx = bfac * (pjdotr*mu[i].x+pidotr*mu[j].x-presf*delx); - bforcecouly = bfac * (pjdotr*mu[i].y+pidotr*mu[j].y-presf*dely); - bforcecoulz = bfac * (pjdotr*mu[i].z+pidotr*mu[j].z-presf*delz); - - forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); - forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); - forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); - - pre2 = 3.0 * bfac * r5inv * pjdotr; - pre3 = 3.0 * bfac * r5inv * pidotr; - pre4 = -bfac * r3inv; - - crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y); - crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z); - crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x); - - tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely); - tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz); - tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx); - tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely); - tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz); - tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx); - } - - if (mu[i].w > 0.0 && q[j] != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); - pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + - 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); - pre2 = q[j] * r3inv * pqfac; - - forcecoulx += pre2*mu[i].x - pre1*delx; - forcecouly += pre2*mu[i].y - pre1*dely; - forcecoulz += pre2*mu[i].z - pre1*delz; - tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely); - tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz); - tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx); - } - - if (mu[j].w > 0.0 && qtmp != 0.0) { - r3inv = r2inv*rinv; - r5inv = r3inv*r2inv; - pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; - rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; - pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); - qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + - 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); - pre2 = qtmp * r3inv * qpfac; - - forcecoulx += pre1*delx - pre2*mu[j].x; - forcecouly += pre1*dely - pre2*mu[j].y; - forcecoulz += pre1*delz - pre2*mu[j].z; - tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely); - tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz); - tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx); - } - } - - // LJ interaction - - if (rsq < cut_ljsq[itype][jtype]) { - r6inv = r2inv*r2inv*r2inv; - forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; - - rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; - rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; - forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * - rcutlj6inv * rcutlj2inv; - - forcelj = factor_lj * (forceljcut - forceljsf); - } else forcelj = 0.0; - - // total force - - fq = factor_coul*qqrd2e; - fx = fq*forcecoulx + delx*forcelj; - fy = fq*forcecouly + dely*forcelj; - fz = fq*forcecoulz + delz*forcelj; - - // force & torque accumulation - - fxtmp += fx; - fytmp += fy; - fztmp += fz; - t1tmp += fq*tixcoul; - t2tmp += fq*tiycoul; - t3tmp += fq*tizcoul; - - if (NEWTON_PAIR || j < nlocal) { - f[j].x -= fx; - f[j].y -= fy; - f[j].z -= fz; - torque[j][0] += fq*tjxcoul; - torque[j][1] += fq*tjycoul; - torque[j][2] += fq*tjzcoul; - } - - if (EFLAG) { - if (rsq < cut_coulsq[itype][jtype]) { - ecoul = (1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])); - ecoul *= ecoul; - ecoul *= qtmp * q[j] * rinv; - if (mu[i].w > 0.0 && mu[j].w > 0.0) - ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); - if (mu[i].w > 0.0 && q[j] != 0.0) - ecoul += -q[j] * r3inv * pqfac * pidotr; - if (mu[j].w > 0.0 && qtmp != 0.0) - ecoul += qtmp * r3inv * qpfac * pjdotr; - ecoul *= factor_coul*qqrd2e; - } else ecoul = 0.0; - - if (rsq < cut_ljsq[itype][jtype]) { - evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+ - rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* - rsq*rcutlj2inv+ - rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); - evdwl *= factor_lj; - } else evdwl = 0.0; - } - - if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, - evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); - } - } - f[i].x += fxtmp; - f[i].y += fytmp; - f[i].z += fztmp; - torque[i][0] += t1tmp; - torque[i][1] += t2tmp; - torque[i][2] += t3tmp; - } -} - -/* ---------------------------------------------------------------------- */ - -double PairLJSFDipoleSFOMP::memory_usage() -{ - double bytes = memory_usage_thr(); - bytes += PairLJSFDipoleSF::memory_usage(); - - return bytes; -} +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_sf_dipole_sf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJSFDipoleSFOMP::PairLJSFDipoleSFOMP(LAMMPS *lmp) : + PairLJSFDipoleSF(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJSFDipoleSFOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairLJSFDipoleSFOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul; + double rsq,rinv,r2inv,r6inv,r3inv,r5inv,fx,fy,fz; + double forcecoulx,forcecouly,forcecoulz,crossx,crossy,crossz; + double tixcoul,tiycoul,tizcoul,tjxcoul,tjycoul,tjzcoul; + double fq,pdotp,pidotr,pjdotr,pre1,pre2,pre3,pre4; + double forcelj,factor_coul,factor_lj; + double presf,afac,bfac,pqfac,qpfac,forceljcut,forceljsf; + double aforcecoulx,aforcecouly,aforcecoulz; + double bforcecoulx,bforcecouly,bforcecoulz; + double rcutlj2inv, rcutcoul2inv,rcutlj6inv; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + double * const * const torque = thr->get_torque(); + const double * _noalias const q = atom->q; + const dbl4_t * _noalias const mu = (dbl4_t *) atom->mu[0]; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp,t1tmp,t2tmp,t3tmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + qtmp = q[i]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=t1tmp=t2tmp=t3tmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rinv = sqrt(r2inv); + + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + // atom can have both a charge and dipole + // i,j = charge-charge, dipole-dipole, dipole-charge, or charge-dipole + + forcecoulx = forcecouly = forcecoulz = 0.0; + tixcoul = tiycoul = tizcoul = 0.0; + tjxcoul = tjycoul = tjzcoul = 0.0; + + if (rsq < cut_coulsq[itype][jtype]) { + + if (qtmp != 0.0 && q[j] != 0.0) { + pre1 = qtmp*q[j]*rinv*(r2inv-1.0/cut_coulsq[itype][jtype]); + + forcecoulx += pre1*delx; + forcecouly += pre1*dely; + forcecoulz += pre1*delz; + } + + if (mu[i].w > 0.0 && mu[j].w > 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + + pdotp = mu[i].x*mu[j].x + mu[i].y*mu[j].y + mu[i].z*mu[j].z; + pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; + pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; + + afac = 1.0 - rsq*rsq * rcutcoul2inv*rcutcoul2inv; + pre1 = afac * ( pdotp - 3.0 * r2inv * pidotr * pjdotr ); + aforcecoulx = pre1*delx; + aforcecouly = pre1*dely; + aforcecoulz = pre1*delz; + + bfac = 1.0 - 4.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv) + + 3.0*rsq*rsq*rcutcoul2inv*rcutcoul2inv; + presf = 2.0 * r2inv * pidotr * pjdotr; + bforcecoulx = bfac * (pjdotr*mu[i].x+pidotr*mu[j].x-presf*delx); + bforcecouly = bfac * (pjdotr*mu[i].y+pidotr*mu[j].y-presf*dely); + bforcecoulz = bfac * (pjdotr*mu[i].z+pidotr*mu[j].z-presf*delz); + + forcecoulx += 3.0 * r5inv * ( aforcecoulx + bforcecoulx ); + forcecouly += 3.0 * r5inv * ( aforcecouly + bforcecouly ); + forcecoulz += 3.0 * r5inv * ( aforcecoulz + bforcecoulz ); + + pre2 = 3.0 * bfac * r5inv * pjdotr; + pre3 = 3.0 * bfac * r5inv * pidotr; + pre4 = -bfac * r3inv; + + crossx = pre4 * (mu[i].y*mu[j].z - mu[i].z*mu[j].y); + crossy = pre4 * (mu[i].z*mu[j].x - mu[i].x*mu[j].z); + crossz = pre4 * (mu[i].x*mu[j].y - mu[i].y*mu[j].x); + + tixcoul += crossx + pre2 * (mu[i].y*delz - mu[i].z*dely); + tiycoul += crossy + pre2 * (mu[i].z*delx - mu[i].x*delz); + tizcoul += crossz + pre2 * (mu[i].x*dely - mu[i].y*delx); + tjxcoul += -crossx + pre3 * (mu[j].y*delz - mu[j].z*dely); + tjycoul += -crossy + pre3 * (mu[j].z*delx - mu[j].x*delz); + tjzcoul += -crossz + pre3 * (mu[j].x*dely - mu[j].y*delx); + } + + if (mu[i].w > 0.0 && q[j] != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pidotr = mu[i].x*delx + mu[i].y*dely + mu[i].z*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * q[j] * r5inv * pidotr * (1-rsq*rcutcoul2inv); + pqfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = q[j] * r3inv * pqfac; + + forcecoulx += pre2*mu[i].x - pre1*delx; + forcecouly += pre2*mu[i].y - pre1*dely; + forcecoulz += pre2*mu[i].z - pre1*delz; + tixcoul += pre2 * (mu[i].y*delz - mu[i].z*dely); + tiycoul += pre2 * (mu[i].z*delx - mu[i].x*delz); + tizcoul += pre2 * (mu[i].x*dely - mu[i].y*delx); + } + + if (mu[j].w > 0.0 && qtmp != 0.0) { + r3inv = r2inv*rinv; + r5inv = r3inv*r2inv; + pjdotr = mu[j].x*delx + mu[j].y*dely + mu[j].z*delz; + rcutcoul2inv=1.0/cut_coulsq[itype][jtype]; + pre1 = 3.0 * qtmp * r5inv * pjdotr * (1-rsq*rcutcoul2inv); + qpfac = 1.0 - 3.0*rsq*rcutcoul2inv + + 2.0*rsq*sqrt(rsq)*rcutcoul2inv*sqrt(rcutcoul2inv); + pre2 = qtmp * r3inv * qpfac; + + forcecoulx += pre1*delx - pre2*mu[j].x; + forcecouly += pre1*dely - pre2*mu[j].y; + forcecoulz += pre1*delz - pre2*mu[j].z; + tjxcoul += -pre2 * (mu[j].y*delz - mu[j].z*dely); + tjycoul += -pre2 * (mu[j].z*delx - mu[j].x*delz); + tjzcoul += -pre2 * (mu[j].x*dely - mu[j].y*delx); + } + } + + // LJ interaction + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forceljcut = r6inv*(lj1[itype][jtype]*r6inv-lj2[itype][jtype])*r2inv; + + rcutlj2inv = 1.0 / cut_ljsq[itype][jtype]; + rcutlj6inv = rcutlj2inv * rcutlj2inv * rcutlj2inv; + forceljsf = (lj1[itype][jtype]*rcutlj6inv - lj2[itype][jtype]) * + rcutlj6inv * rcutlj2inv; + + forcelj = factor_lj * (forceljcut - forceljsf); + } else forcelj = 0.0; + + // total force + + fq = factor_coul*qqrd2e; + fx = fq*forcecoulx + delx*forcelj; + fy = fq*forcecouly + dely*forcelj; + fz = fq*forcecoulz + delz*forcelj; + + // force & torque accumulation + + fxtmp += fx; + fytmp += fy; + fztmp += fz; + t1tmp += fq*tixcoul; + t2tmp += fq*tiycoul; + t3tmp += fq*tizcoul; + + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= fx; + f[j].y -= fy; + f[j].z -= fz; + torque[j][0] += fq*tjxcoul; + torque[j][1] += fq*tjycoul; + torque[j][2] += fq*tjzcoul; + } + + if (EFLAG) { + if (rsq < cut_coulsq[itype][jtype]) { + ecoul = (1.0-sqrt(rsq)/sqrt(cut_coulsq[itype][jtype])); + ecoul *= ecoul; + ecoul *= qtmp * q[j] * rinv; + if (mu[i].w > 0.0 && mu[j].w > 0.0) + ecoul += bfac * (r3inv*pdotp - 3.0*r5inv*pidotr*pjdotr); + if (mu[i].w > 0.0 && q[j] != 0.0) + ecoul += -q[j] * r3inv * pqfac * pidotr; + if (mu[j].w > 0.0 && qtmp != 0.0) + ecoul += qtmp * r3inv * qpfac * pjdotr; + ecoul *= factor_coul*qqrd2e; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype])+ + rcutlj6inv*(6*lj3[itype][jtype]*rcutlj6inv-3*lj4[itype][jtype])* + rsq*rcutlj2inv+ + rcutlj6inv*(-7*lj3[itype][jtype]*rcutlj6inv+4*lj4[itype][jtype]); + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (EVFLAG) ev_tally_xyz_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fx,fy,fz,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + torque[i][0] += t1tmp; + torque[i][1] += t2tmp; + torque[i][2] += t3tmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJSFDipoleSFOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJSFDipoleSF::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.h b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.h index b593c70bc2..b8f3e705a6 100755 --- a/src/USER-OMP/pair_lj_sf_dipole_sf_omp.h +++ b/src/USER-OMP/pair_lj_sf_dipole_sf_omp.h @@ -1,48 +1,48 @@ -/* -*- c++ -*- ---------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - Contributing author: Axel Kohlmeyer (Temple U) -------------------------------------------------------------------------- */ - -#ifdef PAIR_CLASS - -PairStyle(lj/sf/dipole/sf/omp,PairLJSFDipoleSFOMP) - -#else - -#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_OMP_H -#define LMP_PAIR_LJ_SF_DIPOLE_SF_OMP_H - -#include "pair_lj_sf_dipole_sf.h" -#include "thr_omp.h" - -namespace LAMMPS_NS { - -class PairLJSFDipoleSFOMP : public PairLJSFDipoleSF, public ThrOMP { - - public: - PairLJSFDipoleSFOMP(class LAMMPS *); - - virtual void compute(int, int); - virtual double memory_usage(); - - private: - template - void eval(int ifrom, int ito, ThrData * const thr); -}; - -} - -#endif -#endif +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/sf/dipole/sf/omp,PairLJSFDipoleSFOMP) + +#else + +#ifndef LMP_PAIR_LJ_SF_DIPOLE_SF_OMP_H +#define LMP_PAIR_LJ_SF_DIPOLE_SF_OMP_H + +#include "pair_lj_sf_dipole_sf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJSFDipoleSFOMP : public PairLJSFDipoleSF, public ThrOMP { + + public: + PairLJSFDipoleSFOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/KSPACE/math_complex.h b/src/math_complex.h similarity index 100% rename from src/KSPACE/math_complex.h rename to src/math_complex.h diff --git a/src/KSPACE/math_vector.h b/src/math_vector.h similarity index 100% rename from src/KSPACE/math_vector.h rename to src/math_vector.h diff --git a/src/version.h b/src/version.h index fe924ead2e..baa777b964 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "6 Jun 2013" +#define LAMMPS_VERSION "7 Jun 2013"