diff --git a/src/USER-OMP/fix_nphug_omp.cpp b/src/USER-OMP/fix_nphug_omp.cpp new file mode 100644 index 0000000000..273ba4c450 --- /dev/null +++ b/src/USER-OMP/fix_nphug_omp.cpp @@ -0,0 +1,461 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "string.h" +#include "stdlib.h" +#include "fix_nphug_omp.h" +#include "modify.h" +#include "error.h" +#include "update.h" +#include "compute.h" +#include "force.h" +#include "domain.h" +#include "group.h" +#include "math.h" +#include "memory.h" +#include "comm.h" +#include "math.h" + +using namespace LAMMPS_NS; +using namespace FixConst; + +enum{ISO,ANISO,TRICLINIC}; // same as fix_nh.cpp + +/* ---------------------------------------------------------------------- */ + +FixNPHugOMP::FixNPHugOMP(LAMMPS *lmp, int narg, char **arg) : + FixNHOMP(lmp, narg, arg) +{ + + // Prevent masses from being updated every timestep + + eta_mass_flag = 0; + omega_mass_flag = 0; + etap_mass_flag = 0; + + // extend vector of base-class computes + + size_vector += 3; + + // turn off deviatoric flag and remove strain energy from vector + + deviatoric_flag = 0; + size_vector -= 1; + + // use initial state as reference state + + v0_set = p0_set = e0_set = 0; + + // check pressure settings + + if (p_start[0] != p_stop[0] || + p_start[1] != p_stop[1] || + p_start[2] != p_stop[2]) + error->all(FLERR,"Pstart and Pstop must have the same value"); + + // uniaxial = 0 means hydrostatic compression + // uniaxial = 1 means uniaxial compression + // in x, y, or z (idir = 0, 1, or 2) + + // isotropic hydrostatic compression + + if (pstyle == ISO) { + uniaxial = 0; + + // anisotropic compression + + } else if (pstyle == ANISO) { + + // anisotropic hydrostatic compression + + if (p_start[0] == p_start[1] && + p_start[0] == p_start[2] ) + uniaxial = 0; + + // uniaxial compression + + else if (p_flag[0] == 1 && p_flag[1] == 0 + && p_flag[2] == 0) { + uniaxial = 1; + idir = 0; + } else if (p_flag[0] == 0 && p_flag[1] == 1 + && p_flag[2] == 0) { + uniaxial = 1; + idir = 1; + } else if (p_flag[0] == 0 && p_flag[1] == 0 + && p_flag[2] == 1) { + uniaxial = 1; + idir = 2; + + } else error->all(FLERR,"Specified target stress must be uniaxial or hydrostatic"); + + // triclinic hydrostatic compression + + } else if (pstyle == TRICLINIC) { + + if (p_start[0] == p_start[1] && + p_start[0] == p_start[2] && + p_start[3] == 0.0 && + p_start[4] == 0.0 && + p_start[5] == 0.0 ) + uniaxial = 0; + + else error->all(FLERR,"For triclinic deformation, specified target stress must be hydrostatic"); + } + + if (!tstat_flag) + error->all(FLERR,"Temperature control must be used with fix nphug/omp"); + if (!pstat_flag) + error->all(FLERR,"Pressure control must be used with fix nphug/omp"); + + // create a new compute temp style + // id = fix-ID + temp + // compute group = all since pressure is always global (group all) + // and thus its KE/temperature contribution should use group all + + int n = strlen(id) + 6; + id_temp = new char[n]; + strcpy(id_temp,id); + strcat(id_temp,"_temp"); + + char **newarg = new char*[3]; + newarg[0] = id_temp; + newarg[1] = (char *) "all"; + newarg[2] = (char *) "temp"; + + modify->add_compute(3,newarg); + delete [] newarg; + tflag = 1; + + // create a new compute pressure style + // id = fix-ID + press, compute group = all + // pass id_temp as 4th arg to pressure constructor + + n = strlen(id) + 7; + id_press = new char[n]; + strcpy(id_press,id); + strcat(id_press,"_press"); + + newarg = new char*[4]; + newarg[0] = id_press; + newarg[1] = (char *) "all"; + newarg[2] = (char *) "pressure"; + newarg[3] = id_temp; + modify->add_compute(4,newarg); + delete [] newarg; + pflag = 1; + + // create a new compute potential energy compute + + n = strlen(id) + 3; + id_pe = new char[n]; + strcpy(id_pe,id); + strcat(id_pe,"_pe"); + + newarg = new char*[3]; + newarg[0] = id_pe; + newarg[1] = (char*)"all"; + newarg[2] = (char*)"pe"; + modify->add_compute(3,newarg); + delete [] newarg; + peflag = 1; +} + +/* ---------------------------------------------------------------------- */ + +FixNPHugOMP::~FixNPHugOMP() +{ + + // temp and press computes handled by base class + // delete pe compute + + if (peflag) modify->delete_compute(id_pe); + delete [] id_pe; + +} + +/* ---------------------------------------------------------------------- */ + +void FixNPHugOMP::init() +{ + // Call base class init() + + FixNHOMP::init(); + + // set pe ptr + + int icompute = modify->find_compute(id_pe); + if (icompute < 0) + error->all(FLERR,"Potential energy ID for fix nvt/nph/npt does not exist"); + pe = modify->compute[icompute]; +} + + +/* ---------------------------------------------------------------------- + compute initial state before integrator starts +------------------------------------------------------------------------- */ + +void FixNPHugOMP::setup(int vflag) +{ + FixNHOMP::setup(vflag); + + if ( v0_set == 0 ) { + v0 = compute_vol(); + v0_set = 1; + } + + if ( p0_set == 0 ) { + p0_set = 1; + if (uniaxial == 1) + p0 = p_current[idir]; + else + p0 = (p_current[0]+p_current[1]+p_current[2])/3.0; + } + + if ( e0_set == 0 ) { + e0 = compute_etotal(); + e0_set = 1; + } + + double masstot = group->mass(igroup); + rho0 = nktv2p*force->mvv2e*masstot/v0; + + t_target = 0.01; + + pe->addstep(update->ntimestep+1); +} + +/* ---------------------------------------------------------------------- + compute target temperature and kinetic energy +-----------------------------------------------------------------------*/ + +void FixNPHugOMP::compute_temp_target() +{ + t_target = t_current + compute_hugoniot(); + ke_target = tdof * boltz * t_target; + pe->addstep(update->ntimestep+1); +} + +/* ---------------------------------------------------------------------- */ + +double FixNPHugOMP::compute_etotal() +{ + double epot,ekin,etot; + epot = pe->compute_scalar(); + if (thermo_energy) epot -= compute_scalar(); + ekin = temperature->compute_scalar(); + ekin *= 0.5 * tdof * force->boltz; + etot = epot+ekin; + return etot; +} + +/* ---------------------------------------------------------------------- */ + +double FixNPHugOMP::compute_vol() +{ + if (domain->dimension == 3) + return domain->xprd * domain->yprd * domain->zprd; + else + return domain->xprd * domain->yprd; +} + +/* ---------------------------------------------------------------------- + Computes the deviation of the current point + from the Hugoniot in temperature units. +------------------------------------------------------------------------- */ + +double FixNPHugOMP::compute_hugoniot() +{ + double v,e,p; + double dhugo; + + e = compute_etotal(); + + temperature->compute_vector(); + + + if (uniaxial == 1) { + pressure->compute_vector(); + p = pressure->vector[idir]; + } else + p = pressure->compute_scalar(); + + v = compute_vol(); + + dhugo = (0.5 * (p + p0 ) * ( v0 - v)) / + force->nktv2p + e0 - e; + + dhugo /= tdof * boltz; + + return dhugo; +} + +/* ---------------------------------------------------------------------- + Compute shock velocity is distance/time units +------------------------------------------------------------------------- */ + +double FixNPHugOMP::compute_us() +{ + double v,p; + double eps,us; + + temperature->compute_vector(); + + if (uniaxial == 1) { + pressure->compute_vector(); + p = pressure->vector[idir]; + } else + p = pressure->compute_scalar(); + + v = compute_vol(); + + // Us^2 = (p-p0)/(rho0*eps) + + eps = 1.0 - v/v0; + if (eps < 1.0e-10) us = 0.0; + else if (p < p0) us = 0.0; + else us = sqrt((p-p0)/(rho0*eps)); + + return us; +} + +/* ---------------------------------------------------------------------- + Compute particle velocity is distance/time units +------------------------------------------------------------------------- */ + +double FixNPHugOMP::compute_up() +{ + double v; + double eps,us,up; + + v = compute_vol(); + us = compute_us(); + + // u = eps*Us + + eps = 1.0 - v/v0; + up = us*eps; + + return up; +} + +// look for index in local class +// if index not found, look in base class + +double FixNPHugOMP::compute_vector(int n) +{ + int ilen; + + // n = 0: Hugoniot energy difference (temperature units) + + ilen = 1; + if (n < ilen) return compute_hugoniot(); + n -= ilen; + + // n = 1: Shock velocity + + ilen = 1; + if (n < ilen) return compute_us(); + n -= ilen; + + // n = 2: Particle velocity + + ilen = 1; + if (n < ilen) return compute_up(); + n -= ilen; + + // index not found, look in base class + + return FixNHOMP::compute_vector(n); +} + +/* ---------------------------------------------------------------------- + pack restart data +------------------------------------------------------------------------- */ + +int FixNPHugOMP::pack_restart_data(double *list) +{ + int n = 0; + + list[n++] = e0; + list[n++] = v0; + list[n++] = p0; + + // call the base class function + + n += FixNHOMP::pack_restart_data(list+n); + + return n; +} + +/* ---------------------------------------------------------------------- + calculate the number of data to be packed +------------------------------------------------------------------------- */ + +int FixNPHugOMP::size_restart_global() +{ + int nsize = 3; + + // call the base class function + + nsize += FixNHOMP::size_restart_global(); + + return nsize; +} + +/* ---------------------------------------------------------------------- + use state info from restart file to restart the Fix +------------------------------------------------------------------------- */ + +void FixNPHugOMP::restart(char *buf) +{ + int n = 0; + double *list = (double *) buf; + e0 = list[n++]; + v0 = list[n++]; + p0 = list[n++]; + + e0_set = 1; + v0_set = 1; + p0_set = 1; + + // call the base class function + + buf += n*sizeof(double); + FixNHOMP::restart(buf); + +} + +/* ---------------------------------------------------------------------- */ + +int FixNPHugOMP::modify_param(int narg, char **arg) +{ + if (strcmp(arg[0],"e0") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix nphug/omp command"); + e0 = atof(arg[1]); + e0_set = 1; + return 2; + } else if (strcmp(arg[0],"v0") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix nphug/omp command"); + v0 = atof(arg[1]); + v0_set = 1; + return 2; + } else if (strcmp(arg[0],"p0") == 0) { + if (narg < 2) error->all(FLERR,"Illegal fix nphug/omp command"); + p0 = atof(arg[1]); + p0_set = 1; + return 2; + } + + return 0; +} diff --git a/src/USER-OMP/fix_nphug_omp.h b/src/USER-OMP/fix_nphug_omp.h new file mode 100644 index 0000000000..4a4db63440 --- /dev/null +++ b/src/USER-OMP/fix_nphug_omp.h @@ -0,0 +1,98 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(nphug/omp,FixNPHugOMP) + +#else + +#ifndef LMP_FIX_NPHUG_OMP_H +#define LMP_FIX_NPHUG_OMP_H + +#include "fix_nh_omp.h" + +namespace LAMMPS_NS { + +class FixNPHugOMP : public FixNHOMP { + public: + FixNPHugOMP(class LAMMPS *, int, char **); + virtual ~FixNPHugOMP(); + + virtual void init(); + virtual void setup(int); + virtual int modify_param(int, char **); + virtual int pack_restart_data(double *); // pack restart data + virtual void restart(char *); + + private: + class Compute *pe; // PE compute pointer + + void compute_temp_target(); + double compute_vector(int); + double compute_etotal(); + double compute_vol(); + double compute_hugoniot(); + double compute_us(); + double compute_up(); + + char *id_pe; + int peflag; + int v0_set,p0_set,e0_set; + double v0,p0,e0,rho0; + int idir; + int uniaxial; + + int size_restart_global(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Pstart and Pstop must have the same value + +Self-explanatory. + +E: Specified target stress must be uniaxial or hydrostatic + +Self-explanatory. + +E: For triclinic deformation, specified target stress must be hydrostatic + +Triclinic pressure control is allowed using the tri keyword, but +non-hydrostatic pressure control can not be used in this case. + +E: Temperature control must be used with fix nphug + +The temp keyword must be provided. + +E: Pressure control must be used with fix nphug + +A pressure control keyword (iso, aniso, tri, x, y, or z) must be +provided. + +E: Potential energy ID for fix nvt/nph/npt does not exist + +A compute for potential energy must be defined. + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +*/ diff --git a/src/USER-OMP/fix_rigid_small_omp.cpp b/src/USER-OMP/fix_rigid_small_omp.cpp new file mode 100644 index 0000000000..b5d20655e8 --- /dev/null +++ b/src/USER-OMP/fix_rigid_small_omp.cpp @@ -0,0 +1,620 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "fix_rigid_small_omp.h" + +#include "atom.h" +#include "atom_vec_ellipsoid.h" +#include "atom_vec_line.h" +#include "atom_vec_tri.h" +#include "comm.h" +#include "domain.h" + +#include + +#if defined(_OPENMP) +#include +#endif + +#include "math_extra.h" +#include "math_const.h" + +using namespace LAMMPS_NS; +using namespace FixConst; +using namespace MathConst; + +#define EINERTIA 0.4 // moment of inertia prefactor for ellipsoid + +enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF}; + +typedef struct { double x,y,z; } dbl3_t; +#if defined(__GNUC__) +#define _noalias __restrict +#else +#define _noalias +#endif + +/* ---------------------------------------------------------------------- */ + +void FixRigidSmallOMP::initial_integrate(int vflag) +{ + int ibody; + +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(ibody) schedule(static) +#endif + for (ibody = 0; ibody < nlocal_body; ibody++) { + + Body &b = body[ibody]; + + // update vcm by 1/2 step + + const double dtfm = dtf / b.mass; + b.vcm[0] += dtfm * b.fcm[0]; + b.vcm[1] += dtfm * b.fcm[1]; + b.vcm[2] += dtfm * b.fcm[2]; + + // update xcm by full step + + b.xcm[0] += dtv * b.vcm[0]; + b.xcm[1] += dtv * b.vcm[1]; + b.xcm[2] += dtv * b.vcm[2]; + + // update angular momentum by 1/2 step + + b.angmom[0] += dtf * b.torque[0]; + b.angmom[1] += dtf * b.torque[1]; + b.angmom[2] += dtf * b.torque[2]; + + // compute omega at 1/2 step from angmom at 1/2 step and current q + // update quaternion a full step via Richardson iteration + // returns new normalized quaternion, also updated omega at 1/2 step + // update ex,ey,ez to reflect new quaternion + + MathExtra::angmom_to_omega(b.angmom,b.ex_space,b.ey_space, + b.ez_space,b.inertia,b.omega); + MathExtra::richardson(b.quat,b.angmom,b.omega,b.inertia,dtq); + MathExtra::q_to_exyz(b.quat,b.ex_space,b.ey_space,b.ez_space); + } // end of omp parallel for + + // virial setup before call to set_xv + + if (vflag) v_setup(vflag); + else evflag = 0; + + // forward communicate updated info of all bodies + + commflag = INITIAL; + comm->forward_comm_variable_fix(this); + + // set coords/orient and velocity/rotation of atoms in rigid bodies + + if (triclinic) + if (evflag) + set_xv_thr<1,1>(); + else + set_xv_thr<1,0>(); + else + if (evflag) + set_xv_thr<0,1>(); + else + set_xv_thr<0,0>(); +} + +/* ---------------------------------------------------------------------- */ + +void FixRigidSmallOMP::final_integrate() +{ + double * const * _noalias const x = atom->x; + const dbl3_t * _noalias const f = (dbl3_t *) atom->f[0]; + const double * const * const torque_one = atom->torque; + const tagint * _noalias const image = atom->image; + const int nlocal = atom->nlocal; + const int nthreads=comm->nthreads; + int i, ibody; + +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(ibody) schedule(static) +#endif + for (ibody = 0; ibody < nlocal_body+nghost_body; ibody++) { + double * _noalias const fcm = body[ibody].fcm; + fcm[0] = fcm[1] = fcm[2] = 0.0; + double * _noalias const tcm = body[ibody].torque; + tcm[0] = tcm[1] = tcm[2] = 0.0; + } + + // sum over atoms to get force and torque on rigid body + // we likely have a large number of rigid objects with only a + // a few atoms each. so we loop over all atoms for all threads + // and then each thread only processes some bodies. + +#if defined(_OPENMP) +#pragma omp parallel default(none) private(i,ibody) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + for (i = 0; i < nlocal; i++) { + ibody = atom2body[i]; + if ((ibody < 0) || (ibody % nthreads != tid)) continue; + + Body &b = body[ibody]; + + double unwrap[3]; + domain->unmap(x[i],image[i],unwrap); + + double * _noalias const fcm = b.fcm; + double * _noalias const xcm = b.xcm; + double * _noalias const tcm = b.torque; + + const double dx = unwrap[0] - xcm[0]; + const double dy = unwrap[1] - xcm[1]; + const double dz = unwrap[2] - xcm[2]; + + fcm[0] += f[i].x; + fcm[1] += f[i].y; + fcm[2] += f[i].z; + + tcm[0] += dy*f[i].z - dz*f[i].y; + tcm[1] += dz*f[i].x - dx*f[i].z; + tcm[2] += dx*f[i].y - dy*f[i].x; + + if (extended && (eflags[i] & TORQUE)) { + tcm[0] += torque_one[i][0]; + tcm[1] += torque_one[i][1]; + tcm[2] += torque_one[i][2]; + } + } + } // end of omp parallel region + + // reverse communicate fcm, torque of all bodies + + commflag = FORCE_TORQUE; + comm->reverse_comm_variable_fix(this); + + // include Langevin thermostat forces and torques + + if (langflag) { +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(ibody) schedule(static) +#endif + for (ibody = 0; ibody < nlocal_body; ibody++) { + double * _noalias const fcm = body[ibody].fcm; + fcm[0] += langextra[ibody][0]; + fcm[1] += langextra[ibody][1]; + fcm[2] += langextra[ibody][2]; + double * _noalias const tcm = body[ibody].torque; + tcm[0] += langextra[ibody][3]; + tcm[1] += langextra[ibody][4]; + tcm[2] += langextra[ibody][5]; + } + } + + // update vcm and angmom, recompute omega + +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(ibody) schedule(static) +#endif + for (ibody = 0; ibody < nlocal_body; ibody++) { + Body &b = body[ibody]; + + // update vcm by 1/2 step + + const double dtfm = dtf / b.mass; + b.vcm[0] += dtfm * b.fcm[0]; + b.vcm[1] += dtfm * b.fcm[1]; + b.vcm[2] += dtfm * b.fcm[2]; + + // update angular momentum by 1/2 step + + b.angmom[0] += dtf * b.torque[0]; + b.angmom[1] += dtf * b.torque[1]; + b.angmom[2] += dtf * b.torque[2]; + + MathExtra::angmom_to_omega(b.angmom,b.ex_space,b.ey_space, + b.ez_space,b.inertia,b.omega); + } + + // forward communicate updated info of all bodies + + commflag = FINAL; + comm->forward_comm_variable_fix(this); + + // set velocity/rotation of atoms in rigid bodies + // virial is already setup from initial_integrate + // triclinic only matters for virial calculation. + + if (evflag) + if (triclinic) + set_v_thr<1,1>(); + else + set_v_thr<0,1>(); + else + set_v_thr<0,0>(); +} + + +/* ---------------------------------------------------------------------- + set space-frame coords and velocity of each atom in each rigid body + set orientation and rotation of extended particles + x = Q displace + Xcm, mapped back to periodic box + v = Vcm + (W cross (x - Xcm)) +------------------------------------------------------------------------- */ + +template +void FixRigidSmallOMP::set_xv_thr() +{ + dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const v = (dbl3_t *) atom->v[0]; + const dbl3_t * _noalias const f = (dbl3_t *) atom->f[0]; + const double * _noalias const rmass = atom->rmass; + const double * _noalias const mass = atom->mass; + const int * _noalias const type = atom->type; + const tagint * _noalias const image = atom->image; + + double v0=0.0,v1=0.0,v2=0.0,v3=0.0,v4=0.0,v5=0.0; + + const double xprd = domain->xprd; + const double yprd = domain->yprd; + const double zprd = domain->zprd; + const double xy = domain->xy; + const double xz = domain->xz; + const double yz = domain->yz; + + // set x and v of each atom + + const int nlocal = atom->nlocal; + int i; + +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(i) reduction(+:v0,v1,v2,v3,v4,v5) +#endif + for (i = 0; i < nlocal; i++) { + const int ibody = atom2body[i]; + if (ibody < 0) continue; + + Body &b = body[ibody]; + + const int xbox = (image[i] & IMGMASK) - IMGMAX; + const int ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; + const int zbox = (image[i] >> IMG2BITS) - IMGMAX; + const double deltax = xbox*xprd + (TRICLINIC ? ybox*xy + zbox*xz : 0.0); + const double deltay = ybox*yprd + (TRICLINIC ? zbox*yz : 0.0); + const double deltaz = zbox*zprd; + + // save old positions and velocities for virial + double x0,x1,x2,vx,vy,vz; + if (EVFLAG) { + x0 = x[i].x + deltax; + x1 = x[i].y + deltay; + x2 = x[i].z + deltaz; + vx = v[i].x; + vy = v[i].y; + vz = v[i].z; + } + + // x = displacement from center-of-mass, based on body orientation + // v = vcm + omega around center-of-mass + + MathExtra::matvec(b.ex_space,b.ey_space,b.ez_space,displace[i],&x[i].x); + + v[i].x = b.omega[1]*x[i].z - b.omega[2]*x[i].y + b.vcm[0]; + v[i].y = b.omega[2]*x[i].x - b.omega[0]*x[i].z + b.vcm[1]; + v[i].z = b.omega[0]*x[i].y - b.omega[1]*x[i].x + b.vcm[2]; + + // add center of mass to displacement + // map back into periodic box via xbox,ybox,zbox + // for triclinic, add in box tilt factors as well + + x[i].x += b.xcm[0] - deltax; + x[i].y += b.xcm[1] - deltay; + x[i].z += b.xcm[2] - deltaz; + + // virial = unwrapped coords dotted into body constraint force + // body constraint force = implied force due to v change minus f external + // assume f does not include forces internal to body + // 1/2 factor b/c final_integrate contributes other half + // assume per-atom contribution is due to constraint force on that atom + + if (EVFLAG) { + double massone,vr[6]; + + if (rmass) massone = rmass[i]; + else massone = mass[type[i]]; + + const double fc0 = 0.5*(massone*(v[i].x - vx)/dtf - f[i].x); + const double fc1 = 0.5*(massone*(v[i].y - vy)/dtf - f[i].y); + const double fc2 = 0.5*(massone*(v[i].z - vz)/dtf - f[i].z); + + vr[0] = x0*fc0; vr[1] = x1*fc1; vr[2] = x2*fc2; + vr[3] = x0*fc1; vr[4] = x0*fc2; vr[5] = x1*fc2; + + // Fix::v_tally() is not thread safe, so we do this manually here + // accumulate global virial into thread-local variables for reduction + if (vflag_global) { + v0 += vr[0]; + v1 += vr[1]; + v2 += vr[2]; + v3 += vr[3]; + v4 += vr[4]; + v5 += vr[5]; + } + + // accumulate per atom virial directly since we parallelize over atoms. + if (vflag_atom) { + vatom[i][0] += vr[0]; + vatom[i][1] += vr[1]; + vatom[i][2] += vr[2]; + vatom[i][3] += vr[3]; + vatom[i][4] += vr[4]; + vatom[i][5] += vr[5]; + } + } + } + + // second part of thread safe virial accumulation + // add global virial component after it was reduced across all threads + if (EVFLAG) { + if (vflag_global) { + virial[0] += v0; + virial[1] += v1; + virial[2] += v2; + virial[3] += v3; + virial[4] += v4; + virial[5] += v5; + } + } + + // set orientation, omega, angmom of each extended particle + // XXX: extended particle info not yet multi-threaded + + if (extended) { + double ione[3],exone[3],eyone[3],ezone[3],p[3][3]; + double theta_body,theta; + double *shape,*quatatom,*inertiaatom; + + AtomVecEllipsoid::Bonus *ebonus; + if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus; + AtomVecLine::Bonus *lbonus; + if (avec_line) lbonus = avec_line->bonus; + AtomVecTri::Bonus *tbonus; + if (avec_tri) tbonus = avec_tri->bonus; + double **omega = atom->omega; + double **angmom = atom->angmom; + double **mu = atom->mu; + int *ellipsoid = atom->ellipsoid; + int *line = atom->line; + int *tri = atom->tri; + + for (int i = 0; i < nlocal; i++) { + if (atom2body[i] < 0) continue; + Body &b = body[atom2body[i]]; + + if (eflags[i] & SPHERE) { + omega[i][0] = b.omega[0]; + omega[i][1] = b.omega[1]; + omega[i][2] = b.omega[2]; + } else if (eflags[i] & ELLIPSOID) { + shape = ebonus[ellipsoid[i]].shape; + quatatom = ebonus[ellipsoid[i]].quat; + MathExtra::quatquat(b.quat,orient[i],quatatom); + MathExtra::qnormalize(quatatom); + ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]); + ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]); + ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]); + MathExtra::q_to_exyz(quatatom,exone,eyone,ezone); + MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione,angmom[i]); + } else if (eflags[i] & LINE) { + if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]); + else theta_body = -2.0*acos(b.quat[0]); + theta = orient[i][0] + theta_body; + while (theta <= MINUSPI) theta += TWOPI; + while (theta > MY_PI) theta -= TWOPI; + lbonus[line[i]].theta = theta; + omega[i][0] = b.omega[0]; + omega[i][1] = b.omega[1]; + omega[i][2] = b.omega[2]; + } else if (eflags[i] & TRIANGLE) { + inertiaatom = tbonus[tri[i]].inertia; + quatatom = tbonus[tri[i]].quat; + MathExtra::quatquat(b.quat,orient[i],quatatom); + MathExtra::qnormalize(quatatom); + MathExtra::q_to_exyz(quatatom,exone,eyone,ezone); + MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone, + inertiaatom,angmom[i]); + } + if (eflags[i] & DIPOLE) { + MathExtra::quat_to_mat(b.quat,p); + MathExtra::matvec(p,dorient[i],mu[i]); + MathExtra::snormalize3(mu[i][3],mu[i],mu[i]); + } + } + } +} + +/* ---------------------------------------------------------------------- + set space-frame velocity of each atom in a rigid body + set omega and angmom of extended particles + v = Vcm + (W cross (x - Xcm)) +------------------------------------------------------------------------- */ + +template +void FixRigidSmallOMP::set_v_thr() +{ + dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const v = (dbl3_t *) atom->v[0]; + const dbl3_t * _noalias const f = (dbl3_t *) atom->f[0]; + const double * _noalias const rmass = atom->rmass; + const double * _noalias const mass = atom->mass; + const int * _noalias const type = atom->type; + const tagint * _noalias const image = atom->image; + + const double xprd = domain->xprd; + const double yprd = domain->yprd; + const double zprd = domain->zprd; + const double xy = domain->xy; + const double xz = domain->xz; + const double yz = domain->yz; + + double v0=0.0,v1=0.0,v2=0.0,v3=0.0,v4=0.0,v5=0.0; + + // set v of each atom + + const int nlocal = atom->nlocal; + int i; + +#if defined(_OPENMP) +#pragma omp parallel for default(none) private(i) reduction(+:v0,v1,v2,v3,v4,v5) +#endif + for (i = 0; i < nlocal; i++) { + const int ibody = atom2body[i]; + if (ibody < 0) continue; + + Body &b = body[atom2body[i]]; + double delta[3],vx,vy,vz; + + MathExtra::matvec(b.ex_space,b.ey_space,b.ez_space,displace[i],delta); + + // save old velocities for virial + + if (EVFLAG) { + vx = v[i].x; + vy = v[i].y; + vz = v[i].z; + } + + v[i].x = b.omega[1]*delta[2] - b.omega[2]*delta[1] + b.vcm[0]; + v[i].y = b.omega[2]*delta[0] - b.omega[0]*delta[2] + b.vcm[1]; + v[i].z = b.omega[0]*delta[1] - b.omega[1]*delta[0] + b.vcm[2]; + + // virial = unwrapped coords dotted into body constraint force + // body constraint force = implied force due to v change minus f external + // assume f does not include forces internal to body + // 1/2 factor b/c initial_integrate contributes other half + // assume per-atom contribution is due to constraint force on that atom + + if (EVFLAG) { + double massone, vr[6]; + if (rmass) massone = rmass[i]; + else massone = mass[type[i]]; + + const int xbox = (image[i] & IMGMASK) - IMGMAX; + const int ybox = (image[i] >> IMGBITS & IMGMASK) - IMGMAX; + const int zbox = (image[i] >> IMG2BITS) - IMGMAX; + const double deltax = xbox*xprd + (TRICLINIC ? ybox*xy + zbox*xz : 0.0); + const double deltay = ybox*yprd + (TRICLINIC ? zbox*yz : 0.0); + const double deltaz = zbox*zprd; + + const double fc0 = 0.5*(massone*(v[i].x - vx)/dtf - f[i].x); + const double fc1 = 0.5*(massone*(v[i].y - vy)/dtf - f[i].y); + const double fc2 = 0.5*(massone*(v[i].z - vz)/dtf - f[i].z); + + const double x0 = x[i].x + deltax; + const double x1 = x[i].y + deltay; + const double x2 = x[i].z + deltaz; + + vr[0] = x0*fc0; vr[1] = x1*fc1; vr[2] = x2*fc2; + vr[3] = x0*fc1; vr[4] = x0*fc2; vr[5] = x1*fc2; + + // Fix::v_tally() is not thread safe, so we do this manually here + // accumulate global virial into thread-local variables and reduce them later + if (vflag_global) { + v0 += vr[0]; + v1 += vr[1]; + v2 += vr[2]; + v3 += vr[3]; + v4 += vr[4]; + v5 += vr[5]; + } + + // accumulate per atom virial directly since we parallelize over atoms. + if (vflag_atom) { + vatom[i][0] += vr[0]; + vatom[i][1] += vr[1]; + vatom[i][2] += vr[2]; + vatom[i][3] += vr[3]; + vatom[i][4] += vr[4]; + vatom[i][5] += vr[5]; + } + } + } // end of parallel for + + // second part of thread safe virial accumulation + // add global virial component after it was reduced across all threads + if (EVFLAG) { + if (vflag_global) { + virial[0] += v0; + virial[1] += v1; + virial[2] += v2; + virial[3] += v3; + virial[4] += v4; + virial[5] += v5; + } + } + + // set omega, angmom of each extended particle + // XXX: extended particle info not yet multi-threaded + + if (extended) { + double ione[3],exone[3],eyone[3],ezone[3]; + double *shape,*quatatom,*inertiaatom; + + AtomVecEllipsoid::Bonus *ebonus; + if (avec_ellipsoid) ebonus = avec_ellipsoid->bonus; + AtomVecTri::Bonus *tbonus; + if (avec_tri) tbonus = avec_tri->bonus; + double **omega = atom->omega; + double **angmom = atom->angmom; + int *ellipsoid = atom->ellipsoid; + int *tri = atom->tri; + + for (int i = 0; i < nlocal; i++) { + if (atom2body[i] < 0) continue; + Body &b = body[atom2body[i]]; + + if (eflags[i] & SPHERE) { + omega[i][0] = b.omega[0]; + omega[i][1] = b.omega[1]; + omega[i][2] = b.omega[2]; + } else if (eflags[i] & ELLIPSOID) { + shape = ebonus[ellipsoid[i]].shape; + quatatom = ebonus[ellipsoid[i]].quat; + ione[0] = EINERTIA*rmass[i] * (shape[1]*shape[1] + shape[2]*shape[2]); + ione[1] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[2]*shape[2]); + ione[2] = EINERTIA*rmass[i] * (shape[0]*shape[0] + shape[1]*shape[1]); + MathExtra::q_to_exyz(quatatom,exone,eyone,ezone); + MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone,ione, + angmom[i]); + } else if (eflags[i] & LINE) { + omega[i][0] = b.omega[0]; + omega[i][1] = b.omega[1]; + omega[i][2] = b.omega[2]; + } else if (eflags[i] & TRIANGLE) { + inertiaatom = tbonus[tri[i]].inertia; + quatatom = tbonus[tri[i]].quat; + MathExtra::q_to_exyz(quatatom,exone,eyone,ezone); + MathExtra::omega_to_angmom(b.omega,exone,eyone,ezone, + inertiaatom,angmom[i]); + } + } + } +} + diff --git a/src/USER-OMP/fix_rigid_small_omp.h b/src/USER-OMP/fix_rigid_small_omp.h new file mode 100644 index 0000000000..ffc4bee660 --- /dev/null +++ b/src/USER-OMP/fix_rigid_small_omp.h @@ -0,0 +1,134 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef FIX_CLASS + +FixStyle(rigid/small/omp,FixRigidSmallOMP) + +#else + +#ifndef LMP_FIX_RIGID_SMALL_OMP_H +#define LMP_FIX_RIGID_SMALL_OMP_H + +#include "fix_rigid_small.h" + +namespace LAMMPS_NS { + +class FixRigidSmallOMP : public FixRigidSmall { + public: + FixRigidSmallOMP(class LAMMPS *lmp, int narg, char **args) + : FixRigidSmall(lmp,narg,args) {}; + virtual ~FixRigidSmallOMP() {}; + + virtual void initial_integrate(int); + virtual void final_integrate(); + + private: + template void set_xv_thr(); + template void set_v_thr(); +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Illegal ... command + +Self-explanatory. Check the input script syntax and compare to the +documentation for the command. You can use -echo screen as a +command-line option when running LAMMPS to see the offending line. + +E: Fix rigid molecule requires atom attribute molecule + +Self-explanatory. + +E: Could not find fix rigid group ID + +A group ID used in the fix rigid command does not exist. + +E: One or more atoms belong to multiple rigid bodies + +Two or more rigid bodies defined by the fix rigid command cannot +contain the same atom. + +E: No rigid bodies defined + +The fix specification did not end up defining any rigid bodies. + +E: Fix rigid z force cannot be on for 2d simulation + +Self-explanatory. + +E: Fix rigid xy torque cannot be on for 2d simulation + +Self-explanatory. + +E: Fix rigid langevin period must be > 0.0 + +Self-explanatory. + +E: One or zero atoms in rigid body + +Any rigid body defined by the fix rigid command must contain 2 or more +atoms. + +W: More than one fix rigid + +It is not efficient to use fix rigid more than once. + +E: Rigid fix must come before NPT/NPH fix + +NPT/NPH fix must be defined in input script after all rigid fixes, +else the rigid fix contribution to the pressure virial is +incorrect. + +W: Computing temperature of portions of rigid bodies + +The group defined by the temperature compute does not encompass all +the atoms in one or more rigid bodies, so the change in +degrees-of-freedom for the atoms in those partial rigid bodies will +not be accounted for. + +E: Fix rigid atom has non-zero image flag in a non-periodic dimension + +You cannot set image flags for non-periodic dimensions. + +E: Insufficient Jacobi rotations for rigid body + +Eigensolve for rigid body was not sufficiently accurate. + +E: Fix rigid: Bad principal moments + +The principal moments of inertia computed for a rigid body +are not within the required tolerances. + +E: Cannot open fix rigid infile %s + +UNDOCUMENTED + +E: Unexpected end of fix rigid file + +UNDOCUMENTED + +E: Incorrect rigid body format in fix rigid file + +UNDOCUMENTED + +E: Invalid rigid body ID in fix rigid file + +UNDOCUMENTED + +*/ diff --git a/src/USER-OMP/pppm_disp_omp.cpp b/src/USER-OMP/pppm_disp_omp.cpp new file mode 100644 index 0000000000..6d6ab70a0a --- /dev/null +++ b/src/USER-OMP/pppm_disp_omp.cpp @@ -0,0 +1,1779 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U), Rolf Isele-Holder (RWTH Aachen University) +------------------------------------------------------------------------- */ + +#include "pppm_disp_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" + +#include +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + +#define OFFSET 16384 + + +/* ---------------------------------------------------------------------- */ + +PPPMDispOMP::PPPMDispOMP(LAMMPS *lmp, int narg, char **arg) : + PPPMDisp(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDispOMP::allocate() +{ + PPPMDisp::allocate(); + + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + if (function[0]) { + ThrData *thr = fix->get_thr(tid); + thr->init_pppm(order,memory); + } + if (function[1] + function[2]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm_disp(order_6,memory); + } + } +} + +/* ---------------------------------------------------------------------- + free memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDispOMP::deallocate() +{ + PPPMDisp::deallocate(); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + if (function[0]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm(-order,memory); + } + if (function[1] + function[2]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm_disp(-order_6,memory); + } + } +} + +/* ---------------------------------------------------------------------- + Compute the modified (hockney-eastwood) coulomb green function +------------------------------------------------------------------------- */ + +void PPPMDispOMP::compute_gf() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m; + int kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + + const int nnx = nxhi_fft-nxlo_fft+1; + const int nny = nyhi_fft-nylo_fft+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = sin(0.5*qz*zprd_slab/nz_pppm); + snz2 = snz*snz; + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + wz *= wz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = sin(0.5*qy*yprd/ny_pppm); + sny2 = sny*sny; + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + wy *= wy; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = sin(0.5*qx*xprd/nx_pppm); + snx2 = snx*snx; + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + numerator = 4.0*MY_PI/sqk; + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator; + } else greensfn[nn] = 0.0; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + Compyute the modified (hockney-eastwood) dispersion green function +------------------------------------------------------------------------- */ + +void PPPMDispOMP::compute_gf_6() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + double *prd; + int k,l,m,nn; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double sqk; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz; + double qx,qy,qz; + double rtsqk, term; + double numerator,denominator; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + int nnfrom, nnto, tid; + + numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); + + const int nnx = nxhi_fft_6-nxlo_fft_6+1; + const int nny = nyhi_fft_6-nylo_fft_6+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads); + + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + qz = unitkz*mper; + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); + snz2 = snz*snz; + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + wz *= wz; + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + qy = unitky*lper; + sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); + sny2 = sny*sny; + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + wy *= wy; + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + qx = unitkx*kper; + snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); + snx2 = snx*snx; + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + rtsqk = sqrt(sqk); + term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); + greensfn_6[nn] = numerator*term*wx*wy*wz/denominator; + } + } + } + } +} +/* ---------------------------------------------------------------------- + run the regular toplevel compute method from plain PPPPM + which will have individual methods replaced by our threaded + versions and then call the obligatory force reduction. +------------------------------------------------------------------------- */ + +void PPPMDispOMP::compute(int eflag, int vflag) +{ + + PPPMDisp::compute(eflag,vflag); +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDispOMP::particle_map(double dxinv, double dyinv, + double dzinv, double sft, + int ** part2grid, int nup, + int nlw, int nxlo_o, + int nylo_o, int nzlo_o, + int nxhi_o, int nyhi_o, + int nzhi_o) +{ + const int * _noalias const type = atom->type; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + const int nlocal = atom->nlocal; + + const double delxinv = dxinv; + const double delyinv = dyinv; + const double delzinv = dzinv; + const double shift = sft; + const int nupper = nup; + const int nlower = nlw; + const int nxlo_out = nxlo_o; + const int nylo_out = nylo_o; + const int nzlo_out = nzlo_o; + const int nxhi_out = nxhi_o; + const int nyhi_out = nyhi_o; + const int nzhi_out = nzhi_o; + + int i, flag = 0; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) +#endif + for (i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + const int nx = static_cast ((x[i].x-boxlox)*delxinv+shift) - OFFSET; + const int ny = static_cast ((x[i].y-boxloy)*delyinv+shift) - OFFSET; + const int nz = static_cast ((x[i].z-boxloz)*delzinv+shift) - OFFSET; + + p2g[i].a = nx; + p2g[i].b = ny; + p2g[i].t = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag++; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDispOMP::make_rho_c() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); + memset(d,0,ngrid*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out - nxlo_out + 1; + const int iy = nyhi_out - nylo_out + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const double * _noalias const q = atom->q; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone - (x[i].x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (x[i].y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (x[i].z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff); + + const FFT_SCALAR z0 = delvolinv * q[i]; + + for (int n = nlower; n <= nupper; ++n) { + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + same as above for dispersion interaction with geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::make_rho_g() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); + memset(d,0,ngrid_6*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out_6 - nxlo_out_6 + 1; + const int iy = nyhi_out_6 - nylo_out_6 + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; + const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; + const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); + + const int type = atom->type[i]; + const double lj = B[type]; + const FFT_SCALAR z0 = delvolinv_6 * lj; + + for (int n = nlower_6; n <= nupper_6; ++n) { + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + same as above for dispersion interaction with arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::make_rho_a() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]); + + memset(d0,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d1,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d2,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d3,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d4,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d5,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d6,0,ngrid_6*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out_6 - nxlo_out_6 + 1; + const int iy = nyhi_out_6 - nylo_out_6 + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; + const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; + const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); + + const int type = atom->type[i]; + const double lj0 = B[7*type]; + const double lj1 = B[7*type+1]; + const double lj2 = B[7*type+2]; + const double lj3 = B[7*type+3]; + const double lj4 = B[7*type+4]; + const double lj5 = B[7*type+5]; + const double lj6 = B[7*type+6]; + + const FFT_SCALAR z0 = delvolinv_6; + + for (int n = nlower_6; n <= nupper_6; ++n) { + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + const double w = x0*r1d[0][l]; + + d0[jl] += w*lj0; + d1[jl] += w*lj1; + d2[jl] += w*lj2; + d3[jl] += w*lj3; + d4[jl] += w*lj4; + d5[jl] += w*lj5; + d6[jl] += w*lj6; + } + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ik scheme +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_c_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + const double qfactor = qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles + for ad scheme +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_c_ad() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + //const double * const sf_c = sf_coeff; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + const double hx_inv = nx_pppm/xprd; + const double hy_inv = ny_pppm/yprd; + const double hz_inv = nz_pppm/zprd_slab; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + double sf = 0.0; + double s1,s2,s3; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); + compute_drho1d_thr(dr1d,dx,dy,dz, order, drho_coeff); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + const double qfactor = qqrd2e*scale; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + sf = sf_coeff[0]*sin(2*MY_PI*s1); + sf += sf_coeff[1]*sin(4*MY_PI*s1); + sf *= 2*q[i]*q[i]; + f[i][0] += qfactor*(ekx*q[i] - sf); + + sf = sf_coeff[2]*sin(2*MY_PI*s2); + sf += sf_coeff[3]*sin(4*MY_PI*s2); + sf *= 2*q[i]*q[i]; + f[i][1] += qfactor*(eky*q[i] - sf); + + + sf = sf_coeff[4]*sin(2*MY_PI*s3); + sf += sf_coeff[5]*sin(4*MY_PI*s3); + sf *= 2*q[i]*q[i]; + if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf); + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial + ------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_c_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const q = atom->q; + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid[i][0]; + ny = part2grid[i][1]; + nz = part2grid[i][2]; + dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv; + dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv; + dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick[mz][my][mx]; + v1 += x0*v1_brick[mz][my][mx]; + v2 += x0*v2_brick[mz][my][mx]; + v3 += x0*v3_brick[mz][my][mx]; + v4 += x0*v4_brick[mz][my][mx]; + v5 += x0*v5_brick[mz][my][mx]; + } + } + } + } + + const double qfactor = 0.5*force->qqrd2e * scale * q[i]; + + if (eflag_atom) eatom[i] += u*qfactor; + if (vflag_atom) { + vatom[i][0] += v0*qfactor; + vatom[i][1] += v1*qfactor; + vatom[i][2] += v2*qfactor; + vatom[i][3] += v3*qfactor; + vatom[i][4] += v4*qfactor; + vatom[i][5] += v5*qfactor; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ik scheme and geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_g_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int type; + double lj; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick_g[mz][my][mx]; + eky -= x0*vdy_brick_g[mz][my][mx]; + ekz -= x0*vdz_brick_g[mz][my][mx]; + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + f[i][2] += lj*ekz; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ad scheme and geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_g_ad() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + const double hx_inv = nx_pppm_6/xprd; + const double hy_inv = ny_pppm_6/yprd; + const double hz_inv = nz_pppm_6/zprd_slab; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int type; + double lj; + double sf = 0.0; + double s1,s2,s3; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; + eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; + ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 2*lj*lj; + f[i][0] += ekx*lj - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 2*lj*lj; + f[i][1] += eky*lj - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 2*lj*lj; + if (slabflag != 2) f[i][2] += ekz*lj - sf; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial for dispersion + interaction and geometric mixing rule + ------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_g_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + int type; + double lj; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick_g[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick_g[mz][my][mx]; + v1 += x0*v1_brick_g[mz][my][mx]; + v2 += x0*v2_brick_g[mz][my][mx]; + v3 += x0*v3_brick_g[mz][my][mx]; + v4 += x0*v4_brick_g[mz][my][mx]; + v5 += x0*v5_brick_g[mz][my][mx]; + } + } + } + } + + type = atom->type[i]; + lj = B[type]*0.5; + + if (eflag_atom) eatom[i] += u*lj; + if (vflag_atom) { + vatom[i][0] += v0*lj; + vatom[i][1] += v1*lj; + vatom[i][2] += v2*lj; + vatom[i][3] += v3*lj; + vatom[i][4] += v4*lj; + vatom[i][5] += v5*lj; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ik scheme and arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_a_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx0 -= x0*vdx_brick_a0[mz][my][mx]; + eky0 -= x0*vdy_brick_a0[mz][my][mx]; + ekz0 -= x0*vdz_brick_a0[mz][my][mx]; + ekx1 -= x0*vdx_brick_a1[mz][my][mx]; + eky1 -= x0*vdy_brick_a1[mz][my][mx]; + ekz1 -= x0*vdz_brick_a1[mz][my][mx]; + ekx2 -= x0*vdx_brick_a2[mz][my][mx]; + eky2 -= x0*vdy_brick_a2[mz][my][mx]; + ekz2 -= x0*vdz_brick_a2[mz][my][mx]; + ekx3 -= x0*vdx_brick_a3[mz][my][mx]; + eky3 -= x0*vdy_brick_a3[mz][my][mx]; + ekz3 -= x0*vdz_brick_a3[mz][my][mx]; + ekx4 -= x0*vdx_brick_a4[mz][my][mx]; + eky4 -= x0*vdy_brick_a4[mz][my][mx]; + ekz4 -= x0*vdz_brick_a4[mz][my][mx]; + ekx5 -= x0*vdx_brick_a5[mz][my][mx]; + eky5 -= x0*vdy_brick_a5[mz][my][mx]; + ekz5 -= x0*vdz_brick_a5[mz][my][mx]; + ekx6 -= x0*vdx_brick_a6[mz][my][mx]; + eky6 -= x0*vdy_brick_a6[mz][my][mx]; + ekz6 -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; + f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ad scheme and arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_a_ad() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + const double hx_inv = nx_pppm_6/xprd; + const double hy_inv = ny_pppm_6/yprd; + const double hz_inv = nz_pppm_6/zprd_slab; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + double sf = 0.0; + double s1,s2,s3; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n]; + y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n]; + z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n]; + + ekx0 += x0*u_brick_a0[mz][my][mx]; + eky0 += y0*u_brick_a0[mz][my][mx]; + ekz0 += z0*u_brick_a0[mz][my][mx]; + + ekx1 += x0*u_brick_a1[mz][my][mx]; + eky1 += y0*u_brick_a1[mz][my][mx]; + ekz1 += z0*u_brick_a1[mz][my][mx]; + + ekx2 += x0*u_brick_a2[mz][my][mx]; + eky2 += y0*u_brick_a2[mz][my][mx]; + ekz2 += z0*u_brick_a2[mz][my][mx]; + + ekx3 += x0*u_brick_a3[mz][my][mx]; + eky3 += y0*u_brick_a3[mz][my][mx]; + ekz3 += z0*u_brick_a3[mz][my][mx]; + + ekx4 += x0*u_brick_a4[mz][my][mx]; + eky4 += y0*u_brick_a4[mz][my][mx]; + ekz4 += z0*u_brick_a4[mz][my][mx]; + + ekx5 += x0*u_brick_a5[mz][my][mx]; + eky5 += y0*u_brick_a5[mz][my][mx]; + ekz5 += z0*u_brick_a5[mz][my][mx]; + + ekx6 += x0*u_brick_a6[mz][my][mx]; + eky6 += y0*u_brick_a6[mz][my][mx]; + ekz6 += z0*u_brick_a6[mz][my][mx]; + } + } + } + + ekx0 *= hx_inv; + eky0 *= hy_inv; + ekz0 *= hz_inv; + + ekx1 *= hx_inv; + eky1 *= hy_inv; + ekz1 *= hz_inv; + + ekx2 *= hx_inv; + eky2 *= hy_inv; + ekz2 *= hz_inv; + + ekx3 *= hx_inv; + eky3 *= hy_inv; + ekz3 *= hz_inv; + + ekx4 *= hx_inv; + eky4 *= hy_inv; + ekz4 *= hz_inv; + + ekx5 *= hx_inv; + eky5 *= hy_inv; + ekz5 *= hz_inv; + + ekx6 *= hx_inv; + eky6 *= hy_inv; + ekz6 *= hz_inv; + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial for dispersion + interaction and arithmetic mixing rule + ------------------------------------------------------------------------- */ + +void PPPMDispOMP::fieldforce_a_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u0,v00,v10,v20,v30,v40,v50; + FFT_SCALAR u1,v01,v11,v21,v31,v41,v51; + FFT_SCALAR u2,v02,v12,v22,v32,v42,v52; + FFT_SCALAR u3,v03,v13,v23,v33,v43,v53; + FFT_SCALAR u4,v04,v14,v24,v34,v44,v54; + FFT_SCALAR u5,v05,v15,v25,v35,v45,v55; + FFT_SCALAR u6,v06,v16,v26,v36,v46,v56; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; + u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; + u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; + u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; + u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; + u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; + u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) { + u0 += x0*u_brick_a0[mz][my][mx]; + u1 += x0*u_brick_a1[mz][my][mx]; + u2 += x0*u_brick_a2[mz][my][mx]; + u3 += x0*u_brick_a3[mz][my][mx]; + u4 += x0*u_brick_a4[mz][my][mx]; + u5 += x0*u_brick_a5[mz][my][mx]; + u6 += x0*u_brick_a6[mz][my][mx]; + } + if (vflag_atom) { + v00 += x0*v0_brick_a0[mz][my][mx]; + v10 += x0*v1_brick_a0[mz][my][mx]; + v20 += x0*v2_brick_a0[mz][my][mx]; + v30 += x0*v3_brick_a0[mz][my][mx]; + v40 += x0*v4_brick_a0[mz][my][mx]; + v50 += x0*v5_brick_a0[mz][my][mx]; + v01 += x0*v0_brick_a1[mz][my][mx]; + v11 += x0*v1_brick_a1[mz][my][mx]; + v21 += x0*v2_brick_a1[mz][my][mx]; + v31 += x0*v3_brick_a1[mz][my][mx]; + v41 += x0*v4_brick_a1[mz][my][mx]; + v51 += x0*v5_brick_a1[mz][my][mx]; + v02 += x0*v0_brick_a2[mz][my][mx]; + v12 += x0*v1_brick_a2[mz][my][mx]; + v22 += x0*v2_brick_a2[mz][my][mx]; + v32 += x0*v3_brick_a2[mz][my][mx]; + v42 += x0*v4_brick_a2[mz][my][mx]; + v52 += x0*v5_brick_a2[mz][my][mx]; + v03 += x0*v0_brick_a3[mz][my][mx]; + v13 += x0*v1_brick_a3[mz][my][mx]; + v23 += x0*v2_brick_a3[mz][my][mx]; + v33 += x0*v3_brick_a3[mz][my][mx]; + v43 += x0*v4_brick_a3[mz][my][mx]; + v53 += x0*v5_brick_a3[mz][my][mx]; + v04 += x0*v0_brick_a4[mz][my][mx]; + v14 += x0*v1_brick_a4[mz][my][mx]; + v24 += x0*v2_brick_a4[mz][my][mx]; + v34 += x0*v3_brick_a4[mz][my][mx]; + v44 += x0*v4_brick_a4[mz][my][mx]; + v54 += x0*v5_brick_a4[mz][my][mx]; + v05 += x0*v0_brick_a5[mz][my][mx]; + v15 += x0*v1_brick_a5[mz][my][mx]; + v25 += x0*v2_brick_a5[mz][my][mx]; + v35 += x0*v3_brick_a5[mz][my][mx]; + v45 += x0*v4_brick_a5[mz][my][mx]; + v55 += x0*v5_brick_a5[mz][my][mx]; + v06 += x0*v0_brick_a6[mz][my][mx]; + v16 += x0*v1_brick_a6[mz][my][mx]; + v26 += x0*v2_brick_a6[mz][my][mx]; + v36 += x0*v3_brick_a6[mz][my][mx]; + v46 += x0*v4_brick_a6[mz][my][mx]; + v56 += x0*v5_brick_a6[mz][my][mx]; + } + } + } + } + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]*0.5; + lj1 = B[7*type+5]*0.5; + lj2 = B[7*type+4]*0.5; + lj3 = B[7*type+3]*0.5; + lj4 = B[7*type+2]*0.5; + lj5 = B[7*type+1]*0.5; + lj6 = B[7*type]*0.5; + + if (eflag_atom) + eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + + u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6; + if (vflag_atom) { + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + v04*lj4 + v05*lj5 + v06*lj6; + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + v14*lj4 + v15*lj5 + v16*lj6; + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + v24*lj4 + v25*lj5 + v26*lj6; + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + v34*lj4 + v35*lj5 + v36*lj6; + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + v44*lj4 + v45*lj5 + v46*lj6; + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + v54*lj4 + v55*lj5 + v56*lj6; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ +void PPPMDispOMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz, + const int ord, FFT_SCALAR * const * const rho_c) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-1; l >= 0; l--) { + r1 = rho_c[l][k] + r1*dx; + r2 = rho_c[l][k] + r2*dy; + r3 = rho_c[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDispOMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz, + const int ord, FFT_SCALAR * const * const drho_c) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-2; l >= 0; l--) { + r1 = drho_c[l][k] + r1*dx; + r2 = drho_c[l][k] + r2*dy; + r3 = drho_c[l][k] + r3*dz; + } + dr1d[0][k] = r1; + dr1d[1][k] = r2; + dr1d[2][k] = r3; + } +} diff --git a/src/USER-OMP/pppm_disp_omp.h b/src/USER-OMP/pppm_disp_omp.h new file mode 100644 index 0000000000..e2f588d169 --- /dev/null +++ b/src/USER-OMP/pppm_disp_omp.h @@ -0,0 +1,74 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/disp/omp,PPPMDispOMP) + +#else + +#ifndef LMP_PPPM_DISP_OMP_H +#define LMP_PPPM_DISP_OMP_H + +#include "pppm_disp.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMDispOMP : public PPPMDisp, public ThrOMP { + public: + PPPMDispOMP(class LAMMPS *, int, char **); + virtual ~PPPMDispOMP () {}; + virtual void compute(int, int); + + protected: + virtual void allocate(); + virtual void deallocate(); + + virtual void compute_gf(); + virtual void compute_gf_6(); + + virtual void particle_map(double,double,double, + double,int**,int,int, + int,int,int,int,int,int); + + + virtual void fieldforce_c_ik(); + virtual void fieldforce_c_ad(); + virtual void fieldforce_c_peratom(); + virtual void fieldforce_g_ik(); + virtual void fieldforce_g_ad(); + virtual void fieldforce_g_peratom(); + virtual void fieldforce_a_ik(); + virtual void fieldforce_a_ad(); + virtual void fieldforce_a_peratom(); + + virtual void make_rho_c(); + virtual void make_rho_g(); + virtual void make_rho_a(); + + void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &, + const int, FFT_SCALAR * const * const); + void compute_drho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &, + const int, FFT_SCALAR * const * const); +// void compute_rho_coeff(); +// void slabcorr(int); + +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pppm_disp_tip4p_omp.cpp b/src/USER-OMP/pppm_disp_tip4p_omp.cpp new file mode 100644 index 0000000000..bbb96228a3 --- /dev/null +++ b/src/USER-OMP/pppm_disp_tip4p_omp.cpp @@ -0,0 +1,1826 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "pppm_disp_tip4p_omp.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "fix_omp.h" +#include "force.h" +#include "memory.h" +#include "math_const.h" +#include "math_special.h" + +#include +#include + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace MathConst; +using namespace MathSpecial; + +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#else +#define ZEROF 0.0 +#endif + +#define OFFSET 16384 + +/* ---------------------------------------------------------------------- */ + +PPPMDispTIP4POMP::PPPMDispTIP4POMP(LAMMPS *lmp, int narg, char **arg) : + PPPMDispTIP4P(lmp, narg, arg), ThrOMP(lmp, THR_KSPACE) +{ + tip4pflag = 1; + suffix_flag |= Suffix::OMP; +} + +/* ---------------------------------------------------------------------- + allocate memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::allocate() +{ + PPPMDispTIP4P::allocate(); + + const int nthreads = comm->nthreads; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + + if (function[0]) { + ThrData *thr = fix->get_thr(tid); + thr->init_pppm(order,memory); + } + if (function[1] + function[2]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm_disp(order_6,memory); + } + } +} + +/* ---------------------------------------------------------------------- + free memory that depends on # of K-vectors and order +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::deallocate() +{ + PPPMDispTIP4P::deallocate(); + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + if (function[0]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm(-order,memory); + } + if (function[1] + function[2]) { + ThrData * thr = fix->get_thr(tid); + thr->init_pppm_disp(-order_6,memory); + } + } +} + + +/* ---------------------------------------------------------------------- + Compute the modified (hockney-eastwood) coulomb green function +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::compute_gf() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + + double *prd; + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int tid,nn,nnfrom,nnto,nx,ny,nz,k,l,m; + int kper,lper,mper; + double snx,sny,snz,snx2,sny2,snz2; + double sqk; + double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz; + double sum1,dot1,dot2; + double numerator,denominator; + + const int nnx = nxhi_fft-nxlo_fft+1; + const int nny = nyhi_fft-nylo_fft+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft, comm->nthreads); + + for (m = nzlo_fft; m <= nzhi_fft; m++) { + mper = m - nz_pppm*(2*m/nz_pppm); + qz = unitkz*mper; + snz = sin(0.5*qz*zprd_slab/nz_pppm); + snz2 = snz*snz; + sz = exp(-0.25*pow(qz/g_ewald,2.0)); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm; + if (argz != 0.0) wz = pow(sin(argz)/argz,order); + wz *= wz; + + for (l = nylo_fft; l <= nyhi_fft; l++) { + lper = l - ny_pppm*(2*l/ny_pppm); + qy = unitky*lper; + sny = sin(0.5*qy*yprd/ny_pppm); + sny2 = sny*sny; + sy = exp(-0.25*pow(qy/g_ewald,2.0)); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm; + if (argy != 0.0) wy = pow(sin(argy)/argy,order); + wy *= wy; + + for (k = nxlo_fft; k <= nxhi_fft; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft + nnx*(l-nylo_fft + nny*(m-nzlo_fft)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + kper = k - nx_pppm*(2*k/nx_pppm); + qx = unitkx*kper; + snx = sin(0.5*qx*xprd/nx_pppm); + snx2 = snx*snx; + sx = exp(-0.25*pow(qx/g_ewald,2.0)); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm; + if (argx != 0.0) wx = pow(sin(argx)/argx,order); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + if (sqk != 0.0) { + numerator = 4.0*MY_PI/sqk; + denominator = gf_denom(snx2,sny2,snz2, gf_b, order); + greensfn[nn] = numerator*sx*sy*sz*wx*wy*wz/denominator; + } else greensfn[nn] = 0.0; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + Compyute the modified (hockney-eastwood) dispersion green function +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::compute_gf_6() +{ +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + double *prd; + int k,l,m,nn; + + // volume-dependent factors + // adjust z dimension for 2d slab PPPM + // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0 + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + double unitkx = (2.0*MY_PI/xprd); + double unitky = (2.0*MY_PI/yprd); + double unitkz = (2.0*MY_PI/zprd_slab); + + int kper,lper,mper; + double sqk; + double snx,sny,snz,snx2,sny2,snz2; + double argx,argy,argz,wx,wy,wz,sx,sy,sz; + double qx,qy,qz; + double rtsqk, term; + double numerator,denominator; + double inv2ew = 2*g_ewald_6; + inv2ew = 1/inv2ew; + double rtpi = sqrt(MY_PI); + int nnfrom, nnto, tid; + + numerator = -MY_PI*rtpi*g_ewald_6*g_ewald_6*g_ewald_6/(3.0); + + const int nnx = nxhi_fft_6-nxlo_fft_6+1; + const int nny = nyhi_fft_6-nylo_fft_6+1; + + loop_setup_thr(nnfrom, nnto, tid, nfft_6, comm->nthreads); + + for (m = nzlo_fft_6; m <= nzhi_fft_6; m++) { + mper = m - nz_pppm_6*(2*m/nz_pppm_6); + qz = unitkz*mper; + snz = sin(0.5*unitkz*mper*zprd_slab/nz_pppm_6); + snz2 = snz*snz; + sz = exp(-qz*qz*inv2ew*inv2ew); + wz = 1.0; + argz = 0.5*qz*zprd_slab/nz_pppm_6; + if (argz != 0.0) wz = pow(sin(argz)/argz,order_6); + wz *= wz; + + for (l = nylo_fft_6; l <= nyhi_fft_6; l++) { + lper = l - ny_pppm_6*(2*l/ny_pppm_6); + qy = unitky*lper; + sny = sin(0.5*unitky*lper*yprd/ny_pppm_6); + sny2 = sny*sny; + sy = exp(-qy*qy*inv2ew*inv2ew); + wy = 1.0; + argy = 0.5*qy*yprd/ny_pppm_6; + if (argy != 0.0) wy = pow(sin(argy)/argy,order_6); + wy *= wy; + + for (k = nxlo_fft_6; k <= nxhi_fft_6; k++) { + + /* only compute the part designated to this thread */ + nn = k-nxlo_fft_6 + nnx*(l-nylo_fft_6 + nny*(m-nzlo_fft_6)); + if ((nn < nnfrom) || (nn >=nnto)) continue; + + kper = k - nx_pppm_6*(2*k/nx_pppm_6); + qx = unitkx*kper; + snx = sin(0.5*unitkx*kper*xprd/nx_pppm_6); + snx2 = snx*snx; + sx = exp(-qx*qx*inv2ew*inv2ew); + wx = 1.0; + argx = 0.5*qx*xprd/nx_pppm_6; + if (argx != 0.0) wx = pow(sin(argx)/argx,order_6); + wx *= wx; + + sqk = pow(qx,2.0) + pow(qy,2.0) + pow(qz,2.0); + + denominator = gf_denom(snx2,sny2,snz2, gf_b_6, order_6); + rtsqk = sqrt(sqk); + term = (1-2*sqk*inv2ew*inv2ew)*sx*sy*sz + + 2*sqk*rtsqk*inv2ew*inv2ew*inv2ew*rtpi*erfc(rtsqk*inv2ew); + greensfn_6[nn] = numerator*term*wx*wy*wz/denominator; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + run the regular toplevel compute method from plain PPPM + which will have individual methods replaced by our threaded + versions and then call the obligatory force reduction. +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::compute(int eflag, int vflag) +{ + + PPPMDispTIP4P::compute(eflag,vflag); + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { +#if defined(_OPENMP) + const int tid = omp_get_thread_num(); +#else + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::particle_map_c(double dxinv, double dyinv, + double dzinv, double sft, + int ** part2grid, int nup, + int nlw, int nxlo_o, + int nylo_o, int nzlo_o, + int nxhi_o, int nyhi_o, + int nzhi_o) +{ + const int * _noalias const type = atom->type; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + const int nlocal = atom->nlocal; + + const double delxinv = dxinv; + const double delyinv = dyinv; + const double delzinv = dzinv; + const double shift = sft; + const int nupper = nup; + const int nlower = nlw; + const int nxlo_out = nxlo_o; + const int nylo_out = nylo_o; + const int nzlo_out = nzlo_o; + const int nxhi_out = nxhi_o; + const int nyhi_out = nyhi_o; + const int nzhi_out = nzhi_o; + + int i, flag = 0; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) +#endif + for (i = 0; i < nlocal; i++) { + dbl3_t xM; + int iH1,iH2; + + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else { + xM = x[i]; + } + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + const int nx = static_cast ((xM.x-boxlox)*delxinv+shift) - OFFSET; + const int ny = static_cast ((xM.y-boxloy)*delyinv+shift) - OFFSET; + const int nz = static_cast ((xM.z-boxloz)*delzinv+shift) - OFFSET; + + p2g[i].a = nx; + p2g[i].b = ny; + p2g[i].t = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag++; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + find center grid pt for each of my particles + check that full stencil for the particle will fit in my 3d brick + store central grid pt indices in part2grid array +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::particle_map(double dxinv, double dyinv, + double dzinv, double sft, + int ** part2grid, int nup, + int nlw, int nxlo_o, + int nylo_o, int nzlo_o, + int nxhi_o, int nyhi_o, + int nzhi_o) +{ + const int * _noalias const type = atom->type; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + const int nlocal = atom->nlocal; + + const double delxinv = dxinv; + const double delyinv = dyinv; + const double delzinv = dzinv; + const double shift = sft; + const int nupper = nup; + const int nlower = nlw; + const int nxlo_out = nxlo_o; + const int nylo_out = nylo_o; + const int nzlo_out = nzlo_o; + const int nxhi_out = nxhi_o; + const int nyhi_out = nyhi_o; + const int nzhi_out = nzhi_o; + + int i, flag = 0; +#if defined(_OPENMP) +#pragma omp parallel for private(i) default(none) reduction(+:flag) schedule(static) +#endif + for (i = 0; i < nlocal; i++) { + + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // current particle coord can be outside global and local box + // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1 + + const int nx = static_cast ((x[i].x-boxlox)*delxinv+shift) - OFFSET; + const int ny = static_cast ((x[i].y-boxloy)*delyinv+shift) - OFFSET; + const int nz = static_cast ((x[i].z-boxloz)*delzinv+shift) - OFFSET; + + p2g[i].a = nx; + p2g[i].b = ny; + p2g[i].t = nz; + + // check that entire stencil around nx,ny,nz will fit in my 3d brick + + if (nx+nlower < nxlo_out || nx+nupper > nxhi_out || + ny+nlower < nylo_out || ny+nupper > nyhi_out || + nz+nlower < nzlo_out || nz+nupper > nzhi_out) + flag++; + } + + int flag_all; + MPI_Allreduce(&flag,&flag_all,1,MPI_INT,MPI_SUM,world); + if (flag_all) error->all(FLERR,"Out of range atoms - cannot compute PPPM"); +} + +/* ---------------------------------------------------------------------- + create discretized "density" on section of global grid due to my particles + density(x,y,z) = charge "density" at grid points of my 3d brick + (nxlo:nxhi,nylo:nyhi,nzlo:nzhi) is extent of my brick (including ghosts) + in global grid +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::make_rho_c() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick[nzlo_out][nylo_out][nxlo_out]); + memset(d,0,ngrid*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out - nxlo_out + 1; + const int iy = nyhi_out - nylo_out + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const double * _noalias const q = atom->q; + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const int * _noalias const type = atom->type; + dbl3_t xM; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid,iH1,iH2; + loop_setup_thr(jfrom,jto,tid,ngrid,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower-nzlo_out)*ix*iy >= jto) + || ((nz+nupper-nzlo_out+1)*ix*iy < jfrom) ) continue; + + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else { + xM = x[i]; + } + const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff); + + const FFT_SCALAR z0 = delvolinv * q[i]; + + for (int n = nlower; n <= nupper; ++n) { + const int jn = (nz+n-nzlo_out)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower; m <= nupper; ++m) { + const int jm = jn+(ny+m-nylo_out)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower; l <= nupper; ++l) { + const int jl = jm+nx+l-nxlo_out; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + same as above for dispersion interaction with geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::make_rho_g() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d = &(density_brick_g[nzlo_out_6][nylo_out_6][nxlo_out_6]); + memset(d,0,ngrid_6*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out_6 - nxlo_out_6 + 1; + const int iy = nyhi_out_6 - nylo_out_6 + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; + const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; + const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); + + const int type = atom->type[i]; + const double lj = B[type]; + const FFT_SCALAR z0 = delvolinv_6 * lj; + + for (int n = nlower_6; n <= nupper_6; ++n) { + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + d[jl] += x0*r1d[0][l]; + } + } + } + } + } +} + + +/* ---------------------------------------------------------------------- + same as above for dispersion interaction with arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::make_rho_a() +{ + + // clear 3d density array + + FFT_SCALAR * _noalias const d0 = &(density_brick_a0[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d1 = &(density_brick_a1[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d2 = &(density_brick_a2[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d3 = &(density_brick_a3[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d4 = &(density_brick_a4[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d5 = &(density_brick_a5[nzlo_out_6][nylo_out_6][nxlo_out_6]); + FFT_SCALAR * _noalias const d6 = &(density_brick_a6[nzlo_out_6][nylo_out_6][nxlo_out_6]); + + memset(d0,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d1,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d2,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d3,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d4,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d5,0,ngrid_6*sizeof(FFT_SCALAR)); + memset(d6,0,ngrid_6*sizeof(FFT_SCALAR)); + + const int ix = nxhi_out_6 - nxlo_out_6 + 1; + const int iy = nyhi_out_6 - nylo_out_6 + 1; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const int3_t * _noalias const p2g = (int3_t *) part2grid_6[0]; + + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + // determine range of grid points handled by this thread + int i,jfrom,jto,tid; + loop_setup_thr(jfrom,jto,tid,ngrid_6,comm->nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + // loop over my charges, add their contribution to nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + + // loop over all local atoms for all threads + const int nlocal = atom->nlocal; + for (i = 0; i < nlocal; i++) { + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + + // pre-screen whether this atom will ever come within + // reach of the data segement this thread is updating. + if ( ((nz+nlower_6-nzlo_out_6)*ix*iy >= jto) + || ((nz+nupper_6-nzlo_out_6+1)*ix*iy < jfrom) ) continue; + + const FFT_SCALAR dx = nx+shiftone_6 - (x[i].x-boxlox)*delxinv_6; + const FFT_SCALAR dy = ny+shiftone_6 - (x[i].y-boxloy)*delyinv_6; + const FFT_SCALAR dz = nz+shiftone_6 - (x[i].z-boxloz)*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz,order_6,rho_coeff_6); + + const int type = atom->type[i]; + const double lj0 = B[7*type]; + const double lj1 = B[7*type+1]; + const double lj2 = B[7*type+2]; + const double lj3 = B[7*type+3]; + const double lj4 = B[7*type+4]; + const double lj5 = B[7*type+5]; + const double lj6 = B[7*type+6]; + + const FFT_SCALAR z0 = delvolinv_6; + + for (int n = nlower_6; n <= nupper_6; ++n) { + const int jn = (nz+n-nzlo_out_6)*ix*iy; + const FFT_SCALAR y0 = z0*r1d[2][n]; + + for (int m = nlower_6; m <= nupper_6; ++m) { + const int jm = jn+(ny+m-nylo_out_6)*ix; + const FFT_SCALAR x0 = y0*r1d[1][m]; + + for (int l = nlower_6; l <= nupper_6; ++l) { + const int jl = jm+nx+l-nxlo_out_6; + // make sure each thread only updates + // "his" elements of the density grid + if (jl >= jto) break; + if (jl < jfrom) continue; + + const double w = x0*r1d[0][l]; + + d0[jl] += w*lj0; + d1[jl] += w*lj1; + d2[jl] += w*lj2; + d3[jl] += w*lj3; + d4[jl] += w*lj4; + d5[jl] += w*lj5; + d6[jl] += w*lj6; + } + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ik +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_c_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const double * _noalias const q = atom->q; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const int * _noalias const type = atom->type; + + const double qqrd2e = force->qqrd2e; + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + dbl3_t xM; + FFT_SCALAR x0,y0,z0,ekx,eky,ekz; + int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz; + + loop_setup_thr(ifrom,ito,tid,nlocal,nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + + for (i = ifrom; i < ito; ++i) { + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else xM = x[i]; + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz, order, rho_coeff); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; + } + } + } + + // convert E-field to force + + const double qfactor = qqrd2e * scale * q[i]; + if (type[i] != typeO) { + f[i].x += qfactor*ekx; + f[i].y += qfactor*eky; + if (slabflag != 2) f[i].z += qfactor*ekz; + + } else { + const double fx = qfactor * ekx; + const double fy = qfactor * eky; + const double fz = qfactor * ekz; + + f[i].x += fx*(1 - alpha); + f[i].y += fy*(1 - alpha); + if (slabflag != 2) f[i].z += fz*(1 - alpha); + + f[iH1].x += 0.5*alpha*fx; + f[iH1].y += 0.5*alpha*fy; + if (slabflag != 2) f[iH1].z += 0.5*alpha*fz; + + f[iH2].x += 0.5*alpha*fx; + f[iH2].y += 0.5*alpha*fy; + if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; + } + } + } // end of parallel region +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get electric field & force on my particles for ad +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_c_ad() +{ + const double *prd = (triclinic == 0) ? domain->prd : domain->prd_lamda; + const double hx_inv = nx_pppm/prd[0]; + const double hy_inv = ny_pppm/prd[1]; + const double hz_inv = nz_pppm/prd[2]; + + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + const double * _noalias const q = atom->q; + const int3_t * _noalias const p2g = (int3_t *) part2grid[0]; + const int * _noalias const type = atom->type; + + const double qqrd2e = force->qqrd2e; + const double boxlox = boxlo[0]; + const double boxloy = boxlo[1]; + const double boxloz = boxlo[2]; + + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + double s1,s2,s3,sf; + dbl3_t xM; + FFT_SCALAR ekx,eky,ekz; + int i,ifrom,ito,tid,iH1,iH2,l,m,n,mx,my,mz; + + loop_setup_thr(ifrom,ito,tid,nlocal,nthreads); + + // get per thread data + ThrData *thr = fix->get_thr(tid); + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d()); + FFT_SCALAR * const * const d1d = static_cast(thr->get_drho1d()); + + for (i = ifrom; i < ito; ++i) { + if (type[i] == typeO) { + find_M_thr(i,iH1,iH2,xM); + } else xM = x[i]; + + const int nx = p2g[i].a; + const int ny = p2g[i].b; + const int nz = p2g[i].t; + const FFT_SCALAR dx = nx+shiftone - (xM.x-boxlox)*delxinv; + const FFT_SCALAR dy = ny+shiftone - (xM.y-boxloy)*delyinv; + const FFT_SCALAR dz = nz+shiftone - (xM.z-boxloz)*delzinv; + + compute_rho1d_thr(r1d,dx,dy,dz,order,rho_coeff); + compute_drho1d_thr(d1d,dx,dy,dz,order,drho_coeff); + + ekx = eky = ekz = ZEROF; + for (n = nlower; n <= nupper; n++) { + mz = n+nz; + for (m = nlower; m <= nupper; m++) { + my = m+ny; + for (l = nlower; l <= nupper; l++) { + mx = l+nx; + ekx += d1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + eky += r1d[0][l]*d1d[1][m]*r1d[2][n]*u_brick[mz][my][mx]; + ekz += r1d[0][l]*r1d[1][m]*d1d[2][n]*u_brick[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force and substract self forces + + const double qi = q[i]; + const double qfactor = qqrd2e * scale * qi; + + s1 = x[i].x*hx_inv; + sf = sf_coeff[0]*sin(MY_2PI*s1); + sf += sf_coeff[1]*sin(MY_4PI*s1); + sf *= 2.0*qi; + const double fx = qfactor*(ekx - sf); + + s2 = x[i].y*hy_inv; + sf = sf_coeff[2]*sin(MY_2PI*s2); + sf += sf_coeff[3]*sin(MY_4PI*s2); + sf *= 2.0*qi; + const double fy = qfactor*(eky - sf); + + s3 = x[i].z*hz_inv; + sf = sf_coeff[4]*sin(MY_2PI*s3); + sf += sf_coeff[5]*sin(MY_4PI*s3); + sf *= 2.0*qi; + const double fz = qfactor*(ekz - sf); + + if (type[i] != typeO) { + f[i].x += fx; + f[i].y += fy; + if (slabflag != 2) f[i].z += fz; + + } else { + f[i].x += fx*(1 - alpha); + f[i].y += fy*(1 - alpha); + if (slabflag != 2) f[i].z += fz*(1 - alpha); + + f[iH1].x += 0.5*alpha*fx; + f[iH1].y += 0.5*alpha*fy; + if (slabflag != 2) f[iH1].z += 0.5*alpha*fz; + + f[iH2].x += 0.5*alpha*fx; + f[iH2].y += 0.5*alpha*fy; + if (slabflag != 2) f[iH2].z += 0.5*alpha*fz; + } + } + } // end of parallel region +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ik scheme and geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_g_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int type; + double lj; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx -= x0*vdx_brick_g[mz][my][mx]; + eky -= x0*vdy_brick_g[mz][my][mx]; + ekz -= x0*vdz_brick_g[mz][my][mx]; + } + } + } + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + f[i][0] += lj*ekx; + f[i][1] += lj*eky; + f[i][2] += lj*ekz; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ad scheme and geometric mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_g_ad() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + const double hx_inv = nx_pppm_6/xprd; + const double hy_inv = ny_pppm_6/yprd; + const double hz_inv = nz_pppm_6/zprd_slab; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; + int type; + double lj; + double sf = 0.0; + double s1,s2,s3; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); + + ekx = eky = ekz = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + ekx += dr1d[0][l]*r1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; + eky += r1d[0][l]*dr1d[1][m]*r1d[2][n]*u_brick_g[mz][my][mx]; + ekz += r1d[0][l]*r1d[1][m]*dr1d[2][n]*u_brick_g[mz][my][mx]; + } + } + } + ekx *= hx_inv; + eky *= hy_inv; + ekz *= hz_inv; + + // convert E-field to force + type = atom->type[i]; + lj = B[type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 2*lj*lj; + f[i][0] += ekx*lj - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 2*lj*lj; + f[i][1] += eky*lj - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 2*lj*lj; + if (slabflag != 2) f[i][2] += ekz*lj - sf; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial for dispersion + interaction and geometric mixing rule + ------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_g_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u,v0,v1,v2,v3,v4,v5; + int type; + double lj; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) u += x0*u_brick_g[mz][my][mx]; + if (vflag_atom) { + v0 += x0*v0_brick_g[mz][my][mx]; + v1 += x0*v1_brick_g[mz][my][mx]; + v2 += x0*v2_brick_g[mz][my][mx]; + v3 += x0*v3_brick_g[mz][my][mx]; + v4 += x0*v4_brick_g[mz][my][mx]; + v5 += x0*v5_brick_g[mz][my][mx]; + } + } + } + } + + type = atom->type[i]; + lj = B[type]*0.5; + + if (eflag_atom) eatom[i] += u*lj; + if (vflag_atom) { + vatom[i][0] += v0*lj; + vatom[i][1] += v1*lj; + vatom[i][2] += v2*lj; + vatom[i][3] += v3*lj; + vatom[i][4] += v4*lj; + vatom[i][5] += v5*lj; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ik scheme and arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_a_ik() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + const double qqrd2e = force->qqrd2e; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + ekx0 -= x0*vdx_brick_a0[mz][my][mx]; + eky0 -= x0*vdy_brick_a0[mz][my][mx]; + ekz0 -= x0*vdz_brick_a0[mz][my][mx]; + ekx1 -= x0*vdx_brick_a1[mz][my][mx]; + eky1 -= x0*vdy_brick_a1[mz][my][mx]; + ekz1 -= x0*vdz_brick_a1[mz][my][mx]; + ekx2 -= x0*vdx_brick_a2[mz][my][mx]; + eky2 -= x0*vdy_brick_a2[mz][my][mx]; + ekz2 -= x0*vdz_brick_a2[mz][my][mx]; + ekx3 -= x0*vdx_brick_a3[mz][my][mx]; + eky3 -= x0*vdy_brick_a3[mz][my][mx]; + ekz3 -= x0*vdz_brick_a3[mz][my][mx]; + ekx4 -= x0*vdx_brick_a4[mz][my][mx]; + eky4 -= x0*vdy_brick_a4[mz][my][mx]; + ekz4 -= x0*vdz_brick_a4[mz][my][mx]; + ekx5 -= x0*vdx_brick_a5[mz][my][mx]; + eky5 -= x0*vdy_brick_a5[mz][my][mx]; + ekz5 -= x0*vdz_brick_a5[mz][my][mx]; + ekx6 -= x0*vdx_brick_a6[mz][my][mx]; + eky6 -= x0*vdy_brick_a6[mz][my][mx]; + ekz6 -= x0*vdz_brick_a6[mz][my][mx]; + } + } + } + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6; + f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get dispersion field & force on my particles + for ad scheme and arithmetic mixing rule +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_a_ad() +{ + // loop over my charges, interpolate electric field from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + // ek = 3 components of E-field on particle + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + + double *prd; + + if (triclinic == 0) prd = domain->prd; + else prd = domain->prd_lamda; + + double xprd = prd[0]; + double yprd = prd[1]; + double zprd = prd[2]; + double zprd_slab = zprd*slab_volfactor; + + const double hx_inv = nx_pppm_6/xprd; + const double hy_inv = ny_pppm_6/yprd; + const double hz_inv = nz_pppm_6/zprd_slab; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + double * const * const f = thr->get_f(); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + FFT_SCALAR * const * const dr1d = static_cast(thr->get_drho1d_6()); + + int l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx0, eky0, ekz0, ekx1, eky1, ekz1, ekx2, eky2, ekz2; + FFT_SCALAR ekx3, eky3, ekz3, ekx4, eky4, ekz4, ekx5, eky5, ekz5; + FFT_SCALAR ekx6, eky6, ekz6; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + double sf = 0.0; + double s1,s2,s3; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + compute_drho1d_thr(dr1d,dx,dy,dz, order_6, drho_coeff_6); + + ekx0 = eky0 = ekz0 = ZEROF; + ekx1 = eky1 = ekz1 = ZEROF; + ekx2 = eky2 = ekz2 = ZEROF; + ekx3 = eky3 = ekz3 = ZEROF; + ekx4 = eky4 = ekz4 = ZEROF; + ekx5 = eky5 = ekz5 = ZEROF; + ekx6 = eky6 = ekz6 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = dr1d[0][l]*r1d[1][m]*r1d[2][n]; + y0 = r1d[0][l]*dr1d[1][m]*r1d[2][n]; + z0 = r1d[0][l]*r1d[1][m]*dr1d[2][n]; + + ekx0 += x0*u_brick_a0[mz][my][mx]; + eky0 += y0*u_brick_a0[mz][my][mx]; + ekz0 += z0*u_brick_a0[mz][my][mx]; + + ekx1 += x0*u_brick_a1[mz][my][mx]; + eky1 += y0*u_brick_a1[mz][my][mx]; + ekz1 += z0*u_brick_a1[mz][my][mx]; + + ekx2 += x0*u_brick_a2[mz][my][mx]; + eky2 += y0*u_brick_a2[mz][my][mx]; + ekz2 += z0*u_brick_a2[mz][my][mx]; + + ekx3 += x0*u_brick_a3[mz][my][mx]; + eky3 += y0*u_brick_a3[mz][my][mx]; + ekz3 += z0*u_brick_a3[mz][my][mx]; + + ekx4 += x0*u_brick_a4[mz][my][mx]; + eky4 += y0*u_brick_a4[mz][my][mx]; + ekz4 += z0*u_brick_a4[mz][my][mx]; + + ekx5 += x0*u_brick_a5[mz][my][mx]; + eky5 += y0*u_brick_a5[mz][my][mx]; + ekz5 += z0*u_brick_a5[mz][my][mx]; + + ekx6 += x0*u_brick_a6[mz][my][mx]; + eky6 += y0*u_brick_a6[mz][my][mx]; + ekz6 += z0*u_brick_a6[mz][my][mx]; + } + } + } + + ekx0 *= hx_inv; + eky0 *= hy_inv; + ekz0 *= hz_inv; + + ekx1 *= hx_inv; + eky1 *= hy_inv; + ekz1 *= hz_inv; + + ekx2 *= hx_inv; + eky2 *= hy_inv; + ekz2 *= hz_inv; + + ekx3 *= hx_inv; + eky3 *= hy_inv; + ekz3 *= hz_inv; + + ekx4 *= hx_inv; + eky4 *= hy_inv; + ekz4 *= hz_inv; + + ekx5 *= hx_inv; + eky5 *= hy_inv; + ekz5 *= hz_inv; + + ekx6 *= hx_inv; + eky6 *= hy_inv; + ekz6 *= hz_inv; + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]; + lj1 = B[7*type+5]; + lj2 = B[7*type+4]; + lj3 = B[7*type+3]; + lj4 = B[7*type+2]; + lj5 = B[7*type+1]; + lj6 = B[7*type]; + + s1 = x[i][0]*hx_inv; + s2 = x[i][1]*hy_inv; + s3 = x[i][2]*hz_inv; + + sf = sf_coeff_6[0]*sin(2*MY_PI*s1); + sf += sf_coeff_6[1]*sin(4*MY_PI*s1); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][0] += lj0*ekx0 + lj1*ekx1 + lj2*ekx2 + lj3*ekx3 + lj4*ekx4 + lj5*ekx5 + lj6*ekx6 - sf; + + sf = sf_coeff_6[2]*sin(2*MY_PI*s2); + sf += sf_coeff_6[3]*sin(4*MY_PI*s2); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + f[i][1] += lj0*eky0 + lj1*eky1 + lj2*eky2 + lj3*eky3 + lj4*eky4 + lj5*eky5 + lj6*eky6 - sf; + + sf = sf_coeff_6[4]*sin(2*MY_PI*s3); + sf += sf_coeff_6[5]*sin(4*MY_PI*s3); + sf *= 4*lj0*lj6 + 4*lj1*lj5 + 4*lj2*lj4 + 2*lj3*lj3; + if (slabflag != 2) f[i][2] += lj0*ekz0 + lj1*ekz1 + lj2*ekz2 + lj3*ekz3 + lj4*ekz4 + lj5*ekz5 + lj6*ekz6 - sf; + } + } + } +} + +/* ---------------------------------------------------------------------- + interpolate from grid to get per-atom energy/virial for dispersion + interaction and arithmetic mixing rule + ------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::fieldforce_a_peratom() +{ + // loop over my charges, interpolate from nearby grid points + // (nx,ny,nz) = global coords of grid pt to "lower left" of charge + // (dx,dy,dz) = distance to "lower left" grid pt + // (mx,my,mz) = global coords of moving stencil pt + + const double * const * const x = atom->x; + const int nthreads = comm->nthreads; + const int nlocal = atom->nlocal; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { +#if defined(_OPENMP) + // each thread works on a fixed chunk of atoms. + const int tid = omp_get_thread_num(); + const int inum = nlocal; + const int idelta = 1 + inum/nthreads; + const int ifrom = tid*idelta; + const int ito = ((ifrom + idelta) > inum) ? inum : ifrom + idelta; +#else + const int ifrom = 0; + const int ito = nlocal; + const int tid = 0; +#endif + ThrData *thr = fix->get_thr(tid); + FFT_SCALAR * const * const r1d = static_cast(thr->get_rho1d_6()); + + int i,l,m,n,nx,ny,nz,mx,my,mz; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR u0,v00,v10,v20,v30,v40,v50; + FFT_SCALAR u1,v01,v11,v21,v31,v41,v51; + FFT_SCALAR u2,v02,v12,v22,v32,v42,v52; + FFT_SCALAR u3,v03,v13,v23,v33,v43,v53; + FFT_SCALAR u4,v04,v14,v24,v34,v44,v54; + FFT_SCALAR u5,v05,v15,v25,v35,v45,v55; + FFT_SCALAR u6,v06,v16,v26,v36,v46,v56; + int type; + double lj0,lj1,lj2,lj3,lj4,lj5,lj6; + + // this if protects against having more threads than local atoms + if (ifrom < nlocal) { + for (int i = ifrom; i < ito; i++) { + + nx = part2grid_6[i][0]; + ny = part2grid_6[i][1]; + nz = part2grid_6[i][2]; + dx = nx+shiftone_6 - (x[i][0]-boxlo[0])*delxinv_6; + dy = ny+shiftone_6 - (x[i][1]-boxlo[1])*delyinv_6; + dz = nz+shiftone_6 - (x[i][2]-boxlo[2])*delzinv_6; + + compute_rho1d_thr(r1d,dx,dy,dz, order_6, rho_coeff_6); + + u0 = v00 = v10 = v20 = v30 = v40 = v50 = ZEROF; + u1 = v01 = v11 = v21 = v31 = v41 = v51 = ZEROF; + u2 = v02 = v12 = v22 = v32 = v42 = v52 = ZEROF; + u3 = v03 = v13 = v23 = v33 = v43 = v53 = ZEROF; + u4 = v04 = v14 = v24 = v34 = v44 = v54 = ZEROF; + u5 = v05 = v15 = v25 = v35 = v45 = v55 = ZEROF; + u6 = v06 = v16 = v26 = v36 = v46 = v56 = ZEROF; + for (n = nlower_6; n <= nupper_6; n++) { + mz = n+nz; + z0 = r1d[2][n]; + for (m = nlower_6; m <= nupper_6; m++) { + my = m+ny; + y0 = z0*r1d[1][m]; + for (l = nlower_6; l <= nupper_6; l++) { + mx = l+nx; + x0 = y0*r1d[0][l]; + if (eflag_atom) { + u0 += x0*u_brick_a0[mz][my][mx]; + u1 += x0*u_brick_a1[mz][my][mx]; + u2 += x0*u_brick_a2[mz][my][mx]; + u3 += x0*u_brick_a3[mz][my][mx]; + u4 += x0*u_brick_a4[mz][my][mx]; + u5 += x0*u_brick_a5[mz][my][mx]; + u6 += x0*u_brick_a6[mz][my][mx]; + } + if (vflag_atom) { + v00 += x0*v0_brick_a0[mz][my][mx]; + v10 += x0*v1_brick_a0[mz][my][mx]; + v20 += x0*v2_brick_a0[mz][my][mx]; + v30 += x0*v3_brick_a0[mz][my][mx]; + v40 += x0*v4_brick_a0[mz][my][mx]; + v50 += x0*v5_brick_a0[mz][my][mx]; + v01 += x0*v0_brick_a1[mz][my][mx]; + v11 += x0*v1_brick_a1[mz][my][mx]; + v21 += x0*v2_brick_a1[mz][my][mx]; + v31 += x0*v3_brick_a1[mz][my][mx]; + v41 += x0*v4_brick_a1[mz][my][mx]; + v51 += x0*v5_brick_a1[mz][my][mx]; + v02 += x0*v0_brick_a2[mz][my][mx]; + v12 += x0*v1_brick_a2[mz][my][mx]; + v22 += x0*v2_brick_a2[mz][my][mx]; + v32 += x0*v3_brick_a2[mz][my][mx]; + v42 += x0*v4_brick_a2[mz][my][mx]; + v52 += x0*v5_brick_a2[mz][my][mx]; + v03 += x0*v0_brick_a3[mz][my][mx]; + v13 += x0*v1_brick_a3[mz][my][mx]; + v23 += x0*v2_brick_a3[mz][my][mx]; + v33 += x0*v3_brick_a3[mz][my][mx]; + v43 += x0*v4_brick_a3[mz][my][mx]; + v53 += x0*v5_brick_a3[mz][my][mx]; + v04 += x0*v0_brick_a4[mz][my][mx]; + v14 += x0*v1_brick_a4[mz][my][mx]; + v24 += x0*v2_brick_a4[mz][my][mx]; + v34 += x0*v3_brick_a4[mz][my][mx]; + v44 += x0*v4_brick_a4[mz][my][mx]; + v54 += x0*v5_brick_a4[mz][my][mx]; + v05 += x0*v0_brick_a5[mz][my][mx]; + v15 += x0*v1_brick_a5[mz][my][mx]; + v25 += x0*v2_brick_a5[mz][my][mx]; + v35 += x0*v3_brick_a5[mz][my][mx]; + v45 += x0*v4_brick_a5[mz][my][mx]; + v55 += x0*v5_brick_a5[mz][my][mx]; + v06 += x0*v0_brick_a6[mz][my][mx]; + v16 += x0*v1_brick_a6[mz][my][mx]; + v26 += x0*v2_brick_a6[mz][my][mx]; + v36 += x0*v3_brick_a6[mz][my][mx]; + v46 += x0*v4_brick_a6[mz][my][mx]; + v56 += x0*v5_brick_a6[mz][my][mx]; + } + } + } + } + + // convert D-field to force + type = atom->type[i]; + lj0 = B[7*type+6]*0.5; + lj1 = B[7*type+5]*0.5; + lj2 = B[7*type+4]*0.5; + lj3 = B[7*type+3]*0.5; + lj4 = B[7*type+2]*0.5; + lj5 = B[7*type+1]*0.5; + lj6 = B[7*type]*0.5; + + if (eflag_atom) + eatom[i] += u0*lj0 + u1*lj1 + u2*lj2 + + u3*lj3 + u4*lj4 + u5*lj5 + u6*lj6; + if (vflag_atom) { + vatom[i][0] += v00*lj0 + v01*lj1 + v02*lj2 + v03*lj3 + + v04*lj4 + v05*lj5 + v06*lj6; + vatom[i][1] += v10*lj0 + v11*lj1 + v12*lj2 + v13*lj3 + + v14*lj4 + v15*lj5 + v16*lj6; + vatom[i][2] += v20*lj0 + v21*lj1 + v22*lj2 + v23*lj3 + + v24*lj4 + v25*lj5 + v26*lj6; + vatom[i][3] += v30*lj0 + v31*lj1 + v32*lj2 + v33*lj3 + + v34*lj4 + v35*lj5 + v36*lj6; + vatom[i][4] += v40*lj0 + v41*lj1 + v42*lj2 + v43*lj3 + + v44*lj4 + v45*lj5 + v46*lj6; + vatom[i][5] += v50*lj0 + v51*lj1 + v52*lj2 + v53*lj3 + + v54*lj4 + v55*lj5 + v56*lj6; + } + } + } + } +} + +/* ---------------------------------------------------------------------- + charge assignment into rho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ +void PPPMDispTIP4POMP::compute_rho1d_thr(FFT_SCALAR * const * const r1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz, + const int ord, FFT_SCALAR * const * const rho_c) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-1; l >= 0; l--) { + r1 = rho_c[l][k] + r1*dx; + r2 = rho_c[l][k] + r2*dy; + r3 = rho_c[l][k] + r3*dz; + } + r1d[0][k] = r1; + r1d[1][k] = r2; + r1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + charge assignment into drho1d + dx,dy,dz = distance of particle from "lower left" grid point +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::compute_drho1d_thr(FFT_SCALAR * const * const dr1d, const FFT_SCALAR &dx, + const FFT_SCALAR &dy, const FFT_SCALAR &dz, + const int ord, FFT_SCALAR * const * const drho_c) +{ + int k,l; + FFT_SCALAR r1,r2,r3; + + for (k = (1-ord)/2; k <= ord/2; k++) { + r1 = r2 = r3 = ZEROF; + + for (l = ord-2; l >= 0; l--) { + r1 = drho_c[l][k] + r1*dx; + r2 = drho_c[l][k] + r2*dy; + r3 = drho_c[l][k] + r3*dz; + } + dr1d[0][k] = r1; + dr1d[1][k] = r2; + dr1d[2][k] = r3; + } +} + +/* ---------------------------------------------------------------------- + find 2 H atoms bonded to O atom i + compute position xM of fictitious charge site for O atom + also return local indices iH1,iH2 of H atoms +------------------------------------------------------------------------- */ + +void PPPMDispTIP4POMP::find_M_thr(int i, int &iH1, int &iH2, dbl3_t &xM) +{ + iH1 = atom->map(atom->tag[i] + 1); + iH2 = atom->map(atom->tag[i] + 2); + + if (iH1 == -1 || iH2 == -1) error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + + double delx1 = x[iH1].x - x[i].x; + double dely1 = x[iH1].y - x[i].y; + double delz1 = x[iH1].z - x[i].z; + domain->minimum_image(delx1,dely1,delz1); + + double delx2 = x[iH2].x - x[i].x; + double dely2 = x[iH2].y - x[i].y; + double delz2 = x[iH2].z - x[i].z; + domain->minimum_image(delx2,dely2,delz2); + + xM.x = x[i].x + alpha * 0.5 * (delx1 + delx2); + xM.y = x[i].y + alpha * 0.5 * (dely1 + dely2); + xM.z = x[i].z + alpha * 0.5 * (delz1 + delz2); +} diff --git a/src/USER-OMP/pppm_disp_tip4p_omp.h b/src/USER-OMP/pppm_disp_tip4p_omp.h new file mode 100644 index 0000000000..e05a52ac80 --- /dev/null +++ b/src/USER-OMP/pppm_disp_tip4p_omp.h @@ -0,0 +1,84 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef KSPACE_CLASS + +KSpaceStyle(pppm/disp/tip4p/omp,PPPMDispTIP4POMP) + +#else + +#ifndef LMP_PPPM_DISP_TIP4P_OMP_H +#define LMP_PPPM_DISP_TIP4P_OMP_H + +#include "pppm_disp_tip4p.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + + class PPPMDispTIP4POMP : public PPPMDispTIP4P, public ThrOMP { + public: + PPPMDispTIP4POMP(class LAMMPS *, int, char **); + virtual ~PPPMDispTIP4POMP () {}; + + protected: + virtual void allocate(); + virtual void deallocate(); + + virtual void compute_gf(); + virtual void compute_gf_6(); + + virtual void compute(int,int); + + virtual void particle_map(double, double, double, + double, int **, int, int, + int, int, int, int, int, int); + virtual void particle_map_c(double, double, double, + double, int **, int, int, + int, int, int, int, int, int); + virtual void make_rho_c(); // XXX: not (yet) multi-threaded + virtual void make_rho_g(); + virtual void make_rho_a(); + + virtual void fieldforce_c_ik(); + virtual void fieldforce_c_ad(); + // virtual void fieldforce_peratom(); XXX: need to benchmark first. + virtual void fieldforce_g_ik(); + virtual void fieldforce_g_ad(); + virtual void fieldforce_g_peratom(); + virtual void fieldforce_a_ik(); + virtual void fieldforce_a_ad(); + virtual void fieldforce_a_peratom(); + + private: + void compute_rho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &, + const int, FFT_SCALAR * const * const); + void compute_drho1d_thr(FFT_SCALAR * const * const, const FFT_SCALAR &, + const FFT_SCALAR &, const FFT_SCALAR &, + const int, FFT_SCALAR * const * const); + virtual void find_M_thr(int, int &, int &, dbl3_t &); + +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Kspace style pppm/tip4p/omp requires newton on + +Self-explanatory. + +*/