Feb2021 GPU Package Update - GPU Package Files

2021-02-15 08:20:50 -08:00
parent 16004e8f45
commit e7e2d2323b
345 changed files with 13424 additions and 7708 deletions
--- a/src/GPU/fix_nh_gpu.cpp
+++ b/src/GPU/fix_nh_gpu.cpp
@ -0,0 +1,552 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://lammps.sandia.gov/, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#include "fix_nh_gpu.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "gpu_extra.h"
+#include "memory.h"
+#include "modify.h"
+#include "neighbor.h"
+#include "update.h"
+
+#include <cstring>
+#include <cmath>
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+#define TILTMAX 1.5
+
+enum{NOBIAS,BIAS};
+enum{ISO,ANISO,TRICLINIC};
+
+typedef struct { double x,y,z; } dbl3_t;
+
+/* ----------------------------------------------------------------------
+   NVT,NPH,NPT integrators for improved Nose-Hoover equations of motion
+ ---------------------------------------------------------------------- */
+
+FixNHGPU::FixNHGPU(LAMMPS *lmp, int narg, char **arg) :
+  FixNH(lmp, narg, arg)
+{
+  _dtfm = 0;
+  _nlocal3 = 0;
+  _nlocal_max = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixNHGPU::~FixNHGPU()
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNHGPU::setup(int vflag)
+{
+  FixNH::setup(vflag);
+  if (strstr(update->integrate_style,"respa"))
+    _respa_on = 1;
+  else
+    _respa_on = 0;
+  reset_dt();
+}
+
+/* ----------------------------------------------------------------------
+   change box size
+   remap all atoms or dilate group atoms depending on allremap flag
+   if rigid bodies exist, scale rigid body centers-of-mass
+------------------------------------------------------------------------- */
+
+void FixNHGPU::remap()
+{
+  if (_respa_on) { FixNH::remap(); return; }
+
+  double oldlo,oldhi;
+  double expfac;
+
+  dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  double *h = domain->h;
+
+  // omega is not used, except for book-keeping
+
+  for (int i = 0; i < 6; i++) omega[i] += dto*omega_dot[i];
+
+  // convert pertinent atoms and rigid bodies to lamda coords
+  const double hi0 = domain->h_inv[0];
+  const double hi1 = domain->h_inv[1];
+  const double hi2 = domain->h_inv[2];
+  const double hi3 = domain->h_inv[3];
+  const double hi4 = domain->h_inv[4];
+  const double hi5 = domain->h_inv[5];
+  const double b0 = domain->boxlo[0];
+  const double b1 = domain->boxlo[1];
+  const double b2 = domain->boxlo[2];
+
+  if (allremap) {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      const double d0 = x[i].x - b0;
+      const double d1 = x[i].y - b1;
+      const double d2 = x[i].z - b2;
+      x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
+      x[i].y = hi1*d1 + hi3*d2;
+      x[i].z = hi2*d2;
+    }
+  } else {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & dilate_group_bit) {
+        const double d0 = x[i].x - b0;
+        const double d1 = x[i].y - b1;
+        const double d2 = x[i].z - b2;
+        x[i].x = hi0*d0 + hi5*d1 + hi4*d2;
+        x[i].y = hi1*d1 + hi3*d2;
+        x[i].z = hi2*d2;
+      }
+    }
+  }
+
+  if (nrigid)
+    for (int i = 0; i < nrigid; i++)
+      modify->fix[rfix[i]]->deform(0);
+
+  // reset global and local box to new size/shape
+
+  // this operation corresponds to applying the
+  // translate and scale operations
+  // corresponding to the solution of the following ODE:
+  //
+  // h_dot = omega_dot * h
+  //
+  // where h_dot, omega_dot and h are all upper-triangular
+  // 3x3 tensors. In Voigt notation, the elements of the
+  // RHS product tensor are:
+  // h_dot = [0*0, 1*1, 2*2, 1*3+3*2, 0*4+5*3+4*2, 0*5+5*1]
+  //
+  // Ordering of operations preserves time symmetry.
+
+  double dto2 = dto/2.0;
+  double dto4 = dto/4.0;
+  double dto8 = dto/8.0;
+
+  // off-diagonal components, first half
+
+  if (pstyle == TRICLINIC) {
+
+    if (p_flag[4]) {
+      expfac = exp(dto8*omega_dot[0]);
+      h[4] *= expfac;
+      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
+      h[4] *= expfac;
+    }
+
+    if (p_flag[3]) {
+      expfac = exp(dto4*omega_dot[1]);
+      h[3] *= expfac;
+      h[3] += dto2*(omega_dot[3]*h[2]);
+      h[3] *= expfac;
+    }
+
+    if (p_flag[5]) {
+      expfac = exp(dto4*omega_dot[0]);
+      h[5] *= expfac;
+      h[5] += dto2*(omega_dot[5]*h[1]);
+      h[5] *= expfac;
+    }
+
+    if (p_flag[4]) {
+      expfac = exp(dto8*omega_dot[0]);
+      h[4] *= expfac;
+      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
+      h[4] *= expfac;
+    }
+  }
+
+  // scale diagonal components
+  // scale tilt factors with cell, if set
+
+  if (p_flag[0]) {
+    oldlo = domain->boxlo[0];
+    oldhi = domain->boxhi[0];
+    expfac = exp(dto*omega_dot[0]);
+    domain->boxlo[0] = (oldlo-fixedpoint[0])*expfac + fixedpoint[0];
+    domain->boxhi[0] = (oldhi-fixedpoint[0])*expfac + fixedpoint[0];
+  }
+
+  if (p_flag[1]) {
+    oldlo = domain->boxlo[1];
+    oldhi = domain->boxhi[1];
+    expfac = exp(dto*omega_dot[1]);
+    domain->boxlo[1] = (oldlo-fixedpoint[1])*expfac + fixedpoint[1];
+    domain->boxhi[1] = (oldhi-fixedpoint[1])*expfac + fixedpoint[1];
+    if (scalexy) h[5] *= expfac;
+  }
+
+  if (p_flag[2]) {
+    oldlo = domain->boxlo[2];
+    oldhi = domain->boxhi[2];
+    expfac = exp(dto*omega_dot[2]);
+    domain->boxlo[2] = (oldlo-fixedpoint[2])*expfac + fixedpoint[2];
+    domain->boxhi[2] = (oldhi-fixedpoint[2])*expfac + fixedpoint[2];
+    if (scalexz) h[4] *= expfac;
+    if (scaleyz) h[3] *= expfac;
+  }
+
+  // off-diagonal components, second half
+
+  if (pstyle == TRICLINIC) {
+
+    if (p_flag[4]) {
+      expfac = exp(dto8*omega_dot[0]);
+      h[4] *= expfac;
+      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
+      h[4] *= expfac;
+    }
+
+    if (p_flag[3]) {
+      expfac = exp(dto4*omega_dot[1]);
+      h[3] *= expfac;
+      h[3] += dto2*(omega_dot[3]*h[2]);
+      h[3] *= expfac;
+    }
+
+    if (p_flag[5]) {
+      expfac = exp(dto4*omega_dot[0]);
+      h[5] *= expfac;
+      h[5] += dto2*(omega_dot[5]*h[1]);
+      h[5] *= expfac;
+    }
+
+    if (p_flag[4]) {
+      expfac = exp(dto8*omega_dot[0]);
+      h[4] *= expfac;
+      h[4] += dto4*(omega_dot[5]*h[3]+omega_dot[4]*h[2]);
+      h[4] *= expfac;
+    }
+
+  }
+
+  domain->yz = h[3];
+  domain->xz = h[4];
+  domain->xy = h[5];
+
+  // tilt factor to cell length ratio can not exceed TILTMAX in one step
+
+  if (domain->yz < -TILTMAX*domain->yprd ||
+      domain->yz > TILTMAX*domain->yprd ||
+      domain->xz < -TILTMAX*domain->xprd ||
+      domain->xz > TILTMAX*domain->xprd ||
+      domain->xy < -TILTMAX*domain->xprd ||
+      domain->xy > TILTMAX*domain->xprd)
+    error->all(FLERR,"Fix npt/nph has tilted box too far in one step - "
+               "periodic cell is too far from equilibrium state");
+
+  domain->set_global_box();
+  domain->set_local_box();
+
+  // convert pertinent atoms and rigid bodies back to box coords
+  const double h0 = domain->h[0];
+  const double h1 = domain->h[1];
+  const double h2 = domain->h[2];
+  const double h3 = domain->h[3];
+  const double h4 = domain->h[4];
+  const double h5 = domain->h[5];
+  const double nb0 = domain->boxlo[0];
+  const double nb1 = domain->boxlo[1];
+  const double nb2 = domain->boxlo[2];
+
+  if (allremap) {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
+      x[i].y = h1*x[i].y + h3*x[i].z + nb1;
+      x[i].z = h2*x[i].z + nb2;
+    }
+  } else {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & dilate_group_bit) {
+        x[i].x = h0*x[i].x + h5*x[i].y + h4*x[i].z + nb0;
+        x[i].y = h1*x[i].y + h3*x[i].z + nb1;
+        x[i].z = h2*x[i].z + nb2;
+      }
+    }
+  }
+
+  if (nrigid)
+    for (int i = 0; i < nrigid; i++)
+      modify->fix[rfix[i]]->deform(1);
+}
+
+/* ----------------------------------------------------------------------
+   2nd half of Verlet update
+------------------------------------------------------------------------- */
+
+void FixNHGPU::final_integrate() {
+  if (neighbor->ago == 0 && _respa_on == 0) reset_dt();
+  FixNH::final_integrate();
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixNHGPU::reset_dt()
+{
+  if (_respa_on) { FixNH::reset_dt(); return; }
+  dtv = update->dt;
+  dtf = 0.5 * update->dt * force->ftm2v;
+  dthalf = 0.5 * update->dt;
+  dt4 = 0.25 * update->dt;
+  dt8 = 0.125 * update->dt;
+  dto = dthalf;
+
+  if (pstat_flag)
+    pdrag_factor = 1.0 - (update->dt * p_freq_max * drag / nc_pchain);
+
+  if (tstat_flag)
+    tdrag_factor = 1.0 - (update->dt * t_freq * drag / nc_tchain);
+
+  const int * const mask = atom->mask;
+  const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
+    atom->nlocal;
+
+  if (nlocal > _nlocal_max) {
+    if (_nlocal_max) memory->destroy(_dtfm);
+    _nlocal_max = static_cast<int>(1.20 * nlocal);
+    memory->create(_dtfm, _nlocal_max * 3, "fix_nh_gpu:dtfm");
+  }
+
+  _nlocal3 = nlocal * 3;
+
+  if (igroup == 0) {
+    if (atom->rmass) {
+      const double * const rmass = atom->rmass;
+      int n = 0;
+      for (int i = 0; i < nlocal; i++) {
+        const double dtfir = dtf / rmass[i];
+        _dtfm[n++] = dtfir;
+        _dtfm[n++] = dtfir;
+        _dtfm[n++] = dtfir;
+      }
+    } else {
+      const double * const mass = atom->mass;
+      const int * const type = atom->type;
+      int n = 0;
+      for (int i = 0; i < nlocal; i++) {
+        const double dtfim = dtf / mass[type[i]];
+        _dtfm[n++] = dtfim;
+        _dtfm[n++] = dtfim;
+        _dtfm[n++] = dtfim;
+      }
+    }
+  } else {
+    if (atom->rmass) {
+      const double * const rmass = atom->rmass;
+      int n = 0;
+      for (int i = 0; i < nlocal; i++)
+        if (mask[i] & groupbit) {
+          const double dtfir = dtf / rmass[i];
+          _dtfm[n++] = dtfir;
+          _dtfm[n++] = dtfir;
+          _dtfm[n++] = dtfir;
+        } else {
+          _dtfm[n++] = 0.0;
+          _dtfm[n++] = 0.0;
+          _dtfm[n++] = 0.0;
+        }
+    } else {
+      const double * const mass = atom->mass;
+      const int * const type = atom->type;
+      int n = 0;
+      for (int i = 0; i < nlocal; i++)
+        if (mask[i] & groupbit) {
+          const double dtfim = dtf / mass[type[i]];
+          _dtfm[n++] = dtfim;
+          _dtfm[n++] = dtfim;
+          _dtfm[n++] = dtfim;
+        } else {
+          _dtfm[n++] = 0.0;
+          _dtfm[n++] = 0.0;
+          _dtfm[n++] = 0.0;
+        }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   perform half-step barostat scaling of velocities
+-----------------------------------------------------------------------*/
+
+void FixNHGPU::nh_v_press()
+{
+  if (pstyle == TRICLINIC || which == BIAS || _respa_on) {
+    FixNH::nh_v_press();
+    return;
+  }
+
+  dbl3_t * _noalias const v = (dbl3_t *)atom->v[0];
+  int *mask = atom->mask;
+  int nlocal = atom->nlocal;
+  if (igroup == atom->firstgroup) nlocal = atom->nfirst;
+
+  double f0 = exp(-dt4*(omega_dot[0]+mtk_term2));
+  double f1 = exp(-dt4*(omega_dot[1]+mtk_term2));
+  double f2 = exp(-dt4*(omega_dot[2]+mtk_term2));
+  f0 *= f0;
+  f1 *= f1;
+  f2 *= f2;
+
+  if (igroup == 0) {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      v[i].x *= f0;
+      v[i].y *= f1;
+      v[i].z *= f2;
+    }
+  } else {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < nlocal; i++) {
+      if (mask[i] & groupbit) {
+        v[i].x *= f0;
+        v[i].y *= f1;
+        v[i].z *= f2;
+      }
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   perform half-step update of velocities
+-----------------------------------------------------------------------*/
+
+void FixNHGPU::nve_v()
+{
+  if (_respa_on) { FixNH::nve_v(); return; }
+
+  double * _noalias const v = atom->v[0];
+  const double * _noalias const f = atom->f[0];
+  #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+  #pragma omp parallel for simd schedule(static)
+  #elif (LAL_USE_OMP_SIMD == 1)
+  #pragma omp simd
+  #endif
+  for (int i = 0; i < _nlocal3; i++)
+    v[i] += _dtfm[i] * f[i];
+}
+
+/* ----------------------------------------------------------------------
+   perform full-step update of positions
+-----------------------------------------------------------------------*/
+
+void FixNHGPU::nve_x()
+{
+  if (_respa_on) { FixNH::nve_x(); return; }
+
+  double * _noalias const x = atom->x[0];
+  double * _noalias const v = atom->v[0];
+
+  // x update by full step only for atoms in group
+
+  if (igroup == 0) {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < _nlocal3; i++)
+      x[i] += dtv * v[i];
+  } else {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < _nlocal3; i++) {
+      if (_dtfm[i] != 0.0)
+        x[i] += dtv * v[i];
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   perform half-step thermostat scaling of velocities
+-----------------------------------------------------------------------*/
+
+void FixNHGPU::nh_v_temp()
+{
+  if (which == BIAS || _respa_on) {
+    FixNH::nh_v_temp();
+    return;
+  }
+
+  double * _noalias const v = atom->v[0];
+
+  if (igroup == 0) {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < _nlocal3; i++)
+        v[i] *= factor_eta;
+  } else {
+    #if (LAL_USE_OMP == 1) && (LAL_USE_OMP_SIMD == 1)
+    #pragma omp parallel for simd schedule(static)
+    #elif (LAL_USE_OMP_SIMD == 1)
+    #pragma omp simd
+    #endif
+    for (int i = 0; i < _nlocal3; i++) {
+      if (_dtfm[i] != 0.0)
+        v[i] *= factor_eta;
+    }
+  }
+}
+
+double FixNHGPU::memory_usage()
+{
+  return FixNH::memory_usage() + _nlocal_max * 3 * sizeof(double);
+}