Files
lammps/src/GPU/fix_nve_gpu.cpp
2022-10-24 11:08:26 -04:00

297 lines
8.3 KiB
C++

// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "fix_nve_gpu.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "gpu_extra.h"
#include "memory.h"
#include "neighbor.h"
#include "update.h"
#if (LAL_USE_OMP == 1)
#include <omp.h>
#endif
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
FixNVEGPU::FixNVEGPU(LAMMPS *lmp, int narg, char **arg) :
FixNVE(lmp, narg, arg)
{
_dtfm = nullptr;
_nlocal_max = 0;
}
/* ---------------------------------------------------------------------- */
FixNVEGPU::~FixNVEGPU()
{
memory->destroy(_dtfm);
}
/* ---------------------------------------------------------------------- */
void FixNVEGPU::setup(int vflag)
{
FixNVE::setup(vflag);
if (utils::strmatch(update->integrate_style,"^respa"))
_respa_on = 1;
else
_respa_on = 0;
// ensure that _dtfm array is initialized if the group is not "all"
// or there is more than one atom type as that re-ordeted array is used for
// per-type/per-atom masses and group membership detection.
if ((igroup != 0) || (atom->ntypes > 1)) reset_dt();
}
/* ----------------------------------------------------------------------
allow for both per-type and per-atom mass
------------------------------------------------------------------------- */
void FixNVEGPU::initial_integrate(int vflag)
{
if (_respa_on) { FixNVE::initial_integrate(vflag); return; }
// update v and x of atoms in group
double * _noalias const x = atom->x[0];
double * _noalias const v = atom->v[0];
const double * _noalias const f = atom->f[0];
const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
const int nlocal3 = nlocal * 3;
#if (LAL_USE_OMP == 1)
#pragma omp parallel
#endif
{
#if (LAL_USE_OMP == 1)
const int nthreads = comm->nthreads;
const int idelta = nlocal3 / nthreads + 1;
const int ifrom3 = omp_get_thread_num() * idelta;
const int ito3 = MIN(ifrom3 + idelta, nlocal3);
#else
const int ifrom3 = 0;
const int ito3 = nlocal3;
#endif
if (igroup == 0 && atom->ntypes == 1 && !atom->rmass) {
const double dtfm = dtf / atom->mass[1];
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++) {
v[i] += dtfm * f[i];
x[i] += dtv * v[i];
}
} else if (igroup == 0) {
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++) {
v[i] += _dtfm[i] * f[i];
x[i] += dtv * v[i];
}
} else {
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++) {
if (_dtfm[i] != 0.0) {
v[i] += _dtfm[i] * f[i];
x[i] += dtv * v[i];
}
}
}
}
}
/* ---------------------------------------------------------------------- */
void FixNVEGPU::final_integrate()
{
if (_respa_on) { FixNVE::final_integrate(); return; }
// update v of atoms in group
double * _noalias const v = atom->v[0];
const double * _noalias const f = atom->f[0];
const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
if (neighbor->ago == 0) {
if (igroup != 0 || atom->ntypes != 1 || atom->rmass) {
if (nlocal > _nlocal_max) {
if (_nlocal_max) memory->destroy(_dtfm);
_nlocal_max = static_cast<int>(1.20 * nlocal);
memory->create(_dtfm, _nlocal_max * 3, "fix_nve_gpu:dtfm");
}
}
}
#if (LAL_USE_OMP == 1)
#pragma omp parallel
#endif
{
#if (LAL_USE_OMP == 1)
const int nthreads = comm->nthreads;
const int tid = omp_get_thread_num();
const int idelta = nlocal / nthreads + 1;
const int ifrom = tid * idelta;
const int ito = MIN(ifrom + idelta, nlocal);
const int ifrom3 = ifrom * 3;
const int ito3 = ito * 3;
#else
const int tid = 0;
const int ifrom = 0;
const int ifrom3 = 0;
const int ito = nlocal;
const int ito3 = nlocal * 3;
#endif
if (igroup == 0 && atom->ntypes == 1 && !atom->rmass) {
const double dtfm = dtf / atom->mass[1];
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++)
v[i] += dtfm * f[i];
} else if (igroup == 0) {
if (neighbor->ago == 0) reset_dt_omp(ifrom,ito,tid);
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++)
v[i] += _dtfm[i] * f[i];
} else {
if (neighbor->ago == 0) reset_dt_omp(ifrom,ito,tid);
#if (LAL_USE_OMP_SIMD == 1)
#pragma omp simd
#endif
for (int i = ifrom3; i < ito3; i++)
v[i] += _dtfm[i] * f[i];
}
}
}
void FixNVEGPU::reset_dt() {
if (_respa_on) { FixNVE::reset_dt(); return; }
if (igroup == 0 && atom->ntypes == 1 && !atom->rmass) {
dtv = update->dt;
dtf = 0.5 * update->dt * force->ftm2v;
} else {
const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
if (nlocal > _nlocal_max) {
if (_nlocal_max) memory->destroy(_dtfm);
_nlocal_max = static_cast<int>(1.20 * nlocal);
memory->create(_dtfm, _nlocal_max * 3, "fix_nve_gpu:dtfm");
}
#if (LAL_USE_OMP == 1)
#pragma omp parallel
#endif
{
#if (LAL_USE_OMP == 1)
const int nthreads = comm->nthreads;
const int tid = omp_get_thread_num();
const int idelta = nlocal / nthreads + 1;
const int ifrom = tid * idelta;
const int ito = MIN(ifrom + idelta, nlocal);
#else
const int tid = 0;
const int ifrom = 0;
const int ito = nlocal;
#endif
reset_dt_omp(ifrom, ito, tid);
}
}
}
void FixNVEGPU::reset_dt_omp(const int ifrom, const int ito, const int tid) {
const double dtfo = 0.5 * update->dt * force->ftm2v;
if (tid == 0) {
dtv = update->dt;
dtf = dtfo;
}
const int * const mask = atom->mask;
if (igroup == 0) {
if (atom->rmass) {
const double * const rmass = atom->rmass;
int n = ifrom * 3;
for (int i = ifrom; i < ito; i++) {
const double dtfir = dtfo / rmass[i];
_dtfm[n++] = dtfir;
_dtfm[n++] = dtfir;
_dtfm[n++] = dtfir;
}
} else {
const double * const mass = atom->mass;
const int * const type = atom->type;
int n = ifrom * 3;
for (int i = ifrom; i < ito; i++) {
const double dtfim = dtfo / mass[type[i]];
_dtfm[n++] = dtfim;
_dtfm[n++] = dtfim;
_dtfm[n++] = dtfim;
}
}
} else {
if (atom->rmass) {
const double * const rmass = atom->rmass;
int n = ifrom * 3;
for (int i = ifrom; i < ito; i++)
if (mask[i] & groupbit) {
const double dtfir = dtfo / rmass[i];
_dtfm[n++] = dtfir;
_dtfm[n++] = dtfir;
_dtfm[n++] = dtfir;
} else {
_dtfm[n++] = 0.0;
_dtfm[n++] = 0.0;
_dtfm[n++] = 0.0;
}
} else {
const double * const mass = atom->mass;
const int * const type = atom->type;
int n = ifrom * 3;
for (int i = ifrom; i < ito; i++)
if (mask[i] & groupbit) {
const double dtfim = dtfo / mass[type[i]];
_dtfm[n++] = dtfim;
_dtfm[n++] = dtfim;
_dtfm[n++] = dtfim;
} else {
_dtfm[n++] = 0.0;
_dtfm[n++] = 0.0;
_dtfm[n++] = 0.0;
}
}
}
}
double FixNVEGPU::memory_usage()
{
const int nlocal = (igroup == atom->firstgroup) ? atom->nfirst :
atom->nlocal;
return FixNVE::memory_usage() + nlocal * 3 * sizeof(double);
}