298 lines
9.9 KiB
C++
298 lines
9.9 KiB
C++
/* ----------------------------------------------------------------------
|
||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||
http://lammps.sandia.gov, Sandia National Laboratories
|
||
Steve Plimpton, sjplimp@sandia.gov
|
||
|
||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||
certain rights in this software. This software is distributed under
|
||
the GNU General Public License.
|
||
|
||
See the README file in the top-level LAMMPS directory.
|
||
------------------------------------------------------------------------- */
|
||
|
||
#include "string.h"
|
||
#include "stdlib.h"
|
||
#include "fix_gpu.h"
|
||
#include "atom.h"
|
||
#include "force.h"
|
||
#include "pair.h"
|
||
#include "pair_hybrid.h"
|
||
#include "pair_hybrid_overlay.h"
|
||
#include "respa.h"
|
||
#include "input.h"
|
||
#include "timer.h"
|
||
#include "modify.h"
|
||
#include "update.h"
|
||
#include "domain.h"
|
||
#include "universe.h"
|
||
#include "gpu_extra.h"
|
||
#include "neighbor.h"
|
||
#include "citeme.h"
|
||
#include "error.h"
|
||
|
||
using namespace LAMMPS_NS;
|
||
using namespace FixConst;
|
||
|
||
enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH};
|
||
|
||
extern int lmp_init_device(MPI_Comm world, MPI_Comm replica,
|
||
const int first_gpu, const int last_gpu,
|
||
const int gpu_mode, const double particle_split,
|
||
const int nthreads, const int t_per_atom,
|
||
const double cell_size, char *opencl_flags);
|
||
extern void lmp_clear_device();
|
||
extern double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
||
double **vatom, double *virial, double &ecoul);
|
||
|
||
static const char cite_gpu_package[] =
|
||
"GPU package (short-range, long-range and three-body potentials):\n\n"
|
||
"@Article{Brown11,\n"
|
||
" author = {W. M. Brown, P. Wang, S. J. Plimpton, A. N. Tharrington},\n"
|
||
" title = {Implementing Molecular Dynamics on Hybrid High Performance Computers - Short Range Forces},\n"
|
||
" journal = {Comp.~Phys.~Comm.},\n"
|
||
" year = 2011,\n"
|
||
" volume = 182,\n"
|
||
" pages = {898--911}\n"
|
||
"}\n\n"
|
||
"@Article{Brown12,\n"
|
||
" author = {W. M. Brown, A. Kohlmeyer, S. J. Plimpton, A. N. Tharrington},\n"
|
||
" title = {Implementing Molecular Dynamics on Hybrid High Performance Computers - Particle-Particle Particle-Mesh},\n"
|
||
" journal = {Comp.~Phys.~Comm.},\n"
|
||
" year = 2012,\n"
|
||
" volume = 183,\n"
|
||
" pages = {449--459}\n"
|
||
"}\n\n"
|
||
"@Article{Brown13,\n"
|
||
" author = {W. M. Brown, Y. Masako},\n"
|
||
" title = {Implementing Molecular Dynamics on Hybrid High Performance Computers – Three-Body Potentials},\n"
|
||
" journal = {Comp.~Phys.~Comm.},\n"
|
||
" year = 2013,\n"
|
||
" volume = 184,\n"
|
||
" pages = {2785--2793}\n"
|
||
"}\n\n";
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) :
|
||
Fix(lmp, narg, arg)
|
||
{
|
||
if (lmp->citeme) lmp->citeme->add(cite_gpu_package);
|
||
|
||
if (lmp->cuda)
|
||
error->all(FLERR,"Cannot use GPU package with USER-CUDA package enabled");
|
||
|
||
if (narg < 4) error->all(FLERR,"Illegal package gpu command");
|
||
|
||
int ngpu = atoi(arg[3]);
|
||
if (ngpu <= 0) error->all(FLERR,"Illegal package gpu command");
|
||
int first_gpu = 0;
|
||
int last_gpu = ngpu-1;
|
||
|
||
// options
|
||
|
||
_gpu_mode = GPU_NEIGH;
|
||
_particle_split = 1.0;
|
||
int nthreads = 1;
|
||
int threads_per_atom = -1;
|
||
double binsize = -1;
|
||
char *opencl_flags = NULL;
|
||
|
||
int iarg = 4;
|
||
while (iarg < narg) {
|
||
if (strcmp(arg[iarg],"neigh") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
if (strcmp(arg[iarg]+1,"yes") == 0) _gpu_mode = GPU_NEIGH;
|
||
else if (strcmp(arg[iarg]+1,"no") == 0) _gpu_mode = GPU_FORCE;
|
||
else if (strcmp(arg[iarg]+1,"hybrid") == 0) _gpu_mode = GPU_HYB_NEIGH;
|
||
else error->all(FLERR,"Illegal package gpu command");
|
||
iarg += 2;
|
||
} else if (strcmp(arg[iarg],"split") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
_particle_split = force->numeric(FLERR,arg[iarg+1]);
|
||
if (_particle_split == 0.0 || _particle_split > 1.0)
|
||
error->all(FLERR,"Illegal package GPU command");
|
||
iarg += 2;
|
||
} else if (strcmp(arg[iarg],"gpuID") == 0) {
|
||
if (iarg+3 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
first_gpu = force->inumeric(FLERR,arg[iarg+1]);
|
||
last_gpu = force->inumeric(FLERR,arg[iarg+2]);
|
||
iarg += 3;
|
||
} else if (strcmp(arg[iarg],"tpa") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
threads_per_atom = force->inumeric(FLERR,arg[iarg+1]);
|
||
iarg += 2;
|
||
} else if (strcmp(arg[iarg],"nthreads") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
nthreads = force->inumeric(FLERR,arg[iarg+1]);
|
||
if (nthreads < 1) error->all(FLERR,"Illegal fix GPU command");
|
||
iarg += 2;
|
||
} else if (strcmp(arg[iarg],"binsize") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
binsize = force->numeric(FLERR,arg[iarg+1]);
|
||
if (binsize <= 0.0) error->all(FLERR,"Illegal fix GPU command");
|
||
iarg += 2;
|
||
} else if (strcmp(arg[iarg],"device") == 0) {
|
||
if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command");
|
||
opencl_flags = arg[iarg+1];
|
||
iarg += 2;
|
||
} else error->all(FLERR,"Illegal package gpu command");
|
||
}
|
||
|
||
// error check
|
||
|
||
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
|
||
domain->triclinic)
|
||
error->all(FLERR,"Cannot use package gpu neigh yes with triclinic box");
|
||
|
||
#ifndef _OPENMP
|
||
if (nthreads > 1)
|
||
error->all(FLERR,"No OpenMP support compiled in");
|
||
#endif
|
||
|
||
// set newton_pair = 0 since required by all GPU pair styles
|
||
|
||
force->newton_pair = 0;
|
||
if (force->newton_pair || force->newton_bond) force->newton = 1;
|
||
else force->newton = 0;
|
||
|
||
// pass params to GPU library
|
||
|
||
int gpu_flag = lmp_init_device(universe->uworld, world, first_gpu, last_gpu,
|
||
_gpu_mode, _particle_split, nthreads,
|
||
threads_per_atom, binsize, opencl_flags);
|
||
GPU_EXTRA::check_flag(gpu_flag,error,world);
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
FixGPU::~FixGPU()
|
||
{
|
||
lmp_clear_device();
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
int FixGPU::setmask()
|
||
{
|
||
int mask = 0;
|
||
mask |= POST_FORCE;
|
||
mask |= MIN_POST_FORCE;
|
||
mask |= POST_FORCE_RESPA;
|
||
return mask;
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::init()
|
||
{
|
||
// GPU package cannot be used with atom_style template
|
||
|
||
if (atom->molecular == 2)
|
||
error->all(FLERR,"GPU package does not (yet) work with "
|
||
"atom_style template");
|
||
|
||
// hybrid cannot be used with force/neigh option
|
||
|
||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||
if (force->pair_match("hybrid",1) != NULL ||
|
||
force->pair_match("hybrid/overlay",1) != NULL)
|
||
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
|
||
if (_particle_split < 0)
|
||
if (force->pair_match("hybrid",1) != NULL ||
|
||
force->pair_match("hybrid/overlay",1) != NULL)
|
||
error->all(FLERR,"GPU split param must be positive "
|
||
"for hybrid pair styles");
|
||
|
||
// make sure fdotr virial is not accumulated multiple times
|
||
|
||
if (force->pair_match("hybrid",1) != NULL) {
|
||
PairHybrid *hybrid = (PairHybrid *) force->pair;
|
||
for (int i = 0; i < hybrid->nstyles; i++)
|
||
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||
force->pair->no_virial_fdotr_compute = 1;
|
||
} else if (force->pair_match("hybrid/overlay",1) != NULL) {
|
||
PairHybridOverlay *hybrid = (PairHybridOverlay *) force->pair;
|
||
for (int i = 0; i < hybrid->nstyles; i++)
|
||
if (strstr(hybrid->keywords[i],"/gpu")==NULL)
|
||
force->pair->no_virial_fdotr_compute = 1;
|
||
}
|
||
|
||
// rRESPA support
|
||
|
||
if (strstr(update->integrate_style,"respa"))
|
||
_nlevels_respa = ((Respa *) update->integrate)->nlevels;
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::setup(int vflag)
|
||
{
|
||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||
if (neighbor->exclude_setting()!=0)
|
||
error->all(FLERR,
|
||
"Cannot use neigh_modify exclude with GPU neighbor builds");
|
||
|
||
if (strstr(update->integrate_style,"verlet")) post_force(vflag);
|
||
else {
|
||
// in setup only, all forces calculated on GPU are put in the outer level
|
||
((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1);
|
||
post_force(vflag);
|
||
((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1);
|
||
}
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::min_setup(int vflag)
|
||
{
|
||
post_force(vflag);
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::post_force(int vflag)
|
||
{
|
||
timer->stamp();
|
||
double lvirial[6];
|
||
for (int i = 0; i < 6; i++) lvirial[i] = 0.0;
|
||
double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom,
|
||
force->pair->vatom, lvirial,
|
||
force->pair->eng_coul);
|
||
|
||
force->pair->eng_vdwl += my_eng;
|
||
force->pair->virial[0] += lvirial[0];
|
||
force->pair->virial[1] += lvirial[1];
|
||
force->pair->virial[2] += lvirial[2];
|
||
force->pair->virial[3] += lvirial[3];
|
||
force->pair->virial[4] += lvirial[4];
|
||
force->pair->virial[5] += lvirial[5];
|
||
|
||
if (force->pair->vflag_fdotr) force->pair->virial_fdotr_compute();
|
||
timer->stamp(TIME_PAIR);
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::min_post_force(int vflag)
|
||
{
|
||
post_force(vflag);
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
void FixGPU::post_force_respa(int vflag, int ilevel, int iloop)
|
||
{
|
||
post_force(vflag);
|
||
}
|
||
|
||
/* ---------------------------------------------------------------------- */
|
||
|
||
double FixGPU::memory_usage()
|
||
{
|
||
double bytes = 0.0;
|
||
// memory usage currently returned by pair routine
|
||
return bytes;
|
||
}
|
||
|