// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov Copyright (2003) Sandia Corporation. Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains certain rights in this software. This software is distributed under the GNU General Public License. See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ #include "fix_gpu.h" #include "atom.h" #include "citeme.h" #include "comm.h" #include "error.h" #include "force.h" #include "gpu_extra.h" #include "modify.h" #include "neighbor.h" #include "pair.h" #include "pair_hybrid.h" #include "respa.h" #include "timer.h" #include "universe.h" #include "update.h" #include #if (LAL_USE_OMP == 1) #include #endif using namespace LAMMPS_NS; using namespace FixConst; enum{GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH}; extern int lmp_init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, const int first_gpu_id, const int gpu_mode, const double particle_split, const int t_per_atom, const double cell_size, char *opencl_args, const int ocl_platform, char *device_type_flags, const int block_pair); extern void lmp_clear_device(); extern double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom, double *virial, double &ecoul, int &err_flag); extern double lmp_gpu_update_bin_size(const double subx, const double suby, const double subz, const int nlocal, const double cut); static const char cite_gpu_package[] = "GPU package (short-range, long-range and three-body potentials):\n\n" "@Article{Brown11,\n" " author = {W. M. Brown, P. Wang, S. J. Plimpton, A. N. Tharrington},\n" " title = {Implementing Molecular Dynamics on Hybrid High Performance Computers - Short Range Forces},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2011,\n" " volume = 182,\n" " pages = {898--911}\n" "}\n\n" "@Article{Brown12,\n" " author = {W. M. Brown, A. Kohlmeyer, S. J. Plimpton, A. N. Tharrington},\n" " title = {Implementing Molecular Dynamics on Hybrid High Performance Computers - Particle-Particle Particle-Mesh},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2012,\n" " volume = 183,\n" " pages = {449--459}\n" "}\n\n" "@Article{Brown13,\n" " author = {W. M. Brown, Y. Masako},\n" " title = {Implementing Molecular Dynamics on Hybrid High Performance Computers – Three-Body Potentials},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2013,\n" " volume = 184,\n" " pages = {2785--2793}\n" "}\n\n" "@Article{Trung15,\n" " author = {T. D. Nguyen, S. J. Plimpton},\n" " title = {Accelerating dissipative particle dynamics simulations for soft matter systems},\n" " journal = {Comput.~Mater.~Sci.},\n" " year = 2015,\n" " volume = 100,\n" " pages = {173--180}\n" "}\n\n" "@Article{Trung17,\n" " author = {T. D. Nguyen},\n" " title = {GPU-accelerated Tersoff potentials for massively parallel Molecular Dynamics simulations},\n" " journal = {Comp.~Phys.~Comm.},\n" " year = 2017,\n" " volume = 212,\n" " pages = {113--122}\n" "}\n\n" "@Article{Nikolskiy19,\n" " author = {V. Nikolskiy, V. Stegailov},\n" " title = {GPU acceleration of four-site water models in LAMMPS},\n" " journal = {Proceeding of the International Conference on Parallel Computing (ParCo 2019), Prague, Czech Republic},\n" " year = 2019\n" "}\n\n"; /* ---------------------------------------------------------------------- */ FixGPU::FixGPU(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg) { if (lmp->citeme) lmp->citeme->add(cite_gpu_package); if (narg < 4) error->all(FLERR,"Illegal package gpu command"); // If ngpu is 0, autoset ngpu to the number of devices per node matching // best device int ngpu = atoi(arg[3]); if (ngpu < 0) error->all(FLERR,"Illegal package gpu command"); // Negative value indicate GPU package should find the best device ID int first_gpu_id = -1; // options _gpu_mode = GPU_NEIGH; _particle_split = 1.0; int nthreads = 0; int newtonflag = 0; int threads_per_atom = -1; double binsize = 0.0; char *opencl_args = nullptr; int block_pair = -1; int pair_only_flag = 0; int ocl_platform = -1; char *device_type_flags = nullptr; int iarg = 4; while (iarg < narg) { if (strcmp(arg[iarg],"neigh") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); if (strcmp(arg[iarg+1],"yes") == 0) _gpu_mode = GPU_NEIGH; else if (strcmp(arg[iarg+1],"no") == 0) _gpu_mode = GPU_FORCE; else if (strcmp(arg[iarg+1],"on") == 0) _gpu_mode = GPU_NEIGH; else if (strcmp(arg[iarg+1],"off") == 0) _gpu_mode = GPU_FORCE; else if (strcmp(arg[iarg+1],"true") == 0) _gpu_mode = GPU_NEIGH; else if (strcmp(arg[iarg+1],"false") == 0) _gpu_mode = GPU_FORCE; else if (strcmp(arg[iarg+1],"hybrid") == 0) _gpu_mode = GPU_HYB_NEIGH; else error->all(FLERR,"Illegal package gpu command"); iarg += 2; } else if (strcmp(arg[iarg],"newton") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); newtonflag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"binsize") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); binsize = utils::numeric(FLERR,arg[iarg+1],false,lmp); if (binsize <= 0.0) error->all(FLERR,"Illegal fix GPU command"); iarg += 2; } else if (strcmp(arg[iarg],"split") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); _particle_split = utils::numeric(FLERR,arg[iarg+1],false,lmp); if (_particle_split == 0.0 || _particle_split > 1.0) error->all(FLERR,"Illegal package GPU command"); iarg += 2; } else if (strcmp(arg[iarg],"gpuID") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); first_gpu_id = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"tpa") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); threads_per_atom = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"omp") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); nthreads = utils::inumeric(FLERR,arg[iarg+1],false,lmp); if (nthreads < 0) error->all(FLERR,"Illegal fix GPU command"); iarg += 2; } else if (strcmp(arg[iarg],"platform") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); ocl_platform = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"device_type") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); device_type_flags = arg[iarg+1]; iarg += 2; } else if (strcmp(arg[iarg],"blocksize") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); block_pair = utils::inumeric(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"pair/only") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); pair_only_flag = utils::logical(FLERR,arg[iarg+1],false,lmp); iarg += 2; } else if (strcmp(arg[iarg],"ocl_args") == 0) { if (iarg+2 > narg) error->all(FLERR,"Illegal package gpu command"); opencl_args = arg[iarg+1]; iarg += 2; } else error->all(FLERR,"Illegal package gpu command"); } #if (LAL_USE_OMP == 0) if (nthreads > 1) error->all(FLERR,"No OpenMP support compiled into the GPU package"); #else if (nthreads > 0) { omp_set_num_threads(nthreads); comm->nthreads = nthreads; } #endif // set newton pair flag // require newtonflag = 0 since currently required by all GPU pair styles if (newtonflag == 1) error->all(FLERR,"Illegal package gpu command"); force->newton_pair = newtonflag; if (force->newton_pair || force->newton_bond) force->newton = 1; else force->newton = 0; if (pair_only_flag) { lmp->suffixp = lmp->suffix; lmp->suffix = nullptr; } else { if (lmp->suffixp) { lmp->suffix = lmp->suffixp; lmp->suffixp = nullptr; } } // pass params to GPU library // change binsize default (0.0) to -1.0 used by GPU lib if (binsize == 0.0) binsize = -1.0; _binsize = binsize; int gpu_flag = lmp_init_device(universe->uworld, world, ngpu, first_gpu_id, _gpu_mode, _particle_split, threads_per_atom, binsize, opencl_args, ocl_platform, device_type_flags, block_pair); GPU_EXTRA::check_flag(gpu_flag,error,world); } /* ---------------------------------------------------------------------- */ FixGPU::~FixGPU() { lmp_clear_device(); } /* ---------------------------------------------------------------------- */ int FixGPU::setmask() { int mask = 0; mask |= POST_FORCE; mask |= MIN_POST_FORCE; mask |= POST_FORCE_RESPA; return mask; } /* ---------------------------------------------------------------------- */ void FixGPU::init() { // GPU package cannot be used with atom_style template if (atom->molecular == Atom::TEMPLATE) error->all(FLERR,"GPU package does not (yet) work with " "atom_style template"); // give a warning if no pair style is defined if (!force->pair && (comm->me == 0)) error->warning(FLERR,"Using package gpu without any pair style defined"); // make sure fdotr virial is not accumulated multiple times // also disallow GPU neighbor lists for hybrid styles if (force->pair_match("^hybrid",0) != nullptr) { PairHybrid *hybrid = (PairHybrid *) force->pair; for (int i = 0; i < hybrid->nstyles; i++) if (!utils::strmatch(hybrid->keywords[i],"/gpu$")) force->pair->no_virial_fdotr_compute = 1; if (_gpu_mode != GPU_FORCE) error->all(FLERR, "Must not use GPU neighbor lists with hybrid pair style"); } // rRESPA support if (utils::strmatch(update->integrate_style,"^respa")) _nlevels_respa = ((Respa *) update->integrate)->nlevels; } /* ---------------------------------------------------------------------- */ void FixGPU::setup(int vflag) { if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) if (neighbor->exclude_setting() != 0) error->all(FLERR, "Cannot use neigh_modify exclude with GPU neighbor builds"); if (utils::strmatch(update->integrate_style,"^verlet")) post_force(vflag); else { // In setup only, all forces calculated on GPU are put in the outer level ((Respa *) update->integrate)->copy_flevel_f(_nlevels_respa-1); post_force(vflag); ((Respa *) update->integrate)->copy_f_flevel(_nlevels_respa-1); } } /* ---------------------------------------------------------------------- */ void FixGPU::min_setup(int vflag) { post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixGPU::post_force(int /* vflag */) { if (!force->pair) return; timer->stamp(); double lvirial[6]; for (int i = 0; i < 6; i++) lvirial[i] = 0.0; int err_flag = 0; double my_eng = lmp_gpu_forces(atom->f, atom->torque, force->pair->eatom, force->pair->vatom, lvirial, force->pair->eng_coul, err_flag); if (err_flag==1) error->one(FLERR,"Neighbor list problem on the GPU. Try increasing the value of 'neigh_modify one' " "or the GPU neighbor list 'binsize'."); force->pair->eng_vdwl += my_eng; force->pair->virial[0] += lvirial[0]; force->pair->virial[1] += lvirial[1]; force->pair->virial[2] += lvirial[2]; force->pair->virial[3] += lvirial[3]; force->pair->virial[4] += lvirial[4]; force->pair->virial[5] += lvirial[5]; if (force->pair->vflag_fdotr) force->pair->virial_fdotr_compute(); timer->stamp(Timer::PAIR); } /* ---------------------------------------------------------------------- */ void FixGPU::min_post_force(int vflag) { post_force(vflag); } /* ---------------------------------------------------------------------- */ void FixGPU::post_force_respa(int vflag, int /* ilevel */, int /* iloop */) { post_force(vflag); } /* ---------------------------------------------------------------------- */ double FixGPU::memory_usage() { double bytes = 0.0; // memory usage currently returned by pair routine return bytes; } double FixGPU::binsize(const double subx, const double suby, const double subz, const int nlocal, const double cut) { if (_binsize > 0.0) return _binsize; else if (_gpu_mode == GPU_FORCE || comm->cutghostuser) return cut * 0.5; else return lmp_gpu_update_bin_size(subx, suby, subz, nlocal, cut); }