diff --git a/src/GPU/pair_beck_gpu.cpp b/src/GPU/pair_beck_gpu.cpp new file mode 100644 index 0000000000..b5babba008 --- /dev/null +++ b/src/GPU/pair_beck_gpu.cpp @@ -0,0 +1,244 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_beck_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" +#include "math_special.h" + +// External functions from cuda library for atom decomposition + +int beck_gpu_init(const int ntypes, double **cutsq, double **host_aa, + double **alpha, double **beta, double **AA, double **BB, + double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void beck_gpu_clear(); +int ** beck_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void beck_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double beck_gpu_bytes(); + +using namespace LAMMPS_NS; +using namespace MathSpecial; + +/* ---------------------------------------------------------------------- */ + +PairBeckGPU::PairBeckGPU(LAMMPS *lmp) : PairBeck(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairBeckGPU::~PairBeckGPU() +{ + beck_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBeckGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = beck_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + beck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with beck/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = beck_gpu_init(atom->ntypes+1, cutsq, aa, alpha, beta, + AA, BB, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBeckGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + beck_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBeckGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r5,force_beck,factor_lj; + double r,rinv; + double aaij,alphaij,betaij; + double term1,term1inv,term2,term3,term4,term5,term6; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + r5 = rsq*rsq*r; + aaij = aa[itype][jtype]; + alphaij = alpha[itype][jtype]; + betaij = beta[itype][jtype]; + term1 = aaij*aaij + rsq; + term2 = powint(term1,-5); + term3 = 21.672 + 30.0*aaij*aaij + 6.0*rsq; + term4 = alphaij + r5*betaij; + term5 = alphaij + 6.0*r5*betaij; + rinv = 1.0/r; + force_beck = AA[itype][jtype]*exp(-1.0*r*term4)*term5; + force_beck -= BB[itype][jtype]*r*term2*term3; + + fpair = factor_lj*force_beck*rinv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + term6 = powint(term1,-3); + term1inv = 1.0/term1; + evdwl = AA[itype][jtype]*exp(-1.0*r*term4); + evdwl -= BB[itype][jtype]*term6*(1.0+(2.709+3.0*aaij*aaij)*term1inv); + evdwl *= factor_lj; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_beck_gpu.h b/src/GPU/pair_beck_gpu.h new file mode 100644 index 0000000000..87c4cd57b2 --- /dev/null +++ b/src/GPU/pair_beck_gpu.h @@ -0,0 +1,59 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(beck/gpu,PairBeckGPU) + +#else + +#ifndef LMP_PAIR_BECK_GPU_H +#define LMP_PAIR_BECK_GPU_H + +#include "pair_beck.h" + +namespace LAMMPS_NS { + +class PairBeckGPU : public PairBeck { + public: + PairBeckGPU(LAMMPS *lmp); + ~PairBeckGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with beck/gpu pair style + +Self-explanatory. + +*/ diff --git a/src/GPU/pair_lj_cut_coul_msm_gpu.cpp b/src/GPU/pair_lj_cut_coul_msm_gpu.cpp new file mode 100644 index 0000000000..c9fad77f61 --- /dev/null +++ b/src/GPU/pair_lj_cut_coul_msm_gpu.cpp @@ -0,0 +1,294 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_lj_cut_coul_msm_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "kspace.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int ljcm_gpu_init(const int ntypes, double **cutsq, double **host_lj1, + double **host_lj2, double **host_lj3, double **host_lj4, + double **host_gcons, double **host_dgcons, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + double **host_cut_ljsq, double host_cut_coulsq, + double *host_special_coul, const int order, const double qqrd2e); +void ljcm_gpu_clear(); +int ** ljcm_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, double *prd); +void ljcm_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd); +double ljcm_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulMSMGPU::PairLJCutCoulMSMGPU(LAMMPS *lmp) : + PairLJCutCoulMSM(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairLJCutCoulMSMGPU::~PairLJCutCoulMSMGPU() +{ + ljcm_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulMSMGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = ljcm_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success, + atom->q, domain->boxlo, domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + ljcm_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, atom->q, + atom->nlocal, domain->boxlo, domain->prd); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with lj/cut/coul/msm/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + cut_coulsq = cut_coul * cut_coul; + + // setup force tables + + if (ncoultablebits) init_tables(cut_coul,cut_respa); + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = ljcm_gpu_init(atom->ntypes+1, cutsq, lj1, lj2, lj3, lj4, + force->kspace->get_gcons(), + force->kspace->get_dgcons(), + offset, force->special_lj, + atom->nlocal, atom->nlocal+atom->nghost, + 300, maxspecial, cell_size, gpu_mode, screen, + cut_ljsq, cut_coulsq, force->special_coul, + force->kspace->order, force->qqrd2e); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulMSMGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + ljcm_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulMSMGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double egamma,fgamma,prefactor; + int *jlist; + double rsq; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + prefactor = qqrd2e * qtmp*q[j]/r; + egamma = 1.0 - (r/cut_coul)*force->kspace->gamma(r/cut_coul); + fgamma = 1.0 + (rsq/cut_coulsq)*force->kspace->dgamma(r/cut_coul); + forcecoul = prefactor * fgamma; + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + } + } + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + } else forcelj = 0.0; + + fpair = (forcecoul + forcelj) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) + ecoul = prefactor*egamma; + else { + table = etable[itable] + fraction*detable[itable]; + ecoul = qtmp*q[j] * table; + } + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_lj_cut_coul_msm_gpu.h b/src/GPU/pair_lj_cut_coul_msm_gpu.h new file mode 100644 index 0000000000..f77a5abca7 --- /dev/null +++ b/src/GPU/pair_lj_cut_coul_msm_gpu.h @@ -0,0 +1,59 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/msm/gpu,PairLJCutCoulMSMGPU) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_MSM_GPU_H +#define LMP_PAIR_LJ_CUT_COUL_MSM_GPU_H + +#include "pair_lj_cut_coul_msm.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulMSMGPU : public PairLJCutCoulMSM { + public: + PairLJCutCoulMSMGPU(LAMMPS *lmp); + ~PairLJCutCoulMSMGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with lj/cut/coul/msm/gpu pair style + +Self-explanatory. + +*/ diff --git a/src/GPU/pair_mie_cut_gpu.cpp b/src/GPU/pair_mie_cut_gpu.cpp new file mode 100644 index 0000000000..b4771d5956 --- /dev/null +++ b/src/GPU/pair_mie_cut_gpu.cpp @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_mie_cut_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int mie_gpu_init(const int ntypes, double **cutsq, double **host_mie1, + double **host_mie2, double **host_mie3, double **host_mie4, + double **host_gamA, double **host_gamR, double **offset, + double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void mie_gpu_clear(); +int ** mie_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void mie_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double mie_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairMIECutGPU::PairMIECutGPU(LAMMPS *lmp) : PairMIECut(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairMIECutGPU::~PairMIECutGPU() +{ + mie_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairMIECutGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = mie_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + mie_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with mie/cut/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = mie_gpu_init(atom->ntypes+1, cutsq, mie1, mie2, mie3, mie4, + gamA, gamR, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairMIECutGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + mie_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairMIECutGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,rgamR,rgamA,forcemie,factor_mie; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_mie = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_mie = special_mie[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + rgamA = pow(r2inv,(gamA[itype][jtype]/2.0)); + rgamR = pow(r2inv,(gamR[itype][jtype]/2.0)); + forcemie = (mie1[itype][jtype]*rgamR - mie2[itype][jtype]*rgamA); + fpair = factor_mie*forcemie*r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + evdwl = (mie3[itype][jtype]*rgamR - mie4[itype][jtype]*rgamA) - + offset[itype][jtype]; + evdwl *= factor_mie; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_mie_cut_gpu.h b/src/GPU/pair_mie_cut_gpu.h new file mode 100644 index 0000000000..8ee084ccdd --- /dev/null +++ b/src/GPU/pair_mie_cut_gpu.h @@ -0,0 +1,59 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(mie/cut/gpu,PairMIECutGPU) + +#else + +#ifndef LMP_PAIR_MIE_CUT_GPU_H +#define LMP_PAIR_MIE_CUT_GPU_H + +#include "pair_mie_cut.h" + +namespace LAMMPS_NS { + +class PairMIECutGPU : public PairMIECut { + public: + PairMIECutGPU(LAMMPS *lmp); + ~PairMIECutGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with mie/cut/gpu pair style + +Self-explanatory. + +*/ diff --git a/src/GPU/pair_soft_gpu.cpp b/src/GPU/pair_soft_gpu.cpp new file mode 100644 index 0000000000..f6efaf2feb --- /dev/null +++ b/src/GPU/pair_soft_gpu.cpp @@ -0,0 +1,225 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_soft_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" +#include "math_const.h" + +// External functions from cuda library for atom decomposition + +int soft_gpu_init(const int ntypes, double **cutsq, double **prefactor, + double **cut, double *special_lj, const int nlocal, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void soft_gpu_clear(); +int ** soft_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void soft_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double soft_gpu_bytes(); + +using namespace LAMMPS_NS; +using namespace MathConst; + +/* ---------------------------------------------------------------------- */ + +PairSoftGPU::PairSoftGPU(LAMMPS *lmp) : PairSoft(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairSoftGPU::~PairSoftGPU() +{ + soft_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairSoftGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = soft_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + soft_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with soft/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double mcut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + mcut = init_one(i,j); + mcut *= mcut; + if (mcut > maxcut) + maxcut = mcut; + cutsq[i][j] = cutsq[j][i] = mcut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = soft_gpu_init(atom->ntypes+1, cutsq, prefactor, cut, + force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairSoftGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + soft_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairSoftGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double r,rsq,arg,factor_lj; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r = sqrt(rsq); + arg = MY_PI*r/cut[itype][jtype]; + if (r > 0.0) fpair = factor_lj * prefactor[itype][jtype] * + sin(arg) * MY_PI/cut[itype][jtype]/r; + else fpair = 0.0; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) + evdwl = factor_lj * prefactor[itype][jtype] * (1.0+cos(arg)); + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_soft_gpu.h b/src/GPU/pair_soft_gpu.h new file mode 100644 index 0000000000..ccb800990c --- /dev/null +++ b/src/GPU/pair_soft_gpu.h @@ -0,0 +1,59 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(soft/gpu,PairSoftGPU) + +#else + +#ifndef LMP_PAIR_SOFT_GPU_H +#define LMP_PAIR_SOFT_GPU_H + +#include "pair_soft.h" + +namespace LAMMPS_NS { + +class PairSoftGPU : public PairSoft { + public: + PairSoftGPU(LAMMPS *lmp); + ~PairSoftGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Cannot use newton pair with soft/gpu pair style + +Self-explanatory. + +*/ diff --git a/src/GPU/pair_sw_gpu.cpp b/src/GPU/pair_sw_gpu.cpp new file mode 100644 index 0000000000..672b3e517e --- /dev/null +++ b/src/GPU/pair_sw_gpu.cpp @@ -0,0 +1,178 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Mike Brown (ORNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_sw_gpu.h" +#include "atom.h" +#include "neighbor.h" +#include "neigh_request.h" +#include "force.h" +#include "comm.h" +#include "memory.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "memory.h" +#include "error.h" +#include "domain.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int sw_gpu_init(const int nlocal, const int nall, const int max_nbors, + const double cell_size, int &gpu_mode, FILE *screen, + const double, const double, const double, const double, + const double, const double, const double, const double, + const double, const double, const double); +void sw_gpu_clear(); +int ** sw_gpu_compute_n(const int ago, const int inum, + const int nall, double **host_x, int *host_type, + double *sublo, double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, + const double cpu_time, bool &success); +void sw_gpu_compute(const int ago, const int nloc, const int nall, const int ln, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double sw_gpu_bytes(); +extern double lmp_gpu_forces(double **f, double **tor, double *eatom, + double **vatom, double *virial, double &ecoul); + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 +#define DELTA 4 + +/* ---------------------------------------------------------------------- */ + +PairSWGPU::PairSWGPU(LAMMPS *lmp) : PairSW(lmp), gpu_mode(GPU_FORCE) +{ + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); + + cutghost = NULL; + ghostneigh = 1; +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairSWGPU::~PairSWGPU() +{ + sw_gpu_clear(); + if (allocated) + memory->destroy(cutghost); +} + +/* ---------------------------------------------------------------------- */ + +void PairSWGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = sw_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + sw_gpu_compute(neighbor->ago, atom->nlocal, nall, inum+list->gnum, + atom->x, atom->type, ilist, numneigh, firstneigh, eflag, + vflag, eflag_atom, vflag_atom, host_start, cpu_time, + success); + } + if (!success) + error->one(FLERR,"Insufficient memory on accelerator"); +} + +/* ---------------------------------------------------------------------- */ + +void PairSWGPU::allocate() +{ + PairSW::allocate(); + int n = atom->ntypes; + + memory->create(cutghost,n+1,n+1,"pair:cutghost"); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairSWGPU::init_style() +{ + double cell_size = sqrt(params[0].cutsq) + neighbor->skin; + + if (atom->tag_enable == 0) + error->all(FLERR,"Pair style sw/gpu requires atom IDs"); + if (force->newton_pair != 0) + error->all(FLERR,"Pair style sw/gpu requires newton pair off"); + if (nparams > 1) + error->all(FLERR,"Pair style sw/gpu is currently limited to one element."); + + int success = sw_gpu_init(atom->nlocal, atom->nlocal+atom->nghost, 300, + cell_size, gpu_mode, screen,params[0].epsilon, + params[0].sigma, params[0].lambda, params[0].gamma, + params[0].costheta, params[0].biga, params[0].bigb, + params[0].powerp, params[0].powerq, params[0].cut, + params[0].cutsq); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + neighbor->requests[irequest]->ghost = 1; + } + + if (comm->cutghostuser < (2.0*cutmax + neighbor->skin) ) + comm->cutghostuser=2.0*cutmax + neighbor->skin; +} + +/* ---------------------------------------------------------------------- + init for one type pair i,j and corresponding j,i +------------------------------------------------------------------------- */ + +double PairSWGPU::init_one(int i, int j) +{ + if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set"); + cutghost[i][j] = cutmax; + cutghost[j][i] = cutmax; + + return cutmax; +} + diff --git a/src/GPU/pair_sw_gpu.h b/src/GPU/pair_sw_gpu.h new file mode 100644 index 0000000000..ab82359a67 --- /dev/null +++ b/src/GPU/pair_sw_gpu.h @@ -0,0 +1,67 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(sw/gpu,PairSWGPU) + +#else + +#ifndef LMP_PAIR_SW_GPU_H +#define LMP_PAIR_SW_GPU_H + +#include "pair_sw.h" + +namespace LAMMPS_NS { + +class PairSWGPU : public PairSW { + public: + PairSWGPU(class LAMMPS *); + ~PairSWGPU(); + void compute(int, int); + double init_one(int, int); + void init_style(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + protected: + void allocate(); + + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Insufficient memory on accelerator + +There is insufficient memory on one of the devices specified for the gpu +package + +E: Pair style sw/gpu requires newton pair on + +See the newton command. This is a restriction to use the SW +potential. + +E: Pair style sw/gpu is currently limited to one element. + +Self-explanatory. + +*/ +