diff --git a/src/GPU/Install.sh b/src/GPU/Install.sh index 81cb0a73fa..bd50caaa58 100644 --- a/src/GPU/Install.sh +++ b/src/GPU/Install.sh @@ -17,6 +17,40 @@ if (test $1 = 1) then sed -i -e '/^include.*gpu.*$/d' ../Makefile.package.settings sed -i '4 i include ..\/..\/lib\/gpu\/Makefile.lammps' ../Makefile.package.settings fi + + if (test -e ../pair_yukawa.cpp) then + cp pair_yukawa_gpu.cpp .. + cp pair_yukawa_gpu.h .. + fi + + if (test -e ../pair_table.cpp) then + cp pair_table_gpu.cpp .. + cp pair_table_gpu.h .. + fi + + if (test -e ../pair_buck.cpp) then + cp pair_buck_gpu.cpp .. + cp pair_buck_gpu.h .. + fi + + if (test -e ../pair_buck_coul_cut.cpp) then + cp pair_buck_coul_cut_gpu.cpp .. + cp pair_buck_coul_cut_gpu.h .. + fi + + if (test -e ../pair_buck_coul_long.cpp) then + cp pair_buck_coul_long_gpu.cpp .. + cp pair_buck_coul_long_gpu.h .. + fi + + if (test -e ../pair_eam.cpp) then + cp pair_eam_gpu.cpp .. + cp pair_eam_gpu.h .. + cp pair_eam_alloy_gpu.cpp .. + cp pair_eam_alloy_gpu.h .. + cp pair_eam_fs_gpu.cpp .. + cp pair_eam_fs_gpu.h .. + fi if (test -e ../pair_gayberne.cpp) then cp pair_gayberne_gpu.cpp .. @@ -93,6 +127,7 @@ elif (test $1 = 0) then fi rm -f ../pppm_gpu.cpp + rm -f ../pair_eam_gpu.cpp rm -f ../pair_gayberne_gpu.cpp rm -f ../pair_resquared_gpu.cpp rm -f ../pair_lj_cut_gpu.cpp @@ -111,6 +146,7 @@ elif (test $1 = 0) then rm -f ../fix_gpu.cpp rm -f ../pppm_gpu.h + rm -f ../pair_eam_gpu.h rm -f ../pair_gayberne_gpu.h rm -f ../pair_resquared_gpu.h rm -f ../pair_lj_cut_gpu.h diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h index abae643e54..e3bba97644 100644 --- a/src/GPU/gpu_extra.h +++ b/src/GPU/gpu_extra.h @@ -45,6 +45,9 @@ namespace GPU_EXTRA { else if (all_success == -7) error->all(FLERR, "Accelerator sharing is not currently supported on system"); + else if (all_success == -8) + error->all(FLERR, + "GPU particle split must be set to 1 for this pair style."); else error->all(FLERR,"Unknown error in GPU library"); } diff --git a/src/GPU/pair_buck_coul_cut_gpu.cpp b/src/GPU/pair_buck_coul_cut_gpu.cpp new file mode 100644 index 0000000000..d4e00811cf --- /dev/null +++ b/src/GPU/pair_buck_coul_cut_gpu.cpp @@ -0,0 +1,260 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_buck_coul_cut_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int buckc_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, + double **host_buck1, double **host_buck2, double **host_a, + double **host_c, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen, double **host_cut_ljsq, + double **host_cut_coulsq, double *host_special_coul, + const double qqrd2e); +void buckc_gpu_clear(); +int ** buckc_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, + double *prd); +void buckc_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd); +double buckc_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulCutGPU::PairBuckCoulCutGPU(LAMMPS *lmp) : PairBuckCoulCut(lmp), + gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairBuckCoulCutGPU::~PairBuckCoulCutGPU() +{ + buckc_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulCutGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = buckc_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success, + atom->q, domain->boxlo, domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + buckc_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, atom->q, + atom->nlocal, domain->boxlo, domain->prd); + } + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + if (host_startnewton_pair) + error->all(FLERR, + "Cannot use newton pair with buck/coul/cut/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = buckc_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2, + a, c, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, + cut_coulsq, force->special_coul, force->qqrd2e); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulCutGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + buckc_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulCutGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, + int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,qtmp,delx,dely,delz,evdwl,ecoul,fpair; + double rsq,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj; + double r,rexp; + int *jlist; + + evdwl = ecoul = 0.0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + if (rsq < cut_coulsq[itype][jtype]) + forcecoul = qqrd2e * qtmp*q[j]/r; + else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (factor_coul*forcecoul + factor_lj*forcebuck) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + if (rsq < cut_coulsq[itype][jtype]) + ecoul = factor_coul * qqrd2e * qtmp*q[j]/r; + else ecoul = 0.0; + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_buck_coul_cut_gpu.h b/src/GPU/pair_buck_coul_cut_gpu.h new file mode 100644 index 0000000000..fbbe3676a0 --- /dev/null +++ b/src/GPU/pair_buck_coul_cut_gpu.h @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/cut/gpu,PairBuckCoulCutGPU) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_CUT_GPU_H +#define LMP_PAIR_BUCK_COUL_CUT_GPU_H + +#include "pair_buck_coul_cut.h" + +namespace LAMMPS_NS { + +class PairBuckCoulCutGPU : public PairBuckCoulCut { + public: + PairBuckCoulCutGPU(LAMMPS *lmp); + ~PairBuckCoulCutGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with buck/coul/cut/gpu pair style + +UNDOCUMENTED + +E: Pair style buck/coul/cut/gpu requires atom attribute q + +The atom style defined does not have this attribute. + +*/ \ No newline at end of file diff --git a/src/GPU/pair_buck_coul_long_gpu.cpp b/src/GPU/pair_buck_coul_long_gpu.cpp new file mode 100644 index 0000000000..a5f6401117 --- /dev/null +++ b/src/GPU/pair_buck_coul_long_gpu.cpp @@ -0,0 +1,291 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_buck_coul_long_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "kspace.h" +#include "gpu_extra.h" + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +// External functions from cuda library for atom decomposition + +int buckcl_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, + double **host_buck1, double **host_buck2, double **host_a, + double **host_c, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen, double **host_cut_ljsq, + double host_cut_coulsq, double *host_special_coul, + const double qqrd2e, const double g_ewald); +void buckcl_gpu_clear(); +int** buckcl_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success, double *host_q, double *boxlo, + double *prd); +void buckcl_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, double *host_q, + const int nlocal, double *boxlo, double *prd); +double buckcl_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBuckCoulLongGPU::PairBuckCoulLongGPU(LAMMPS *lmp) : + PairBuckCoulLong(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairBuckCoulLongGPU::~PairBuckCoulLongGPU() +{ + buckcl_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulLongGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = buckcl_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, atom->q, domain->boxlo, + domain->prd); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + buckcl_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, atom->q, + atom->nlocal, domain->boxlo, domain->prd); + } + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + if (host_startq_flag) + error->all(FLERR, + "Pair style buck/coul/long/gpu requires atom attribute q"); + if (force->newton_pair) + error->all(FLERR, + "Cannot use newton pair with buck/coul/long/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + cut_coulsq = cut_coul * cut_coul; + + // insure use of KSpace long-range solver, set g_ewald + + if (force->kspace == NULL) + error->all(FLERR,"Pair style is incompatible with KSpace style"); + g_ewald = force->kspace->g_ewald; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = buckcl_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2, + a, c, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, cut_ljsq, + cut_coulsq, force->special_coul, force->qqrd2e, + g_ewald); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckCoulLongGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + buckcl_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckCoulLongGPU::cpu_compute(int start, int inum, int eflag, + int vflag, int *ilist, int *numneigh, + int **firstneigh) +{ + int i,j,ii,jj,jnum,itype,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rexp,r2inv,r6inv,forcecoul,forcebuck,factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + int *jlist; + double rsq; + + evdwl = ecoul = 0.0; + + double **x = atom->x; + double **f = atom->f; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + double qqrd2e = force->qqrd2e; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (factor_coul < 1.0) forcecoul -= (1.0-factor_coul)*prefactor; + } else forcecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + } else forcebuck = 0.0; + + fpair = (forcecoul + factor_lj*forcebuck) * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + if (rsq < cut_coulsq) { + ecoul = prefactor*erfc; + if (factor_coul < 1.0) ecoul -= (1.0-factor_coul)*prefactor; + } else ecoul = 0.0; + + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + } + + if (evflag) ev_tally_full(i,evdwl,ecoul,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_buck_coul_long_gpu.h b/src/GPU/pair_buck_coul_long_gpu.h new file mode 100644 index 0000000000..179a6b078b --- /dev/null +++ b/src/GPU/pair_buck_coul_long_gpu.h @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/coul/long/gpu,PairBuckCoulLongGPU) + +#else + +#ifndef LMP_PAIR_BUCK_COUL_LONG_GPU_H +#define LMP_PAIR_BUCK_COUL_LONG_GPU_H + +#include "pair_buck_coul_long.h" + +namespace LAMMPS_NS { + +class PairBuckCoulLongGPU : public PairBuckCoulLong { + public: + PairBuckCoulLongGPU(LAMMPS *lmp); + ~PairBuckCoulLongGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with buck/coul/long/gpu pair style + +UNDOCUMENTED + +E: Pair style buck/coul/long/gpu requires atom attribute q + +The atom style defined does not have this attribute. + +*/ \ No newline at end of file diff --git a/src/GPU/pair_buck_gpu.cpp b/src/GPU/pair_buck_gpu.cpp new file mode 100644 index 0000000000..67982202cb --- /dev/null +++ b/src/GPU/pair_buck_gpu.cpp @@ -0,0 +1,230 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_buck_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int buck_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv, + double **host_buck1, double **host_buck2, + double **host_a, double **host_c, + double **offset, double *special_lj, const int inum, + const int nall, const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen); +void buck_gpu_clear(); +int ** buck_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success); +void buck_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double buck_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairBuckGPU::PairBuckGPU(LAMMPS *lmp) : PairBuck(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairBuckGPU::~PairBuckGPU() +{ + buck_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = buck_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + buck_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with buck/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = buck_gpu_init(atom->ntypes+1, cutsq, rhoinv, buck1, buck2, + a, c, offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairBuckGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + buck_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairBuckGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r6inv,forcebuck,factor_lj; + double r,rexp; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r6inv = r2inv*r2inv*r2inv; + r = sqrt(rsq); + rexp = exp(-r*rhoinv[itype][jtype]); + forcebuck = buck1[itype][jtype]*r*rexp - buck2[itype][jtype]*r6inv; + fpair = factor_lj*forcebuck*r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + evdwl = a[itype][jtype]*rexp - c[itype][jtype]*r6inv - + offset[itype][jtype]; + evdwl *= factor_lj; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_buck_gpu.h b/src/GPU/pair_buck_gpu.h new file mode 100644 index 0000000000..2c74660448 --- /dev/null +++ b/src/GPU/pair_buck_gpu.h @@ -0,0 +1,58 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(buck/gpu,PairBuckGPU) + +#else + +#ifndef LMP_PAIR_BUCK_GPU_H +#define LMP_PAIR_BUCK_GPU_H + +#include "pair_buck.h" + +namespace LAMMPS_NS { + +class PairBuckGPU : public PairBuck { + public: + PairBuckGPU(LAMMPS *lmp); + ~PairBuckGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with buck/gpu pair style + +UNDOCUMENTED + +*/ \ No newline at end of file diff --git a/src/GPU/pair_eam_alloy_gpu.cpp b/src/GPU/pair_eam_alloy_gpu.cpp new file mode 100644 index 0000000000..2cb80a34d7 --- /dev/null +++ b/src/GPU/pair_eam_alloy_gpu.cpp @@ -0,0 +1,323 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_alloy_gpu.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMAlloyGPU::PairEAMAlloyGPU(LAMMPS *lmp) : PairEAMGPU(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyGPU::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM setfl file + + if (setfl) { + for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i]; + delete [] setfl->elements; + delete [] setfl->mass; + memory->destroy(setfl->frho); + memory->destroy(setfl->rhor); + memory->destroy(setfl->z2r); + delete setfl; + } + setfl = new Setfl(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < setfl->nelements; j++) + if (strcmp(arg[i],setfl->elements[j]) == 0) break; + if (j < setfl->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,setfl->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMAlloyGPU::read_file(char *filename) +{ + Setfl *file = setfl; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho"); + memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements,file->nr+1, + "pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]); + MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world); + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMAlloyGPU::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from setfl file + + nrho = setfl->nrho; + nr = setfl->nr; + drho = setfl->drho; + dr = setfl->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of setfl elements + 1 for zero array + + nfrho = setfl->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = # of setfl elements + + nrhor = setfl->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element's rhor to global rhor + + for (i = 0; i < setfl->nelements; i++) + for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m]; + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for setfl files, I,J mapping only depends on I + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of setfl elements + + nz2r = setfl->nelements * (setfl->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < setfl->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/GPU/pair_eam_alloy_gpu.h b/src/GPU/pair_eam_alloy_gpu.h new file mode 100644 index 0000000000..1606d81a93 --- /dev/null +++ b/src/GPU/pair_eam_alloy_gpu.h @@ -0,0 +1,41 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/alloy/gpu,PairEAMAlloyGPU) + +#else + +#ifndef LMP_PAIR_EAM_ALLOY_GPU_H +#define LMP_PAIR_EAM_ALLOY_GPU_H + +#include "pair_eam_gpu.h" + +namespace LAMMPS_NS { + +class PairEAMAlloyGPU : public PairEAMGPU { +public: + PairEAMAlloyGPU(class LAMMPS *); + virtual ~PairEAMAlloyGPU() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/GPU/pair_eam_fs_gpu.cpp b/src/GPU/pair_eam_fs_gpu.cpp new file mode 100644 index 0000000000..1b52920e30 --- /dev/null +++ b/src/GPU/pair_eam_fs_gpu.cpp @@ -0,0 +1,332 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL) +------------------------------------------------------------------------- */ + +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_fs_gpu.h" +#include "atom.h" +#include "comm.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +/* ---------------------------------------------------------------------- */ + +PairEAMFSGPU::PairEAMFSGPU(LAMMPS *lmp) : PairEAMGPU(lmp) +{ + one_coeff = 1; +} + +/* ---------------------------------------------------------------------- + set coeffs for one or more type pairs + read EAM Finnis-Sinclair file +------------------------------------------------------------------------- */ + +void PairEAMFSGPU::coeff(int narg, char **arg) +{ + int i,j; + + if (!allocated) allocate(); + + if (narg != 3 + atom->ntypes) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // insure I,J args are * * + + if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0) + error->all(FLERR,"Incorrect args for pair coefficients"); + + // read EAM Finnis-Sinclair file + + if (fs) { + for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i]; + delete [] fs->elements; + delete [] fs->mass; + memory->destroy(fs->frho); + memory->destroy(fs->rhor); + memory->destroy(fs->z2r); + delete fs; + } + fs = new Fs(); + read_file(arg[2]); + + // read args that map atom types to elements in potential file + // map[i] = which element the Ith atom type is, -1 if NULL + + for (i = 3; i < narg; i++) { + if (strcmp(arg[i],"NULL") == 0) { + map[i-2] = -1; + continue; + } + for (j = 0; j < fs->nelements; j++) + if (strcmp(arg[i],fs->elements[j]) == 0) break; + if (j < fs->nelements) map[i-2] = j; + else error->all(FLERR,"No matching element in EAM potential file"); + } + + // clear setflag since coeff() called once with I,J = * * + + int n = atom->ntypes; + for (i = 1; i <= n; i++) + for (j = i; j <= n; j++) + setflag[i][j] = 0; + + // set setflag i,j for type pairs where both are mapped to elements + // set mass of atom type if i = j + + int count = 0; + for (i = 1; i <= n; i++) { + for (j = i; j <= n; j++) { + if (map[i] >= 0 && map[j] >= 0) { + setflag[i][j] = 1; + if (i == j) atom->set_mass(i,fs->mass[map[i]]); + count++; + } + } + } + + if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients"); +} + +/* ---------------------------------------------------------------------- + read a multi-element DYNAMO setfl file +------------------------------------------------------------------------- */ + +void PairEAMFSGPU::read_file(char *filename) +{ + Fs *file = fs; + + // open potential file + + int me = comm->me; + FILE *fptr; + char line[MAXLINE]; + + if (me == 0) { + fptr = fopen(filename,"r"); + if (fptr == NULL) { + char str[128]; + sprintf(str,"Cannot open EAM potential file %s",filename); + error->one(FLERR,str); + } + } + + // read and broadcast header + // extract element names from nelements line + + int n; + if (me == 0) { + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + fgets(line,MAXLINE,fptr); + n = strlen(line) + 1; + } + MPI_Bcast(&n,1,MPI_INT,0,world); + MPI_Bcast(line,n,MPI_CHAR,0,world); + + sscanf(line,"%d",&file->nelements); + int nwords = atom->count_words(line); + if (nwords != file->nelements + 1) + error->all(FLERR,"Incorrect element names in EAM potential file"); + + char **words = new char*[file->nelements+1]; + nwords = 0; + strtok(line," \t\n\r\f"); + while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue; + + file->elements = new char*[file->nelements]; + for (int i = 0; i < file->nelements; i++) { + n = strlen(words[i]) + 1; + file->elements[i] = new char[n]; + strcpy(file->elements[i],words[i]); + } + delete [] words; + + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg %d %lg %lg", + &file->nrho,&file->drho,&file->nr,&file->dr,&file->cut); + } + + MPI_Bcast(&file->nrho,1,MPI_INT,0,world); + MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->nr,1,MPI_INT,0,world); + MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world); + MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world); + + file->mass = new double[file->nelements]; + memory->create(file->frho,file->nelements,file->nrho+1, + "pair:frho"); + memory->create(file->rhor,file->nelements,file->nelements, + file->nr+1,"pair:rhor"); + memory->create(file->z2r,file->nelements,file->nelements, + file->nr+1,"pair:z2r"); + + int i,j,tmp; + for (i = 0; i < file->nelements; i++) { + if (me == 0) { + fgets(line,MAXLINE,fptr); + sscanf(line,"%d %lg",&tmp,&file->mass[i]); + } + MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world); + + if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]); + MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world); + + for (j = 0; j < file->nelements; j++) { + if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]); + MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + } + + for (i = 0; i < file->nelements; i++) + for (j = 0; j <= i; j++) { + if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]); + MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world); + } + + // close the potential file + + if (me == 0) fclose(fptr); +} + +/* ---------------------------------------------------------------------- + copy read-in setfl potential to standard array format +------------------------------------------------------------------------- */ + +void PairEAMFSGPU::file2array() +{ + int i,j,m,n; + int ntypes = atom->ntypes; + + // set function params directly from fs file + + nrho = fs->nrho; + nr = fs->nr; + drho = fs->drho; + dr = fs->dr; + + // ------------------------------------------------------------------ + // setup frho arrays + // ------------------------------------------------------------------ + + // allocate frho arrays + // nfrho = # of fs elements + 1 for zero array + + nfrho = fs->nelements + 1; + memory->destroy(frho); + memory->create(frho,nfrho,nrho+1,"pair:frho"); + + // copy each element's frho to global frho + + for (i = 0; i < fs->nelements; i++) + for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m]; + + // add extra frho of zeroes for non-EAM types to point to (pair hybrid) + // this is necessary b/c fp is still computed for non-EAM atoms + + for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0; + + // type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to + // if atom type doesn't point to element (non-EAM atom in pair hybrid) + // then map it to last frho array of zeroes + + for (i = 1; i <= ntypes; i++) + if (map[i] >= 0) type2frho[i] = map[i]; + else type2frho[i] = nfrho-1; + + // ------------------------------------------------------------------ + // setup rhor arrays + // ------------------------------------------------------------------ + + // allocate rhor arrays + // nrhor = square of # of fs elements + + nrhor = fs->nelements * fs->nelements; + memory->destroy(rhor); + memory->create(rhor,nrhor,nr+1,"pair:rhor"); + + // copy each element pair rhor to global rhor + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j < fs->nelements; j++) { + for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m]; + n++; + } + + // type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to + // for fs files, there is a full NxN set of rhor arrays + // OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used + + for (i = 1; i <= ntypes; i++) + for (j = 1; j <= ntypes; j++) + type2rhor[i][j] = map[i] * fs->nelements + map[j]; + + // ------------------------------------------------------------------ + // setup z2r arrays + // ------------------------------------------------------------------ + + // allocate z2r arrays + // nz2r = N*(N+1)/2 where N = # of fs elements + + nz2r = fs->nelements * (fs->nelements+1) / 2; + memory->destroy(z2r); + memory->create(z2r,nz2r,nr+1,"pair:z2r"); + + // copy each element pair z2r to global z2r, only for I >= J + + n = 0; + for (i = 0; i < fs->nelements; i++) + for (j = 0; j <= i; j++) { + for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m]; + n++; + } + + // type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to + // set of z2r arrays only fill lower triangular Nelement matrix + // value = n = sum over rows of lower-triangular matrix until reach irow,icol + // swap indices when irow < icol to stay lower triangular + // if map = -1 (non-EAM atom in pair hybrid): + // type2z2r is not used by non-opt + // but set type2z2r to 0 since accessed by opt + + int irow,icol; + for (i = 1; i <= ntypes; i++) { + for (j = 1; j <= ntypes; j++) { + irow = map[i]; + icol = map[j]; + if (irow == -1 || icol == -1) { + type2z2r[i][j] = 0; + continue; + } + if (irow < icol) { + irow = map[j]; + icol = map[i]; + } + n = 0; + for (m = 0; m < irow; m++) n += m + 1; + n += icol; + type2z2r[i][j] = n; + } + } +} diff --git a/src/GPU/pair_eam_fs_gpu.h b/src/GPU/pair_eam_fs_gpu.h new file mode 100644 index 0000000000..cef88fa18b --- /dev/null +++ b/src/GPU/pair_eam_fs_gpu.h @@ -0,0 +1,41 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/fs/gpu,PairEAMFSGPU) + +#else + +#ifndef LMP_PAIR_EAM_FS_GPU_H +#define LMP_PAIR_EAM_FS_GPU_H + +#include "pair_eam_gpu.h" + +namespace LAMMPS_NS { + +class PairEAMFSGPU : public PairEAMGPU { +public: + PairEAMFSGPU(class LAMMPS *); + virtual ~PairEAMFSGPU() {} + void coeff(int, char **); + + protected: + void read_file(char *); + void file2array(); +}; + +} + +#endif +#endif diff --git a/src/GPU/pair_eam_gpu.cpp b/src/GPU/pair_eam_gpu.cpp new file mode 100644 index 0000000000..68a9e9c3c3 --- /dev/null +++ b/src/GPU/pair_eam_gpu.cpp @@ -0,0 +1,243 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Trung Dac Nguyen (ORNL), W. Michael Brown (ORNL) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "pair_eam_gpu.h" +#include "atom.h" +#include "force.h" +#include "comm.h" +#include "domain.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "gpu_extra.h" + +using namespace LAMMPS_NS; + +#define MAXLINE 1024 + +// External functions from cuda library for atom decomposition + +int eam_gpu_init(const int ntypes, double host_cutforcesq, + int **host_type2rhor, int **host_type2z2r, + int *host_type2frho, double ***host_rhor_spline, + double ***host_z2r_spline, double ***host_frho_spline, + double rdr, double rdrho, int nrhor, int nrho, int nz2r, + int nfrho, int nr, const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + int &fp_size); +void eam_gpu_clear(); +int** eam_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, int **special, + const bool eflag, const bool vflag, const bool eatom, + const bool vatom, int &host_start, int **ilist, + int **jnum, const double cpu_time, bool &success, + int &inum, void **fp_ptr); +void eam_gpu_compute(const int ago, const int inum_full, const int nlocal, + const int nall,double **host_x, int *host_type, + int *ilist, int *numj, int **firstneigh, + const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success, void **fp_ptr); +void eam_gpu_compute_force(int *ilist, const bool eflag, const bool vflag, + const bool eatom, const bool vatom); +double eam_gpu_bytes(); + +/* ---------------------------------------------------------------------- */ + +PairEAMGPU::PairEAMGPU(LAMMPS *lmp) : PairEAM(lmp), gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + check if allocated, since class can be destructed when incomplete +------------------------------------------------------------------------- */ + +PairEAMGPU::~PairEAMGPU() +{ + eam_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +double PairEAMGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + eam_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMGPU::compute(int eflag, int vflag) +{ + int i,j,ii,jj,m,jnum,itype,jtype; + double evdwl,*coeff; + + evdwl = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + + int nlocal = atom->nlocal; + int newton_pair = force->newton_pair; + + // compute density on each atom on GPU + + int nall = atom->nlocal + atom->nghost; + int inum, host_start, inum_dev; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = eam_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success, inum_dev, &fp_pinned); + } else { // gpu_mode == GPU_FORCE + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + eam_gpu_compute(neighbor->ago, inum, nlocal, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success, &fp_pinned); + } + + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + // communicate derivative of embedding function + + comm->forward_comm_pair(this); + + // compute forces on each atom on GPU + if (gpu_mode != GPU_FORCE) + eam_gpu_compute_force(NULL, eflag, vflag, eflag_atom, vflag_atom); + else + eam_gpu_compute_force(ilist, eflag, vflag, eflag_atom, vflag_atom); +} + +/* ---------------------------------------------------------------------- + init specific to this pair style +------------------------------------------------------------------------- */ + +void PairEAMGPU::init_style() +{ + if (force->newton_pair) + error->all(FLERR,"Cannot use newton pair with eam/gpu pair style"); + + if (!allocated) error->all(FLERR,"Not allocate memory eam/gpu pair style"); + + // convert read-in file(s) to arrays and spline them + + file2array(); + array2spline(); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int fp_size; + int success = eam_gpu_init(atom->ntypes+1, cutforcesq, type2rhor, type2z2r, + type2frho, rhor_spline, z2r_spline, frho_spline, + rdr, rdrho, nrhor, nrho, nz2r, nfrho, nr, + atom->nlocal, atom->nlocal+atom->nghost, 300, + maxspecial, cell_size, gpu_mode, screen, fp_size); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } + + if (fp_size == sizeof(double)) + fp_single = false; + else + fp_single = true; +} + +/* ---------------------------------------------------------------------- */ + +int PairEAMGPU::pack_comm(int n, int *list, double *buf, int pbc_flag, + int *pbc) +{ + int i,j,m; + + m = 0; + + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = static_cast(fp_ptr[j]); + } + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = 0; i < n; i++) { + j = list[i]; + buf[m++] = fp_ptr[j]; + } + } + + return 1; +} + +/* ---------------------------------------------------------------------- */ + +void PairEAMGPU::unpack_comm(int n, int first, double *buf) +{ + int i,m,last; + + m = 0; + last = first + n; + if (fp_single) { + float *fp_ptr = (float *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } else { + double *fp_ptr = (double *)fp_pinned; + for (i = first; i < last; i++) fp_ptr[i] = buf[m++]; + } +} diff --git a/src/GPU/pair_eam_gpu.h b/src/GPU/pair_eam_gpu.h new file mode 100644 index 0000000000..4ed40c3e19 --- /dev/null +++ b/src/GPU/pair_eam_gpu.h @@ -0,0 +1,69 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(eam/gpu,PairEAMGPU) + +#else + +#ifndef LMP_PAIR_EAM_GPU_H +#define LMP_PAIR_EAM_GPU_H + +#include "stdio.h" +#include "pair_eam.h" + +namespace LAMMPS_NS { + +class PairEAMGPU : public PairEAM { + public: + + PairEAMGPU(class LAMMPS *); + virtual ~PairEAMGPU(); + void compute(int, int); + void init_style(); + double memory_usage(); + + int pack_comm(int, int *, double *, int, int *); + void unpack_comm(int, int, double *); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + protected: + int gpu_mode; + double cpu_time; + int *gpulist; + void *fp_pinned; + bool fp_single; +}; + +} + +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with eam/gpu pair style + +UNDOCUMENTED + +E: Not allocate memory eam/gpu pair style + +UNDOCUMENTED + +*/ \ No newline at end of file diff --git a/src/GPU/pair_table_gpu.cpp b/src/GPU/pair_table_gpu.cpp new file mode 100644 index 0000000000..c426fca8fc --- /dev/null +++ b/src/GPU/pair_table_gpu.cpp @@ -0,0 +1,343 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_table_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +#define LOOKUP 0 +#define LINEAR 1 +#define SPLINE 2 +#define BITMAP 3 + +// External functions from cuda library for atom decomposition + +int table_gpu_init(const int ntypes, double **cutsq, + double ***host_table_coeffs, double **host_table_data, + double *special_lj, const int nlocal, const int nall, + const int max_nbors, const int maxspecial, + const double cell_size, int &gpu_mode, FILE *screen, + int tabstyle, int ntables, int tablength); +void table_gpu_clear(); +int ** table_gpu_compute_n(const int ago, const int inum, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + int **ilist, int **jnum, const double cpu_time, + bool &success); +void table_gpu_compute(const int ago, const int inum, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double table_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTableGPU::PairTableGPU(LAMMPS *lmp) : PairTable(lmp), + gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairTableGPU::~PairTableGPU() +{ + table_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairTableGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = table_gpu_compute_n(neighbor->ago, inum, nall, atom->x, + atom->type, domain->sublo, domain->subhi, + atom->tag, atom->nspecial, atom->special, + eflag, vflag, eflag_atom, vflag_atom, + host_start, &ilist, &numneigh, cpu_time, + success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + table_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with table/gpu pair style"); + + int ntypes = atom->ntypes; + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + // pack tables and send them to device + double ***table_coeffs = NULL; + double **table_data = NULL; + memory->create(table_coeffs, ntypes+1, ntypes+1, 6, "table:coeffs"); + + Table *tb; + for (int i = 1; i <= atom->ntypes; i++) + for (int j = 1; j <= atom->ntypes; j++) { + int n = tabindex[i][j]; + tb = &tables[n]; + table_coeffs[i][j][0] = n; + table_coeffs[i][j][1] = tb->nshiftbits; + table_coeffs[i][j][2] = tb->nmask; + table_coeffs[i][j][3] = tb->innersq; + table_coeffs[i][j][4] = tb->invdelta; + table_coeffs[i][j][5] = tb->deltasq6; + } + + if (tabstyle != BITMAP) { + memory->create(table_data, ntables, 6*tablength, "table:data"); + for (int n = 0; n < ntables; n++) { + tb = &tables[n]; + if (tabstyle == LOOKUP) { + for (int k = 0; ke[k]; + table_data[n][6*k+2] = tb->f[k]; + } + } else if (tabstyle == LINEAR) { + for (int k = 0; krsq[k]; + table_data[n][6*k+1] = tb->e[k]; + table_data[n][6*k+2] = tb->f[k]; + if (kde[k]; + table_data[n][6*k+4] = tb->df[k]; + } + } + } else if (tabstyle == SPLINE) { + for (int k = 0; krsq[k]; + table_data[n][6*k+1] = tb->e[k]; + table_data[n][6*k+2] = tb->f[k]; + table_data[n][6*k+3] = tb->e2[k]; + table_data[n][6*k+4] = tb->f2[k]; + } + } + } + } else { + int ntable = 1 << tablength; + memory->create(table_data, ntables, 6*ntable, "table:data"); + + for (int n = 0; n < ntables; n++) { + tb = &tables[n]; + for (int k = 0; krsq[k]; + table_data[n][6*k+1] = tb->e[k]; + table_data[n][6*k+2] = tb->f[k]; + table_data[n][6*k+3] = tb->de[k]; + table_data[n][6*k+4] = tb->df[k]; + table_data[n][6*k+5] = tb->drsq[k]; + } + } + } + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = table_gpu_init(atom->ntypes+1, cutsq, table_coeffs, table_data, + force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen, tabstyle, ntables, + tablength); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } + + memory->destroy(table_coeffs); + memory->destroy(table_data); +} + +/* ---------------------------------------------------------------------- */ + +double PairTableGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + table_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairTableGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype,itable; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,factor_lj,fraction,value,a,b; + int *jlist; + Table *tb; + + union_int_float_t rsq_lookup; + int tlm1 = tablength - 1; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + tb = &tables[tabindex[itype][jtype]]; + if (rsq < tb->innersq) + error->one(FLERR,"Pair distance < table inner cutoff"); + + if (tabstyle == LOOKUP) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + fpair = factor_lj * tb->f[itable]; + } else if (tabstyle == LINEAR) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + fraction = (rsq - tb->rsq[itable]) * tb->invdelta; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } else if (tabstyle == SPLINE) { + itable = static_cast ((rsq - tb->innersq) * tb->invdelta); + if (itable >= tlm1) + error->one(FLERR,"Pair distance > table outer cutoff"); + b = (rsq - tb->rsq[itable]) * tb->invdelta; + a = 1.0 - b; + value = a * tb->f[itable] + b * tb->f[itable+1] + + ((a*a*a-a)*tb->f2[itable] + (b*b*b-b)*tb->f2[itable+1]) * + tb->deltasq6; + fpair = factor_lj * value; + } else { + rsq_lookup.f = rsq; + itable = rsq_lookup.i & tb->nmask; + itable >>= tb->nshiftbits; + fraction = (rsq_lookup.f - tb->rsq[itable]) * tb->drsq[itable]; + value = tb->f[itable] + fraction*tb->df[itable]; + fpair = factor_lj * value; + } + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + if (tabstyle == LOOKUP) + evdwl = tb->e[itable]; + else if (tabstyle == LINEAR || tabstyle == BITMAP) + evdwl = tb->e[itable] + fraction*tb->de[itable]; + else + evdwl = a * tb->e[itable] + b * tb->e[itable+1] + + ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * + tb->deltasq6; + evdwl *= factor_lj; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_table_gpu.h b/src/GPU/pair_table_gpu.h new file mode 100644 index 0000000000..dd393b447d --- /dev/null +++ b/src/GPU/pair_table_gpu.h @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(table/gpu,PairTableGPU) + +#else + +#ifndef LMP_PAIR_TABLE_GPU_H +#define LMP_PAIR_TABLE_GPU_H + +#include "pair_table.h" + +namespace LAMMPS_NS { + +class PairTableGPU : public PairTable { + public: + PairTableGPU(LAMMPS *lmp); + ~PairTableGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with table/gpu pair style + +UNDOCUMENTED + +E: Pair distance > table outer cutoff + +Two atoms are further apart than the pairwise table allows. + +*/ \ No newline at end of file diff --git a/src/GPU/pair_yukawa_gpu.cpp b/src/GPU/pair_yukawa_gpu.cpp new file mode 100644 index 0000000000..f03687d028 --- /dev/null +++ b/src/GPU/pair_yukawa_gpu.cpp @@ -0,0 +1,229 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Trung Dac Nguyen (ORNL) +------------------------------------------------------------------------- */ + +#include "lmptype.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "pair_yukawa_gpu.h" +#include "atom.h" +#include "atom_vec.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" +#include "integrate.h" +#include "memory.h" +#include "error.h" +#include "neigh_request.h" +#include "universe.h" +#include "update.h" +#include "domain.h" +#include "string.h" +#include "gpu_extra.h" + +// External functions from cuda library for atom decomposition + +int yukawa_gpu_init(const int ntypes, double **cutsq, double kappa, + double **host_a, double **offset, double *special_lj, + const int inum, const int nall, const int max_nbors, + const int maxspecial, const double cell_size, + int &gpu_mode, FILE *screen); +void yukawa_gpu_clear(); +int ** yukawa_gpu_compute_n(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, double *sublo, + double *subhi, int *tag, int **nspecial, + int **special, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, + int &host_start, int **ilist, int **jnum, + const double cpu_time, bool &success); +void yukawa_gpu_compute(const int ago, const int inum_full, const int nall, + double **host_x, int *host_type, int *ilist, int *numj, + int **firstneigh, const bool eflag, const bool vflag, + const bool eatom, const bool vatom, int &host_start, + const double cpu_time, bool &success); +double yukawa_gpu_bytes(); + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairYukawaGPU::PairYukawaGPU(LAMMPS *lmp) : PairYukawa(lmp), + gpu_mode(GPU_FORCE) +{ + respa_enable = 0; + cpu_time = 0.0; + GPU_EXTRA::gpu_ready(lmp->modify, lmp->error); +} + +/* ---------------------------------------------------------------------- + free all arrays +------------------------------------------------------------------------- */ + +PairYukawaGPU::~PairYukawaGPU() +{ + yukawa_gpu_clear(); +} + +/* ---------------------------------------------------------------------- */ + +void PairYukawaGPU::compute(int eflag, int vflag) +{ + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + int nall = atom->nlocal + atom->nghost; + int inum, host_start; + + bool success = true; + int *ilist, *numneigh, **firstneigh; + if (gpu_mode != GPU_FORCE) { + inum = atom->nlocal; + firstneigh = yukawa_gpu_compute_n(neighbor->ago, inum, nall, + atom->x, atom->type, domain->sublo, + domain->subhi, atom->tag, atom->nspecial, + atom->special, eflag, vflag, eflag_atom, + vflag_atom, host_start, + &ilist, &numneigh, cpu_time, success); + } else { + inum = list->inum; + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + yukawa_gpu_compute(neighbor->ago, inum, nall, atom->x, atom->type, + ilist, numneigh, firstneigh, eflag, vflag, eflag_atom, + vflag_atom, host_start, cpu_time, success); + } + if (!success) + error->one(FLERR,"Out of memory on GPGPU"); + + if (host_startnewton_pair) + error->all(FLERR,"Cannot use newton pair with yukawa/gpu pair style"); + + // Repeat cutsq calculation because done after call to init_style + double maxcut = -1.0; + double cut; + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) { + cut = init_one(i,j); + cut *= cut; + if (cut > maxcut) + maxcut = cut; + cutsq[i][j] = cutsq[j][i] = cut; + } else + cutsq[i][j] = cutsq[j][i] = 0.0; + } + } + double cell_size = sqrt(maxcut) + neighbor->skin; + + int maxspecial=0; + if (atom->molecular) + maxspecial=atom->maxspecial; + int success = yukawa_gpu_init(atom->ntypes+1, cutsq, kappa, a, + offset, force->special_lj, atom->nlocal, + atom->nlocal+atom->nghost, 300, maxspecial, + cell_size, gpu_mode, screen); + GPU_EXTRA::check_flag(success,error,world); + + if (gpu_mode == GPU_FORCE) { + int irequest = neighbor->request(this); + neighbor->requests[irequest]->half = 0; + neighbor->requests[irequest]->full = 1; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairYukawaGPU::memory_usage() +{ + double bytes = Pair::memory_usage(); + return bytes + yukawa_gpu_bytes(); +} + +/* ---------------------------------------------------------------------- */ + +void PairYukawaGPU::cpu_compute(int start, int inum, int eflag, int vflag, + int *ilist, int *numneigh, int **firstneigh) { + int i,j,ii,jj,jnum,itype,jtype; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,r2inv,r,rinv,screening,forceyukawa,factor; + int *jlist; + + double **x = atom->x; + double **f = atom->f; + int *type = atom->type; + double *special_lj = force->special_lj; + + // loop over neighbors of my atoms + + for (ii = start; ii < inum; ii++) { + i = ilist[ii]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + rinv = 1.0/r; + screening = exp(-kappa*r); + forceyukawa = a[itype][jtype] * screening * (kappa + rinv); + + fpair = factor*forceyukawa * r2inv; + + f[i][0] += delx*fpair; + f[i][1] += dely*fpair; + f[i][2] += delz*fpair; + + if (eflag) { + evdwl = a[itype][jtype] * screening * rinv - offset[itype][jtype]; + evdwl *= factor; + } + + if (evflag) ev_tally_full(i,evdwl,0.0,fpair,delx,dely,delz); + } + } + } +} diff --git a/src/GPU/pair_yukawa_gpu.h b/src/GPU/pair_yukawa_gpu.h new file mode 100644 index 0000000000..deaa179393 --- /dev/null +++ b/src/GPU/pair_yukawa_gpu.h @@ -0,0 +1,58 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(yukawa/gpu,PairYukawaGPU) + +#else + +#ifndef LMP_PAIR_YUKAWA_GPU_H +#define LMP_PAIR_YUKAWA_GPU_H + +#include "pair_yukawa.h" + +namespace LAMMPS_NS { + +class PairYukawaGPU : public PairYukawa { + public: + PairYukawaGPU(LAMMPS *lmp); + ~PairYukawaGPU(); + void cpu_compute(int, int, int, int, int *, int *, int **); + void compute(int, int); + void init_style(); + double memory_usage(); + + enum { GPU_FORCE, GPU_NEIGH, GPU_HYB_NEIGH }; + + private: + int gpu_mode; + double cpu_time; + int *gpulist; +}; + +} +#endif +#endif + +/* ERROR/WARNING messages: + +E: Out of memory on GPGPU + +UNDOCUMENTED + +E: Cannot use newton pair with yukawa/gpu pair style + +UNDOCUMENTED + +*/ \ No newline at end of file