217 lines
6.4 KiB
C++
217 lines
6.4 KiB
C++
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Contributing authors: Mike Brown (ORNL)
|
|
------------------------------------------------------------------------- */
|
|
|
|
#include "mpi.h"
|
|
#include "string.h"
|
|
#include "stdio.h"
|
|
#include "stdlib.h"
|
|
#include "math.h"
|
|
#include "pppm_gpu_single.h"
|
|
#include "lmptype.h"
|
|
#include "atom.h"
|
|
#include "comm.h"
|
|
#include "neighbor.h"
|
|
#include "force.h"
|
|
#include "pair.h"
|
|
#include "bond.h"
|
|
#include "angle.h"
|
|
#include "domain.h"
|
|
#include "fft3d_wrap.h"
|
|
#include "remap_wrap.h"
|
|
#include "memory.h"
|
|
#include "error.h"
|
|
#include "gpu_extra.h"
|
|
|
|
#define grdtyp float
|
|
|
|
// External functions from cuda library for atom decomposition
|
|
|
|
grdtyp* pppm_gpu_init_f(const int nlocal, const int nall, FILE *screen,
|
|
const int order, const int nxlo_out,
|
|
const int nylo_out, const int nzlo_out,
|
|
const int nxhi_out, const int nyhi_out,
|
|
const int nzhi_out, double **rho_coeff,
|
|
grdtyp **_vd_brick, const double slab_volfactor,
|
|
const int nx_pppm, const int ny_pppm,
|
|
const int nz_pppm, int &success);
|
|
void pppm_gpu_clear_f(const double poisson_time);
|
|
int pppm_gpu_spread_f(const int ago, const int nlocal, const int nall,
|
|
double **host_x, int *host_type, bool &success,
|
|
double *host_q, double *boxlo, const double delxinv,
|
|
const double delyinv, const double delzinv);
|
|
void pppm_gpu_interp_f(const grdtyp qqrd2e_scale);
|
|
double pppm_gpu_bytes_f();
|
|
|
|
using namespace LAMMPS_NS;
|
|
|
|
#define MAXORDER 7
|
|
#define OFFSET 16384
|
|
#define SMALL 0.00001
|
|
#define LARGE 10000.0
|
|
#define EPS_HOC 1.0e-7
|
|
|
|
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
|
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
PPPMGPUSingle::PPPMGPUSingle(LAMMPS *lmp, int narg, char **arg) :
|
|
PPPMGPU<grdtyp>(lmp, narg, arg)
|
|
{
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
free all memory
|
|
------------------------------------------------------------------------- */
|
|
|
|
PPPMGPUSingle::~PPPMGPUSingle()
|
|
{
|
|
pppm_gpu_clear_f(poisson_time);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
called once before run
|
|
------------------------------------------------------------------------- */
|
|
|
|
void PPPMGPUSingle::init()
|
|
{
|
|
base_init();
|
|
|
|
if (order>8)
|
|
error->all("Cannot use order greater than 8 with pppm/gpu.");
|
|
pppm_gpu_clear_f(poisson_time);
|
|
|
|
int success;
|
|
grdtyp *data, *h_brick;
|
|
h_brick = pppm_gpu_init_f(atom->nlocal, atom->nlocal+atom->nghost, screen,
|
|
order, nxlo_out, nylo_out, nzlo_out, nxhi_out,
|
|
nyhi_out, nzhi_out, rho_coeff, &data,
|
|
slab_volfactor,nx_pppm,ny_pppm,nz_pppm,success);
|
|
|
|
GPU_EXTRA::check_flag(success,error,world);
|
|
|
|
density_brick =
|
|
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
|
nxlo_out,nxhi_out,"pppm:density_brick",h_brick,1);
|
|
vd_brick =
|
|
create_3d_offset(nzlo_out,nzhi_out,nylo_out,nyhi_out,
|
|
nxlo_out,nxhi_out,"pppm:vd_brick",data,4);
|
|
|
|
poisson_time=0;
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
compute the PPPMGPU long-range force, energy, virial
|
|
------------------------------------------------------------------------- */
|
|
|
|
void PPPMGPUSingle::compute(int eflag, int vflag)
|
|
{
|
|
bool success = true;
|
|
int flag=pppm_gpu_spread_f(neighbor->ago, atom->nlocal, atom->nlocal +
|
|
atom->nghost, atom->x, atom->type, success,
|
|
atom->q, domain->boxlo, delxinv, delyinv,
|
|
delzinv);
|
|
if (!success)
|
|
error->one("Out of memory on GPGPU");
|
|
if (flag != 0)
|
|
error->one("Out of range atoms - cannot compute PPPM");
|
|
|
|
int i;
|
|
|
|
// convert atoms from box to lamda coords
|
|
|
|
if (triclinic == 0) boxlo = domain->boxlo;
|
|
else {
|
|
boxlo = domain->boxlo_lamda;
|
|
domain->x2lamda(atom->nlocal);
|
|
}
|
|
|
|
energy = 0.0;
|
|
if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0;
|
|
|
|
double t3=MPI_Wtime();
|
|
|
|
// all procs communicate density values from their ghost cells
|
|
// to fully sum contribution in their 3d bricks
|
|
// remap from 3d decomposition to FFT decomposition
|
|
|
|
brick2fft();
|
|
|
|
// compute potential gradient on my FFT grid and
|
|
// portion of e_long on this proc's FFT grid
|
|
// return gradients (electric fields) in 3d brick decomposition
|
|
|
|
poisson(eflag,vflag);
|
|
|
|
// all procs communicate E-field values to fill ghost cells
|
|
// surrounding their 3d bricks
|
|
|
|
fillbrick();
|
|
|
|
poisson_time+=MPI_Wtime()-t3;
|
|
|
|
// calculate the force on my particles
|
|
|
|
grdtyp qqrd2e_scale=qqrd2e*scale;
|
|
pppm_gpu_interp_f(qqrd2e_scale);
|
|
|
|
// sum energy across procs and add in volume-dependent term
|
|
|
|
if (eflag) {
|
|
double energy_all;
|
|
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
|
|
energy = energy_all;
|
|
|
|
energy *= 0.5*volume;
|
|
energy -= g_ewald*qsqsum/1.772453851 +
|
|
0.5*PI*qsum*qsum / (g_ewald*g_ewald*volume);
|
|
energy *= qqrd2e*scale;
|
|
}
|
|
|
|
// sum virial across procs
|
|
|
|
if (vflag) {
|
|
double virial_all[6];
|
|
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
|
|
for (i = 0; i < 6; i++) virial[i] = 0.5*qqrd2e*scale*volume*virial_all[i];
|
|
}
|
|
|
|
// 2d slab correction
|
|
|
|
if (slabflag) slabcorr(eflag);
|
|
|
|
// convert atoms back from lamda to box coords
|
|
|
|
if (triclinic) domain->lamda2x(atom->nlocal);
|
|
}
|
|
|
|
/* ----------------------------------------------------------------------
|
|
memory usage of local arrays
|
|
------------------------------------------------------------------------- */
|
|
|
|
double PPPMGPUSingle::memory_usage()
|
|
{
|
|
double bytes = nmax*3 * sizeof(double);
|
|
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
|
(nzhi_out-nzlo_out+1);
|
|
bytes += 4 * nbrick * sizeof(grdtyp);
|
|
bytes += 6 * nfft_both * sizeof(double);
|
|
bytes += nfft_both*6 * sizeof(double);
|
|
bytes += 2 * nbuf * sizeof(double);
|
|
return bytes + pppm_gpu_bytes_f();
|
|
}
|