From 0ffe33f6c8bd1287a5f0f0e7a26be9477f8c964e Mon Sep 17 00:00:00 2001 From: sjplimp Date: Tue, 14 Feb 2012 22:55:38 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@7790 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/GPU/pppm_gpu.cpp | 48 ++++++++++++++++++++++++++------------------ src/GPU/pppm_gpu.h | 6 +++--- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/GPU/pppm_gpu.cpp b/src/GPU/pppm_gpu.cpp index fc0d05727c..fd643fa479 100644 --- a/src/GPU/pppm_gpu.cpp +++ b/src/GPU/pppm_gpu.cpp @@ -121,7 +121,8 @@ void PPPMGPU::init() return; } - // GPU precision specific init. + // GPU precision specific init + if (order>8) error->all(FLERR,"Cannot use order greater than 8 with pppm/gpu."); PPPM_GPU_API(clear)(poisson_time); @@ -152,8 +153,18 @@ void PPPMGPU::init() void PPPMGPU::compute(int eflag, int vflag) { - if (im_real_space) - return; + if (im_real_space) return; + + // set energy/virial flags + // invoke allocate_peratom() if needed for first time + + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = eflag_global = vflag_global = eflag_atom = vflag_atom = 0; + + if (!peratom_allocate_flag && (eflag_atom || vflag_atom)) { + allocate_peratom(); + peratom_allocate_flag = 1; + } bool success = true; int flag=PPPM_GPU_API(spread)(neighbor->ago, atom->nlocal, atom->nlocal + @@ -175,9 +186,6 @@ void PPPMGPU::compute(int eflag, int vflag) domain->x2lamda(atom->nlocal); } - energy = 0.0; - if (vflag) for (i = 0; i < 6; i++) virial[i] = 0.0; - double t3=MPI_Wtime(); // all procs communicate density values from their ghost cells @@ -190,7 +198,7 @@ void PPPMGPU::compute(int eflag, int vflag) // portion of e_long on this proc's FFT grid // return gradients (electric fields) in 3d brick decomposition - poisson(eflag,vflag); + poisson(); // all procs communicate E-field values to fill ghost cells // surrounding their 3d bricks @@ -206,7 +214,7 @@ void PPPMGPU::compute(int eflag, int vflag) // sum energy across procs and add in volume-dependent term - if (eflag) { + if (eflag_global) { double energy_all; MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world); energy = energy_all; @@ -219,7 +227,7 @@ void PPPMGPU::compute(int eflag, int vflag) // sum virial across procs - if (vflag) { + if (vflag_global) { double virial_all[6]; MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world); for (i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i]; @@ -227,14 +235,13 @@ void PPPMGPU::compute(int eflag, int vflag) // 2d slab correction - if (slabflag) slabcorr(eflag); + if (slabflag) slabcorr(); // convert atoms back from lamda to box coords if (triclinic) domain->lamda2x(atom->nlocal); - if (kspace_split) - PPPM_GPU_API(forces)(atom->f); + if (kspace_split) PPPM_GPU_API(forces)(atom->f); } /* ---------------------------------------------------------------------- @@ -690,7 +697,7 @@ void PPPMGPU::fillbrick() FFT-based Poisson solver ------------------------------------------------------------------------- */ -void PPPMGPU::poisson(int eflag, int vflag) +void PPPMGPU::poisson() { int i,j,k,n; double eng; @@ -710,13 +717,13 @@ void PPPMGPU::poisson(int eflag, int vflag) double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm); double s2 = scaleinv*scaleinv; - if (eflag || vflag) { - if (vflag) { + if (eflag_global || vflag_global) { + if (vflag_global) { n = 0; for (i = 0; i < nfft; i++) { eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]); for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j]; - energy += eng; + if (eflag_global) energy += eng; n += 2; } } else { @@ -820,8 +827,10 @@ FFT_SCALAR ***PPPMGPU::create_3d_offset(int n1lo, int n1hi, int n2lo, int n2hi, int n2 = n2hi - n2lo + 1; int n3 = n3hi - n3lo + 1; - FFT_SCALAR **plane = (FFT_SCALAR **)memory->smalloc(n1*n2*sizeof(FFT_SCALAR *),name); - FFT_SCALAR ***array = (FFT_SCALAR ***)memory->smalloc(n1*sizeof(FFT_SCALAR **),name); + FFT_SCALAR **plane = (FFT_SCALAR **) + memory->smalloc(n1*n2*sizeof(FFT_SCALAR *),name); + FFT_SCALAR ***array = (FFT_SCALAR ***) + memory->smalloc(n1*sizeof(FFT_SCALAR **),name); int n = 0; for (i = 0; i < n1; i++) { @@ -886,7 +895,6 @@ void PPPMGPU::timing(int n, double &time3d, double &time1d) { void PPPMGPU::setup() { - if (im_real_space) - return; + if (im_real_space) return; PPPM::setup(); } diff --git a/src/GPU/pppm_gpu.h b/src/GPU/pppm_gpu.h index aec92bc60d..d8bed15bc8 100644 --- a/src/GPU/pppm_gpu.h +++ b/src/GPU/pppm_gpu.h @@ -1,4 +1,4 @@ -ndoc/* ---------------------------------------------------------------------- +/* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -43,12 +43,12 @@ class PPPMGPU : public PPPM { virtual void deallocate(); virtual void brick2fft(); virtual void fillbrick(); - virtual void poisson(int, int); + virtual void poisson(); double poisson_time; FFT_SCALAR ***create_3d_offset(int, int, int, int, int, int, const char *, - FFT_SCALAR *, int); + FFT_SCALAR *, int); void destroy_3d_offset(FFT_SCALAR ***, int, int); };