git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@9680 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -112,12 +112,16 @@ void MSMOMP::direct(int n)
|
||||
direct_eval<0,0,0>(n);
|
||||
}
|
||||
}
|
||||
|
||||
if (vflag_atom)
|
||||
direct_peratom<1>(n);
|
||||
else
|
||||
direct_peratom<0>(n);
|
||||
}
|
||||
|
||||
template <int EFLAG_GLOBAL, int VFLAG_GLOBAL, int VFLAG_ATOM>
|
||||
void MSMOMP::direct_eval(const int nn)
|
||||
{
|
||||
double * _noalias const * _noalias const * _noalias const egridn = egrid[nn];
|
||||
const double * _noalias const * _noalias const * _noalias const qgridn = qgrid[nn];
|
||||
const double * _noalias const g_directn = g_direct[nn];
|
||||
const double * _noalias const v0_directn = v0_direct[nn];
|
||||
@ -157,14 +161,14 @@ void MSMOMP::direct_eval(const int nn)
|
||||
#pragma omp parallel default(none) reduction(+:v0,v1,v2,v3,v4,v5,emsm)
|
||||
#endif
|
||||
{
|
||||
double qtmp,esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
|
||||
double esum,v0sum,v1sum,v2sum,v3sum,v4sum,v5sum;
|
||||
int i,ifrom,ito,tid,icx,icy,icz,ix,iy,iz,k;
|
||||
|
||||
loop_setup_thr(ifrom, ito, tid, inum, comm->nthreads);
|
||||
|
||||
for (i = ifrom; i < ito; ++i) {
|
||||
|
||||
// infer outer loop indices icx, icy, icz from master loop index
|
||||
// infer outer loop indices icx, icy, icz from master loop index i
|
||||
|
||||
icz = i/(numy*numx);
|
||||
icy = (i - icz*numy*numx) / numx;
|
||||
@ -173,41 +177,102 @@ void MSMOMP::direct_eval(const int nn)
|
||||
icy += nylo_inn;
|
||||
icx += nxlo_inn;
|
||||
|
||||
const int kmin = zper ? nzlo_direct : MAX(nzlo_direct,alphan - icz);
|
||||
const int kmax = zper ? nzhi_direct : MIN(nzhi_direct,betazn - icz);
|
||||
const int jmin = yper ? nylo_direct : MAX(nylo_direct,alphan - icy);
|
||||
const int jmax = yper ? nyhi_direct : MIN(nyhi_direct,betayn - icy);
|
||||
const int imin = xper ? nxlo_direct : MAX(nxlo_direct,alphan - icx);
|
||||
const int imax = xper ? nxhi_direct : MIN(nxhi_direct,betaxn - icx);
|
||||
|
||||
const double qtmp = qgridn[icz][icy][icx]; // charge on center grid point
|
||||
|
||||
esum = 0.0;
|
||||
if (VFLAG_GLOBAL || VFLAG_ATOM)
|
||||
v0sum = v1sum = v2sum = v3sum = v4sum = v5sum = 0.0;
|
||||
|
||||
for (iz = kmin; iz <= kmax; iz++) {
|
||||
// use hemisphere to avoid double computation of pair-wise
|
||||
// interactions in direct sum (no computations in -z direction)
|
||||
|
||||
for (iz = 1; iz <= kmax; iz++) {
|
||||
const int kk = icz+iz;
|
||||
const int zk = (iz + nzhi_direct)*ny;
|
||||
for (iy = jmin; iy <= jmax; iy++) {
|
||||
const int jj = icy+iy;
|
||||
const int zyk = (zk + iy + nyhi_direct)*nx;
|
||||
const double * _noalias const qgridnkj = & qgridn[kk][jj][icx];
|
||||
const double * _noalias const qgridnkj = &qgridn[kk][jj][icx];
|
||||
for (ix = imin; ix <= imax; ix++) {
|
||||
qtmp = qgridnkj[ix];
|
||||
const double qtmp2 = qgridnkj[ix];
|
||||
k = zyk + ix + nxhi_direct;
|
||||
esum += g_directn[k] * qtmp;
|
||||
const double gtmp = g_directn[k];
|
||||
esum += gtmp * qtmp2;
|
||||
|
||||
if (VFLAG_GLOBAL || VFLAG_ATOM) {
|
||||
v0sum += v0_directn[k] * qtmp;
|
||||
v1sum += v1_directn[k] * qtmp;
|
||||
v2sum += v2_directn[k] * qtmp;
|
||||
v3sum += v3_directn[k] * qtmp;
|
||||
v4sum += v4_directn[k] * qtmp;
|
||||
v5sum += v5_directn[k] * qtmp;
|
||||
v0sum += v0_directn[k] * qtmp2;
|
||||
v1sum += v1_directn[k] * qtmp2;
|
||||
v2sum += v2_directn[k] * qtmp2;
|
||||
v3sum += v3_directn[k] * qtmp2;
|
||||
v4sum += v4_directn[k] * qtmp2;
|
||||
v5sum += v5_directn[k] * qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
egridn[icz][icy][icx] = esum;
|
||||
|
||||
// iz=0
|
||||
|
||||
const int zk = nzhi_direct*ny;
|
||||
for (iy = 1; iy <= jmax; iy++) {
|
||||
const int jj = icy+iy;
|
||||
const int zyk = (zk + iy + nyhi_direct)*nx;
|
||||
const double * _noalias const qgridnkj = &qgridn[icz][jj][icx];
|
||||
for (ix = imin; ix <= imax; ix++) {
|
||||
const double qtmp2 = qgridnkj[ix];
|
||||
k = zyk + ix + nxhi_direct;
|
||||
const double gtmp = g_directn[k];
|
||||
esum += gtmp * qtmp2;
|
||||
|
||||
if (VFLAG_GLOBAL || VFLAG_ATOM) {
|
||||
v0sum += v0_directn[k] * qtmp2;
|
||||
v1sum += v1_directn[k] * qtmp2;
|
||||
v2sum += v2_directn[k] * qtmp2;
|
||||
v3sum += v3_directn[k] * qtmp2;
|
||||
v4sum += v4_directn[k] * qtmp2;
|
||||
v5sum += v5_directn[k] * qtmp2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// iz=0, iy=0
|
||||
|
||||
const int zyk = (zk + nyhi_direct)*nx;
|
||||
const double * _noalias const qgridnkj = &qgridn[icz][icy][icx];
|
||||
for (ix = 1; ix <= imax; ix++) {
|
||||
const double qtmp2 = qgridnkj[ix];
|
||||
k = zyk + ix + nxhi_direct;
|
||||
const double gtmp = g_directn[k];
|
||||
esum += gtmp * qtmp2;
|
||||
|
||||
if (VFLAG_GLOBAL || VFLAG_ATOM) {
|
||||
v0sum += v0_directn[k] * qtmp2;
|
||||
v1sum += v1_directn[k] * qtmp2;
|
||||
v2sum += v2_directn[k] * qtmp2;
|
||||
v3sum += v3_directn[k] * qtmp2;
|
||||
v4sum += v4_directn[k] * qtmp2;
|
||||
v5sum += v5_directn[k] * qtmp2;
|
||||
}
|
||||
}
|
||||
|
||||
// iz=0, iy=0, ix=0
|
||||
|
||||
const double qtmp2 = qgridnkj[0];
|
||||
k = zyk + nxhi_direct;
|
||||
const double gtmp = g_directn[k];
|
||||
esum += 0.5 * gtmp * qtmp2;
|
||||
|
||||
// virial is zero for iz=0, iy=0, ix=0
|
||||
|
||||
// accumulate per-atom energy/virial
|
||||
|
||||
egrid[n][icz][icy][icx] = esum;
|
||||
|
||||
if (VFLAG_ATOM) {
|
||||
v0grid[n][icz][icy][icx] = v0sum;
|
||||
@ -219,15 +284,15 @@ void MSMOMP::direct_eval(const int nn)
|
||||
}
|
||||
|
||||
if (EFLAG_GLOBAL || VFLAG_GLOBAL) {
|
||||
qtmp = qgridn[icz][icy][icx];
|
||||
if (EFLAG_GLOBAL) emsm += esum * qtmp;
|
||||
const double qtmp3 = qgridn[icz][icy][icx];
|
||||
if (EFLAG_GLOBAL) emsm += 2.0 * esum * qtmp3;
|
||||
if (VFLAG_GLOBAL) {
|
||||
v0 += v0sum * qtmp;
|
||||
v1 += v1sum * qtmp;
|
||||
v2 += v2sum * qtmp;
|
||||
v3 += v3sum * qtmp;
|
||||
v4 += v4sum * qtmp;
|
||||
v5 += v5sum * qtmp;
|
||||
v0 += 2.0 * v0sum * qtmp3;
|
||||
v1 += 2.0 * v1sum * qtmp3;
|
||||
v2 += 2.0 * v2sum * qtmp3;
|
||||
v3 += 2.0 * v3sum * qtmp3;
|
||||
v4 += 2.0 * v4sum * qtmp3;
|
||||
v5 += 2.0 * v5sum * qtmp3;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -245,3 +310,155 @@ void MSMOMP::direct_eval(const int nn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <int VFLAG_ATOM>
|
||||
void MSMOMP::direct_peratom(const int nn)
|
||||
{
|
||||
double * _noalias const * _noalias const * _noalias const egridn = egrid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v0gridn = v0grid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v1gridn = v1grid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v2gridn = v2grid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v3gridn = v3grid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v4gridn = v4grid[nn];
|
||||
double * _noalias const * _noalias const * _noalias const v5gridn = v5grid[nn];
|
||||
const double * _noalias const * _noalias const * _noalias const qgridn = qgrid[nn];
|
||||
const double * _noalias const g_directn = g_direct[nn];
|
||||
const double * _noalias const v0_directn = v0_direct[nn];
|
||||
const double * _noalias const v1_directn = v1_direct[nn];
|
||||
const double * _noalias const v2_directn = v2_direct[nn];
|
||||
const double * _noalias const v3_directn = v3_direct[nn];
|
||||
const double * _noalias const v4_directn = v4_direct[nn];
|
||||
const double * _noalias const v5_directn = v5_direct[nn];
|
||||
|
||||
|
||||
const int alphan = alpha[nn];
|
||||
const int betaxn = betax[nn];
|
||||
const int betayn = betay[nn];
|
||||
const int betazn = betaz[nn];
|
||||
|
||||
const int nx = nxhi_direct - nxlo_direct + 1;
|
||||
const int ny = nyhi_direct - nylo_direct + 1;
|
||||
|
||||
// merge three outer loops into one
|
||||
|
||||
const int nzlo_inn = nzlo_in[nn];
|
||||
const int nylo_inn = nylo_in[nn];
|
||||
const int nxlo_inn = nxlo_in[nn];
|
||||
const int numz = nzhi_in[nn] - nzlo_inn + 1;
|
||||
const int numy = nyhi_in[nn] - nylo_inn + 1;
|
||||
const int numx = nxhi_in[nn] - nxlo_inn + 1;
|
||||
const int inum = numz*numy*numx;
|
||||
|
||||
const int zper = domain->zperiodic;
|
||||
const int yper = domain->yperiodic;
|
||||
const int xper = domain->xperiodic;
|
||||
|
||||
const int n=nn;
|
||||
int i,ifrom,ito,tid,icx,icy,icz,ix,iy,iz,k;
|
||||
|
||||
|
||||
for (i = 0; i < inum; ++i) {
|
||||
|
||||
// infer outer loop indices icx, icy, icz from master loop index i
|
||||
|
||||
icz = i/(numy*numx);
|
||||
icy = (i - icz*numy*numx) / numx;
|
||||
icx = i - icz*numy*numx - icy*numx;
|
||||
icz += nzlo_inn;
|
||||
icy += nylo_inn;
|
||||
icx += nxlo_inn;
|
||||
|
||||
const int kmax = zper ? nzhi_direct : MIN(nzhi_direct,betazn - icz);
|
||||
const int jmin = yper ? nylo_direct : MAX(nylo_direct,alphan - icy);
|
||||
const int jmax = yper ? nyhi_direct : MIN(nyhi_direct,betayn - icy);
|
||||
const int imin = xper ? nxlo_direct : MAX(nxlo_direct,alphan - icx);
|
||||
const int imax = xper ? nxhi_direct : MIN(nxhi_direct,betaxn - icx);
|
||||
|
||||
const double qtmp = qgridn[icz][icy][icx]; // charge on center grid point
|
||||
|
||||
|
||||
// use hemisphere to avoid double computation of pair-wise
|
||||
// interactions in direct sum (no computations in -z direction)
|
||||
|
||||
for (iz = 1; iz <= kmax; iz++) {
|
||||
const int kk = icz+iz;
|
||||
const int zk = (iz + nzhi_direct)*ny;
|
||||
for (iy = jmin; iy <= jmax; iy++) {
|
||||
const int jj = icy+iy;
|
||||
const int zyk = (zk + iy + nyhi_direct)*nx;
|
||||
double * _noalias const egridnkj = &egridn[kk][jj][icx];
|
||||
for (ix = imin; ix <= imax; ix++) {
|
||||
k = zyk + ix + nxhi_direct;
|
||||
const int ii = icx+ix;
|
||||
const double gtmp = g_directn[k];
|
||||
|
||||
egridnkj[ix] += gtmp * qtmp;
|
||||
|
||||
if (VFLAG_ATOM) {
|
||||
v0gridn[kk][jj][ii] += v0_directn[k] * qtmp;
|
||||
v1gridn[kk][jj][ii] += v1_directn[k] * qtmp;
|
||||
v2gridn[kk][jj][ii] += v2_directn[k] * qtmp;
|
||||
v3gridn[kk][jj][ii] += v3_directn[k] * qtmp;
|
||||
v4gridn[kk][jj][ii] += v4_directn[k] * qtmp;
|
||||
v5gridn[kk][jj][ii] += v5_directn[k] * qtmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// iz=0
|
||||
|
||||
const int zk = nzhi_direct*ny;
|
||||
for (iy = 1; iy <= jmax; iy++) {
|
||||
const int jj = icy+iy;
|
||||
const int zyk = (zk + iy + nyhi_direct)*nx;
|
||||
double * _noalias const egridnkj = &egridn[icz][jj][icx];
|
||||
for (ix = imin; ix <= imax; ix++) {
|
||||
k = zyk + ix + nxhi_direct;
|
||||
const int ii = icx+ix;
|
||||
const double gtmp = g_directn[k];
|
||||
|
||||
egridnkj[ix] += gtmp * qtmp;
|
||||
|
||||
if (VFLAG_ATOM) {
|
||||
v0gridn[icz][jj][ii] += v0_directn[k] * qtmp;
|
||||
v1gridn[icz][jj][ii] += v1_directn[k] * qtmp;
|
||||
v2gridn[icz][jj][ii] += v2_directn[k] * qtmp;
|
||||
v3gridn[icz][jj][ii] += v3_directn[k] * qtmp;
|
||||
v4gridn[icz][jj][ii] += v4_directn[k] * qtmp;
|
||||
v5gridn[icz][jj][ii] += v5_directn[k] * qtmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// iz=0, iy=0
|
||||
|
||||
const int zyk = (zk + nyhi_direct)*nx;
|
||||
double * _noalias const egridnkj = &egridn[icz][icy][icx];
|
||||
for (ix = 1; ix <= imax; ix++) {
|
||||
k = zyk + ix + nxhi_direct;
|
||||
const int ii = icx+ix;
|
||||
const double gtmp = g_directn[k];
|
||||
|
||||
egridnkj[ix] += gtmp * qtmp;
|
||||
|
||||
if (VFLAG_ATOM) {
|
||||
v0gridn[icz][icy][ii] += v0_directn[k] * qtmp;
|
||||
v1gridn[icz][icy][ii] += v1_directn[k] * qtmp;
|
||||
v2gridn[icz][icy][ii] += v2_directn[k] * qtmp;
|
||||
v3gridn[icz][icy][ii] += v3_directn[k] * qtmp;
|
||||
v4gridn[icz][icy][ii] += v4_directn[k] * qtmp;
|
||||
v5gridn[icz][icy][ii] += v5_directn[k] * qtmp;
|
||||
}
|
||||
}
|
||||
|
||||
// iz=0, iy=0, ix=0
|
||||
|
||||
k = zyk + nxhi_direct;
|
||||
const double gtmp = g_directn[k];
|
||||
egridnkj[0] += 0.5 * gtmp * qtmp;
|
||||
|
||||
// virial is zero for iz=0, iy=0, ix=0
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,45 +1,46 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(msm/omp,MSMOMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_MSM_OMP_H
|
||||
#define LMP_MSM_OMP_H
|
||||
|
||||
#include "msm.h"
|
||||
#include "thr_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class MSMOMP : public MSM, public ThrOMP {
|
||||
public:
|
||||
MSMOMP(class LAMMPS *, int, char **);
|
||||
virtual ~MSMOMP () {};
|
||||
|
||||
protected:
|
||||
virtual void direct(int);
|
||||
virtual void compute(int,int);
|
||||
|
||||
private:
|
||||
template <int, int, int> void direct_eval(int);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(msm/omp,MSMOMP)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_MSM_OMP_H
|
||||
#define LMP_MSM_OMP_H
|
||||
|
||||
#include "msm.h"
|
||||
#include "thr_omp.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class MSMOMP : public MSM, public ThrOMP {
|
||||
public:
|
||||
MSMOMP(class LAMMPS *, int, char **);
|
||||
virtual ~MSMOMP () {};
|
||||
|
||||
protected:
|
||||
virtual void direct(int);
|
||||
virtual void compute(int,int);
|
||||
|
||||
private:
|
||||
template <int, int, int> void direct_eval(int);
|
||||
template <int> void direct_peratom(int);
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user