git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6622 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2011-08-08 19:40:03 +00:00
parent a54f7e6b5b
commit 8745580d5d
10 changed files with 944 additions and 948 deletions

View File

@ -43,6 +43,14 @@ using namespace LAMMPS_NS;
#define LARGE 10000.0 #define LARGE 10000.0
#define EPS_HOC 1.0e-7 #define EPS_HOC 1.0e-7
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
#define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b))
@ -50,7 +58,7 @@ using namespace LAMMPS_NS;
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{ {
if (narg != 1) error->all("Illegal kspace_style pppm command"); if (narg < 1) error->all("Illegal kspace_style pppm command");
precision = atof(arg[0]); precision = atof(arg[0]);
PI = 4.0*atan(1.0); PI = 4.0*atan(1.0);
@ -754,7 +762,7 @@ void PPPM::allocate()
// summation coeffs // summation coeffs
gf_b = new double[order]; memory->create(gf_b,order,"pppm:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
@ -778,7 +786,7 @@ void PPPM::allocate()
remap = new Remap(lmp,world, remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,2); 1,0,0,FFT_PRECISION);
} }
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------
@ -805,7 +813,7 @@ void PPPM::deallocate()
memory->destroy(buf1); memory->destroy(buf1);
memory->destroy(buf2); memory->destroy(buf2);
delete [] gf_b; memory->destroy(gf_b);
memory->destroy2d_offset(rho1d,-order/2); memory->destroy2d_offset(rho1d,-order/2);
memory->destroy2d_offset(rho_coeff,(1-order)/2); memory->destroy2d_offset(rho_coeff,(1-order)/2);
@ -967,17 +975,24 @@ void PPPM::set_grid()
// print info // print info
if (me == 0) { if (me == 0) {
#ifdef FFT_SINGLE
const char fft_prec[] = "single";
#else
const char fft_prec[] = "double";
#endif
if (screen) { if (screen) {
fprintf(screen," G vector = %g\n",g_ewald); fprintf(screen," G vector = %g\n",g_ewald);
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(screen," stencil order = %d\n",order); fprintf(screen," stencil order = %d\n",order);
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr)); fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
fprintf(screen," using %s precision FFTs\n",fft_prec);
} }
if (logfile) { if (logfile) {
fprintf(logfile," G vector = %g\n",g_ewald); fprintf(logfile," G vector = %g\n",g_ewald);
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(logfile," stencil order = %d\n",order); fprintf(logfile," stencil order = %d\n",order);
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr)); fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
fprintf(logfile," using %s precision FFTs\n",fft_prec);
} }
} }
} }
@ -1036,7 +1051,7 @@ double PPPM::diffpr(double hx, double hy, double hz, double q2, double **acons)
lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons); lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons);
kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
sqrt(natoms*cutoff*xprd*yprd*zprd); sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
double value = kspace_prec - real_prec; double value = kspace_prec - real_prec;
return value; return value;
} }
@ -1113,8 +1128,8 @@ void PPPM::brick2fft()
if (comm->procneigh[0][1] == me) if (comm->procneigh[0][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1137,8 +1152,8 @@ void PPPM::brick2fft()
if (comm->procneigh[0][0] == me) if (comm->procneigh[0][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1161,8 +1176,8 @@ void PPPM::brick2fft()
if (comm->procneigh[1][1] == me) if (comm->procneigh[1][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1185,8 +1200,8 @@ void PPPM::brick2fft()
if (comm->procneigh[1][0] == me) if (comm->procneigh[1][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1209,8 +1224,8 @@ void PPPM::brick2fft()
if (comm->procneigh[2][1] == me) if (comm->procneigh[2][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1233,8 +1248,8 @@ void PPPM::brick2fft()
if (comm->procneigh[2][0] == me) if (comm->procneigh[2][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1284,8 +1299,8 @@ void PPPM::fillbrick()
if (comm->procneigh[2][1] == me) if (comm->procneigh[2][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1314,8 +1329,8 @@ void PPPM::fillbrick()
if (comm->procneigh[2][0] == me) if (comm->procneigh[2][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1344,8 +1359,8 @@ void PPPM::fillbrick()
if (comm->procneigh[1][1] == me) if (comm->procneigh[1][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1374,8 +1389,8 @@ void PPPM::fillbrick()
if (comm->procneigh[1][0] == me) if (comm->procneigh[1][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1404,8 +1419,8 @@ void PPPM::fillbrick()
if (comm->procneigh[0][1] == me) if (comm->procneigh[0][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1434,8 +1449,8 @@ void PPPM::fillbrick()
if (comm->procneigh[0][0] == me) if (comm->procneigh[0][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i]; for (i = 0; i < n; i++) buf2[i] = buf1[i];
else { else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world); MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
MPI_Wait(&request,&status); MPI_Wait(&request,&status);
} }
@ -1497,12 +1512,12 @@ void PPPM::particle_map()
void PPPM::make_rho() void PPPM::make_rho()
{ {
int i,l,m,n,nx,ny,nz,mx,my,mz; int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0; FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array // clear 3d density array
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = 0.0; for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points // loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -1554,7 +1569,7 @@ void PPPM::poisson(int eflag, int vflag)
n = 0; n = 0;
for (i = 0; i < nfft; i++) { for (i = 0; i < nfft; i++) {
work1[n++] = density_fft[i]; work1[n++] = density_fft[i];
work1[n++] = 0.0; work1[n++] = ZEROF;
} }
fft1->compute(work1,work1,1); fft1->compute(work1,work1,1);
@ -1667,8 +1682,8 @@ void PPPM::poisson(int eflag, int vflag)
void PPPM::fieldforce() void PPPM::fieldforce()
{ {
int i,l,m,n,nx,ny,nz,mx,my,mz; int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0; FFT_SCALAR dx,dy,dz,x0,y0,z0;
double ek[3]; FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points // loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -1679,6 +1694,7 @@ void PPPM::fieldforce()
double *q = atom->q; double *q = atom->q;
double **x = atom->x; double **x = atom->x;
double **f = atom->f; double **f = atom->f;
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) { for (i = 0; i < nlocal; i++) {
@ -1691,7 +1707,7 @@ void PPPM::fieldforce()
compute_rho1d(dx,dy,dz); compute_rho1d(dx,dy,dz);
ek[0] = ek[1] = ek[2] = 0.0; ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) { for (n = nlower; n <= nupper; n++) {
mz = n+nz; mz = n+nz;
z0 = rho1d[2][n]; z0 = rho1d[2][n];
@ -1701,18 +1717,18 @@ void PPPM::fieldforce()
for (l = nlower; l <= nupper; l++) { for (l = nlower; l <= nupper; l++) {
mx = l+nx; mx = l+nx;
x0 = y0*rho1d[0][l]; x0 = y0*rho1d[0][l];
ek[0] -= x0*vdx_brick[mz][my][mx];; ekx -= x0*vdx_brick[mz][my][mx];
ek[1] -= x0*vdy_brick[mz][my][mx];; eky -= x0*vdy_brick[mz][my][mx];
ek[2] -= x0*vdz_brick[mz][my][mx];; ekz -= x0*vdz_brick[mz][my][mx];
} }
} }
} }
// convert E-field to force // convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
f[i][0] += qqrd2e*scale * q[i]*ek[0]; f[i][0] += qfactor*ekx;
f[i][1] += qqrd2e*scale * q[i]*ek[1]; f[i][1] += qfactor*eky;
f[i][2] += qqrd2e*scale * q[i]*ek[2]; f[i][2] += qfactor*ekz;
} }
} }
@ -1758,15 +1774,16 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
charge assignment into rho1d charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
void PPPM::compute_rho1d(double dx, double dy, double dz) const FFT_SCALAR &dz)
{ {
int k,l; int k,l;
for (k = (1-order)/2; k <= order/2; k++) { for (k = (1-order)/2; k <= order/2; k++) {
rho1d[0][k] = 0.0; rho1d[0][k] = ZEROF;
rho1d[1][k] = 0.0; rho1d[1][k] = ZEROF;
rho1d[2][k] = 0.0; rho1d[2][k] = ZEROF;
for (l = order-1; l >= 0; l--) { for (l = order-1; l >= 0; l--) {
rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx; rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx;
rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy; rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy;
@ -1797,9 +1814,9 @@ void PPPM::compute_rho1d(double dx, double dy, double dz)
void PPPM::compute_rho_coeff() void PPPM::compute_rho_coeff()
{ {
int j,k,l,m; int j,k,l,m;
double s; FFT_SCALAR s;
double **a; FFT_SCALAR **a;
memory->create2d_offset(a,order,-order,order,"pppm:a"); memory->create2d_offset(a,order,-order,order,"pppm:a");
for (k = -order; k <= order; k++) for (k = -order; k <= order; k++)
@ -1812,8 +1829,13 @@ void PPPM::compute_rho_coeff()
s = 0.0; s = 0.0;
for (l = 0; l < j; l++) { for (l = 0; l < j; l++) {
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
#ifdef FFT_SINGLE
s += powf(0.5,(float) l+1) *
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
#else
s += pow(0.5,(double) l+1) * s += pow(0.5,(double) l+1) *
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
#endif
} }
a[0][k] = s; a[0][k] = s;
} }
@ -1874,7 +1896,7 @@ void PPPM::timing(int n, double &time3d, double &time1d)
{ {
double time1,time2; double time1,time2;
for (int i = 0; i < 2*nfft_both; i++) work1[i] = 0.0; for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
MPI_Barrier(world); MPI_Barrier(world);
time1 = MPI_Wtime(); time1 = MPI_Wtime();
@ -1914,9 +1936,10 @@ double PPPM::memory_usage()
double bytes = nmax*3 * sizeof(double); double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1); (nzhi_out-nzlo_out+1);
bytes += 4 * nbrick * sizeof(double); bytes += 4 * nbrick * sizeof(FFT_SCALAR);
bytes += 6 * nfft_both * sizeof(double); bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both*6 * sizeof(double); bytes += nfft_both * sizeof(double);
bytes += 2 * nbuf * sizeof(double); bytes += nfft_both*5 * sizeof(FFT_SCALAR);
bytes += 2 * nbuf * sizeof(FFT_SCALAR);
return bytes; return bytes;
} }

View File

@ -1,4 +1,4 @@
/* ---------------------------------------------------------------------- /* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
@ -20,6 +20,17 @@ KSpaceStyle(pppm,PPPM)
#ifndef LMP_PPPM_H #ifndef LMP_PPPM_H
#define LMP_PPPM_H #define LMP_PPPM_H
#include "lmptype.h"
#include "mpi.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
#include "kspace.h" #include "kspace.h"
namespace LAMMPS_NS { namespace LAMMPS_NS {
@ -28,11 +39,11 @@ class PPPM : public KSpace {
public: public:
PPPM(class LAMMPS *, int, char **); PPPM(class LAMMPS *, int, char **);
virtual ~PPPM(); virtual ~PPPM();
void init(); virtual void init();
void setup(); virtual void setup();
void compute(int, int); virtual void compute(int, int);
void timing(int, double &, double &); virtual void timing(int, double &, double &);
double memory_usage(); virtual double memory_usage();
protected: protected:
int me,nprocs; int me,nprocs;
@ -54,17 +65,17 @@ class PPPM : public KSpace {
int nlower,nupper; int nlower,nupper;
int ngrid,nfft,nbuf,nfft_both; int ngrid,nfft,nbuf,nfft_both;
double ***density_brick; FFT_SCALAR ***density_brick;
double ***vdx_brick,***vdy_brick,***vdz_brick; FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
double *greensfn; double *greensfn;
double **vg; double **vg;
double *fkx,*fky,*fkz; double *fkx,*fky,*fkz;
double *density_fft; FFT_SCALAR *density_fft;
double *work1,*work2; FFT_SCALAR *work1,*work2;
double *buf1,*buf2; FFT_SCALAR *buf1,*buf2;
double *gf_b; double *gf_b;
double **rho1d,**rho_coeff; FFT_SCALAR **rho1d,**rho_coeff;
class FFT3d *fft1,*fft2; class FFT3d *fft1,*fft2;
class Remap *remap; class Remap *remap;
@ -80,8 +91,8 @@ class PPPM : public KSpace {
double alpha; // geometric factor double alpha; // geometric factor
void set_grid(); void set_grid();
void allocate(); virtual void allocate();
void deallocate(); virtual void deallocate();
int factorable(int); int factorable(int);
double rms(double, double, bigint, double, double **); double rms(double, double, bigint, double, double **);
double diffpr(double, double, double, double, double **); double diffpr(double, double, double, double, double **);
@ -89,12 +100,13 @@ class PPPM : public KSpace {
double gf_denom(double, double, double); double gf_denom(double, double, double);
virtual void particle_map(); virtual void particle_map();
virtual void make_rho(); virtual void make_rho();
void brick2fft(); virtual void brick2fft();
void fillbrick(); virtual void fillbrick();
void poisson(int, int); virtual void poisson(int, int);
virtual void fieldforce(); virtual void fieldforce();
void procs2grid2d(int,int,int,int *, int*); void procs2grid2d(int,int,int,int *, int*);
void compute_rho1d(double, double, double); void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
const FFT_SCALAR &);
void compute_rho_coeff(); void compute_rho_coeff();
void slabcorr(int); void slabcorr(int);
}; };

View File

@ -26,6 +26,14 @@ using namespace LAMMPS_NS;
#define OFFSET 16384 #define OFFSET 16384
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) : PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
@ -87,13 +95,13 @@ void PPPMTIP4P::particle_map()
void PPPMTIP4P::make_rho() void PPPMTIP4P::make_rho()
{ {
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
double dx,dy,dz,x0,y0,z0; FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi,xM[3]; double *xi,xM[3];
// clear 3d density array // clear 3d density array
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = 0.0; for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points // loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -143,13 +151,13 @@ void PPPMTIP4P::make_rho()
void PPPMTIP4P::fieldforce() void PPPMTIP4P::fieldforce()
{ {
int i,l,m,n,nx,ny,nz,mx,my,mz; int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0; FFT_SCALAR dx,dy,dz,x0,y0,z0;
double ek[3]; FFT_SCALAR ekx,eky,ekz;
double *xi; double *xi;
int iH1,iH2; int iH1,iH2;
double xM[3]; double xM[3];
double fx,fy,fz; double fx,fy,fz;
double ddotf, rOM[3], f1[3]; double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
// loop over my charges, interpolate electric field from nearby grid points // loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -160,6 +168,7 @@ void PPPMTIP4P::fieldforce()
double *q = atom->q; double *q = atom->q;
double **x = atom->x; double **x = atom->x;
double **f = atom->f; double **f = atom->f;
int *type = atom->type; int *type = atom->type;
int nlocal = atom->nlocal; int nlocal = atom->nlocal;
@ -178,7 +187,7 @@ void PPPMTIP4P::fieldforce()
compute_rho1d(dx,dy,dz); compute_rho1d(dx,dy,dz);
ek[0] = ek[1] = ek[2] = 0.0; ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) { for (n = nlower; n <= nupper; n++) {
mz = n+nz; mz = n+nz;
z0 = rho1d[2][n]; z0 = rho1d[2][n];
@ -188,47 +197,47 @@ void PPPMTIP4P::fieldforce()
for (l = nlower; l <= nupper; l++) { for (l = nlower; l <= nupper; l++) {
mx = l+nx; mx = l+nx;
x0 = y0*rho1d[0][l]; x0 = y0*rho1d[0][l];
ek[0] -= x0*vdx_brick[mz][my][mx]; ekx -= x0*vdx_brick[mz][my][mx];
ek[1] -= x0*vdy_brick[mz][my][mx]; eky -= x0*vdy_brick[mz][my][mx];
ek[2] -= x0*vdz_brick[mz][my][mx]; ekz -= x0*vdz_brick[mz][my][mx];
} }
} }
} }
// convert E-field to force // convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
if (type[i] != typeO) { if (type[i] != typeO) {
f[i][0] += qqrd2e*q[i]*ek[0]; f[i][0] += qfactor*ekx;
f[i][1] += qqrd2e*q[i]*ek[1]; f[i][1] += qfactor*eky;
f[i][2] += qqrd2e*q[i]*ek[2]; f[i][2] += qfactor*ekz;
} else { } else {
fx = qqrd2e * q[i] * ek[0]; fx = qfactor * ekx;
fy = qqrd2e * q[i] * ek[1]; fy = qfactor * eky;
fz = qqrd2e * q[i] * ek[2]; fz = qfactor * ekz;
find_M(i,iH1,iH2,xM); find_M(i,iH1,iH2,xM);
rOM[0] = xM[0] - x[i][0]; rOMx = xM[0] - x[i][0];
rOM[1] = xM[1] - x[i][1]; rOMy = xM[1] - x[i][1];
rOM[2] = xM[2] - x[i][2]; rOMz = xM[2] - x[i][2];
ddotf = (rOM[0] * fx + rOM[1] * fy + rOM[2] * fz) / (qdist * qdist); ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
f1[0] = ddotf * rOM[0]; f1x = ddotf * rOMx;
f1[1] = ddotf * rOM[1]; f1y = ddotf * rOMy;
f1[2] = ddotf * rOM[2]; f1z = ddotf * rOMz;
f[i][0] += fx - alpha * (fx - f1[0]); f[i][0] += fx - alpha * (fx - f1x);
f[i][1] += fy - alpha * (fy - f1[1]); f[i][1] += fy - alpha * (fy - f1y);
f[i][2] += fz - alpha * (fz - f1[2]); f[i][2] += fz - alpha * (fz - f1z);
f[iH1][0] += 0.5*alpha*(fx - f1[0]); f[iH1][0] += 0.5*alpha*(fx - f1x);
f[iH1][1] += 0.5*alpha*(fy - f1[1]); f[iH1][1] += 0.5*alpha*(fy - f1y);
f[iH1][2] += 0.5*alpha*(fz - f1[2]); f[iH1][2] += 0.5*alpha*(fz - f1z);
f[iH2][0] += 0.5*alpha*(fx - f1[0]); f[iH2][0] += 0.5*alpha*(fx - f1x);
f[iH2][1] += 0.5*alpha*(fy - f1[1]); f[iH2][1] += 0.5*alpha*(fy - f1y);
f[iH2][2] += 0.5*alpha*(fz - f1[2]); f[iH2][2] += 0.5*alpha*(fz - f1z);
} }
} }
} }

View File

@ -27,12 +27,14 @@ namespace LAMMPS_NS {
class PPPMTIP4P : public PPPM { class PPPMTIP4P : public PPPM {
public: public:
PPPMTIP4P(class LAMMPS *, int, char **); PPPMTIP4P(class LAMMPS *, int, char **);
virtual ~PPPMTIP4P () {};
protected:
virtual void particle_map();
virtual void make_rho();
virtual void fieldforce();
private: private:
void particle_map();
void make_rho();
void fieldforce();
void find_M(int, int &, int &, double *); void find_M(int, int &, int &, double *);
}; };

View File

@ -11,10 +11,12 @@
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "mpi.h"
#include "stdio.h" #include "stdio.h"
#include "stdlib.h" #include "stdlib.h"
#include "remap.h" #include "remap.h"
#define PACK_DATA FFT_SCALAR
#include "pack.h" #include "pack.h"
#define MIN(A,B) ((A) < (B)) ? (A) : (B) #define MIN(A,B) ((A) < (B)) ? (A) : (B)
@ -57,13 +59,13 @@
plan plan returned by previous call to remap_3d_create_plan plan plan returned by previous call to remap_3d_create_plan
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
void remap_3d(double *in, double *out, double *buf, void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
struct remap_plan_3d *plan) struct remap_plan_3d *plan)
{ {
MPI_Status status; MPI_Status status;
int i,isend,irecv; int i,isend,irecv;
double *scratch; FFT_SCALAR *scratch;
if (plan->memory == 0) if (plan->memory == 0)
scratch = buf; scratch = buf;
@ -74,7 +76,7 @@ void remap_3d(double *in, double *out, double *buf,
for (irecv = 0; irecv < plan->nrecv; irecv++) for (irecv = 0; irecv < plan->nrecv; irecv++)
MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv], MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
MPI_DOUBLE,plan->recv_proc[irecv],0, MPI_FFT_SCALAR,plan->recv_proc[irecv],0,
plan->comm,&plan->request[irecv]); plan->comm,&plan->request[irecv]);
// send all messages to other procs // send all messages to other procs
@ -82,7 +84,7 @@ void remap_3d(double *in, double *out, double *buf,
for (isend = 0; isend < plan->nsend; isend++) { for (isend = 0; isend < plan->nsend; isend++) {
plan->pack(&in[plan->send_offset[isend]], plan->pack(&in[plan->send_offset[isend]],
plan->sendbuf,&plan->packplan[isend]); plan->sendbuf,&plan->packplan[isend]);
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE, MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_FFT_SCALAR,
plan->send_proc[isend],0,plan->comm); plan->send_proc[isend],0,plan->comm);
} }
@ -150,13 +152,6 @@ struct remap_plan_3d *remap_3d_create_plan(
MPI_Comm_rank(comm,&me); MPI_Comm_rank(comm,&me);
MPI_Comm_size(comm,&nprocs); MPI_Comm_size(comm,&nprocs);
// single precision not yet supported
if (precision == 1) {
if (me == 0) printf("Single precision not supported\n");
return NULL;
}
// allocate memory for plan data struct // allocate memory for plan data struct
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d)); plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
@ -209,10 +204,7 @@ struct remap_plan_3d *remap_3d_create_plan(
// malloc space for send info // malloc space for send info
if (nsend) { if (nsend) {
if (precision == 1) plan->pack = pack_3d;
plan->pack = NULL;
else
plan->pack = pack_3d;
plan->send_offset = (int *) malloc(nsend*sizeof(int)); plan->send_offset = (int *) malloc(nsend*sizeof(int));
plan->send_size = (int *) malloc(nsend*sizeof(int)); plan->send_size = (int *) malloc(nsend*sizeof(int));
@ -272,45 +264,23 @@ struct remap_plan_3d *remap_3d_create_plan(
// malloc space for recv info // malloc space for recv info
if (nrecv) { if (nrecv) {
if (precision == 1) { if (permute == 0)
if (permute == 0) plan->unpack = unpack_3d;
plan->unpack = NULL; else if (permute == 1) {
else if (permute == 1) { if (nqty == 1)
if (nqty == 1) plan->unpack = unpack_3d_permute1_1;
plan->unpack = NULL; else if (nqty == 2)
else if (nqty == 2) plan->unpack = unpack_3d_permute1_2;
plan->unpack = NULL; else
else plan->unpack = unpack_3d_permute1_n;
plan->unpack = NULL;
}
else if (permute == 2) {
if (nqty == 1)
plan->unpack = NULL;
else if (nqty == 2)
plan->unpack = NULL;
else
plan->unpack = NULL;
}
} }
else if (precision == 2) { else if (permute == 2) {
if (permute == 0) if (nqty == 1)
plan->unpack = unpack_3d; plan->unpack = unpack_3d_permute2_1;
else if (permute == 1) { else if (nqty == 2)
if (nqty == 1) plan->unpack = unpack_3d_permute2_2;
plan->unpack = unpack_3d_permute1_1; else
else if (nqty == 2) plan->unpack = unpack_3d_permute2_n;
plan->unpack = unpack_3d_permute1_2;
else
plan->unpack = unpack_3d_permute1_n;
}
else if (permute == 2) {
if (nqty == 1)
plan->unpack = unpack_3d_permute2_1;
else if (nqty == 2)
plan->unpack = unpack_3d_permute2_2;
else
plan->unpack = unpack_3d_permute2_n;
}
} }
plan->recv_offset = (int *) malloc(nrecv*sizeof(int)); plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
@ -408,10 +378,7 @@ struct remap_plan_3d *remap_3d_create_plan(
size = MAX(size,plan->send_size[nsend]); size = MAX(size,plan->send_size[nsend]);
if (size) { if (size) {
if (precision == 1) plan->sendbuf = (FFT_SCALAR *) malloc(size*sizeof(FFT_SCALAR));
plan->sendbuf = NULL;
else
plan->sendbuf = (double *) malloc(size*sizeof(double));
if (plan->sendbuf == NULL) return NULL; if (plan->sendbuf == NULL) return NULL;
} }
@ -422,11 +389,8 @@ struct remap_plan_3d *remap_3d_create_plan(
if (memory == 1) { if (memory == 1) {
if (nrecv > 0) { if (nrecv > 0) {
if (precision == 1) plan->scratch =
plan->scratch = NULL; (FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR));
else
plan->scratch =
(double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double));
if (plan->scratch == NULL) return NULL; if (plan->scratch == NULL) return NULL;
} }
} }

View File

@ -1,4 +1,4 @@
/* ---------------------------------------------------------------------- /* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
@ -11,14 +11,24 @@
See the README file in the top-level LAMMPS directory. See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include <mpi.h>
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
// details of how to do a 3d remap // details of how to do a 3d remap
struct remap_plan_3d { struct remap_plan_3d {
double *sendbuf; // buffer for MPI sends FFT_SCALAR *sendbuf; // buffer for MPI sends
double *scratch; // scratch buffer for MPI recvs FFT_SCALAR *scratch; // scratch buffer for MPI recvs
void (*pack)(double *, double *, struct pack_plan_3d *); void (*pack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
// which pack function to use // which pack function to use
void (*unpack)(double *, double *, struct pack_plan_3d *); void (*unpack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
// which unpack function to use // which unpack function to use
int *send_offset; // extraction loc for each send int *send_offset; // extraction loc for each send
int *send_size; // size of each send message int *send_size; // size of each send message
@ -47,7 +57,7 @@ struct extent_3d {
// function prototypes // function prototypes
void remap_3d(double *, double *, double *, struct remap_plan_3d *); void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *);
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm, struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int,
int, int, int, int); int, int, int, int);

View File

@ -42,7 +42,7 @@ Remap::~Remap()
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
void Remap::perform(double *in, double *out, double *buf) void Remap::perform(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf)
{ {
remap_3d(in,out,buf,plan); remap_3d(in,out,buf,plan);
} }

View File

@ -1,4 +1,4 @@
/* ---------------------------------------------------------------------- /* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
@ -24,7 +24,7 @@ class Remap : protected Pointers {
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
int,int,int,int,int,int,int,int,int,int); int,int,int,int,int,int,int,int,int,int);
~Remap(); ~Remap();
void perform(double *, double *, double *); void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *);
private: private:
struct remap_plan_3d *plan; struct remap_plan_3d *plan;

View File

@ -1,757 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "pack.h"
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
#define PACK_ARRAY
#endif
/* ----------------------------------------------------------------------
Pack and unpack functions:
pack routines copy strided values from data into contiguous locs in buf
unpack routines copy contiguous values from buf into strided locs in data
different versions of unpack depending on permutation
and # of values/element
PACK_ARRAY routines work via array indices (default)
PACK_POINTER routines work via pointers
PACK_MEMCPY routines work via pointers and memcpy function
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
pack/unpack with array indices
------------------------------------------------------------------------- */
#ifdef PACK_ARRAY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
out = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
buf[in++] = data[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
data[in++] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + 2*mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
instart = plane + nqty*mid;
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = 2*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
instart = nqty*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers
------------------------------------------------------------------------- */
#ifdef PACK_POINTER
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (out = begin; out < end; out++)
*(in++) = *out;
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (in = begin; in < end; in++)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers and memcpy function
no memcpy version of unpack_permute routines,
just use PACK_POINTER versions
------------------------------------------------------------------------- */
#ifdef PACK_MEMCPY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register double *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(double);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(buf[upto+mid*nfast]);
out = &(data[plane+mid*nstride_line]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(double);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(data[plane+mid*nstride_line]);
out = &(buf[upto+mid*nfast]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif

View File

@ -1,4 +1,4 @@
/* ---------------------------------------------------------------------- /* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov Steve Plimpton, sjplimp@sandia.gov
@ -22,13 +22,746 @@ struct pack_plan_3d {
int nqty; // # of values/element int nqty; // # of values/element
}; };
// function prototypes
void pack_3d(double *, double *, struct pack_plan_3d *); #if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
void unpack_3d(double *, double *, struct pack_plan_3d *); #define PACK_ARRAY
void unpack_3d_permute1_1(double *, double *, struct pack_plan_3d *); #endif
void unpack_3d_permute1_2(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute1_n(double *, double *, struct pack_plan_3d *); #ifndef PACK_DATA
void unpack_3d_permute2_1(double *, double *, struct pack_plan_3d *); #define PACK_DATA double
void unpack_3d_permute2_2(double *, double *, struct pack_plan_3d *); #endif
void unpack_3d_permute2_n(double *, double *, struct pack_plan_3d *);
/* ----------------------------------------------------------------------
Pack and unpack functions:
pack routines copy strided values from data into contiguous locs in buf
unpack routines copy contiguous values from buf into strided locs in data
different versions of unpack depending on permutation
and # of values/element
PACK_ARRAY routines work via array indices (default)
PACK_POINTER routines work via pointers
PACK_MEMCPY routines work via pointers and memcpy function
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
pack/unpack with array indices
------------------------------------------------------------------------- */
#ifdef PACK_ARRAY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
out = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
buf[in++] = data[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
data[in++] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + 2*mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
instart = plane + nqty*mid;
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = 2*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
instart = nqty*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers
------------------------------------------------------------------------- */
#ifdef PACK_POINTER
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (out = begin; out < end; out++)
*(in++) = *out;
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (in = begin; in < end; in++)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers and memcpy function
no memcpy version of unpack_permute routines,
just use PACK_POINTER versions
------------------------------------------------------------------------- */
#ifdef PACK_MEMCPY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(PACK_DATA);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(buf[upto+mid*nfast]);
out = &(data[plane+mid*nstride_line]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(PACK_DATA);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(data[plane+mid*nstride_line]);
out = &(buf[upto+mid*nfast]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif