From 8745580d5de6992ef8a55fe2c14c9f50f4379e63 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Mon, 8 Aug 2011 19:40:03 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6622 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- src/KSPACE/pppm.cpp | 131 ++++--- src/KSPACE/pppm.h | 48 ++- src/KSPACE/pppm_tip4p.cpp | 75 ++-- src/KSPACE/pppm_tip4p.h | 10 +- src/KSPACE/remap.cpp | 90 ++--- src/KSPACE/remap.h | 22 +- src/KSPACE/remap_wrap.cpp | 2 +- src/KSPACE/remap_wrap.h | 4 +- src/pack.cpp | 757 -------------------------------------- src/pack.h | 753 ++++++++++++++++++++++++++++++++++++- 10 files changed, 944 insertions(+), 948 deletions(-) delete mode 100644 src/pack.cpp diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index e6025d8035..7c671d0a3c 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -43,6 +43,14 @@ using namespace LAMMPS_NS; #define LARGE 10000.0 #define EPS_HOC 1.0e-7 +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define MAX(a,b) ((a) > (b) ? (a) : (b)) @@ -50,7 +58,7 @@ using namespace LAMMPS_NS; PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) { - if (narg != 1) error->all("Illegal kspace_style pppm command"); + if (narg < 1) error->all("Illegal kspace_style pppm command"); precision = atof(arg[0]); PI = 4.0*atan(1.0); @@ -754,7 +762,7 @@ void PPPM::allocate() // summation coeffs - gf_b = new double[order]; + memory->create(gf_b,order,"pppm:gf_b"); memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d"); memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff"); @@ -778,7 +786,7 @@ void PPPM::allocate() remap = new Remap(lmp,world, nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in, nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft, - 1,0,0,2); + 1,0,0,FFT_PRECISION); } /* ---------------------------------------------------------------------- @@ -805,7 +813,7 @@ void PPPM::deallocate() memory->destroy(buf1); memory->destroy(buf2); - delete [] gf_b; + memory->destroy(gf_b); memory->destroy2d_offset(rho1d,-order/2); memory->destroy2d_offset(rho_coeff,(1-order)/2); @@ -967,17 +975,24 @@ void PPPM::set_grid() // print info if (me == 0) { +#ifdef FFT_SINGLE + const char fft_prec[] = "single"; +#else + const char fft_prec[] = "double"; +#endif if (screen) { fprintf(screen," G vector = %g\n",g_ewald); fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(screen," stencil order = %d\n",order); fprintf(screen," RMS precision = %g\n",MAX(lpr,spr)); + fprintf(screen," using %s precision FFTs\n",fft_prec); } if (logfile) { fprintf(logfile," G vector = %g\n",g_ewald); fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm); fprintf(logfile," stencil order = %d\n",order); fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr)); + fprintf(logfile," using %s precision FFTs\n",fft_prec); } } } @@ -1036,7 +1051,7 @@ double PPPM::diffpr(double hx, double hy, double hz, double q2, double **acons) lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons); kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0); real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) / - sqrt(natoms*cutoff*xprd*yprd*zprd); + sqrt(static_cast(natoms)*cutoff*xprd*yprd*zprd); double value = kspace_prec - real_prec; return value; } @@ -1113,8 +1128,8 @@ void PPPM::brick2fft() if (comm->procneigh[0][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); MPI_Wait(&request,&status); } @@ -1137,8 +1152,8 @@ void PPPM::brick2fft() if (comm->procneigh[0][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); MPI_Wait(&request,&status); } @@ -1161,8 +1176,8 @@ void PPPM::brick2fft() if (comm->procneigh[1][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); MPI_Wait(&request,&status); } @@ -1185,8 +1200,8 @@ void PPPM::brick2fft() if (comm->procneigh[1][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); MPI_Wait(&request,&status); } @@ -1209,8 +1224,8 @@ void PPPM::brick2fft() if (comm->procneigh[2][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); MPI_Wait(&request,&status); } @@ -1233,8 +1248,8 @@ void PPPM::brick2fft() if (comm->procneigh[2][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); MPI_Wait(&request,&status); } @@ -1284,8 +1299,8 @@ void PPPM::fillbrick() if (comm->procneigh[2][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world); MPI_Wait(&request,&status); } @@ -1314,8 +1329,8 @@ void PPPM::fillbrick() if (comm->procneigh[2][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world); MPI_Wait(&request,&status); } @@ -1344,8 +1359,8 @@ void PPPM::fillbrick() if (comm->procneigh[1][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world); MPI_Wait(&request,&status); } @@ -1374,8 +1389,8 @@ void PPPM::fillbrick() if (comm->procneigh[1][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world); MPI_Wait(&request,&status); } @@ -1404,8 +1419,8 @@ void PPPM::fillbrick() if (comm->procneigh[0][1] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world); MPI_Wait(&request,&status); } @@ -1434,8 +1449,8 @@ void PPPM::fillbrick() if (comm->procneigh[0][0] == me) for (i = 0; i < n; i++) buf2[i] = buf1[i]; else { - MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request); - MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world); + MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request); + MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world); MPI_Wait(&request,&status); } @@ -1497,12 +1512,12 @@ void PPPM::particle_map() void PPPM::make_rho() { int i,l,m,n,nx,ny,nz,mx,my,mz; - double dx,dy,dz,x0,y0,z0; + FFT_SCALAR dx,dy,dz,x0,y0,z0; // clear 3d density array - double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (i = 0; i < ngrid; i++) vec[i] = 0.0; + FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (i = 0; i < ngrid; i++) vec[i] = ZEROF; // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -1554,7 +1569,7 @@ void PPPM::poisson(int eflag, int vflag) n = 0; for (i = 0; i < nfft; i++) { work1[n++] = density_fft[i]; - work1[n++] = 0.0; + work1[n++] = ZEROF; } fft1->compute(work1,work1,1); @@ -1667,8 +1682,8 @@ void PPPM::poisson(int eflag, int vflag) void PPPM::fieldforce() { int i,l,m,n,nx,ny,nz,mx,my,mz; - double dx,dy,dz,x0,y0,z0; - double ek[3]; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -1679,6 +1694,7 @@ void PPPM::fieldforce() double *q = atom->q; double **x = atom->x; double **f = atom->f; + int nlocal = atom->nlocal; for (i = 0; i < nlocal; i++) { @@ -1691,7 +1707,7 @@ void PPPM::fieldforce() compute_rho1d(dx,dy,dz); - ek[0] = ek[1] = ek[2] = 0.0; + ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = rho1d[2][n]; @@ -1701,18 +1717,18 @@ void PPPM::fieldforce() for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*rho1d[0][l]; - ek[0] -= x0*vdx_brick[mz][my][mx];; - ek[1] -= x0*vdy_brick[mz][my][mx];; - ek[2] -= x0*vdz_brick[mz][my][mx];; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force - - f[i][0] += qqrd2e*scale * q[i]*ek[0]; - f[i][1] += qqrd2e*scale * q[i]*ek[1]; - f[i][2] += qqrd2e*scale * q[i]*ek[2]; + const double qfactor = qqrd2e*scale*q[i]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; } } @@ -1758,15 +1774,16 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py) charge assignment into rho1d dx,dy,dz = distance of particle from "lower left" grid point ------------------------------------------------------------------------- */ - -void PPPM::compute_rho1d(double dx, double dy, double dz) +void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, + const FFT_SCALAR &dz) { int k,l; for (k = (1-order)/2; k <= order/2; k++) { - rho1d[0][k] = 0.0; - rho1d[1][k] = 0.0; - rho1d[2][k] = 0.0; + rho1d[0][k] = ZEROF; + rho1d[1][k] = ZEROF; + rho1d[2][k] = ZEROF; + for (l = order-1; l >= 0; l--) { rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx; rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy; @@ -1797,9 +1814,9 @@ void PPPM::compute_rho1d(double dx, double dy, double dz) void PPPM::compute_rho_coeff() { int j,k,l,m; - double s; + FFT_SCALAR s; - double **a; + FFT_SCALAR **a; memory->create2d_offset(a,order,-order,order,"pppm:a"); for (k = -order; k <= order; k++) @@ -1812,8 +1829,13 @@ void PPPM::compute_rho_coeff() s = 0.0; for (l = 0; l < j; l++) { a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1); +#ifdef FFT_SINGLE + s += powf(0.5,(float) l+1) * + (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1); +#else s += pow(0.5,(double) l+1) * (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1); +#endif } a[0][k] = s; } @@ -1874,7 +1896,7 @@ void PPPM::timing(int n, double &time3d, double &time1d) { double time1,time2; - for (int i = 0; i < 2*nfft_both; i++) work1[i] = 0.0; + for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF; MPI_Barrier(world); time1 = MPI_Wtime(); @@ -1914,9 +1936,10 @@ double PPPM::memory_usage() double bytes = nmax*3 * sizeof(double); int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) * (nzhi_out-nzlo_out+1); - bytes += 4 * nbrick * sizeof(double); + bytes += 4 * nbrick * sizeof(FFT_SCALAR); bytes += 6 * nfft_both * sizeof(double); - bytes += nfft_both*6 * sizeof(double); - bytes += 2 * nbuf * sizeof(double); + bytes += nfft_both * sizeof(double); + bytes += nfft_both*5 * sizeof(FFT_SCALAR); + bytes += 2 * nbuf * sizeof(FFT_SCALAR); return bytes; } diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index f0ee75d12a..982b8f7c9e 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -20,6 +20,17 @@ KSpaceStyle(pppm,PPPM) #ifndef LMP_PPPM_H #define LMP_PPPM_H +#include "lmptype.h" +#include "mpi.h" + +#ifdef FFT_SINGLE +typedef float FFT_SCALAR; +#define MPI_FFT_SCALAR MPI_FLOAT +#else +typedef double FFT_SCALAR; +#define MPI_FFT_SCALAR MPI_DOUBLE +#endif + #include "kspace.h" namespace LAMMPS_NS { @@ -28,11 +39,11 @@ class PPPM : public KSpace { public: PPPM(class LAMMPS *, int, char **); virtual ~PPPM(); - void init(); - void setup(); - void compute(int, int); - void timing(int, double &, double &); - double memory_usage(); + virtual void init(); + virtual void setup(); + virtual void compute(int, int); + virtual void timing(int, double &, double &); + virtual double memory_usage(); protected: int me,nprocs; @@ -54,17 +65,17 @@ class PPPM : public KSpace { int nlower,nupper; int ngrid,nfft,nbuf,nfft_both; - double ***density_brick; - double ***vdx_brick,***vdy_brick,***vdz_brick; + FFT_SCALAR ***density_brick; + FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick; double *greensfn; double **vg; double *fkx,*fky,*fkz; - double *density_fft; - double *work1,*work2; - double *buf1,*buf2; + FFT_SCALAR *density_fft; + FFT_SCALAR *work1,*work2; + FFT_SCALAR *buf1,*buf2; double *gf_b; - double **rho1d,**rho_coeff; + FFT_SCALAR **rho1d,**rho_coeff; class FFT3d *fft1,*fft2; class Remap *remap; @@ -80,8 +91,8 @@ class PPPM : public KSpace { double alpha; // geometric factor void set_grid(); - void allocate(); - void deallocate(); + virtual void allocate(); + virtual void deallocate(); int factorable(int); double rms(double, double, bigint, double, double **); double diffpr(double, double, double, double, double **); @@ -89,12 +100,13 @@ class PPPM : public KSpace { double gf_denom(double, double, double); virtual void particle_map(); virtual void make_rho(); - void brick2fft(); - void fillbrick(); - void poisson(int, int); + virtual void brick2fft(); + virtual void fillbrick(); + virtual void poisson(int, int); virtual void fieldforce(); void procs2grid2d(int,int,int,int *, int*); - void compute_rho1d(double, double, double); + void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, + const FFT_SCALAR &); void compute_rho_coeff(); void slabcorr(int); }; diff --git a/src/KSPACE/pppm_tip4p.cpp b/src/KSPACE/pppm_tip4p.cpp index 08ade46a07..af0af92fad 100644 --- a/src/KSPACE/pppm_tip4p.cpp +++ b/src/KSPACE/pppm_tip4p.cpp @@ -26,6 +26,14 @@ using namespace LAMMPS_NS; #define OFFSET 16384 +#ifdef FFT_SINGLE +#define ZEROF 0.0f +#define ONEF 1.0f +#else +#define ZEROF 0.0 +#define ONEF 1.0 +#endif + /* ---------------------------------------------------------------------- */ PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) : @@ -87,13 +95,13 @@ void PPPMTIP4P::particle_map() void PPPMTIP4P::make_rho() { int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2; - double dx,dy,dz,x0,y0,z0; + FFT_SCALAR dx,dy,dz,x0,y0,z0; double *xi,xM[3]; // clear 3d density array - double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; - for (i = 0; i < ngrid; i++) vec[i] = 0.0; + FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out]; + for (i = 0; i < ngrid; i++) vec[i] = ZEROF; // loop over my charges, add their contribution to nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -143,13 +151,13 @@ void PPPMTIP4P::make_rho() void PPPMTIP4P::fieldforce() { int i,l,m,n,nx,ny,nz,mx,my,mz; - double dx,dy,dz,x0,y0,z0; - double ek[3]; + FFT_SCALAR dx,dy,dz,x0,y0,z0; + FFT_SCALAR ekx,eky,ekz; double *xi; int iH1,iH2; double xM[3]; double fx,fy,fz; - double ddotf, rOM[3], f1[3]; + double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z; // loop over my charges, interpolate electric field from nearby grid points // (nx,ny,nz) = global coords of grid pt to "lower left" of charge @@ -160,6 +168,7 @@ void PPPMTIP4P::fieldforce() double *q = atom->q; double **x = atom->x; double **f = atom->f; + int *type = atom->type; int nlocal = atom->nlocal; @@ -178,7 +187,7 @@ void PPPMTIP4P::fieldforce() compute_rho1d(dx,dy,dz); - ek[0] = ek[1] = ek[2] = 0.0; + ekx = eky = ekz = ZEROF; for (n = nlower; n <= nupper; n++) { mz = n+nz; z0 = rho1d[2][n]; @@ -188,47 +197,47 @@ void PPPMTIP4P::fieldforce() for (l = nlower; l <= nupper; l++) { mx = l+nx; x0 = y0*rho1d[0][l]; - ek[0] -= x0*vdx_brick[mz][my][mx]; - ek[1] -= x0*vdy_brick[mz][my][mx]; - ek[2] -= x0*vdz_brick[mz][my][mx]; + ekx -= x0*vdx_brick[mz][my][mx]; + eky -= x0*vdy_brick[mz][my][mx]; + ekz -= x0*vdz_brick[mz][my][mx]; } } } // convert E-field to force - + const double qfactor = qqrd2e*scale*q[i]; if (type[i] != typeO) { - f[i][0] += qqrd2e*q[i]*ek[0]; - f[i][1] += qqrd2e*q[i]*ek[1]; - f[i][2] += qqrd2e*q[i]*ek[2]; + f[i][0] += qfactor*ekx; + f[i][1] += qfactor*eky; + f[i][2] += qfactor*ekz; } else { - fx = qqrd2e * q[i] * ek[0]; - fy = qqrd2e * q[i] * ek[1]; - fz = qqrd2e * q[i] * ek[2]; + fx = qfactor * ekx; + fy = qfactor * eky; + fz = qfactor * ekz; find_M(i,iH1,iH2,xM); - rOM[0] = xM[0] - x[i][0]; - rOM[1] = xM[1] - x[i][1]; - rOM[2] = xM[2] - x[i][2]; + rOMx = xM[0] - x[i][0]; + rOMy = xM[1] - x[i][1]; + rOMz = xM[2] - x[i][2]; - ddotf = (rOM[0] * fx + rOM[1] * fy + rOM[2] * fz) / (qdist * qdist); + ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist); - f1[0] = ddotf * rOM[0]; - f1[1] = ddotf * rOM[1]; - f1[2] = ddotf * rOM[2]; + f1x = ddotf * rOMx; + f1y = ddotf * rOMy; + f1z = ddotf * rOMz; - f[i][0] += fx - alpha * (fx - f1[0]); - f[i][1] += fy - alpha * (fy - f1[1]); - f[i][2] += fz - alpha * (fz - f1[2]); + f[i][0] += fx - alpha * (fx - f1x); + f[i][1] += fy - alpha * (fy - f1y); + f[i][2] += fz - alpha * (fz - f1z); - f[iH1][0] += 0.5*alpha*(fx - f1[0]); - f[iH1][1] += 0.5*alpha*(fy - f1[1]); - f[iH1][2] += 0.5*alpha*(fz - f1[2]); + f[iH1][0] += 0.5*alpha*(fx - f1x); + f[iH1][1] += 0.5*alpha*(fy - f1y); + f[iH1][2] += 0.5*alpha*(fz - f1z); - f[iH2][0] += 0.5*alpha*(fx - f1[0]); - f[iH2][1] += 0.5*alpha*(fy - f1[1]); - f[iH2][2] += 0.5*alpha*(fz - f1[2]); + f[iH2][0] += 0.5*alpha*(fx - f1x); + f[iH2][1] += 0.5*alpha*(fy - f1y); + f[iH2][2] += 0.5*alpha*(fz - f1z); } } } diff --git a/src/KSPACE/pppm_tip4p.h b/src/KSPACE/pppm_tip4p.h index 4a56a0270b..764db68edd 100644 --- a/src/KSPACE/pppm_tip4p.h +++ b/src/KSPACE/pppm_tip4p.h @@ -27,12 +27,14 @@ namespace LAMMPS_NS { class PPPMTIP4P : public PPPM { public: PPPMTIP4P(class LAMMPS *, int, char **); + virtual ~PPPMTIP4P () {}; + + protected: + virtual void particle_map(); + virtual void make_rho(); + virtual void fieldforce(); private: - void particle_map(); - void make_rho(); - void fieldforce(); - void find_M(int, int &, int &, double *); }; diff --git a/src/KSPACE/remap.cpp b/src/KSPACE/remap.cpp index f30a2d97f2..dbf9476a80 100644 --- a/src/KSPACE/remap.cpp +++ b/src/KSPACE/remap.cpp @@ -11,10 +11,12 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#include "mpi.h" #include "stdio.h" #include "stdlib.h" #include "remap.h" + +#define PACK_DATA FFT_SCALAR + #include "pack.h" #define MIN(A,B) ((A) < (B)) ? (A) : (B) @@ -57,13 +59,13 @@ plan plan returned by previous call to remap_3d_create_plan ------------------------------------------------------------------------- */ -void remap_3d(double *in, double *out, double *buf, +void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf, struct remap_plan_3d *plan) { MPI_Status status; int i,isend,irecv; - double *scratch; + FFT_SCALAR *scratch; if (plan->memory == 0) scratch = buf; @@ -74,7 +76,7 @@ void remap_3d(double *in, double *out, double *buf, for (irecv = 0; irecv < plan->nrecv; irecv++) MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv], - MPI_DOUBLE,plan->recv_proc[irecv],0, + MPI_FFT_SCALAR,plan->recv_proc[irecv],0, plan->comm,&plan->request[irecv]); // send all messages to other procs @@ -82,7 +84,7 @@ void remap_3d(double *in, double *out, double *buf, for (isend = 0; isend < plan->nsend; isend++) { plan->pack(&in[plan->send_offset[isend]], plan->sendbuf,&plan->packplan[isend]); - MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE, + MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_FFT_SCALAR, plan->send_proc[isend],0,plan->comm); } @@ -150,13 +152,6 @@ struct remap_plan_3d *remap_3d_create_plan( MPI_Comm_rank(comm,&me); MPI_Comm_size(comm,&nprocs); - // single precision not yet supported - - if (precision == 1) { - if (me == 0) printf("Single precision not supported\n"); - return NULL; - } - // allocate memory for plan data struct plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d)); @@ -209,10 +204,7 @@ struct remap_plan_3d *remap_3d_create_plan( // malloc space for send info if (nsend) { - if (precision == 1) - plan->pack = NULL; - else - plan->pack = pack_3d; + plan->pack = pack_3d; plan->send_offset = (int *) malloc(nsend*sizeof(int)); plan->send_size = (int *) malloc(nsend*sizeof(int)); @@ -272,45 +264,23 @@ struct remap_plan_3d *remap_3d_create_plan( // malloc space for recv info if (nrecv) { - if (precision == 1) { - if (permute == 0) - plan->unpack = NULL; - else if (permute == 1) { - if (nqty == 1) - plan->unpack = NULL; - else if (nqty == 2) - plan->unpack = NULL; - else - plan->unpack = NULL; - } - else if (permute == 2) { - if (nqty == 1) - plan->unpack = NULL; - else if (nqty == 2) - plan->unpack = NULL; - else - plan->unpack = NULL; - } + if (permute == 0) + plan->unpack = unpack_3d; + else if (permute == 1) { + if (nqty == 1) + plan->unpack = unpack_3d_permute1_1; + else if (nqty == 2) + plan->unpack = unpack_3d_permute1_2; + else + plan->unpack = unpack_3d_permute1_n; } - else if (precision == 2) { - if (permute == 0) - plan->unpack = unpack_3d; - else if (permute == 1) { - if (nqty == 1) - plan->unpack = unpack_3d_permute1_1; - else if (nqty == 2) - plan->unpack = unpack_3d_permute1_2; - else - plan->unpack = unpack_3d_permute1_n; - } - else if (permute == 2) { - if (nqty == 1) - plan->unpack = unpack_3d_permute2_1; - else if (nqty == 2) - plan->unpack = unpack_3d_permute2_2; - else - plan->unpack = unpack_3d_permute2_n; - } + else if (permute == 2) { + if (nqty == 1) + plan->unpack = unpack_3d_permute2_1; + else if (nqty == 2) + plan->unpack = unpack_3d_permute2_2; + else + plan->unpack = unpack_3d_permute2_n; } plan->recv_offset = (int *) malloc(nrecv*sizeof(int)); @@ -408,10 +378,7 @@ struct remap_plan_3d *remap_3d_create_plan( size = MAX(size,plan->send_size[nsend]); if (size) { - if (precision == 1) - plan->sendbuf = NULL; - else - plan->sendbuf = (double *) malloc(size*sizeof(double)); + plan->sendbuf = (FFT_SCALAR *) malloc(size*sizeof(FFT_SCALAR)); if (plan->sendbuf == NULL) return NULL; } @@ -422,11 +389,8 @@ struct remap_plan_3d *remap_3d_create_plan( if (memory == 1) { if (nrecv > 0) { - if (precision == 1) - plan->scratch = NULL; - else - plan->scratch = - (double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double)); + plan->scratch = + (FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR)); if (plan->scratch == NULL) return NULL; } } diff --git a/src/KSPACE/remap.h b/src/KSPACE/remap.h index 6788354c69..648b3a0d16 100644 --- a/src/KSPACE/remap.h +++ b/src/KSPACE/remap.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -11,14 +11,24 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include + +#ifdef FFT_SINGLE +typedef float FFT_SCALAR; +#define MPI_FFT_SCALAR MPI_FLOAT +#else +typedef double FFT_SCALAR; +#define MPI_FFT_SCALAR MPI_DOUBLE +#endif + // details of how to do a 3d remap struct remap_plan_3d { - double *sendbuf; // buffer for MPI sends - double *scratch; // scratch buffer for MPI recvs - void (*pack)(double *, double *, struct pack_plan_3d *); + FFT_SCALAR *sendbuf; // buffer for MPI sends + FFT_SCALAR *scratch; // scratch buffer for MPI recvs + void (*pack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *); // which pack function to use - void (*unpack)(double *, double *, struct pack_plan_3d *); + void (*unpack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -47,7 +57,7 @@ struct extent_3d { // function prototypes -void remap_3d(double *, double *, double *, struct remap_plan_3d *); +void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *); struct remap_plan_3d *remap_3d_create_plan(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int, int); diff --git a/src/KSPACE/remap_wrap.cpp b/src/KSPACE/remap_wrap.cpp index d46df6be48..83631defe4 100644 --- a/src/KSPACE/remap_wrap.cpp +++ b/src/KSPACE/remap_wrap.cpp @@ -42,7 +42,7 @@ Remap::~Remap() /* ---------------------------------------------------------------------- */ -void Remap::perform(double *in, double *out, double *buf) +void Remap::perform(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf) { remap_3d(in,out,buf,plan); } diff --git a/src/KSPACE/remap_wrap.h b/src/KSPACE/remap_wrap.h index 7145769d4c..dd57e18d17 100644 --- a/src/KSPACE/remap_wrap.h +++ b/src/KSPACE/remap_wrap.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -24,7 +24,7 @@ class Remap : protected Pointers { Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int); ~Remap(); - void perform(double *, double *, double *); + void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *); private: struct remap_plan_3d *plan; diff --git a/src/pack.cpp b/src/pack.cpp deleted file mode 100644 index 0b6ba908a9..0000000000 --- a/src/pack.cpp +++ /dev/null @@ -1,757 +0,0 @@ -/* ---------------------------------------------------------------------- - LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator - http://lammps.sandia.gov, Sandia National Laboratories - Steve Plimpton, sjplimp@sandia.gov - - Copyright (2003) Sandia Corporation. Under the terms of Contract - DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains - certain rights in this software. This software is distributed under - the GNU General Public License. - - See the README file in the top-level LAMMPS directory. -------------------------------------------------------------------------- */ - -#include "pack.h" - -#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY) -#define PACK_ARRAY -#endif - -/* ---------------------------------------------------------------------- - Pack and unpack functions: - - pack routines copy strided values from data into contiguous locs in buf - unpack routines copy contiguous values from buf into strided locs in data - different versions of unpack depending on permutation - and # of values/element - PACK_ARRAY routines work via array indices (default) - PACK_POINTER routines work via pointers - PACK_MEMCPY routines work via pointers and memcpy function -------------------------------------------------------------------------- */ - -/* ---------------------------------------------------------------------- - pack/unpack with array indices -------------------------------------------------------------------------- */ - -#ifdef PACK_ARRAY - -/* ---------------------------------------------------------------------- - pack from data -> buf -------------------------------------------------------------------------- */ - -void pack_3d(double *data, double *buf, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - in = 0; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - for (mid = 0; mid < nmid; mid++) { - out = plane + mid*nstride_line; - for (fast = 0; fast < nfast; fast++) - buf[in++] = data[out++]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data -------------------------------------------------------------------------- */ - -void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - for (mid = 0; mid < nmid; mid++) { - in = plane + mid*nstride_line; - for (fast = 0; fast < nfast; fast++) - data[in++] = buf[out++]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - in = plane + mid; - for (fast = 0; fast < nfast; fast++, in += nstride_plane) - data[in] = buf[out++]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - in = plane + 2*mid; - for (fast = 0; fast < nfast; fast++, in += nstride_plane) { - data[in] = buf[out++]; - data[in+1] = buf[out++]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,iqty,instart,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - instart = plane + nqty*mid; - for (fast = 0; fast < nfast; fast++, instart += nstride_plane) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - in = slow + mid*nstride_plane; - for (fast = 0; fast < nfast; fast++, in += nstride_line) - data[in] = buf[out++]; - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - in = 2*slow + mid*nstride_plane; - for (fast = 0; fast < nfast; fast++, in += nstride_line) { - data[in] = buf[out++]; - data[in+1] = buf[out++]; - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register int in,out,iqty,instart,fast,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = 0; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - instart = nqty*slow + mid*nstride_plane; - for (fast = 0; fast < nfast; fast++, instart += nstride_line) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++]; - } - } - } -} - -#endif - -/* ---------------------------------------------------------------------- - pack/unpack with pointers -------------------------------------------------------------------------- */ - -#ifdef PACK_POINTER - -/* ---------------------------------------------------------------------- - pack from data -> buf -------------------------------------------------------------------------- */ - -void pack_3d(double *data, double *buf, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - in = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+mid*nstride_line]); - end = begin + nfast; - for (out = begin; out < end; out++) - *(in++) = *out; - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data -------------------------------------------------------------------------- */ - -void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+mid*nstride_line]); - end = begin + nfast; - for (in = begin; in < end; in++) - *in = *(out++); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+mid]); - end = begin + nfast*nstride_plane; - for (in = begin; in < end; in += nstride_plane) - *in = *(out++); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+2*mid]); - end = begin + nfast*nstride_plane; - for (in = begin; in < end; in += nstride_plane) { - *in = *(out++); - *(in+1) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*instart,*begin,*end; - register int iqty,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+nqty*mid]); - end = begin + nfast*nstride_plane; - for (instart = begin; instart < end; instart += nstride_plane) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (in = begin; in < end; in += nstride_line) - *in = *(out++); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[2*slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (in = begin; in < end; in += nstride_line) { - *in = *(out++); - *(in+1) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*instart,*begin,*end; - register int iqty,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[nqty*slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (instart = begin; instart < end; instart += nstride_line) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); - } - } - } -} - -#endif - -/* ---------------------------------------------------------------------- - pack/unpack with pointers and memcpy function - no memcpy version of unpack_permute routines, - just use PACK_POINTER versions -------------------------------------------------------------------------- */ - -#ifdef PACK_MEMCPY - -/* ---------------------------------------------------------------------- - pack from data -> buf -------------------------------------------------------------------------- */ - -void pack_3d(double *data, double *buf, struct pack_plan_3d *plan) - -{ - register double *in,*out; - register int mid,slow,size; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - size = nfast*sizeof(double); - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - upto = slow*nmid*nfast; - for (mid = 0; mid < nmid; mid++) { - in = &(buf[upto+mid*nfast]); - out = &(data[plane+mid*nstride_line]); - memcpy(in,out,size); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data -------------------------------------------------------------------------- */ - -void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out; - register int mid,slow,size; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - size = nfast*sizeof(double); - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_plane; - upto = slow*nmid*nfast; - for (mid = 0; mid < nmid; mid++) { - in = &(data[plane+mid*nstride_line]); - out = &(buf[upto+mid*nfast]); - memcpy(in,out,size); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+mid]); - end = begin + nfast*nstride_plane; - for (in = begin; in < end; in += nstride_plane) - *in = *(out++); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+2*mid]); - end = begin + nfast*nstride_plane; - for (in = begin; in < end; in += nstride_plane) { - *in = *(out++); - *(in+1) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, one axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*instart,*begin,*end; - register int iqty,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - plane = slow*nstride_line; - for (mid = 0; mid < nmid; mid++) { - begin = &(data[plane+nqty*mid]); - end = begin + nfast*nstride_plane; - for (instart = begin; instart < end; instart += nstride_plane) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 1 value/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (in = begin; in < end; in += nstride_line) - *in = *(out++); - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, 2 values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*begin,*end; - register int mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[2*slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (in = begin; in < end; in += nstride_line) { - *in = *(out++); - *(in+1) = *(out++); - } - } - } -} - -/* ---------------------------------------------------------------------- - unpack from buf -> data, two axis permutation, nqty values/element -------------------------------------------------------------------------- */ - -void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan) - -{ - register double *in,*out,*instart,*begin,*end; - register int iqty,mid,slow; - register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; - - nfast = plan->nfast; - nmid = plan->nmid; - nslow = plan->nslow; - nstride_line = plan->nstride_line; - nstride_plane = plan->nstride_plane; - nqty = plan->nqty; - - out = buf; - for (slow = 0; slow < nslow; slow++) { - for (mid = 0; mid < nmid; mid++) { - begin = &(data[nqty*slow+mid*nstride_plane]); - end = begin + nfast*nstride_line; - for (instart = begin; instart < end; instart += nstride_line) { - in = instart; - for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); - } - } - } -} - -#endif diff --git a/src/pack.h b/src/pack.h index bcb6208b2e..5f032e68c7 100644 --- a/src/pack.h +++ b/src/pack.h @@ -1,4 +1,4 @@ -/* ---------------------------------------------------------------------- +/* -*- c++ -*- ---------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator http://lammps.sandia.gov, Sandia National Laboratories Steve Plimpton, sjplimp@sandia.gov @@ -22,13 +22,746 @@ struct pack_plan_3d { int nqty; // # of values/element }; -// function prototypes -void pack_3d(double *, double *, struct pack_plan_3d *); -void unpack_3d(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute1_1(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute1_2(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute1_n(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute2_1(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute2_2(double *, double *, struct pack_plan_3d *); -void unpack_3d_permute2_n(double *, double *, struct pack_plan_3d *); +#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY) +#define PACK_ARRAY +#endif + +#ifndef PACK_DATA +#define PACK_DATA double +#endif + +/* ---------------------------------------------------------------------- + Pack and unpack functions: + + pack routines copy strided values from data into contiguous locs in buf + unpack routines copy contiguous values from buf into strided locs in data + different versions of unpack depending on permutation + and # of values/element + PACK_ARRAY routines work via array indices (default) + PACK_POINTER routines work via pointers + PACK_MEMCPY routines work via pointers and memcpy function +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + pack/unpack with array indices +------------------------------------------------------------------------- */ + +#ifdef PACK_ARRAY + +/* ---------------------------------------------------------------------- + pack from data -> buf +------------------------------------------------------------------------- */ + +static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan) +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + in = 0; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + for (mid = 0; mid < nmid; mid++) { + out = plane + mid*nstride_line; + for (fast = 0; fast < nfast; fast++) + buf[in++] = data[out++]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data +------------------------------------------------------------------------- */ + +static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + for (mid = 0; mid < nmid; mid++) { + in = plane + mid*nstride_line; + for (fast = 0; fast < nfast; fast++) + data[in++] = buf[out++]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + in = plane + mid; + for (fast = 0; fast < nfast; fast++, in += nstride_plane) + data[in] = buf[out++]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + in = plane + 2*mid; + for (fast = 0; fast < nfast; fast++, in += nstride_plane) { + data[in] = buf[out++]; + data[in+1] = buf[out++]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register int in,out,iqty,instart,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + instart = plane + nqty*mid; + for (fast = 0; fast < nfast; fast++, instart += nstride_plane) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + in = slow + mid*nstride_plane; + for (fast = 0; fast < nfast; fast++, in += nstride_line) + data[in] = buf[out++]; + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register int in,out,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + in = 2*slow + mid*nstride_plane; + for (fast = 0; fast < nfast; fast++, in += nstride_line) { + data[in] = buf[out++]; + data[in+1] = buf[out++]; + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register int in,out,iqty,instart,fast,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = 0; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + instart = nqty*slow + mid*nstride_plane; + for (fast = 0; fast < nfast; fast++, instart += nstride_line) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++]; + } + } + } +} + +#endif + +/* ---------------------------------------------------------------------- + pack/unpack with pointers +------------------------------------------------------------------------- */ + +#ifdef PACK_POINTER + +/* ---------------------------------------------------------------------- + pack from data -> buf +------------------------------------------------------------------------- */ + +static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + in = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+mid*nstride_line]); + end = begin + nfast; + for (out = begin; out < end; out++) + *(in++) = *out; + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data +------------------------------------------------------------------------- */ + +static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+mid*nstride_line]); + end = begin + nfast; + for (in = begin; in < end; in++) + *in = *(out++); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+mid]); + end = begin + nfast*nstride_plane; + for (in = begin; in < end; in += nstride_plane) + *in = *(out++); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+2*mid]); + end = begin + nfast*nstride_plane; + for (in = begin; in < end; in += nstride_plane) { + *in = *(out++); + *(in+1) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*instart,*begin,*end; + register int iqty,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+nqty*mid]); + end = begin + nfast*nstride_plane; + for (instart = begin; instart < end; instart += nstride_plane) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (in = begin; in < end; in += nstride_line) + *in = *(out++); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[2*slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (in = begin; in < end; in += nstride_line) { + *in = *(out++); + *(in+1) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*instart,*begin,*end; + register int iqty,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[nqty*slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (instart = begin; instart < end; instart += nstride_line) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); + } + } + } +} + +#endif + +/* ---------------------------------------------------------------------- + pack/unpack with pointers and memcpy function + no memcpy version of unpack_permute routines, + just use PACK_POINTER versions +------------------------------------------------------------------------- */ + +#ifdef PACK_MEMCPY + +/* ---------------------------------------------------------------------- + pack from data -> buf +------------------------------------------------------------------------- */ + +static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out; + register int mid,slow,size; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + size = nfast*sizeof(PACK_DATA); + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + upto = slow*nmid*nfast; + for (mid = 0; mid < nmid; mid++) { + in = &(buf[upto+mid*nfast]); + out = &(data[plane+mid*nstride_line]); + memcpy(in,out,size); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data +------------------------------------------------------------------------- */ + +static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out; + register int mid,slow,size; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + size = nfast*sizeof(PACK_DATA); + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_plane; + upto = slow*nmid*nfast; + for (mid = 0; mid < nmid; mid++) { + in = &(data[plane+mid*nstride_line]); + out = &(buf[upto+mid*nfast]); + memcpy(in,out,size); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+mid]); + end = begin + nfast*nstride_plane; + for (in = begin; in < end; in += nstride_plane) + *in = *(out++); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+2*mid]); + end = begin + nfast*nstride_plane; + for (in = begin; in < end; in += nstride_plane) { + *in = *(out++); + *(in+1) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, one axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*instart,*begin,*end; + register int iqty,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + plane = slow*nstride_line; + for (mid = 0; mid < nmid; mid++) { + begin = &(data[plane+nqty*mid]); + end = begin + nfast*nstride_plane; + for (instart = begin; instart < end; instart += nstride_plane) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 1 value/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (in = begin; in < end; in += nstride_line) + *in = *(out++); + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, 2 values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*begin,*end; + register int mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[2*slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (in = begin; in < end; in += nstride_line) { + *in = *(out++); + *(in+1) = *(out++); + } + } + } +} + +/* ---------------------------------------------------------------------- + unpack from buf -> data, two axis permutation, nqty values/element +------------------------------------------------------------------------- */ + +static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan) + +{ + register PACK_DATA *in,*out,*instart,*begin,*end; + register int iqty,mid,slow; + register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty; + + nfast = plan->nfast; + nmid = plan->nmid; + nslow = plan->nslow; + nstride_line = plan->nstride_line; + nstride_plane = plan->nstride_plane; + nqty = plan->nqty; + + out = buf; + for (slow = 0; slow < nslow; slow++) { + for (mid = 0; mid < nmid; mid++) { + begin = &(data[nqty*slow+mid*nstride_plane]); + end = begin + nfast*nstride_line; + for (instart = begin; instart < end; instart += nstride_line) { + in = instart; + for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++); + } + } + } +} + +#endif