git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6622 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -43,6 +43,14 @@ using namespace LAMMPS_NS;
|
|||||||
#define LARGE 10000.0
|
#define LARGE 10000.0
|
||||||
#define EPS_HOC 1.0e-7
|
#define EPS_HOC 1.0e-7
|
||||||
|
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
#define ZEROF 0.0f
|
||||||
|
#define ONEF 1.0f
|
||||||
|
#else
|
||||||
|
#define ZEROF 0.0
|
||||||
|
#define ONEF 1.0
|
||||||
|
#endif
|
||||||
|
|
||||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
@ -50,7 +58,7 @@ using namespace LAMMPS_NS;
|
|||||||
|
|
||||||
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
|
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
|
||||||
{
|
{
|
||||||
if (narg != 1) error->all("Illegal kspace_style pppm command");
|
if (narg < 1) error->all("Illegal kspace_style pppm command");
|
||||||
|
|
||||||
precision = atof(arg[0]);
|
precision = atof(arg[0]);
|
||||||
PI = 4.0*atan(1.0);
|
PI = 4.0*atan(1.0);
|
||||||
@ -754,7 +762,7 @@ void PPPM::allocate()
|
|||||||
|
|
||||||
// summation coeffs
|
// summation coeffs
|
||||||
|
|
||||||
gf_b = new double[order];
|
memory->create(gf_b,order,"pppm:gf_b");
|
||||||
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
|
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
|
||||||
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
|
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
|
||||||
|
|
||||||
@ -778,7 +786,7 @@ void PPPM::allocate()
|
|||||||
remap = new Remap(lmp,world,
|
remap = new Remap(lmp,world,
|
||||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||||
1,0,0,2);
|
1,0,0,FFT_PRECISION);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -805,7 +813,7 @@ void PPPM::deallocate()
|
|||||||
memory->destroy(buf1);
|
memory->destroy(buf1);
|
||||||
memory->destroy(buf2);
|
memory->destroy(buf2);
|
||||||
|
|
||||||
delete [] gf_b;
|
memory->destroy(gf_b);
|
||||||
memory->destroy2d_offset(rho1d,-order/2);
|
memory->destroy2d_offset(rho1d,-order/2);
|
||||||
memory->destroy2d_offset(rho_coeff,(1-order)/2);
|
memory->destroy2d_offset(rho_coeff,(1-order)/2);
|
||||||
|
|
||||||
@ -967,17 +975,24 @@ void PPPM::set_grid()
|
|||||||
// print info
|
// print info
|
||||||
|
|
||||||
if (me == 0) {
|
if (me == 0) {
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
const char fft_prec[] = "single";
|
||||||
|
#else
|
||||||
|
const char fft_prec[] = "double";
|
||||||
|
#endif
|
||||||
if (screen) {
|
if (screen) {
|
||||||
fprintf(screen," G vector = %g\n",g_ewald);
|
fprintf(screen," G vector = %g\n",g_ewald);
|
||||||
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
||||||
fprintf(screen," stencil order = %d\n",order);
|
fprintf(screen," stencil order = %d\n",order);
|
||||||
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
|
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
|
||||||
|
fprintf(screen," using %s precision FFTs\n",fft_prec);
|
||||||
}
|
}
|
||||||
if (logfile) {
|
if (logfile) {
|
||||||
fprintf(logfile," G vector = %g\n",g_ewald);
|
fprintf(logfile," G vector = %g\n",g_ewald);
|
||||||
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
||||||
fprintf(logfile," stencil order = %d\n",order);
|
fprintf(logfile," stencil order = %d\n",order);
|
||||||
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
|
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
|
||||||
|
fprintf(logfile," using %s precision FFTs\n",fft_prec);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1036,7 +1051,7 @@ double PPPM::diffpr(double hx, double hy, double hz, double q2, double **acons)
|
|||||||
lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons);
|
lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons);
|
||||||
kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
|
kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
|
||||||
real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
|
real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
|
||||||
sqrt(natoms*cutoff*xprd*yprd*zprd);
|
sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
|
||||||
double value = kspace_prec - real_prec;
|
double value = kspace_prec - real_prec;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
@ -1113,8 +1128,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[0][1] == me)
|
if (comm->procneigh[0][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1137,8 +1152,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[0][0] == me)
|
if (comm->procneigh[0][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1161,8 +1176,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[1][1] == me)
|
if (comm->procneigh[1][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1185,8 +1200,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[1][0] == me)
|
if (comm->procneigh[1][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1209,8 +1224,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[2][1] == me)
|
if (comm->procneigh[2][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1233,8 +1248,8 @@ void PPPM::brick2fft()
|
|||||||
if (comm->procneigh[2][0] == me)
|
if (comm->procneigh[2][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1284,8 +1299,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[2][1] == me)
|
if (comm->procneigh[2][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1314,8 +1329,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[2][0] == me)
|
if (comm->procneigh[2][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1344,8 +1359,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[1][1] == me)
|
if (comm->procneigh[1][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1374,8 +1389,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[1][0] == me)
|
if (comm->procneigh[1][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1404,8 +1419,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[0][1] == me)
|
if (comm->procneigh[0][1] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1434,8 +1449,8 @@ void PPPM::fillbrick()
|
|||||||
if (comm->procneigh[0][0] == me)
|
if (comm->procneigh[0][0] == me)
|
||||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||||
else {
|
else {
|
||||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
|
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
|
||||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
|
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
|
||||||
MPI_Wait(&request,&status);
|
MPI_Wait(&request,&status);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1497,12 +1512,12 @@ void PPPM::particle_map()
|
|||||||
void PPPM::make_rho()
|
void PPPM::make_rho()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
double dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
|
|
||||||
// clear 3d density array
|
// clear 3d density array
|
||||||
|
|
||||||
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||||
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
|
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
// loop over my charges, add their contribution to nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -1554,7 +1569,7 @@ void PPPM::poisson(int eflag, int vflag)
|
|||||||
n = 0;
|
n = 0;
|
||||||
for (i = 0; i < nfft; i++) {
|
for (i = 0; i < nfft; i++) {
|
||||||
work1[n++] = density_fft[i];
|
work1[n++] = density_fft[i];
|
||||||
work1[n++] = 0.0;
|
work1[n++] = ZEROF;
|
||||||
}
|
}
|
||||||
|
|
||||||
fft1->compute(work1,work1,1);
|
fft1->compute(work1,work1,1);
|
||||||
@ -1667,8 +1682,8 @@ void PPPM::poisson(int eflag, int vflag)
|
|||||||
void PPPM::fieldforce()
|
void PPPM::fieldforce()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
double dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
double ek[3];
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
|
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -1679,6 +1694,7 @@ void PPPM::fieldforce()
|
|||||||
double *q = atom->q;
|
double *q = atom->q;
|
||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
double **f = atom->f;
|
double **f = atom->f;
|
||||||
|
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|
||||||
for (i = 0; i < nlocal; i++) {
|
for (i = 0; i < nlocal; i++) {
|
||||||
@ -1691,7 +1707,7 @@ void PPPM::fieldforce()
|
|||||||
|
|
||||||
compute_rho1d(dx,dy,dz);
|
compute_rho1d(dx,dy,dz);
|
||||||
|
|
||||||
ek[0] = ek[1] = ek[2] = 0.0;
|
ekx = eky = ekz = ZEROF;
|
||||||
for (n = nlower; n <= nupper; n++) {
|
for (n = nlower; n <= nupper; n++) {
|
||||||
mz = n+nz;
|
mz = n+nz;
|
||||||
z0 = rho1d[2][n];
|
z0 = rho1d[2][n];
|
||||||
@ -1701,18 +1717,18 @@ void PPPM::fieldforce()
|
|||||||
for (l = nlower; l <= nupper; l++) {
|
for (l = nlower; l <= nupper; l++) {
|
||||||
mx = l+nx;
|
mx = l+nx;
|
||||||
x0 = y0*rho1d[0][l];
|
x0 = y0*rho1d[0][l];
|
||||||
ek[0] -= x0*vdx_brick[mz][my][mx];;
|
ekx -= x0*vdx_brick[mz][my][mx];
|
||||||
ek[1] -= x0*vdy_brick[mz][my][mx];;
|
eky -= x0*vdy_brick[mz][my][mx];
|
||||||
ek[2] -= x0*vdz_brick[mz][my][mx];;
|
ekz -= x0*vdz_brick[mz][my][mx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert E-field to force
|
// convert E-field to force
|
||||||
|
const double qfactor = qqrd2e*scale*q[i];
|
||||||
f[i][0] += qqrd2e*scale * q[i]*ek[0];
|
f[i][0] += qfactor*ekx;
|
||||||
f[i][1] += qqrd2e*scale * q[i]*ek[1];
|
f[i][1] += qfactor*eky;
|
||||||
f[i][2] += qqrd2e*scale * q[i]*ek[2];
|
f[i][2] += qfactor*ekz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1758,15 +1774,16 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
|
|||||||
charge assignment into rho1d
|
charge assignment into rho1d
|
||||||
dx,dy,dz = distance of particle from "lower left" grid point
|
dx,dy,dz = distance of particle from "lower left" grid point
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
|
||||||
void PPPM::compute_rho1d(double dx, double dy, double dz)
|
const FFT_SCALAR &dz)
|
||||||
{
|
{
|
||||||
int k,l;
|
int k,l;
|
||||||
|
|
||||||
for (k = (1-order)/2; k <= order/2; k++) {
|
for (k = (1-order)/2; k <= order/2; k++) {
|
||||||
rho1d[0][k] = 0.0;
|
rho1d[0][k] = ZEROF;
|
||||||
rho1d[1][k] = 0.0;
|
rho1d[1][k] = ZEROF;
|
||||||
rho1d[2][k] = 0.0;
|
rho1d[2][k] = ZEROF;
|
||||||
|
|
||||||
for (l = order-1; l >= 0; l--) {
|
for (l = order-1; l >= 0; l--) {
|
||||||
rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx;
|
rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx;
|
||||||
rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy;
|
rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy;
|
||||||
@ -1797,9 +1814,9 @@ void PPPM::compute_rho1d(double dx, double dy, double dz)
|
|||||||
void PPPM::compute_rho_coeff()
|
void PPPM::compute_rho_coeff()
|
||||||
{
|
{
|
||||||
int j,k,l,m;
|
int j,k,l,m;
|
||||||
double s;
|
FFT_SCALAR s;
|
||||||
|
|
||||||
double **a;
|
FFT_SCALAR **a;
|
||||||
memory->create2d_offset(a,order,-order,order,"pppm:a");
|
memory->create2d_offset(a,order,-order,order,"pppm:a");
|
||||||
|
|
||||||
for (k = -order; k <= order; k++)
|
for (k = -order; k <= order; k++)
|
||||||
@ -1812,8 +1829,13 @@ void PPPM::compute_rho_coeff()
|
|||||||
s = 0.0;
|
s = 0.0;
|
||||||
for (l = 0; l < j; l++) {
|
for (l = 0; l < j; l++) {
|
||||||
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
|
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
s += powf(0.5,(float) l+1) *
|
||||||
|
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
|
||||||
|
#else
|
||||||
s += pow(0.5,(double) l+1) *
|
s += pow(0.5,(double) l+1) *
|
||||||
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
|
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
a[0][k] = s;
|
a[0][k] = s;
|
||||||
}
|
}
|
||||||
@ -1874,7 +1896,7 @@ void PPPM::timing(int n, double &time3d, double &time1d)
|
|||||||
{
|
{
|
||||||
double time1,time2;
|
double time1,time2;
|
||||||
|
|
||||||
for (int i = 0; i < 2*nfft_both; i++) work1[i] = 0.0;
|
for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
|
||||||
|
|
||||||
MPI_Barrier(world);
|
MPI_Barrier(world);
|
||||||
time1 = MPI_Wtime();
|
time1 = MPI_Wtime();
|
||||||
@ -1914,9 +1936,10 @@ double PPPM::memory_usage()
|
|||||||
double bytes = nmax*3 * sizeof(double);
|
double bytes = nmax*3 * sizeof(double);
|
||||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||||
(nzhi_out-nzlo_out+1);
|
(nzhi_out-nzlo_out+1);
|
||||||
bytes += 4 * nbrick * sizeof(double);
|
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
|
||||||
bytes += 6 * nfft_both * sizeof(double);
|
bytes += 6 * nfft_both * sizeof(double);
|
||||||
bytes += nfft_both*6 * sizeof(double);
|
bytes += nfft_both * sizeof(double);
|
||||||
bytes += 2 * nbuf * sizeof(double);
|
bytes += nfft_both*5 * sizeof(FFT_SCALAR);
|
||||||
|
bytes += 2 * nbuf * sizeof(FFT_SCALAR);
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
http://lammps.sandia.gov, Sandia National Laboratories
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
@ -20,6 +20,17 @@ KSpaceStyle(pppm,PPPM)
|
|||||||
#ifndef LMP_PPPM_H
|
#ifndef LMP_PPPM_H
|
||||||
#define LMP_PPPM_H
|
#define LMP_PPPM_H
|
||||||
|
|
||||||
|
#include "lmptype.h"
|
||||||
|
#include "mpi.h"
|
||||||
|
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
typedef float FFT_SCALAR;
|
||||||
|
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||||
|
#else
|
||||||
|
typedef double FFT_SCALAR;
|
||||||
|
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "kspace.h"
|
#include "kspace.h"
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
@ -28,11 +39,11 @@ class PPPM : public KSpace {
|
|||||||
public:
|
public:
|
||||||
PPPM(class LAMMPS *, int, char **);
|
PPPM(class LAMMPS *, int, char **);
|
||||||
virtual ~PPPM();
|
virtual ~PPPM();
|
||||||
void init();
|
virtual void init();
|
||||||
void setup();
|
virtual void setup();
|
||||||
void compute(int, int);
|
virtual void compute(int, int);
|
||||||
void timing(int, double &, double &);
|
virtual void timing(int, double &, double &);
|
||||||
double memory_usage();
|
virtual double memory_usage();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
int me,nprocs;
|
int me,nprocs;
|
||||||
@ -54,17 +65,17 @@ class PPPM : public KSpace {
|
|||||||
int nlower,nupper;
|
int nlower,nupper;
|
||||||
int ngrid,nfft,nbuf,nfft_both;
|
int ngrid,nfft,nbuf,nfft_both;
|
||||||
|
|
||||||
double ***density_brick;
|
FFT_SCALAR ***density_brick;
|
||||||
double ***vdx_brick,***vdy_brick,***vdz_brick;
|
FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
|
||||||
double *greensfn;
|
double *greensfn;
|
||||||
double **vg;
|
double **vg;
|
||||||
double *fkx,*fky,*fkz;
|
double *fkx,*fky,*fkz;
|
||||||
double *density_fft;
|
FFT_SCALAR *density_fft;
|
||||||
double *work1,*work2;
|
FFT_SCALAR *work1,*work2;
|
||||||
double *buf1,*buf2;
|
FFT_SCALAR *buf1,*buf2;
|
||||||
|
|
||||||
double *gf_b;
|
double *gf_b;
|
||||||
double **rho1d,**rho_coeff;
|
FFT_SCALAR **rho1d,**rho_coeff;
|
||||||
|
|
||||||
class FFT3d *fft1,*fft2;
|
class FFT3d *fft1,*fft2;
|
||||||
class Remap *remap;
|
class Remap *remap;
|
||||||
@ -80,8 +91,8 @@ class PPPM : public KSpace {
|
|||||||
double alpha; // geometric factor
|
double alpha; // geometric factor
|
||||||
|
|
||||||
void set_grid();
|
void set_grid();
|
||||||
void allocate();
|
virtual void allocate();
|
||||||
void deallocate();
|
virtual void deallocate();
|
||||||
int factorable(int);
|
int factorable(int);
|
||||||
double rms(double, double, bigint, double, double **);
|
double rms(double, double, bigint, double, double **);
|
||||||
double diffpr(double, double, double, double, double **);
|
double diffpr(double, double, double, double, double **);
|
||||||
@ -89,12 +100,13 @@ class PPPM : public KSpace {
|
|||||||
double gf_denom(double, double, double);
|
double gf_denom(double, double, double);
|
||||||
virtual void particle_map();
|
virtual void particle_map();
|
||||||
virtual void make_rho();
|
virtual void make_rho();
|
||||||
void brick2fft();
|
virtual void brick2fft();
|
||||||
void fillbrick();
|
virtual void fillbrick();
|
||||||
void poisson(int, int);
|
virtual void poisson(int, int);
|
||||||
virtual void fieldforce();
|
virtual void fieldforce();
|
||||||
void procs2grid2d(int,int,int,int *, int*);
|
void procs2grid2d(int,int,int,int *, int*);
|
||||||
void compute_rho1d(double, double, double);
|
void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
|
||||||
|
const FFT_SCALAR &);
|
||||||
void compute_rho_coeff();
|
void compute_rho_coeff();
|
||||||
void slabcorr(int);
|
void slabcorr(int);
|
||||||
};
|
};
|
||||||
|
|||||||
@ -26,6 +26,14 @@ using namespace LAMMPS_NS;
|
|||||||
|
|
||||||
#define OFFSET 16384
|
#define OFFSET 16384
|
||||||
|
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
#define ZEROF 0.0f
|
||||||
|
#define ONEF 1.0f
|
||||||
|
#else
|
||||||
|
#define ZEROF 0.0
|
||||||
|
#define ONEF 1.0
|
||||||
|
#endif
|
||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
|
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
|
||||||
@ -87,13 +95,13 @@ void PPPMTIP4P::particle_map()
|
|||||||
void PPPMTIP4P::make_rho()
|
void PPPMTIP4P::make_rho()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
|
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
|
||||||
double dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
double *xi,xM[3];
|
double *xi,xM[3];
|
||||||
|
|
||||||
// clear 3d density array
|
// clear 3d density array
|
||||||
|
|
||||||
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||||
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
|
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
|
||||||
|
|
||||||
// loop over my charges, add their contribution to nearby grid points
|
// loop over my charges, add their contribution to nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -143,13 +151,13 @@ void PPPMTIP4P::make_rho()
|
|||||||
void PPPMTIP4P::fieldforce()
|
void PPPMTIP4P::fieldforce()
|
||||||
{
|
{
|
||||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||||
double dx,dy,dz,x0,y0,z0;
|
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||||
double ek[3];
|
FFT_SCALAR ekx,eky,ekz;
|
||||||
double *xi;
|
double *xi;
|
||||||
int iH1,iH2;
|
int iH1,iH2;
|
||||||
double xM[3];
|
double xM[3];
|
||||||
double fx,fy,fz;
|
double fx,fy,fz;
|
||||||
double ddotf, rOM[3], f1[3];
|
double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
|
||||||
|
|
||||||
// loop over my charges, interpolate electric field from nearby grid points
|
// loop over my charges, interpolate electric field from nearby grid points
|
||||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||||
@ -160,6 +168,7 @@ void PPPMTIP4P::fieldforce()
|
|||||||
double *q = atom->q;
|
double *q = atom->q;
|
||||||
double **x = atom->x;
|
double **x = atom->x;
|
||||||
double **f = atom->f;
|
double **f = atom->f;
|
||||||
|
|
||||||
int *type = atom->type;
|
int *type = atom->type;
|
||||||
int nlocal = atom->nlocal;
|
int nlocal = atom->nlocal;
|
||||||
|
|
||||||
@ -178,7 +187,7 @@ void PPPMTIP4P::fieldforce()
|
|||||||
|
|
||||||
compute_rho1d(dx,dy,dz);
|
compute_rho1d(dx,dy,dz);
|
||||||
|
|
||||||
ek[0] = ek[1] = ek[2] = 0.0;
|
ekx = eky = ekz = ZEROF;
|
||||||
for (n = nlower; n <= nupper; n++) {
|
for (n = nlower; n <= nupper; n++) {
|
||||||
mz = n+nz;
|
mz = n+nz;
|
||||||
z0 = rho1d[2][n];
|
z0 = rho1d[2][n];
|
||||||
@ -188,47 +197,47 @@ void PPPMTIP4P::fieldforce()
|
|||||||
for (l = nlower; l <= nupper; l++) {
|
for (l = nlower; l <= nupper; l++) {
|
||||||
mx = l+nx;
|
mx = l+nx;
|
||||||
x0 = y0*rho1d[0][l];
|
x0 = y0*rho1d[0][l];
|
||||||
ek[0] -= x0*vdx_brick[mz][my][mx];
|
ekx -= x0*vdx_brick[mz][my][mx];
|
||||||
ek[1] -= x0*vdy_brick[mz][my][mx];
|
eky -= x0*vdy_brick[mz][my][mx];
|
||||||
ek[2] -= x0*vdz_brick[mz][my][mx];
|
ekz -= x0*vdz_brick[mz][my][mx];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert E-field to force
|
// convert E-field to force
|
||||||
|
const double qfactor = qqrd2e*scale*q[i];
|
||||||
if (type[i] != typeO) {
|
if (type[i] != typeO) {
|
||||||
f[i][0] += qqrd2e*q[i]*ek[0];
|
f[i][0] += qfactor*ekx;
|
||||||
f[i][1] += qqrd2e*q[i]*ek[1];
|
f[i][1] += qfactor*eky;
|
||||||
f[i][2] += qqrd2e*q[i]*ek[2];
|
f[i][2] += qfactor*ekz;
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
fx = qqrd2e * q[i] * ek[0];
|
fx = qfactor * ekx;
|
||||||
fy = qqrd2e * q[i] * ek[1];
|
fy = qfactor * eky;
|
||||||
fz = qqrd2e * q[i] * ek[2];
|
fz = qfactor * ekz;
|
||||||
find_M(i,iH1,iH2,xM);
|
find_M(i,iH1,iH2,xM);
|
||||||
|
|
||||||
rOM[0] = xM[0] - x[i][0];
|
rOMx = xM[0] - x[i][0];
|
||||||
rOM[1] = xM[1] - x[i][1];
|
rOMy = xM[1] - x[i][1];
|
||||||
rOM[2] = xM[2] - x[i][2];
|
rOMz = xM[2] - x[i][2];
|
||||||
|
|
||||||
ddotf = (rOM[0] * fx + rOM[1] * fy + rOM[2] * fz) / (qdist * qdist);
|
ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
|
||||||
|
|
||||||
f1[0] = ddotf * rOM[0];
|
f1x = ddotf * rOMx;
|
||||||
f1[1] = ddotf * rOM[1];
|
f1y = ddotf * rOMy;
|
||||||
f1[2] = ddotf * rOM[2];
|
f1z = ddotf * rOMz;
|
||||||
|
|
||||||
f[i][0] += fx - alpha * (fx - f1[0]);
|
f[i][0] += fx - alpha * (fx - f1x);
|
||||||
f[i][1] += fy - alpha * (fy - f1[1]);
|
f[i][1] += fy - alpha * (fy - f1y);
|
||||||
f[i][2] += fz - alpha * (fz - f1[2]);
|
f[i][2] += fz - alpha * (fz - f1z);
|
||||||
|
|
||||||
f[iH1][0] += 0.5*alpha*(fx - f1[0]);
|
f[iH1][0] += 0.5*alpha*(fx - f1x);
|
||||||
f[iH1][1] += 0.5*alpha*(fy - f1[1]);
|
f[iH1][1] += 0.5*alpha*(fy - f1y);
|
||||||
f[iH1][2] += 0.5*alpha*(fz - f1[2]);
|
f[iH1][2] += 0.5*alpha*(fz - f1z);
|
||||||
|
|
||||||
f[iH2][0] += 0.5*alpha*(fx - f1[0]);
|
f[iH2][0] += 0.5*alpha*(fx - f1x);
|
||||||
f[iH2][1] += 0.5*alpha*(fy - f1[1]);
|
f[iH2][1] += 0.5*alpha*(fy - f1y);
|
||||||
f[iH2][2] += 0.5*alpha*(fz - f1[2]);
|
f[iH2][2] += 0.5*alpha*(fz - f1z);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -27,12 +27,14 @@ namespace LAMMPS_NS {
|
|||||||
class PPPMTIP4P : public PPPM {
|
class PPPMTIP4P : public PPPM {
|
||||||
public:
|
public:
|
||||||
PPPMTIP4P(class LAMMPS *, int, char **);
|
PPPMTIP4P(class LAMMPS *, int, char **);
|
||||||
|
virtual ~PPPMTIP4P () {};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void particle_map();
|
||||||
|
virtual void make_rho();
|
||||||
|
virtual void fieldforce();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void particle_map();
|
|
||||||
void make_rho();
|
|
||||||
void fieldforce();
|
|
||||||
|
|
||||||
void find_M(int, int &, int &, double *);
|
void find_M(int, int &, int &, double *);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -11,10 +11,12 @@
|
|||||||
See the README file in the top-level LAMMPS directory.
|
See the README file in the top-level LAMMPS directory.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "mpi.h"
|
|
||||||
#include "stdio.h"
|
#include "stdio.h"
|
||||||
#include "stdlib.h"
|
#include "stdlib.h"
|
||||||
#include "remap.h"
|
#include "remap.h"
|
||||||
|
|
||||||
|
#define PACK_DATA FFT_SCALAR
|
||||||
|
|
||||||
#include "pack.h"
|
#include "pack.h"
|
||||||
|
|
||||||
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
|
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
|
||||||
@ -57,13 +59,13 @@
|
|||||||
plan plan returned by previous call to remap_3d_create_plan
|
plan plan returned by previous call to remap_3d_create_plan
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
void remap_3d(double *in, double *out, double *buf,
|
void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
|
||||||
struct remap_plan_3d *plan)
|
struct remap_plan_3d *plan)
|
||||||
|
|
||||||
{
|
{
|
||||||
MPI_Status status;
|
MPI_Status status;
|
||||||
int i,isend,irecv;
|
int i,isend,irecv;
|
||||||
double *scratch;
|
FFT_SCALAR *scratch;
|
||||||
|
|
||||||
if (plan->memory == 0)
|
if (plan->memory == 0)
|
||||||
scratch = buf;
|
scratch = buf;
|
||||||
@ -74,7 +76,7 @@ void remap_3d(double *in, double *out, double *buf,
|
|||||||
|
|
||||||
for (irecv = 0; irecv < plan->nrecv; irecv++)
|
for (irecv = 0; irecv < plan->nrecv; irecv++)
|
||||||
MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
|
MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
|
||||||
MPI_DOUBLE,plan->recv_proc[irecv],0,
|
MPI_FFT_SCALAR,plan->recv_proc[irecv],0,
|
||||||
plan->comm,&plan->request[irecv]);
|
plan->comm,&plan->request[irecv]);
|
||||||
|
|
||||||
// send all messages to other procs
|
// send all messages to other procs
|
||||||
@ -82,7 +84,7 @@ void remap_3d(double *in, double *out, double *buf,
|
|||||||
for (isend = 0; isend < plan->nsend; isend++) {
|
for (isend = 0; isend < plan->nsend; isend++) {
|
||||||
plan->pack(&in[plan->send_offset[isend]],
|
plan->pack(&in[plan->send_offset[isend]],
|
||||||
plan->sendbuf,&plan->packplan[isend]);
|
plan->sendbuf,&plan->packplan[isend]);
|
||||||
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE,
|
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_FFT_SCALAR,
|
||||||
plan->send_proc[isend],0,plan->comm);
|
plan->send_proc[isend],0,plan->comm);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,13 +152,6 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||||||
MPI_Comm_rank(comm,&me);
|
MPI_Comm_rank(comm,&me);
|
||||||
MPI_Comm_size(comm,&nprocs);
|
MPI_Comm_size(comm,&nprocs);
|
||||||
|
|
||||||
// single precision not yet supported
|
|
||||||
|
|
||||||
if (precision == 1) {
|
|
||||||
if (me == 0) printf("Single precision not supported\n");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
// allocate memory for plan data struct
|
// allocate memory for plan data struct
|
||||||
|
|
||||||
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
|
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
|
||||||
@ -209,10 +204,7 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||||||
// malloc space for send info
|
// malloc space for send info
|
||||||
|
|
||||||
if (nsend) {
|
if (nsend) {
|
||||||
if (precision == 1)
|
plan->pack = pack_3d;
|
||||||
plan->pack = NULL;
|
|
||||||
else
|
|
||||||
plan->pack = pack_3d;
|
|
||||||
|
|
||||||
plan->send_offset = (int *) malloc(nsend*sizeof(int));
|
plan->send_offset = (int *) malloc(nsend*sizeof(int));
|
||||||
plan->send_size = (int *) malloc(nsend*sizeof(int));
|
plan->send_size = (int *) malloc(nsend*sizeof(int));
|
||||||
@ -272,45 +264,23 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||||||
// malloc space for recv info
|
// malloc space for recv info
|
||||||
|
|
||||||
if (nrecv) {
|
if (nrecv) {
|
||||||
if (precision == 1) {
|
if (permute == 0)
|
||||||
if (permute == 0)
|
plan->unpack = unpack_3d;
|
||||||
plan->unpack = NULL;
|
else if (permute == 1) {
|
||||||
else if (permute == 1) {
|
if (nqty == 1)
|
||||||
if (nqty == 1)
|
plan->unpack = unpack_3d_permute1_1;
|
||||||
plan->unpack = NULL;
|
else if (nqty == 2)
|
||||||
else if (nqty == 2)
|
plan->unpack = unpack_3d_permute1_2;
|
||||||
plan->unpack = NULL;
|
else
|
||||||
else
|
plan->unpack = unpack_3d_permute1_n;
|
||||||
plan->unpack = NULL;
|
|
||||||
}
|
|
||||||
else if (permute == 2) {
|
|
||||||
if (nqty == 1)
|
|
||||||
plan->unpack = NULL;
|
|
||||||
else if (nqty == 2)
|
|
||||||
plan->unpack = NULL;
|
|
||||||
else
|
|
||||||
plan->unpack = NULL;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (precision == 2) {
|
else if (permute == 2) {
|
||||||
if (permute == 0)
|
if (nqty == 1)
|
||||||
plan->unpack = unpack_3d;
|
plan->unpack = unpack_3d_permute2_1;
|
||||||
else if (permute == 1) {
|
else if (nqty == 2)
|
||||||
if (nqty == 1)
|
plan->unpack = unpack_3d_permute2_2;
|
||||||
plan->unpack = unpack_3d_permute1_1;
|
else
|
||||||
else if (nqty == 2)
|
plan->unpack = unpack_3d_permute2_n;
|
||||||
plan->unpack = unpack_3d_permute1_2;
|
|
||||||
else
|
|
||||||
plan->unpack = unpack_3d_permute1_n;
|
|
||||||
}
|
|
||||||
else if (permute == 2) {
|
|
||||||
if (nqty == 1)
|
|
||||||
plan->unpack = unpack_3d_permute2_1;
|
|
||||||
else if (nqty == 2)
|
|
||||||
plan->unpack = unpack_3d_permute2_2;
|
|
||||||
else
|
|
||||||
plan->unpack = unpack_3d_permute2_n;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
|
plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
|
||||||
@ -408,10 +378,7 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||||||
size = MAX(size,plan->send_size[nsend]);
|
size = MAX(size,plan->send_size[nsend]);
|
||||||
|
|
||||||
if (size) {
|
if (size) {
|
||||||
if (precision == 1)
|
plan->sendbuf = (FFT_SCALAR *) malloc(size*sizeof(FFT_SCALAR));
|
||||||
plan->sendbuf = NULL;
|
|
||||||
else
|
|
||||||
plan->sendbuf = (double *) malloc(size*sizeof(double));
|
|
||||||
if (plan->sendbuf == NULL) return NULL;
|
if (plan->sendbuf == NULL) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -422,11 +389,8 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||||||
|
|
||||||
if (memory == 1) {
|
if (memory == 1) {
|
||||||
if (nrecv > 0) {
|
if (nrecv > 0) {
|
||||||
if (precision == 1)
|
plan->scratch =
|
||||||
plan->scratch = NULL;
|
(FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR));
|
||||||
else
|
|
||||||
plan->scratch =
|
|
||||||
(double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double));
|
|
||||||
if (plan->scratch == NULL) return NULL;
|
if (plan->scratch == NULL) return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
http://lammps.sandia.gov, Sandia National Laboratories
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
@ -11,14 +11,24 @@
|
|||||||
See the README file in the top-level LAMMPS directory.
|
See the README file in the top-level LAMMPS directory.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include <mpi.h>
|
||||||
|
|
||||||
|
#ifdef FFT_SINGLE
|
||||||
|
typedef float FFT_SCALAR;
|
||||||
|
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||||
|
#else
|
||||||
|
typedef double FFT_SCALAR;
|
||||||
|
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||||
|
#endif
|
||||||
|
|
||||||
// details of how to do a 3d remap
|
// details of how to do a 3d remap
|
||||||
|
|
||||||
struct remap_plan_3d {
|
struct remap_plan_3d {
|
||||||
double *sendbuf; // buffer for MPI sends
|
FFT_SCALAR *sendbuf; // buffer for MPI sends
|
||||||
double *scratch; // scratch buffer for MPI recvs
|
FFT_SCALAR *scratch; // scratch buffer for MPI recvs
|
||||||
void (*pack)(double *, double *, struct pack_plan_3d *);
|
void (*pack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
|
||||||
// which pack function to use
|
// which pack function to use
|
||||||
void (*unpack)(double *, double *, struct pack_plan_3d *);
|
void (*unpack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
|
||||||
// which unpack function to use
|
// which unpack function to use
|
||||||
int *send_offset; // extraction loc for each send
|
int *send_offset; // extraction loc for each send
|
||||||
int *send_size; // size of each send message
|
int *send_size; // size of each send message
|
||||||
@ -47,7 +57,7 @@ struct extent_3d {
|
|||||||
|
|
||||||
// function prototypes
|
// function prototypes
|
||||||
|
|
||||||
void remap_3d(double *, double *, double *, struct remap_plan_3d *);
|
void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *);
|
||||||
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
|
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
|
||||||
int, int, int, int, int, int, int, int, int, int, int, int,
|
int, int, int, int, int, int, int, int, int, int, int, int,
|
||||||
int, int, int, int);
|
int, int, int, int);
|
||||||
|
|||||||
@ -42,7 +42,7 @@ Remap::~Remap()
|
|||||||
|
|
||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
void Remap::perform(double *in, double *out, double *buf)
|
void Remap::perform(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf)
|
||||||
{
|
{
|
||||||
remap_3d(in,out,buf,plan);
|
remap_3d(in,out,buf,plan);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
http://lammps.sandia.gov, Sandia National Laboratories
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
@ -24,7 +24,7 @@ class Remap : protected Pointers {
|
|||||||
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
|
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
|
||||||
int,int,int,int,int,int,int,int,int,int);
|
int,int,int,int,int,int,int,int,int,int);
|
||||||
~Remap();
|
~Remap();
|
||||||
void perform(double *, double *, double *);
|
void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct remap_plan_3d *plan;
|
struct remap_plan_3d *plan;
|
||||||
|
|||||||
757
src/pack.cpp
757
src/pack.cpp
@ -1,757 +0,0 @@
|
|||||||
/* ----------------------------------------------------------------------
|
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
||||||
http://lammps.sandia.gov, Sandia National Laboratories
|
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
|
||||||
|
|
||||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
||||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
||||||
certain rights in this software. This software is distributed under
|
|
||||||
the GNU General Public License.
|
|
||||||
|
|
||||||
See the README file in the top-level LAMMPS directory.
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "pack.h"
|
|
||||||
|
|
||||||
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
|
|
||||||
#define PACK_ARRAY
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
Pack and unpack functions:
|
|
||||||
|
|
||||||
pack routines copy strided values from data into contiguous locs in buf
|
|
||||||
unpack routines copy contiguous values from buf into strided locs in data
|
|
||||||
different versions of unpack depending on permutation
|
|
||||||
and # of values/element
|
|
||||||
PACK_ARRAY routines work via array indices (default)
|
|
||||||
PACK_POINTER routines work via pointers
|
|
||||||
PACK_MEMCPY routines work via pointers and memcpy function
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack/unpack with array indices
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifdef PACK_ARRAY
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack from data -> buf
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
in = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
out = plane + mid*nstride_line;
|
|
||||||
for (fast = 0; fast < nfast; fast++)
|
|
||||||
buf[in++] = data[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = plane + mid*nstride_line;
|
|
||||||
for (fast = 0; fast < nfast; fast++)
|
|
||||||
data[in++] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = plane + mid;
|
|
||||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
|
|
||||||
data[in] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = plane + 2*mid;
|
|
||||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
|
|
||||||
data[in] = buf[out++];
|
|
||||||
data[in+1] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,iqty,instart,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
instart = plane + nqty*mid;
|
|
||||||
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = slow + mid*nstride_plane;
|
|
||||||
for (fast = 0; fast < nfast; fast++, in += nstride_line)
|
|
||||||
data[in] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = 2*slow + mid*nstride_plane;
|
|
||||||
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
|
|
||||||
data[in] = buf[out++];
|
|
||||||
data[in+1] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register int in,out,iqty,instart,fast,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = 0;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
instart = nqty*slow + mid*nstride_plane;
|
|
||||||
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack/unpack with pointers
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifdef PACK_POINTER
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack from data -> buf
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
in = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+mid*nstride_line]);
|
|
||||||
end = begin + nfast;
|
|
||||||
for (out = begin; out < end; out++)
|
|
||||||
*(in++) = *out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+mid*nstride_line]);
|
|
||||||
end = begin + nfast;
|
|
||||||
for (in = begin; in < end; in++)
|
|
||||||
*in = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (in = begin; in < end; in += nstride_plane)
|
|
||||||
*in = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+2*mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (in = begin; in < end; in += nstride_plane) {
|
|
||||||
*in = *(out++);
|
|
||||||
*(in+1) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*instart,*begin,*end;
|
|
||||||
register int iqty,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+nqty*mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (in = begin; in < end; in += nstride_line)
|
|
||||||
*in = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[2*slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (in = begin; in < end; in += nstride_line) {
|
|
||||||
*in = *(out++);
|
|
||||||
*(in+1) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*instart,*begin,*end;
|
|
||||||
register int iqty,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (instart = begin; instart < end; instart += nstride_line) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack/unpack with pointers and memcpy function
|
|
||||||
no memcpy version of unpack_permute routines,
|
|
||||||
just use PACK_POINTER versions
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifdef PACK_MEMCPY
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
pack from data -> buf
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out;
|
|
||||||
register int mid,slow,size;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
size = nfast*sizeof(double);
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
upto = slow*nmid*nfast;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = &(buf[upto+mid*nfast]);
|
|
||||||
out = &(data[plane+mid*nstride_line]);
|
|
||||||
memcpy(in,out,size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out;
|
|
||||||
register int mid,slow,size;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
size = nfast*sizeof(double);
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_plane;
|
|
||||||
upto = slow*nmid*nfast;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
in = &(data[plane+mid*nstride_line]);
|
|
||||||
out = &(buf[upto+mid*nfast]);
|
|
||||||
memcpy(in,out,size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (in = begin; in < end; in += nstride_plane)
|
|
||||||
*in = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+2*mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (in = begin; in < end; in += nstride_plane) {
|
|
||||||
*in = *(out++);
|
|
||||||
*(in+1) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, one axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*instart,*begin,*end;
|
|
||||||
register int iqty,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
plane = slow*nstride_line;
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[plane+nqty*mid]);
|
|
||||||
end = begin + nfast*nstride_plane;
|
|
||||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 1 value/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (in = begin; in < end; in += nstride_line)
|
|
||||||
*in = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, 2 values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*begin,*end;
|
|
||||||
register int mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[2*slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (in = begin; in < end; in += nstride_line) {
|
|
||||||
*in = *(out++);
|
|
||||||
*(in+1) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
unpack from buf -> data, two axis permutation, nqty values/element
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
|
||||||
|
|
||||||
{
|
|
||||||
register double *in,*out,*instart,*begin,*end;
|
|
||||||
register int iqty,mid,slow;
|
|
||||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
|
||||||
|
|
||||||
nfast = plan->nfast;
|
|
||||||
nmid = plan->nmid;
|
|
||||||
nslow = plan->nslow;
|
|
||||||
nstride_line = plan->nstride_line;
|
|
||||||
nstride_plane = plan->nstride_plane;
|
|
||||||
nqty = plan->nqty;
|
|
||||||
|
|
||||||
out = buf;
|
|
||||||
for (slow = 0; slow < nslow; slow++) {
|
|
||||||
for (mid = 0; mid < nmid; mid++) {
|
|
||||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
|
||||||
end = begin + nfast*nstride_line;
|
|
||||||
for (instart = begin; instart < end; instart += nstride_line) {
|
|
||||||
in = instart;
|
|
||||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
753
src/pack.h
753
src/pack.h
@ -1,4 +1,4 @@
|
|||||||
/* ----------------------------------------------------------------------
|
/* -*- c++ -*- ----------------------------------------------------------
|
||||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
http://lammps.sandia.gov, Sandia National Laboratories
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
Steve Plimpton, sjplimp@sandia.gov
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
@ -22,13 +22,746 @@ struct pack_plan_3d {
|
|||||||
int nqty; // # of values/element
|
int nqty; // # of values/element
|
||||||
};
|
};
|
||||||
|
|
||||||
// function prototypes
|
|
||||||
|
|
||||||
void pack_3d(double *, double *, struct pack_plan_3d *);
|
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
|
||||||
void unpack_3d(double *, double *, struct pack_plan_3d *);
|
#define PACK_ARRAY
|
||||||
void unpack_3d_permute1_1(double *, double *, struct pack_plan_3d *);
|
#endif
|
||||||
void unpack_3d_permute1_2(double *, double *, struct pack_plan_3d *);
|
|
||||||
void unpack_3d_permute1_n(double *, double *, struct pack_plan_3d *);
|
#ifndef PACK_DATA
|
||||||
void unpack_3d_permute2_1(double *, double *, struct pack_plan_3d *);
|
#define PACK_DATA double
|
||||||
void unpack_3d_permute2_2(double *, double *, struct pack_plan_3d *);
|
#endif
|
||||||
void unpack_3d_permute2_n(double *, double *, struct pack_plan_3d *);
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
Pack and unpack functions:
|
||||||
|
|
||||||
|
pack routines copy strided values from data into contiguous locs in buf
|
||||||
|
unpack routines copy contiguous values from buf into strided locs in data
|
||||||
|
different versions of unpack depending on permutation
|
||||||
|
and # of values/element
|
||||||
|
PACK_ARRAY routines work via array indices (default)
|
||||||
|
PACK_POINTER routines work via pointers
|
||||||
|
PACK_MEMCPY routines work via pointers and memcpy function
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack/unpack with array indices
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PACK_ARRAY
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack from data -> buf
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
in = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
out = plane + mid*nstride_line;
|
||||||
|
for (fast = 0; fast < nfast; fast++)
|
||||||
|
buf[in++] = data[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = plane + mid*nstride_line;
|
||||||
|
for (fast = 0; fast < nfast; fast++)
|
||||||
|
data[in++] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = plane + mid;
|
||||||
|
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
|
||||||
|
data[in] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = plane + 2*mid;
|
||||||
|
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
|
||||||
|
data[in] = buf[out++];
|
||||||
|
data[in+1] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register int in,out,iqty,instart,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
instart = plane + nqty*mid;
|
||||||
|
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = slow + mid*nstride_plane;
|
||||||
|
for (fast = 0; fast < nfast; fast++, in += nstride_line)
|
||||||
|
data[in] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register int in,out,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = 2*slow + mid*nstride_plane;
|
||||||
|
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
|
||||||
|
data[in] = buf[out++];
|
||||||
|
data[in+1] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register int in,out,iqty,instart,fast,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = 0;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
instart = nqty*slow + mid*nstride_plane;
|
||||||
|
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack/unpack with pointers
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PACK_POINTER
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack from data -> buf
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
in = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+mid*nstride_line]);
|
||||||
|
end = begin + nfast;
|
||||||
|
for (out = begin; out < end; out++)
|
||||||
|
*(in++) = *out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+mid*nstride_line]);
|
||||||
|
end = begin + nfast;
|
||||||
|
for (in = begin; in < end; in++)
|
||||||
|
*in = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (in = begin; in < end; in += nstride_plane)
|
||||||
|
*in = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+2*mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (in = begin; in < end; in += nstride_plane) {
|
||||||
|
*in = *(out++);
|
||||||
|
*(in+1) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||||
|
register int iqty,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+nqty*mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (in = begin; in < end; in += nstride_line)
|
||||||
|
*in = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[2*slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (in = begin; in < end; in += nstride_line) {
|
||||||
|
*in = *(out++);
|
||||||
|
*(in+1) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||||
|
register int iqty,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (instart = begin; instart < end; instart += nstride_line) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack/unpack with pointers and memcpy function
|
||||||
|
no memcpy version of unpack_permute routines,
|
||||||
|
just use PACK_POINTER versions
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifdef PACK_MEMCPY
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
pack from data -> buf
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out;
|
||||||
|
register int mid,slow,size;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
size = nfast*sizeof(PACK_DATA);
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
upto = slow*nmid*nfast;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = &(buf[upto+mid*nfast]);
|
||||||
|
out = &(data[plane+mid*nstride_line]);
|
||||||
|
memcpy(in,out,size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out;
|
||||||
|
register int mid,slow,size;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
size = nfast*sizeof(PACK_DATA);
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_plane;
|
||||||
|
upto = slow*nmid*nfast;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
in = &(data[plane+mid*nstride_line]);
|
||||||
|
out = &(buf[upto+mid*nfast]);
|
||||||
|
memcpy(in,out,size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (in = begin; in < end; in += nstride_plane)
|
||||||
|
*in = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+2*mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (in = begin; in < end; in += nstride_plane) {
|
||||||
|
*in = *(out++);
|
||||||
|
*(in+1) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, one axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||||
|
register int iqty,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
plane = slow*nstride_line;
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[plane+nqty*mid]);
|
||||||
|
end = begin + nfast*nstride_plane;
|
||||||
|
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 1 value/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (in = begin; in < end; in += nstride_line)
|
||||||
|
*in = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, 2 values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*begin,*end;
|
||||||
|
register int mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[2*slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (in = begin; in < end; in += nstride_line) {
|
||||||
|
*in = *(out++);
|
||||||
|
*(in+1) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
unpack from buf -> data, two axis permutation, nqty values/element
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||||
|
|
||||||
|
{
|
||||||
|
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||||
|
register int iqty,mid,slow;
|
||||||
|
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||||
|
|
||||||
|
nfast = plan->nfast;
|
||||||
|
nmid = plan->nmid;
|
||||||
|
nslow = plan->nslow;
|
||||||
|
nstride_line = plan->nstride_line;
|
||||||
|
nstride_plane = plan->nstride_plane;
|
||||||
|
nqty = plan->nqty;
|
||||||
|
|
||||||
|
out = buf;
|
||||||
|
for (slow = 0; slow < nslow; slow++) {
|
||||||
|
for (mid = 0; mid < nmid; mid++) {
|
||||||
|
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||||
|
end = begin + nfast*nstride_line;
|
||||||
|
for (instart = begin; instart < end; instart += nstride_line) {
|
||||||
|
in = instart;
|
||||||
|
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|||||||
Reference in New Issue
Block a user