Replicate bbox from Chris Knight

This commit is contained in:
Stan Moore
2017-11-06 11:31:43 -07:00
parent 39df9f5d94
commit e337db4059
2 changed files with 418 additions and 76 deletions

View File

@ -10,9 +10,11 @@ replicate command :h3
[Syntax:] [Syntax:]
replicate nx ny nz :pre replicate nx ny nz {keyword} :pre
nx,ny,nz = replication factors in each dimension :ul nx,ny,nz = replication factors in each dimension :ulb
optional {keyword} = {bbox} :l
{bbox} = only check atoms in replicas that overlap with a processor's subdomain :ule
[Examples:] [Examples:]
@ -43,6 +45,12 @@ file that crosses a periodic boundary should be between two atoms with
image flags that differ by 1. This will allow the bond to be image flags that differ by 1. This will allow the bond to be
unwrapped appropriately. unwrapped appropriately.
The optional keyword {bbox} uses a bounding box to only check atoms
in replicas that overlap with a processor's subdomain when assigning
atoms to processors, and thus can result in substantial speedups for
calculations using a large number of processors. It does require
temporarily using more memory.
[Restrictions:] [Restrictions:]
A 2d simulation cannot be replicated in the z dimension. A 2d simulation cannot be replicated in the z dimension.

View File

@ -44,7 +44,7 @@ void Replicate::command(int narg, char **arg)
if (domain->box_exist == 0) if (domain->box_exist == 0)
error->all(FLERR,"Replicate command before simulation box is defined"); error->all(FLERR,"Replicate command before simulation box is defined");
if (narg != 3) error->all(FLERR,"Illegal replicate command"); if (narg < 3 || narg > 4) error->all(FLERR,"Illegal replicate command");
int me = comm->me; int me = comm->me;
int nprocs = comm->nprocs; int nprocs = comm->nprocs;
@ -58,6 +58,10 @@ void Replicate::command(int narg, char **arg)
int nz = force->inumeric(FLERR,arg[2]); int nz = force->inumeric(FLERR,arg[2]);
int nrep = nx*ny*nz; int nrep = nx*ny*nz;
int bbox_flag = 0;
if (narg == 4)
if (strcmp(arg[3],"bbox") == 0) bbox_flag = 1;
// error and warning checks // error and warning checks
if (nx <= 0 || ny <= 0 || nz <= 0) if (nx <= 0 || ny <= 0 || nz <= 0)
@ -99,6 +103,37 @@ void Replicate::command(int narg, char **arg)
maxmol = maxmol_all; maxmol = maxmol_all;
} }
// check image flags maximum extent; only efficient small image flags compared to new system
int _imagelo[3], _imagehi[3];
_imagelo[0] = 0;
_imagelo[1] = 0;
_imagelo[2] = 0;
_imagehi[0] = 0;
_imagehi[1] = 0;
_imagehi[2] = 0;
if (bbox_flag) {
for (i=0; i<atom->nlocal; ++i) {
imageint image = atom->image[i];
int xbox = (image & IMGMASK) - IMGMAX;
int ybox = (image >> IMGBITS & IMGMASK) - IMGMAX;
int zbox = (image >> IMG2BITS) - IMGMAX;
if (xbox < _imagelo[0]) _imagelo[0] = xbox;
if (ybox < _imagelo[1]) _imagelo[1] = ybox;
if (zbox < _imagelo[2]) _imagelo[2] = zbox;
if (xbox > _imagehi[0]) _imagehi[0] = xbox;
if (ybox > _imagehi[1]) _imagehi[1] = ybox;
if (zbox > _imagehi[2]) _imagehi[2] = zbox;
}
MPI_Allreduce(MPI_IN_PLACE, &(_imagelo[0]), 3, MPI_INT, MPI_MIN, world);
MPI_Allreduce(MPI_IN_PLACE, &(_imagehi[0]), 3, MPI_INT, MPI_MAX, world);
}
// unmap existing atoms via image flags // unmap existing atoms via image flags
for (i = 0; i < atom->nlocal; i++) for (i = 0; i < atom->nlocal; i++)
@ -280,93 +315,392 @@ void Replicate::command(int narg, char **arg)
double *coord; double *coord;
int tag_enable = atom->tag_enable; int tag_enable = atom->tag_enable;
for (int iproc = 0; iproc < nprocs; iproc++) { if (bbox_flag) {
if (me == iproc) {
n = 0; // allgather size of buf on each proc
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
n = 0;
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
int * size_buf_rnk;
memory->create(size_buf_rnk, nprocs, "replicate:size_buf_rnk");
MPI_Allgather(&n, 1, MPI_INT, size_buf_rnk, 1, MPI_INT, world);
// size of buf_all
int size_buf_all = 0;
MPI_Allreduce(&n, &size_buf_all, 1, MPI_INT, MPI_SUM, world);
if (me == 0 && screen) {
fprintf(screen," bounding box image = (%i %i %i) to (%i %i %i)\n",
_imagelo[0],_imagelo[1],_imagelo[2],_imagehi[0],_imagehi[1],_imagehi[2]);
fprintf(screen," bounding box extra memory = %.2f MB\n",
(double)size_buf_all*sizeof(double)/1024/1024);
} }
MPI_Bcast(&n,1,MPI_INT,iproc,world);
MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world); // rnk offsets
int * disp_buf_rnk;
memory->create(disp_buf_rnk, nprocs, "replicate:disp_buf_rnk");
disp_buf_rnk[0] = 0;
for (i=1; i<nprocs; ++i) disp_buf_rnk[i] = disp_buf_rnk[i-1] + size_buf_rnk[i-1];
// allgather buf_all
double * buf_all;
memory->create(buf_all, size_buf_all, "replicate:buf_all");
MPI_Allgatherv(buf, n, MPI_DOUBLE, buf_all, size_buf_rnk, disp_buf_rnk, MPI_DOUBLE, world);
// bounding box of original unwrapped system
double _orig_lo[3], _orig_hi[3];
if (triclinic) {
_orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd + _imagelo[1] * old_xy + _imagelo[2] * old_xz;
_orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd + _imagelo[2] * old_yz;
_orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd;
_orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd + (_imagehi[1]+1) * old_xy + (_imagehi[2]+1) * old_xz;
_orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd + (_imagehi[2]+1) * old_yz;
_orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd;
} else {
_orig_lo[0] = domain->boxlo[0] + _imagelo[0] * old_xprd;
_orig_lo[1] = domain->boxlo[1] + _imagelo[1] * old_yprd;
_orig_lo[2] = domain->boxlo[2] + _imagelo[2] * old_zprd;
_orig_hi[0] = domain->boxlo[0] + (_imagehi[0]+1) * old_xprd;
_orig_hi[1] = domain->boxlo[1] + (_imagehi[1]+1) * old_yprd;
_orig_hi[2] = domain->boxlo[2] + (_imagehi[2]+1) * old_zprd;
}
double _lo[3], _hi[3];
int num_replicas_added = 0;
for (ix = 0; ix < nx; ix++) { for (ix = 0; ix < nx; ix++) {
for (iy = 0; iy < ny; iy++) { for (iy = 0; iy < ny; iy++) {
for (iz = 0; iz < nz; iz++) { for (iz = 0; iz < nz; iz++) {
// domain->remap() overwrites coordinates, so always recompute here
if (triclinic) {
_lo[0] = _orig_lo[0] + ix * old_xprd + iy * old_xy + iz * old_xz;
_hi[0] = _orig_hi[0] + ix * old_xprd + iy * old_xy + iz * old_xz;
_lo[1] = _orig_lo[1] + iy * old_yprd + iz * old_yz;
_hi[1] = _orig_hi[1] + iy * old_yprd + iz * old_yz;
_lo[2] = _orig_lo[2] + iz * old_zprd;
_hi[2] = _orig_hi[2] + iz * old_zprd;
} else {
_lo[0] = _orig_lo[0] + ix * old_xprd;
_hi[0] = _orig_hi[0] + ix * old_xprd;
_lo[1] = _orig_lo[1] + iy * old_yprd;
_hi[1] = _orig_hi[1] + iy * old_yprd;
_lo[2] = _orig_lo[2] + iz * old_zprd;
_hi[2] = _orig_hi[2] + iz * old_zprd;
}
// test if bounding box of shifted replica overlaps sub-domain of proc
// if not, then skip testing atoms
int xoverlap = 1;
int yoverlap = 1;
int zoverlap = 1;
if (triclinic) {
double _llo[3];
domain->x2lamda(_lo,_llo);
double _lhi[3];
domain->x2lamda(_hi,_lhi);
if (_llo[0] > (subhi[0] - EPSILON)
|| _lhi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
if (_llo[1] > (subhi[1] - EPSILON)
|| _lhi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
if (_llo[2] > (subhi[2] - EPSILON)
|| _lhi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
} else {
if (_lo[0] > (subhi[0] - EPSILON)
|| _hi[0] < (sublo[0] + EPSILON) ) xoverlap = 0;
if (_lo[1] > (subhi[1] - EPSILON)
|| _hi[1] < (sublo[1] + EPSILON) ) yoverlap = 0;
if (_lo[2] > (subhi[2] - EPSILON)
|| _hi[2] < (sublo[2] + EPSILON) ) zoverlap = 0;
}
int overlap = 0;
if (xoverlap && yoverlap && zoverlap) overlap = 1;
// if no overlap, test if bounding box wrapped back into new system
if (!overlap) {
// wrap back into cell
imageint imagelo = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
domain->remap(&(_lo[0]), imagelo);
int xboxlo = (imagelo & IMGMASK) - IMGMAX;
int yboxlo = (imagelo >> IMGBITS & IMGMASK) - IMGMAX;
int zboxlo = (imagelo >> IMG2BITS) - IMGMAX;
imageint imagehi = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
domain->remap(&(_hi[0]), imagehi);
int xboxhi = (imagehi & IMGMASK) - IMGMAX;
int yboxhi = (imagehi >> IMGBITS & IMGMASK) - IMGMAX;
int zboxhi = (imagehi >> IMG2BITS) - IMGMAX;
if (triclinic) {
double _llo[3];
_llo[0] = _lo[0]; _llo[1] = _lo[1]; _llo[2] = _lo[2];
domain->x2lamda(_llo,_lo);
double _lhi[3];
_lhi[0] = _hi[0]; _lhi[1] = _hi[1]; _lhi[2] = _hi[2];
domain->x2lamda(_lhi,_hi);
}
// test all fragments for any overlap; ok to include false positives
int _xoverlap1 = 0;
int _xoverlap2 = 0;
if (!xoverlap) {
if (xboxlo < 0) {
_xoverlap1 = 1;
if ( _lo[0] > (subhi[0] - EPSILON) ) _xoverlap1 = 0;
}
if (xboxhi > 0) {
_xoverlap2 = 1;
if ( _hi[0] < (sublo[0] + EPSILON) ) _xoverlap2 = 0;
}
if (_xoverlap1 || _xoverlap2) xoverlap = 1;
}
int _yoverlap1 = 0;
int _yoverlap2 = 0;
if (!yoverlap) {
if (yboxlo < 0) {
_yoverlap1 = 1;
if ( _lo[1] > (subhi[1] - EPSILON) ) _yoverlap1 = 0;
}
if (yboxhi > 0) {
_yoverlap2 = 1;
if ( _hi[1] < (sublo[1] + EPSILON) ) _yoverlap2 = 0;
}
if (_yoverlap1 || _yoverlap2) yoverlap = 1;
}
int _zoverlap1 = 0;
int _zoverlap2 = 0;
if (!zoverlap) {
if (zboxlo < 0) {
_zoverlap1 = 1;
if ( _lo[2] > (subhi[2] - EPSILON) ) _zoverlap1 = 0;
}
if (zboxhi > 0) {
_zoverlap2 = 1;
if ( _hi[2] < (sublo[2] + EPSILON) ) _zoverlap2 = 0;
}
if (_zoverlap1 || _zoverlap2) zoverlap = 1;
}
// does either fragment overlap w/ sub-domain
if (xoverlap && yoverlap && zoverlap) overlap = 1;
}
// while loop over one proc's atom list // while loop over one proc's atom list
m = 0; if (overlap) {
while (m < n) { num_replicas_added++;
image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
if (triclinic == 0) {
x[0] = buf[m+1] + ix*old_xprd;
x[1] = buf[m+2] + iy*old_yprd;
x[2] = buf[m+3] + iz*old_zprd;
} else {
x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
x[1] = buf[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf[m+3] + iz*old_zprd;
}
domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] && m = 0;
coord[1] >= sublo[1] && coord[1] < subhi[1] && while (m < size_buf_all) {
coord[2] >= sublo[2] && coord[2] < subhi[2]) { image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
m += avec->unpack_restart(&buf[m]); if (triclinic == 0) {
x[0] = buf_all[m+1] + ix*old_xprd;
i = atom->nlocal - 1; x[1] = buf_all[m+2] + iy*old_yprd;
if (tag_enable) x[2] = buf_all[m+3] + iz*old_zprd;
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag; } else {
else atom_offset = 0; x[0] = buf_all[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol; x[1] = buf_all[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf_all[m+3] + iz*old_zprd;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
} }
} else m += static_cast<int> (buf[m]); domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
m += avec->unpack_restart(&buf_all[m]);
i = atom->nlocal - 1;
if (tag_enable)
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag;
else atom_offset = 0;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
}
} else m += static_cast<int> (buf_all[m]);
}
} // if (overlap)
}
}
}
memory->destroy(size_buf_rnk);
memory->destroy(disp_buf_rnk);
memory->destroy(buf_all);
int sum = 0;
MPI_Reduce(&num_replicas_added, &sum, 1, MPI_INT, MPI_SUM, 0, world);
double avg = (double) sum / nprocs;
if (me == 0 && screen)
fprintf(screen," average # of replicas added to proc = %.2f out of %i (%.2f %%)\n",
avg,nx*ny*nz,avg/(nx*ny*nz)*100.0);
} else {
for (int iproc = 0; iproc < nprocs; iproc++) {
if (me == iproc) {
n = 0;
for (i = 0; i < old->nlocal; i++) n += old_avec->pack_restart(i,&buf[n]);
}
MPI_Bcast(&n,1,MPI_INT,iproc,world);
MPI_Bcast(buf,n,MPI_DOUBLE,iproc,world);
for (ix = 0; ix < nx; ix++) {
for (iy = 0; iy < ny; iy++) {
for (iz = 0; iz < nz; iz++) {
// while loop over one proc's atom list
m = 0;
while (m < n) {
image = ((imageint) IMGMAX << IMG2BITS) |
((imageint) IMGMAX << IMGBITS) | IMGMAX;
if (triclinic == 0) {
x[0] = buf[m+1] + ix*old_xprd;
x[1] = buf[m+2] + iy*old_yprd;
x[2] = buf[m+3] + iz*old_zprd;
} else {
x[0] = buf[m+1] + ix*old_xprd + iy*old_xy + iz*old_xz;
x[1] = buf[m+2] + iy*old_yprd + iz*old_yz;
x[2] = buf[m+3] + iz*old_zprd;
}
domain->remap(x,image);
if (triclinic) {
domain->x2lamda(x,lamda);
coord = lamda;
} else coord = x;
if (coord[0] >= sublo[0] && coord[0] < subhi[0] &&
coord[1] >= sublo[1] && coord[1] < subhi[1] &&
coord[2] >= sublo[2] && coord[2] < subhi[2]) {
m += avec->unpack_restart(&buf[m]);
i = atom->nlocal - 1;
if (tag_enable)
atom_offset = iz*ny*nx*maxtag + iy*nx*maxtag + ix*maxtag;
else atom_offset = 0;
mol_offset = iz*ny*nx*maxmol + iy*nx*maxmol + ix*maxmol;
atom->x[i][0] = x[0];
atom->x[i][1] = x[1];
atom->x[i][2] = x[2];
atom->tag[i] += atom_offset;
atom->image[i] = image;
if (atom->molecular) {
if (atom->molecule[i] > 0)
atom->molecule[i] += mol_offset;
if (atom->molecular == 1) {
if (atom->avec->bonds_allow)
for (j = 0; j < atom->num_bond[i]; j++)
atom->bond_atom[i][j] += atom_offset;
if (atom->avec->angles_allow)
for (j = 0; j < atom->num_angle[i]; j++) {
atom->angle_atom1[i][j] += atom_offset;
atom->angle_atom2[i][j] += atom_offset;
atom->angle_atom3[i][j] += atom_offset;
}
if (atom->avec->dihedrals_allow)
for (j = 0; j < atom->num_dihedral[i]; j++) {
atom->dihedral_atom1[i][j] += atom_offset;
atom->dihedral_atom2[i][j] += atom_offset;
atom->dihedral_atom3[i][j] += atom_offset;
atom->dihedral_atom4[i][j] += atom_offset;
}
if (atom->avec->impropers_allow)
for (j = 0; j < atom->num_improper[i]; j++) {
atom->improper_atom1[i][j] += atom_offset;
atom->improper_atom2[i][j] += atom_offset;
atom->improper_atom3[i][j] += atom_offset;
atom->improper_atom4[i][j] += atom_offset;
}
}
}
} else m += static_cast<int> (buf[m]);
}
} }
} }
} }
} }
} } // if (bbox_flag)
// free communication buffer and old atom class // free communication buffer and old atom class