patch for allowing prd command to work with sorted atoms

2016-09-28 16:33:30 -06:00
18 changed files with 18294 additions and 94 deletions
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="28 Sep 2016 version">
+<META NAME="docnumber" CONTENT="29 Sep 2016 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 <H1></H1>

 LAMMPS Documentation :c,h3
-28 Sep 2016 version :c,h4
+29 Sep 2016 version :c,h4

 Version info: :h4

--- a/examples/prd/README
+++ b/examples/prd/README
@ -1,5 +1,6 @@
 Run this example as:

-mpirun -np 4 lmp_linux -partition 4x1 -in in.prd
+mpirun -np 4 lmp_g++ -partition 4x1 -in in.prd
+mpirun -np 8 lmp_g++ -partition 4x2 -in in.prd

 You should be able to use any number of replicas >= 3.
--- a/examples/prd/log.15Feb16.prd.g++.4
+++ b/examples/prd/log.15Feb16.prd.g++.4
@ -1,22 +0,0 @@
-LAMMPS (15 Feb 2016)
-Running on 4 partitions of processors
-Step CPU Clock Event Correlated Coincident Replica
-100 0.000 0 0 0 0 0
-100 0.539 0 0 0 0 0
-100 0.694 0 0 0 0 0
-100 0.850 0 0 0 0 0
-100 0.928 0 0 0 0 0
-200 1.555 400 1 0 4 1
-300 1.924 500 2 1 1 1
-500 3.495 772 3 0 2 2
-800 5.446 1328 4 0 2 2
-1000 7.037 1636 5 0 1 3
-1000 7.989 1636 5 0 1 3
-1200 8.838 1908 6 0 1 2
-1300 9.212 2008 7 1 1 2
-1900 12.291 4024 8 0 1 3
-1900 12.832 4024 8 0 1 3
-1900 13.157 4024 8 0 1 3
-1900 13.320 4024 8 0 1 3
-2100 14.090 4220 9 0 2 3
-Loop time of 14.0941 on 4 procs for 2000 steps with 511 atoms
--- a/examples/prd/log.29Sep16.prd.g++.4
+++ b/examples/prd/log.29Sep16.prd.g++.4
@ -0,0 +1,22 @@
+LAMMPS (28 Sep 2016)
+Running on 4 partitions of processors
+Step CPU Clock Event Correlated Coincident Replica
+100 0.000 0 0 0 0 0
+100 0.521 0 0 0 0 0
+100 0.670 0 0 0 0 0
+100 0.822 0 0 0 0 0
+100 0.896 0 0 0 0 0
+200 1.555 400 1 0 4 1
+300 1.918 500 2 1 1 1
+500 3.476 772 3 0 2 2
+800 5.379 1328 4 0 2 2
+1000 6.914 1636 5 0 1 3
+1000 7.859 1636 5 0 1 3
+1200 8.658 1908 6 0 1 2
+1300 9.018 2008 7 1 1 2
+1900 12.005 4024 8 0 1 3
+1900 12.539 4024 8 0 1 3
+1900 12.861 4024 8 0 1 3
+1900 13.027 4024 8 0 1 3
+2100 13.798 4220 9 0 2 3
+Loop time of 13.8021 on 4 procs for 2000 steps with 511 atoms
--- a/examples/prd/log.29Sep16.prd.g++.8
+++ b/examples/prd/log.29Sep16.prd.g++.8
@ -0,0 +1,22 @@
+LAMMPS (28 Sep 2016)
+Running on 4 partitions of processors
+Step CPU Clock Event Correlated Coincident Replica
+100 0.000 0 0 0 0 0
+100 0.289 0 0 0 0 0
+100 0.373 0 0 0 0 0
+100 0.458 0 0 0 0 0
+100 0.500 0 0 0 0 0
+200 0.800 400 1 0 4 1
+300 0.999 500 2 1 1 1
+500 1.834 772 3 0 2 2
+800 2.864 1328 4 0 2 2
+1000 3.706 1636 5 0 1 3
+1000 4.211 1636 5 0 1 3
+1200 4.655 1908 6 0 1 2
+1300 4.854 2008 7 1 1 2
+1900 6.503 4024 8 0 1 3
+1900 6.792 4024 8 0 1 3
+1900 6.968 4024 8 0 1 3
+1900 7.055 4024 8 0 1 3
+2100 7.463 4220 9 0 2 3
+Loop time of 7.46531 on 8 procs for 2000 steps with 511 atoms
--- a/examples/prd/screen.29Sep16.prd.g++.4.0
+++ b/examples/prd/screen.29Sep16.prd.g++.4.0
--- a/examples/prd/screen.29Sep16.prd.g++.4.1
+++ b/examples/prd/screen.29Sep16.prd.g++.4.1
--- a/examples/prd/screen.29Sep16.prd.g++.4.2
+++ b/examples/prd/screen.29Sep16.prd.g++.4.2
--- a/examples/prd/screen.29Sep16.prd.g++.4.3
+++ b/examples/prd/screen.29Sep16.prd.g++.4.3
--- a/examples/prd/screen.29Sep16.prd.g++.8.0
+++ b/examples/prd/screen.29Sep16.prd.g++.8.0
--- a/examples/prd/screen.29Sep16.prd.g++.8.1
+++ b/examples/prd/screen.29Sep16.prd.g++.8.1
--- a/examples/prd/screen.29Sep16.prd.g++.8.2
+++ b/examples/prd/screen.29Sep16.prd.g++.8.2
--- a/examples/prd/screen.29Sep16.prd.g++.8.3
+++ b/examples/prd/screen.29Sep16.prd.g++.8.3
--- a/src/REPLICA/fix_neb.cpp
+++ b/src/REPLICA/fix_neb.cpp
@ -142,7 +142,7 @@ void FixNEB::init()
  if (count > MAXSMALLINT) error->all(FLERR,"Too many active NEB atoms");
  nebatoms = count;

-  // comm style for inter-replica exchange of coords
+  // comm mode for inter-replica exchange of coords

  if (nreplica == nprocs_universe &&
      nebatoms == atom->natoms && atom->sortfreq == 0) 
@ -392,7 +392,7 @@ void FixNEB::inter_replica_comm()
  // -----------------------------------------------------

  // single proc per replica
-  // all atoms are NEB atoms and no atom sorting is enabled
+  // all atoms are NEB atoms and no atom sorting
  // direct comm of x -> xprev and x -> xnext

  if (cmode == SINGLE_PROC_DIRECT) {
@ -414,7 +414,7 @@ void FixNEB::inter_replica_comm()
  // single proc per replica
  // but only some atoms are NEB atoms or atom sorting is enabled
  // send atom IDs and coords of only NEB atoms to prev/next proc
-  // recv proc uses atom->map() to match received coords to owned atoms
+  // recv procs use atom->map() to match received coords to owned atoms

  if (cmode == SINGLE_PROC_MAP) {
    m = 0;
--- a/src/REPLICA/prd.cpp
+++ b/src/REPLICA/prd.cpp
@ -52,6 +52,8 @@

 using namespace LAMMPS_NS;

+enum{SINGLE_PROC_DIRECT,SINGLE_PROC_MAP,MULTI_PROC};
+
 /* ---------------------------------------------------------------------- */

 PRD::PRD(LAMMPS *lmp) : Pointers(lmp) {}
@ -114,30 +116,35 @@ void PRD::command(int narg, char **arg)
  int color = me;
  MPI_Comm_split(universe->uworld,color,0,&comm_replica);

-  // equal_size_replicas = 1 if all replicas have same # of procs
-  // no longer used
+  // comm mode for inter-replica exchange of coords

-  //flag = 0;
-  //if (nreplica*nprocs == nprocs_universe) flag = 1;
-  //MPI_Allreduce(&flag,&equal_size_replicas,1,MPI_INT,MPI_MIN,
-  //              universe->uworld);
+  if (nreplica == nprocs_universe && atom->sortfreq == 0) 
+    cmode = SINGLE_PROC_DIRECT;
+  else if (nreplica == nprocs_universe) cmode = SINGLE_PROC_MAP;
+  else cmode = MULTI_PROC;

-  // workspace for inter-replica communication via gathers
+  // workspace for inter-replica communication

  natoms = atom->natoms;

-  displacements = NULL;
  tagall = NULL;
  xall = NULL;
  imageall = NULL;

-  if (nreplica != nprocs_universe) {
-    displacements = new int[nprocs];
+  if (cmode != SINGLE_PROC_DIRECT) {
    memory->create(tagall,natoms,"prd:tagall");
    memory->create(xall,natoms,3,"prd:xall");
    memory->create(imageall,natoms,"prd:imageall");
  }

+  counts = NULL;
+  displacements = NULL;
+
+  if (cmode == MULTI_PROC) {
+    memory->create(counts,nprocs,"prd:counts");
+    memory->create(displacements,nprocs,"prd:displacements");
+  }
+
  // random_select = same RNG for each replica, for multiple event selection
  // random_clock = same RNG for each replica, for clock updates
  // random_dephase = unique RNG for each replica, for dephasing
@ -238,7 +245,7 @@ void PRD::command(int narg, char **arg)
  if (domain->box_change)
    error->all(FLERR,"Cannot use PRD with a changing box");

-  // cannot use PRD with time-dependent fixes or regions or atom sorting
+  // cannot use PRD with time-dependent fixes or regions

  for (int i = 0; i < modify->nfix; i++)
    if (modify->fix[i]->time_depend)
@ -248,9 +255,6 @@ void PRD::command(int narg, char **arg)
    if (domain->regions[i]->dynamic_check())
      error->all(FLERR,"Cannot use PRD with a time-dependent region defined");

-  if (atom->sortfreq > 0)
-    error->all(FLERR,"Cannot use PRD with atom_modify sort enabled");
-
  // perform PRD simulation

  if (me_universe == 0 && universe->uscreen)
@ -433,12 +437,14 @@ void PRD::command(int narg, char **arg)
      fprintf(universe->uscreen,
              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
              " atoms\n",
-              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,
+              nsteps,atom->natoms);
    if (universe->ulogfile)
      fprintf(universe->ulogfile,
              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
              " atoms\n",
-              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,
+              nsteps,atom->natoms);
  }

  if (me == 0) {
@ -461,10 +467,11 @@ void PRD::command(int narg, char **arg)

  // clean up

-  delete [] displacements;
  memory->destroy(tagall);
  memory->destroy(xall);
  memory->destroy(imageall);
+  memory->destroy(counts);
+  memory->destroy(displacements);

  delete [] id_compute;
  MPI_Comm_free(&comm_replica);
@ -780,53 +787,94 @@ void PRD::replicate(int ireplica)
  int nprocs_universe = universe->nprocs;
  int i,m;

-  if (nreplica == nprocs_universe) {
-    MPI_Bcast(atom->image,atom->nlocal,MPI_INT,ireplica,comm_replica);
+  // -----------------------------------------------------
+  // 3 cases: two for single proc per replica
+  //          one for multiple procs per replica
+  // -----------------------------------------------------
+
+  // single proc per replica, no atom sorting
+  // direct bcast of image and x
+
+  if (cmode == SINGLE_PROC_DIRECT) {
    MPI_Bcast(atom->x[0],3*atom->nlocal,MPI_DOUBLE,ireplica,comm_replica);
+    MPI_Bcast(atom->image,atom->nlocal,MPI_INT,ireplica,comm_replica);
+    return;
+  }

-  } else {
-    int *counts = new int[nprocs];
-
-    if (universe->iworld == ireplica) {
-      MPI_Gather(&atom->nlocal,1,MPI_INT,counts,1,MPI_INT,0,world);
-      displacements[0] = 0;
-      for (i = 0; i < nprocs-1; i++)
-        displacements[i+1] = displacements[i] + counts[i];
-      MPI_Gatherv(atom->tag,atom->nlocal,MPI_LMP_TAGINT,
-                  tagall,counts,displacements,MPI_LMP_TAGINT,0,world);
-      MPI_Gatherv(atom->image,atom->nlocal,MPI_INT,
-                        imageall,counts,displacements,MPI_INT,0,world);
-      for (i = 0; i < nprocs; i++) counts[i] *= 3;
-      for (i = 0; i < nprocs-1; i++)
-        displacements[i+1] = displacements[i] + counts[i];
-      MPI_Gatherv(atom->x[0],3*atom->nlocal,MPI_DOUBLE,
-                        xall[0],counts,displacements,MPI_DOUBLE,0,world);
-    }
-
-    if (me == 0) {
-      MPI_Bcast(tagall,natoms,MPI_INT,ireplica,comm_replica);
-      MPI_Bcast(imageall,natoms,MPI_INT,ireplica,comm_replica);
-      MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,ireplica,comm_replica);
-    }
-
-    MPI_Bcast(tagall,natoms,MPI_INT,0,world);
-    MPI_Bcast(imageall,natoms,MPI_INT,0,world);
-    MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,0,world);
+  // single proc per replica, atom sorting is enabled
+  // bcast atom IDs, x, image via tagall, xall, imageall
+  // recv procs use atom->map() to match received info to owned atoms

+  if (cmode == SINGLE_PROC_MAP) {
    double **x = atom->x;
+    tagint *tag = atom->tag;
+    imageint *image = atom->image;
    int nlocal = atom->nlocal;

-    for (i = 0; i < natoms; i++) {
-      m = atom->map(tagall[i]);
-      if (m >= 0 && m < nlocal) {
-        x[m][0] = xall[i][0];
-        x[m][1] = xall[i][1];
-        x[m][2] = xall[i][2];
-        atom->image[m] = imageall[i];
-      }
+    if (universe->iworld == ireplica) {
+      memcpy(tagall,tag,nlocal*sizeof(tagint));
+      memcpy(xall[0],x[0],3*nlocal*sizeof(double));
+      memcpy(imageall,image,nlocal*sizeof(imageint));
    }

-    delete [] counts;
+    MPI_Bcast(tagall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,ireplica,comm_replica);
+    MPI_Bcast(imageall,natoms,MPI_INT,ireplica,comm_replica);
+
+    for (i = 0; i < nlocal; i++) {
+      m = atom->map(tagall[i]);
+      x[m][0] = xall[i][0];
+      x[m][1] = xall[i][1];
+      x[m][2] = xall[i][2];
+      atom->image[m] = imageall[i];
+    }
+
+    return;
+  }
+
+  // multiple procs per replica
+  // MPI_Gather all atom IDs, x, image to root proc of ireplica
+  // bcast to root of other replicas
+  // bcast within each replica
+  // each proc extracts info for atoms it owns via atom->map()
+  // NOTE: assumes imagint and tagint are always the same size
+
+  if (universe->iworld == ireplica) {
+    MPI_Gather(&atom->nlocal,1,MPI_INT,counts,1,MPI_INT,0,world);
+    displacements[0] = 0;
+    for (i = 0; i < nprocs-1; i++)
+      displacements[i+1] = displacements[i] + counts[i];
+    MPI_Gatherv(atom->tag,atom->nlocal,MPI_LMP_TAGINT,
+                tagall,counts,displacements,MPI_LMP_TAGINT,0,world);
+    MPI_Gatherv(atom->image,atom->nlocal,MPI_LMP_TAGINT,
+                imageall,counts,displacements,MPI_LMP_TAGINT,0,world);
+    for (i = 0; i < nprocs; i++) counts[i] *= 3;
+    for (i = 0; i < nprocs-1; i++)
+      displacements[i+1] = displacements[i] + counts[i];
+    MPI_Gatherv(atom->x[0],3*atom->nlocal,MPI_DOUBLE,
+                xall[0],counts,displacements,MPI_DOUBLE,0,world);
+  }
+  
+  if (me == 0) {
+    MPI_Bcast(tagall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(imageall,natoms,MPI_INT,ireplica,comm_replica);
+    MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,ireplica,comm_replica);
+  }
+  
+  MPI_Bcast(tagall,natoms,MPI_INT,0,world);
+  MPI_Bcast(imageall,natoms,MPI_INT,0,world);
+  MPI_Bcast(xall[0],3*natoms,MPI_DOUBLE,0,world);
+  
+  double **x = atom->x;
+  int nlocal = atom->nlocal;
+  
+  for (i = 0; i < natoms; i++) {
+    m = atom->map(tagall[i]);
+    if (m < 0 || m >= nlocal) continue;
+    x[m][0] = xall[i][0];
+    x[m][1] = xall[i][1];
+    x[m][2] = xall[i][2];
+    atom->image[m] = imageall[i];
  }
 }

--- a/src/REPLICA/prd.h
+++ b/src/REPLICA/prd.h
@ -34,7 +34,7 @@ class PRD : protected Pointers {
  int me,nprocs;
  int t_event,n_dephase,t_dephase,t_corr;
  double etol,ftol,temp_dephase;
-  int maxiter,maxeval,temp_flag,stepmode;
+  int maxiter,maxeval,temp_flag,stepmode,cmode;
  char *loop_setting,*dist_setting;

  int equal_size_replicas,natoms;
@ -46,9 +46,10 @@ class PRD : protected Pointers {
  double time_start;

  MPI_Comm comm_replica;
+  int *counts,*displacements;
  tagint *tagall;
-  int *displacements,*imageall;
  double **xall;
+  imageint *imageall;

  int ncoincident;

--- a/src/REPLICA/tad.cpp
+++ b/src/REPLICA/tad.cpp
@ -43,8 +43,6 @@
 #include "fix_store.h"
 #include "force.h"
 #include "pair.h"
-#include "random_park.h"
-#include "random_mars.h"
 #include "output.h"
 #include "dump.h"
 #include "finish.h"
@ -249,7 +247,7 @@ void TAD::command(int narg, char **arg)
  // need this line if quench() does only setup_minimal()
  // update->minimize->setup();

-  // This should work with if uncommented, but does not
+  // this should work with if statement uncommented, but does not
  // if (universe->iworld == 0) {

  fix_event->store_state_quench();
@ -399,12 +397,14 @@ void TAD::command(int narg, char **arg)
      fprintf(universe->uscreen,
              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
              " atoms\n",
-              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,
+              nsteps,atom->natoms);
    if (universe->ulogfile)
      fprintf(universe->ulogfile,
              "Loop time of %g on %d procs for %d steps with " BIGINT_FORMAT
              " atoms\n",
-              timer->get_wall(Timer::TOTAL),nprocs_universe,nsteps,atom->natoms);
+              timer->get_wall(Timer::TOTAL),nprocs_universe,
+              nsteps,atom->natoms);
  }

  if ((me_universe == 0) && ulogfile_neb) fclose(ulogfile_neb);
@ -874,7 +874,7 @@ void TAD::revert_state()
 }

 /* ----------------------------------------------------------------------
-   Initialize list of possible events
+   initialize list of possible events
 ------------------------------------------------------------------------- */

 void TAD::initialize_event_list() {
@ -890,7 +890,7 @@ void TAD::initialize_event_list() {
 }

 /* ----------------------------------------------------------------------
-   Delete list of possible events
+   delete list of possible events
 ------------------------------------------------------------------------- */

 void TAD::delete_event_list() {
--- a/src/version.h
+++ b/src/version.h
@ -1 +1 @@
-#define LAMMPS_VERSION "28 Sep 2016"
+#define LAMMPS_VERSION "29 Sep 2016"