diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt
index 375a1c097b..bb7a1452bc 100644
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@@ -1,7 +1,7 @@
LAMMPS Users Manual
-
+
@@ -21,7 +21,7 @@
LAMMPS Documentation :c,h3
-7 Mar 2017 version :c,h4
+10 Mar 2017 version :c,h4
Version info: :h4
diff --git a/doc/src/balance.txt b/doc/src/balance.txt
index 9aeb03392f..79728d6569 100644
--- a/doc/src/balance.txt
+++ b/doc/src/balance.txt
@@ -286,24 +286,32 @@ above. It performs a recursive coordinate bisectioning (RCB) of the
simulation domain. The basic idea is as follows.
The simulation domain is cut into 2 boxes by an axis-aligned cut in
-the longest dimension, leaving one new box on either side of the cut.
-All the processors are also partitioned into 2 groups, half assigned
-to the box on the lower side of the cut, and half to the box on the
-upper side. (If the processor count is odd, one side gets an extra
-processor.) The cut is positioned so that the number of particles in
-the lower box is exactly the number that the processors assigned to
-that box should own for load balance to be perfect. This also makes
-load balance for the upper box perfect. The positioning is done
-iteratively, by a bisectioning method. Note that counting particles
-on either side of the cut requires communication between all
-processors at each iteration.
+one of the dimensions, leaving one new sub-box on either side of the
+cut. Which dimension is chosen for the cut depends on the particle
+(weight) distribution within the parent box. Normally the longest
+dimension of the box is cut, but if all (or most) of the particles are
+at one end of the box, a cut may be performed in another dimension to
+induce sub-boxes that are more cube-ish (3d) or square-ish (2d) in
+shape.
+
+After the cut is made, all the processors are also partitioned into 2
+groups, half assigned to the box on the lower side of the cut, and
+half to the box on the upper side. (If the processor count is odd,
+one side gets an extra processor.) The cut is positioned so that the
+number of (weighted) particles in the lower box is exactly the number
+that the processors assigned to that box should own for load balance
+to be perfect. This also makes load balance for the upper box
+perfect. The positioning of the cut is done iteratively, by a
+bisectioning method (median search). Note that counting particles on
+either side of the cut requires communication between all processors
+at each iteration.
That is the procedure for the first cut. Subsequent cuts are made
recursively, in exactly the same manner. The subset of processors
-assigned to each box make a new cut in the longest dimension of that
-box, splitting the box, the subset of processors, and the particles
-in the box in two. The recursion continues until every processor is
-assigned a sub-box of the entire simulation domain, and owns the
+assigned to each box make a new cut in one dimension of that box,
+splitting the box, the subset of processors, and the particles in the
+box in two. The recursion continues until every processor is assigned
+a sub-box of the entire simulation domain, and owns the (weighted)
particles in that sub-box.
:line
diff --git a/doc/src/change_box.txt b/doc/src/change_box.txt
index a41463baaf..2c7a890d4c 100644
--- a/doc/src/change_box.txt
+++ b/doc/src/change_box.txt
@@ -101,11 +101,11 @@ Instead you could do something like this, assuming the simulation box
is non-periodic and atoms extend from 0 to 20 in all dimensions:
change_box all x final -10 20
-create_atoms 1 single -5 5 5 # this will fail to insert an atom :pre
+create_atoms 1 single -5 5 5 # this will fail to insert an atom :pre
change_box all x final -10 20 boundary f s s
create_atoms 1 single -5 5 5
-change_box boundary s s s # this will work :pre
+change_box all boundary s s s # this will work :pre
NOTE: Unlike the earlier "displace_box" version of this command, atom
remapping is NOT performed by default. This command allows remapping
diff --git a/doc/src/create_atoms.txt b/doc/src/create_atoms.txt
index da9c8809d0..98c3c24a0b 100644
--- a/doc/src/create_atoms.txt
+++ b/doc/src/create_atoms.txt
@@ -134,6 +134,17 @@ not overlap existing atoms inappropriately, especially if molecules
are being added. The "delete_atoms"_delete_atoms.html command can be
used to remove overlapping atoms or molecules.
+NOTE: You cannot use any of the styles explained above to create atoms
+that are outside the simulation box; they will just be ignored by
+LAMMPS. This is true even if you are using shrink-wrapped box
+boundaries, as specified by the "boundary"_boundary.html command.
+However, you can first use the "change_box"_change_box.html command to
+temporarily expand the box, then add atoms via create_atoms, then
+finally use change_box command again if needed to re-shrink-wrap the
+new atoms. See the "change_box"_change_box.html doc page for an
+example of how to do this, using the create_atoms {single} style to
+insert a new atom outside the current simulation box.
+
:line
Individual atoms are inserted by this command, unless the {mol}
diff --git a/doc/src/fix_halt.txt b/doc/src/fix_halt.txt
index c9295eca69..3f7650466f 100644
--- a/doc/src/fix_halt.txt
+++ b/doc/src/fix_halt.txt
@@ -15,15 +15,16 @@ fix ID group-ID halt N attribute operator avalue keyword value ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
halt = style name of this fix command :l
N = check halt condition every N steps :l
-attribute = hstyle or v_name :l
- hstyle = {bondmax}
+attribute = {bondmax} or {tlimit} or v_name :l
+ bondmax = length of longest bond in the system
+ tlimit = elapsed CPU time
v_name = name of "equal-style variable"_variable.html :pre
operator = "<" or "<=" or ">" or ">=" or "==" or "!=" or "|^" :l
avalue = numeric value to compare attribute to :l
-string = text string to print with optional variable names :l
zero or more keyword/value pairs may be appended :l
-keyword = {error} :l
- {error} value = {hard} or {soft} or {continue} :pre
+keyword = {error} or {message} :l
+ {error} value = {hard} or {soft} or {continue}
+ {message} value = {yes} or {no} :pre
:ule
[Examples:]
@@ -40,14 +41,33 @@ specified by the "run"_run.html or "minimize"_minimize.html command.
The specified group-ID is ignored by this fix.
-The specified {attribute} can be one of the {hstyle} options listed
-above, or an "equal-style variable"_variable.html referenced as
-{v_name}, where "name" is the name of a variable that has been defined
-previously in the input script.
+The specified {attribute} can be one of the options listed above,
+namely {bondmax} or {tlimit}, or an "equal-style
+variable"_variable.html referenced as {v_name}, where "name" is the
+name of a variable that has been defined previously in the input
+script.
-The only {hstyle} option currently implemented is {bondmax}. This
-will loop over all bonds in the system, compute their current
-lengths, and set {attribute} to the longest bond distance.
+The {bondmax} attribute will loop over all bonds in the system,
+compute their current lengths, and set {attribute} to the longest bond
+distance.
+
+The {tlimit} attribute queries the elapsed CPU time (in seconds) since
+the current run began, and sets {attribute} to that value. This is an
+alternative way to limit the length of a simulation run, similar to
+the "timer"_timer.html timeout command. There are two differences in
+using this method versus the timer command option. The first is that
+the clock starts at the beginning of the current run (not when the
+timer or fix command is specified), so that any setup time for the run
+is not included in the elapsed time. The second is that the timer
+invocation and syncing across all processors (via MPI_Allreduce) is
+not performed once every {N} steps by this command. Instead it is
+performed (typically) only a small number of times and the elapsed
+times are used to predict when the end-of-the-run will be. Both of
+these attributes can be useful when performing benchmark calculations
+for a desired length of time with minmimal overhead. For example, if
+a run is performing 1000s of timesteps/sec, the overhead for syncing
+the timer frequently across a large number of processors may be
+non-negligble.
Equal-style variables evaluate to a numeric value. See the
"variable"_variable.html command for a description. They calculate
@@ -100,6 +120,14 @@ Note that you may wish use the "unfix"_unfix.html command on the fix
halt ID, so that the same condition is not immediately triggered in a
subsequent run.
+The optional {message} keyword determines whether a message is printed
+to the screen and logfile when the half condition is triggered. If
+{message} is set to yes, a one line message with the values that
+triggered the halt is printed. If {message} is set to no, no message
+is printed; the run simply exits. The latter may be desirable for
+post-processing tools that extract thermodyanmic information from log
+files.
+
[Restart, fix_modify, output, run start/stop, minimize info:]
No information about this fix is written to "binary restart
@@ -118,4 +146,4 @@ This fix is not invoked during "energy minimization"_minimize.html.
[Default:]
-The option defaults are error = hard.
+The option defaults are error = hard and message = yes.
diff --git a/doc/src/quit.txt b/doc/src/quit.txt
index 809fb2e38b..843d3de7f3 100644
--- a/doc/src/quit.txt
+++ b/doc/src/quit.txt
@@ -17,7 +17,7 @@ status = numerical exit status (optional)
[Examples:]
quit
-if "$n > 10000" then "quit 1":pre
+if "$n > 10000" then "quit 1" :pre
[Description:]
diff --git a/doc/src/thermo_style.txt b/doc/src/thermo_style.txt
index e30e7023e4..36ec7bf12e 100644
--- a/doc/src/thermo_style.txt
+++ b/doc/src/thermo_style.txt
@@ -36,7 +36,7 @@ args = list of arguments for a particular style :l
elaplong = timesteps since start of initial run in a series of runs
dt = timestep size
time = simulation time
- cpu = elapsed CPU time in seconds
+ cpu = elapsed CPU time in seconds since start of this run
tpcpu = time per CPU second
spcpu = timesteps per CPU second
cpuremain = estimated CPU time remaining in run
diff --git a/src/balance.cpp b/src/balance.cpp
index 050f282dfe..52f6072a6c 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -456,6 +456,7 @@ void Balance::options(int iarg, int narg, char **arg)
wtflag = 0;
varflag = 0;
+ oldrcb = 0;
outflag = 0;
int outarg = 0;
fp = NULL;
@@ -491,6 +492,9 @@ void Balance::options(int iarg, int narg, char **arg)
}
iarg += 2+nopt;
+ } else if (strcmp(arg[iarg],"old") == 0) {
+ oldrcb = 1;
+ iarg++;
} else if (strcmp(arg[iarg],"out") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal (fix) balance command");
outflag = 1;
@@ -641,12 +645,21 @@ int *Balance::bisection(int sortflag)
// invoke RCB
// then invert() to create list of proc assignments for my atoms
+ // NOTE: (3/2017) can remove undocumented "old" option at some point
+ // ditto in rcb.cpp
- if (wtflag) {
- weight = fixstore->vstore;
- rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
- } else rcb->compute(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
-
+ if (oldrcb) {
+ if (wtflag) {
+ weight = fixstore->vstore;
+ rcb->compute_old(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
+ } else rcb->compute_old(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
+ } else {
+ if (wtflag) {
+ weight = fixstore->vstore;
+ rcb->compute(dim,atom->nlocal,atom->x,weight,shrinklo,shrinkhi);
+ } else rcb->compute(dim,atom->nlocal,atom->x,NULL,shrinklo,shrinkhi);
+ }
+
rcb->invert(sortflag);
// reset RCB lo/hi bounding box to full simulation box as needed
diff --git a/src/balance.h b/src/balance.h
index 82941b34c0..0f2f79bb15 100644
--- a/src/balance.h
+++ b/src/balance.h
@@ -53,6 +53,7 @@ class Balance : protected Pointers {
int style; // style of LB
int xflag,yflag,zflag; // xyz LB flags
double *user_xsplit,*user_ysplit,*user_zsplit; // params for xyz LB
+ int oldrcb; // use old-style RCB compute
int nitermax; // params for shift LB
double stopthresh;
diff --git a/src/fix_halt.cpp b/src/fix_halt.cpp
index ca74efa454..2a4af4dc66 100644
--- a/src/fix_halt.cpp
+++ b/src/fix_halt.cpp
@@ -30,9 +30,10 @@
using namespace LAMMPS_NS;
using namespace FixConst;
-enum{BONDMAX,VARIABLE};
+enum{BONDMAX,TLIMIT,VARIABLE};
enum{LT,LE,GT,GE,EQ,NEQ,XOR};
enum{HARD,SOFT,CONTINUE};
+enum{NOMSG,YESMSG};
/* ---------------------------------------------------------------------- */
@@ -47,7 +48,8 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
idvar = NULL;
- if (strcmp(arg[4],"bondmax") == 0) attribute = BONDMAX;
+ if (strcmp(arg[4],"tlimit") == 0) attribute = TLIMIT;
+ else if (strcmp(arg[4],"bondmax") == 0) attribute = BONDMAX;
else if (strncmp(arg[4],"v_",2) == 0) {
attribute = VARIABLE;
int n = strlen(arg[4]);
@@ -73,6 +75,7 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
// parse optional args
eflag = SOFT;
+ msgflag = YESMSG;
int iarg = 7;
while (iarg < narg) {
@@ -83,6 +86,12 @@ FixHalt::FixHalt(LAMMPS *lmp, int narg, char **arg) :
else if (strcmp(arg[iarg+1],"continue") == 0) eflag = CONTINUE;
else error->all(FLERR,"Illegal fix halt command");
iarg += 2;
+ } else if (strcmp(arg[iarg],"message") == 0) {
+ if (iarg+2 > narg) error->all(FLERR,"Illegal fix halt command");
+ if (strcmp(arg[iarg+1],"no") == 0) msgflag = NOMSG;
+ else if (strcmp(arg[iarg+1],"yes") == 0) msgflag = YESMSG;
+ else error->all(FLERR,"Illegal fix halt command");
+ iarg += 2;
} else error->all(FLERR,"Illegal fix halt command");
}
@@ -125,6 +134,11 @@ void FixHalt::init()
if (input->variable->equalstyle(ivar) == 0)
error->all(FLERR,"Fix halt variable is not equal-style variable");
}
+
+ // settings used by TLIMIT
+
+ nextstep = (update->ntimestep/nevery)*nevery + nevery;
+ tratio = 0.5;
}
/* ---------------------------------------------------------------------- */
@@ -135,8 +149,12 @@ void FixHalt::end_of_step()
double attvalue;
- if (attribute == BONDMAX) attvalue = bondmax();
- else {
+ if (attribute == TLIMIT) {
+ if (update->ntimestep != nextstep) return;
+ attvalue = tlimit();
+ } else if (attribute == BONDMAX) {
+ attvalue = bondmax();
+ } else {
modify->clearstep_compute();
attvalue = input->variable->compute_equal(ivar);
modify->addstep_compute(update->ntimestep + nevery);
@@ -169,9 +187,10 @@ void FixHalt::end_of_step()
sprintf(str,"Fix halt %s condition met on step %ld with value %g",
id,update->ntimestep,attvalue);
- if (eflag == HARD) error->all(FLERR,str);
- else if (eflag == SOFT || eflag == CONTINUE) {
- if (comm->me == 0) error->message(FLERR,str);
+ if (eflag == HARD) {
+ error->all(FLERR,str);
+ } else if (eflag == SOFT || eflag == CONTINUE) {
+ if (comm->me == 0 && msgflag == YESMSG) error->message(FLERR,str);
timer->force_timeout();
}
}
@@ -218,3 +237,27 @@ double FixHalt::bondmax()
return sqrt(maxall);
}
+
+/* ----------------------------------------------------------------------
+ compute synced elapsed time
+ reset nextstep = estimate of timestep when run will end
+ first project to 1/2 the run time, thereafter to end of run
+------------------------------------------------------------------------- */
+
+double FixHalt::tlimit()
+{
+ double cpu = timer->elapsed(Timer::TOTAL);
+ MPI_Bcast(&cpu,1,MPI_DOUBLE,0,world);
+
+ if (cpu < value) {
+ bigint elapsed = update->ntimestep - update->firststep;
+ bigint final = update->firststep +
+ static_cast (tratio*value/cpu * elapsed);
+ nextstep = (final/nevery)*nevery + nevery;
+ tratio = 1.0;
+ }
+
+ //printf("EVAL %ld %g %d\n",update->ntimestep,cpu,nevery);
+
+ return cpu;
+}
diff --git a/src/fix_halt.h b/src/fix_halt.h
index 156397a4b5..52d2f95d2f 100644
--- a/src/fix_halt.h
+++ b/src/fix_halt.h
@@ -35,11 +35,13 @@ class FixHalt : public Fix {
void post_run();
private:
- int attribute,operation,eflag,ivar;
- double value;
+ int attribute,operation,eflag,msgflag,ivar;
+ bigint nextstep;
+ double value,tratio;
char *idvar;
double bondmax();
+ double tlimit();
};
}
diff --git a/src/rcb.cpp b/src/rcb.cpp
index 7223e4616a..1aca015daf 100644
--- a/src/rcb.cpp
+++ b/src/rcb.cpp
@@ -42,7 +42,7 @@ RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
dots = NULL;
nlist = maxlist = 0;
- dotlist = dotmark = NULL;
+ dotlist = dotmark = dotmark_select = NULL;
maxbuf = 0;
buf = NULL;
@@ -73,6 +73,7 @@ RCB::~RCB()
memory->sfree(dots);
memory->destroy(dotlist);
memory->destroy(dotmark);
+ memory->destroy(dotmark_select);
memory->sfree(buf);
memory->destroy(recvproc);
@@ -91,6 +92,9 @@ RCB::~RCB()
/* ----------------------------------------------------------------------
perform RCB balancing of N particles at coords X in bounding box LO/HI
+ NEW version: each RCB cut is tested in all dimensions
+ dimeension that produces 2 boxes with largest min size is selected
+ this is to prevent very narrow boxes from being produced
if wt = NULL, ignore per-particle weights
if wt defined, per-particle weights > 0.0
dimension = 2 or 3
@@ -103,6 +107,523 @@ RCB::~RCB()
void RCB::compute(int dimension, int n, double **x, double *wt,
double *bboxlo, double *bboxhi)
+{
+ int i,j,k;
+ int keep,outgoing,incoming,incoming2;
+ int dim,markactive;
+ int indexlo,indexhi;
+ int first_iteration,breakflag;
+ double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
+ double targetlo,targethi;
+ double valuemin,valuemax,valuehalf,valuehalf_select,smaller;
+ double tolerance;
+ MPI_Comm comm,comm_half;
+ MPI_Request request,request2;
+ Median med,medme;
+
+ // create list of my Dots
+
+ ndot = nkeep = noriginal = n;
+
+ if (ndot > maxdot) {
+ maxdot = ndot;
+ memory->sfree(dots);
+ dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots");
+ }
+
+ for (i = 0; i < ndot; i++) {
+ dots[i].x[0] = x[i][0];
+ dots[i].x[1] = x[i][1];
+ dots[i].x[2] = x[i][2];
+ dots[i].proc = me;
+ dots[i].index = i;
+ }
+
+ if (wt)
+ for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
+ else
+ for (i = 0; i < ndot; i++) dots[i].wt = 1.0;
+
+ // initial bounding box = simulation box
+ // includes periodic or shrink-wrapped boundaries
+
+ lo = bbox.lo;
+ hi = bbox.hi;
+
+ lo[0] = bboxlo[0];
+ lo[1] = bboxlo[1];
+ lo[2] = bboxlo[2];
+ hi[0] = bboxhi[0];
+ hi[1] = bboxhi[1];
+ hi[2] = bboxhi[2];
+
+ cut = 0.0;
+ cutdim = -1;
+
+ // initialize counters
+
+ counters[0] = 0;
+ counters[1] = 0;
+ counters[2] = 0;
+ counters[3] = ndot;
+ counters[4] = maxdot;
+ counters[5] = 0;
+ counters[6] = 0;
+
+ // create communicator for use in recursion
+
+ MPI_Comm_dup(world,&comm);
+
+ // recurse until partition is a single proc = me
+ // proclower,procupper = lower,upper procs in partition
+ // procmid = 1st proc in upper half of partition
+
+ int procpartner,procpartner2;
+
+ int procmid;
+ int proclower = 0;
+ int procupper = nprocs - 1;
+
+ while (proclower != procupper) {
+
+ // if odd # of procs, lower partition gets extra one
+
+ procmid = proclower + (procupper - proclower) / 2 + 1;
+
+ // determine communication partner(s)
+ // readnumber = # of proc partners to read from
+
+ if (me < procmid)
+ procpartner = me + (procmid - proclower);
+ else
+ procpartner = me - (procmid - proclower);
+
+ int readnumber = 1;
+ if (procpartner > procupper) {
+ readnumber = 0;
+ procpartner--;
+ }
+ if (me == procupper && procpartner != procmid - 1) {
+ readnumber = 2;
+ procpartner2 = procpartner + 1;
+ }
+
+ // wttot = summed weight of entire partition
+ // search tolerance = largest single weight (plus epsilon)
+ // targetlo = desired weight in lower half of partition
+ // targethi = desired weight in upper half of partition
+
+ wtmax = wtsum = 0.0;
+
+ if (wt) {
+ for (i = 0; i < ndot; i++) {
+ wtsum += dots[i].wt;
+ if (dots[i].wt > wtmax) wtmax = dots[i].wt;
+ }
+ } else {
+ for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
+ wtmax = 1.0;
+ }
+
+ MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
+ if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
+ else tolerance = 1.0;
+
+ tolerance *= 1.0 + TINY;
+ targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
+ targethi = wttot - targetlo;
+
+ // attempt a cut in each dimension
+ // each cut produces 2 boxes, each with a reduced box length in that dim
+ // smaller = the smaller of the 2 reduced box lengths in that dimension
+ // choose to cut in dimension which produces largest smaller value
+ // this should induce final proc sub-boxes to be as cube-ish as possible
+ // dim_select = selected cut dimension
+ // valuehalf_select = valuehalf in that dimension
+ // dotmark_select = dot markings in that dimension
+
+ int dim_select = -1;
+ double largest = 0.0;
+
+ for (dim = 0; dim < dimension; dim++) {
+
+ // create active list and mark array for dots
+ // initialize active list to all dots
+
+ if (ndot > maxlist) {
+ memory->destroy(dotlist);
+ memory->destroy(dotmark);
+ memory->destroy(dotmark_select);
+ maxlist = maxdot;
+ memory->create(dotlist,maxlist,"RCB:dotlist");
+ memory->create(dotmark,maxlist,"RCB:dotmark");
+ memory->create(dotmark_select,maxlist,"RCB:dotmark_select");
+ }
+
+ nlist = ndot;
+ for (i = 0; i < nlist; i++) dotlist[i] = i;
+
+ // median iteration
+ // zoom in on bisector until correct # of dots in each half of partition
+ // as each iteration of median-loop begins, require:
+ // all non-active dots are marked with 0/1 in dotmark
+ // valuemin <= every active dot <= valuemax
+ // wtlo, wthi = total wt of non-active dots
+ // when leave median-loop, require only:
+ // valuehalf = correct cut position
+ // all dots <= valuehalf are marked with 0 in dotmark
+ // all dots >= valuehalf are marked with 1 in dotmark
+ // markactive = which side of cut is active = 0/1
+ // indexlo,indexhi = indices of dot closest to median
+
+ wtlo = wthi = 0.0;
+ valuemin = lo[dim];
+ valuemax = hi[dim];
+ first_iteration = 1;
+ indexlo = indexhi = 0;
+
+ while (1) {
+
+ // choose bisector value
+ // use old value on 1st iteration if old cut dimension is the same
+ // on 2nd option: could push valuehalf towards geometric center
+ // with "1.0-factor" to force overshoot
+
+ if (first_iteration && reuse && dim == tree[procmid].dim) {
+ counters[5]++;
+ valuehalf = tree[procmid].cut;
+ if (valuehalf < valuemin || valuehalf > valuemax)
+ valuehalf = 0.5 * (valuemin + valuemax);
+ } else if (wt)
+ valuehalf = valuemin + (targetlo - wtlo) /
+ (wttot - wtlo - wthi) * (valuemax - valuemin);
+ else
+ valuehalf = 0.5 * (valuemin + valuemax);
+
+ first_iteration = 0;
+
+ // initialize local median data structure
+
+ medme.totallo = medme.totalhi = 0.0;
+ medme.valuelo = -MYHUGE;
+ medme.valuehi = MYHUGE;
+ medme.wtlo = medme.wthi = 0.0;
+ medme.countlo = medme.counthi = 0;
+ medme.proclo = medme.prochi = me;
+
+ // mark all active dots on one side or other of bisector
+ // also set all fields in median data struct
+ // save indices of closest dots on either side
+
+ for (j = 0; j < nlist; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] <= valuehalf) { // in lower part
+ medme.totallo += dots[i].wt;
+ dotmark[i] = 0;
+ if (dots[i].x[dim] > medme.valuelo) { // my closest dot
+ medme.valuelo = dots[i].x[dim];
+ medme.wtlo = dots[i].wt;
+ medme.countlo = 1;
+ indexlo = i;
+ } else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
+ medme.wtlo += dots[i].wt;
+ medme.countlo++;
+ }
+ }
+ else { // in upper part
+ medme.totalhi += dots[i].wt;
+ dotmark[i] = 1;
+ if (dots[i].x[dim] < medme.valuehi) { // my closest dot
+ medme.valuehi = dots[i].x[dim];
+ medme.wthi = dots[i].wt;
+ medme.counthi = 1;
+ indexhi = i;
+ } else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
+ medme.wthi += dots[i].wt;
+ medme.counthi++;
+ }
+ }
+ }
+
+ // combine median data struct across current subset of procs
+
+ counters[0]++;
+ MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);
+
+ // test median guess for convergence
+ // move additional dots that are next to cut across it
+
+ if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL
+
+ wtlo += med.totallo;
+ valuehalf = med.valuehi;
+
+ if (med.counthi == 1) { // only one dot to move
+ if (wtlo + med.wthi < targetlo) { // move it, keep iterating
+ if (me == med.prochi) dotmark[indexhi] = 0;
+ }
+ else { // only move if beneficial
+ if (wtlo + med.wthi - targetlo < targetlo - wtlo)
+ if (me == med.prochi) dotmark[indexhi] = 0;
+ break; // all done
+ }
+ }
+ else { // multiple dots to move
+ breakflag = 0;
+ wtok = 0.0;
+ if (medme.valuehi == med.valuehi) wtok = medme.wthi;
+ if (wtlo + med.wthi >= targetlo) { // all done
+ MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
+ wtmax = targetlo - wtlo;
+ if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
+ breakflag = 1;
+ } // wtok = most I can move
+ for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] == med.valuehi) { // only move if better
+ if (wtsum + dots[i].wt - wtok < wtok - wtsum)
+ dotmark[i] = 0;
+ wtsum += dots[i].wt;
+ }
+ }
+ if (breakflag) break; // done if moved enough
+ }
+
+ wtlo += med.wthi;
+ if (targetlo-wtlo <= tolerance) break; // close enough
+
+ valuemin = med.valuehi; // iterate again
+ markactive = 1;
+ }
+
+ else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL
+
+ wthi += med.totalhi;
+ valuehalf = med.valuelo;
+
+ if (med.countlo == 1) { // only one dot to move
+ if (wthi + med.wtlo < targethi) { // move it, keep iterating
+ if (me == med.proclo) dotmark[indexlo] = 1;
+ }
+ else { // only move if beneficial
+ if (wthi + med.wtlo - targethi < targethi - wthi)
+ if (me == med.proclo) dotmark[indexlo] = 1;
+ break; // all done
+ }
+ }
+ else { // multiple dots to move
+ breakflag = 0;
+ wtok = 0.0;
+ if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
+ if (wthi + med.wtlo >= targethi) { // all done
+ MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
+ wtmax = targethi - wthi;
+ if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
+ breakflag = 1;
+ } // wtok = most I can move
+ for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] == med.valuelo) { // only move if better
+ if (wtsum + dots[i].wt - wtok < wtok - wtsum)
+ dotmark[i] = 1;
+ wtsum += dots[i].wt;
+ }
+ }
+ if (breakflag) break; // done if moved enough
+ }
+
+ wthi += med.wtlo;
+ if (targethi-wthi <= tolerance) break; // close enough
+
+ valuemax = med.valuelo; // iterate again
+ markactive = 0;
+ }
+
+ else // Goldilocks result: both partitions just right
+ break;
+
+ // shrink the active list
+
+ k = 0;
+ for (j = 0; j < nlist; j++) {
+ i = dotlist[j];
+ if (dotmark[i] == markactive) dotlist[k++] = i;
+ }
+ nlist = k;
+ }
+
+ // cut produces 2 sub-boxes with reduced size in dim
+ // compare smaller of the 2 sizes to previous dims
+ // keep dim that has the largest smaller
+
+ smaller = MIN(valuehalf-lo[dim],hi[dim]-valuehalf);
+ if (smaller > largest) {
+ largest = smaller;
+ dim_select = dim;
+ valuehalf_select = valuehalf;
+ memcpy(dotmark_select,dotmark,ndot*sizeof(int));
+ }
+ }
+
+ // copy results for best dim cut into dim,valuehalf,dotmark
+
+ dim = dim_select;
+ valuehalf = valuehalf_select;
+ memcpy(dotmark,dotmark_select,ndot*sizeof(int));
+
+ // found median
+ // store cut info only if I am procmid
+
+ if (me == procmid) {
+ cut = valuehalf;
+ cutdim = dim;
+ }
+
+ // use cut to shrink my RCB bounding box
+
+ if (me < procmid) hi[dim] = valuehalf;
+ else lo[dim] = valuehalf;
+
+ // outgoing = number of dots to ship to partner
+ // nkeep = number of dots that have never migrated
+
+ markactive = (me < procpartner);
+ for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
+ if (dotmark[i] == markactive) outgoing++;
+ else if (i < nkeep) keep++;
+ nkeep = keep;
+
+ // alert partner how many dots I'll send, read how many I'll recv
+
+ MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
+ incoming = 0;
+ if (readnumber) {
+ MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);
+ if (readnumber == 2) {
+ MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,MPI_STATUS_IGNORE);
+ incoming += incoming2;
+ }
+ }
+
+ // check if need to alloc more space
+
+ int ndotnew = ndot - outgoing + incoming;
+ if (ndotnew > maxdot) {
+ while (maxdot < ndotnew) maxdot += DELTA;
+ dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots");
+ counters[6]++;
+ }
+
+ counters[1] += outgoing;
+ counters[2] += incoming;
+ if (ndotnew > counters[3]) counters[3] = ndotnew;
+ if (maxdot > counters[4]) counters[4] = maxdot;
+
+ // malloc comm send buffer
+
+ if (outgoing > maxbuf) {
+ memory->sfree(buf);
+ maxbuf = outgoing;
+ buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf");
+ }
+
+ // fill buffer with dots that are marked for sending
+ // pack down the unmarked ones
+
+ keep = outgoing = 0;
+ for (i = 0; i < ndot; i++) {
+ if (dotmark[i] == markactive)
+ memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
+ else
+ memcpy(&dots[keep++],&dots[i],sizeof(Dot));
+ }
+
+ // post receives for dots
+
+ if (readnumber > 0) {
+ MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
+ procpartner,1,world,&request);
+ if (readnumber == 2) {
+ keep += incoming - incoming2;
+ MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
+ procpartner2,1,world,&request2);
+ }
+ }
+
+ // handshake before sending dots to insure recvs have been posted
+
+ if (readnumber > 0) {
+ MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
+ if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
+ }
+ MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,MPI_STATUS_IGNORE);
+
+ // send dots to partner
+
+ MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);
+
+ // wait until all dots are received
+
+ if (readnumber > 0) {
+ MPI_Wait(&request,MPI_STATUS_IGNORE);
+ if (readnumber == 2) MPI_Wait(&request2,MPI_STATUS_IGNORE);
+ }
+
+ ndot = ndotnew;
+
+ // cut partition in half, create new communicators of 1/2 size
+
+ int split;
+ if (me < procmid) {
+ procupper = procmid - 1;
+ split = 0;
+ } else {
+ proclower = procmid;
+ split = 1;
+ }
+
+ MPI_Comm_split(comm,split,me,&comm_half);
+ MPI_Comm_free(&comm);
+ comm = comm_half;
+ }
+
+ // clean up
+
+ MPI_Comm_free(&comm);
+
+ // set public variables with results of rebalance
+
+ nfinal = ndot;
+
+ if (nfinal > maxrecv) {
+ memory->destroy(recvproc);
+ memory->destroy(recvindex);
+ maxrecv = nfinal;
+ memory->create(recvproc,maxrecv,"RCB:recvproc");
+ memory->create(recvindex,maxrecv,"RCB:recvindex");
+ }
+
+ for (i = 0; i < nfinal; i++) {
+ recvproc[i] = dots[i].proc;
+ recvindex[i] = dots[i].index;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ perform RCB balancing of N particles at coords X in bounding box LO/HI
+ OLD version: each RCB cut is made in longest dimension of sub-box
+ if wt = NULL, ignore per-particle weights
+ if wt defined, per-particle weights > 0.0
+ dimension = 2 or 3
+ as documented in rcb.h:
+ sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
+ all proc particles will be inside or on surface of 3-d box
+ defined by final lo/hi
+ // NOTE: worry about re-use of data structs for fix balance?
+------------------------------------------------------------------------- */
+
+void RCB::compute_old(int dimension, int n, double **x, double *wt,
+ double *bboxlo, double *bboxhi)
{
int i,j,k;
int keep,outgoing,incoming,incoming2;
diff --git a/src/rcb.h b/src/rcb.h
index 7a82d42f93..90b82f5952 100644
--- a/src/rcb.h
+++ b/src/rcb.h
@@ -42,6 +42,7 @@ class RCB : protected Pointers {
RCB(class LAMMPS *);
~RCB();
void compute(int, int, double **, double *, double *, double *);
+ void compute_old(int, int, double **, double *, double *, double *);
void invert(int sortflag = 0);
bigint memory_usage();
@@ -99,6 +100,7 @@ class RCB : protected Pointers {
int maxlist;
int *dotlist;
int *dotmark;
+ int *dotmark_select;
int maxbuf;
Dot *buf;
diff --git a/src/thermo.cpp b/src/thermo.cpp
index 75e72ada64..dbbeff4998 100644
--- a/src/thermo.cpp
+++ b/src/thermo.cpp
@@ -55,7 +55,8 @@ using namespace MathConst;
// customize a new keyword by adding to this list:
-// step, elapsed, elaplong, dt, time, cpu, tpcpu, spcpu, cpuremain, part, timeremain
+// step, elapsed, elaplong, dt, time, cpu, tpcpu, spcpu, cpuremain,
+// part, timeremain
// atoms, temp, press, pe, ke, etotal, enthalpy
// evdwl, ecoul, epair, ebond, eangle, edihed, eimp, emol, elong, etail
// vol, density, lx, ly, lz, xlo, xhi, ylo, yhi, zlo, zhi, xy, xz, yz,
@@ -394,6 +395,11 @@ void Thermo::compute(int flag)
if (flushflag) fflush(logfile);
}
}
+
+ // set to 1, so that subsequent invocations of CPU time will be non-zero
+ // e.g. via variables in print command
+
+ firststep = 1;
}
/* ----------------------------------------------------------------------
diff --git a/src/version.h b/src/version.h
index 703db28c26..e0aa18371c 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "7 Mar 2017"
+#define LAMMPS_VERSION "10 Mar 2017"