diff --git a/doc/JPG/sinusoid.jpg b/doc/JPG/sinusoid.jpg new file mode 100644 index 0000000000..00969281a1 Binary files /dev/null and b/doc/JPG/sinusoid.jpg differ diff --git a/doc/JPG/sinusoid_small.jpg b/doc/JPG/sinusoid_small.jpg new file mode 100644 index 0000000000..1643d81698 Binary files /dev/null and b/doc/JPG/sinusoid_small.jpg differ diff --git a/doc/Manual.html b/doc/Manual.html index 43e96bec91..fa49ba5bdf 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -1,7 +1,7 @@
mol value = template-ID seed
template-ID = ID of molecule template specified in a separate molecule command
@@ -40,6 +40,10 @@
M = which basis atom
itype = atom type (1-N) to assign to this basis atom
remap value = yes or no
+ var value = name = variable name to evaluate for test of atom creation
+ set values = dim vname
+ dim = x or y or z
+ name = name of variable to set with x,y,z atom position
units value = lattice or box
lattice = the geometry is defined in lattice units
box = the geometry is defined in simulation box units
@@ -51,6 +55,7 @@
create_atoms 1 box
create_atoms 3 region regsphere basis 2 3
create_atoms 3 single 0 0 5
+create_atoms 1 box var v set x xpos set y ypos
Description:
@@ -189,6 +194,45 @@ box, it will mapped back into the box, assuming the relevant
dimensions are periodic. If it is set to no, no remapping is done
and no particle is created if its position is outside the box.
+The var and set keywords can be used to provide a criterion for
+accepting or rejecting the addition of an individual atom, based on
+its coordinates. The vname specified for the var keyword is the
+name of an equal-style variable which should evaluate
+to a zero or non-zero value based on one or two or three variables
+which will store the x, y, or z coordinates of an atom (one variable
+per coordinate). These other variables must be equal-style
+variables defined in the input script, but their
+formula can by anything. The set keyword is used to identify the
+names of these other variables, one variable for the x-coordinate of a
+created atom, one for y, and one for z.
+
+When an atom is created, its x, y, or z coordinates override the
+formula for any set variable that is defined. The var variable is
+then evaluated. If the returned value is 0.0, the atom is not
+created. If it is non-zero, the atom is created.
+
+As an example, these commands can be used in a 2d simulation, to
+create a sinusoidal surface. Note that the surface is "rough" due to
+individual lattice points being "above" or "below" the mathematical
+expression for the sinusoidal curve. If a finer lattice were used,
+the sinusoid would appear to be "smoother". Also note the use of the
+"xlat" and "ylat" thermo_style keywords which
+converts lattice spacings to distance. Click on the image for a
+larger version.
+
+variable x equal 100
+variable y equal 25
+lattice hex 0.8442
+region box block 0 $x 0 $y -0.5 0.5
+create_box 1 box
+
+variable xx equal 0.0
+variable yy equal 0.0
+variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
+create_atoms 1 box var v set x xx set y yy
+
+
+
The units keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the single
style. A box value selects standard distance units as defined by
diff --git a/doc/create_atoms.txt b/doc/create_atoms.txt
index 4552d43e9a..f395b87b32 100644
--- a/doc/create_atoms.txt
+++ b/doc/create_atoms.txt
@@ -24,7 +24,7 @@ style = {box} or {region} or {single} or {random} :l
seed = random # seed (positive integer)
region-ID = create atoms within this region, use NULL for entire simulation box :pre
zero or more keyword/value pairs may be appended :l
-keyword = {mol} or {basis} or {remap} or {units} :l
+keyword = {mol} or {basis} or {remap} or {var} or {set} or {units} :l
{mol} value = template-ID seed
template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
seed = random # seed (positive integer)
@@ -32,6 +32,10 @@ keyword = {mol} or {basis} or {remap} or {units} :l
M = which basis atom
itype = atom type (1-N) to assign to this basis atom
{remap} value = {yes} or {no}
+ {var} value = name = variable name to evaluate for test of atom creation
+ {set} values = dim vname
+ dim = {x} or {y} or {z}
+ name = name of variable to set with x,y,z atom position
{units} value = {lattice} or {box}
{lattice} = the geometry is defined in lattice units
{box} = the geometry is defined in simulation box units :pre
@@ -41,7 +45,8 @@ keyword = {mol} or {basis} or {remap} or {units} :l
create_atoms 1 box
create_atoms 3 region regsphere basis 2 3
-create_atoms 3 single 0 0 5 :pre
+create_atoms 3 single 0 0 5
+create_atoms 1 box var v set x xpos set y ypos :pre
[Description:]
@@ -180,6 +185,45 @@ box, it will mapped back into the box, assuming the relevant
dimensions are periodic. If it is set to {no}, no remapping is done
and no particle is created if its position is outside the box.
+The {var} and {set} keywords can be used to provide a criterion for
+accepting or rejecting the addition of an individual atom, based on
+its coordinates. The {vname} specified for the {var} keyword is the
+name of an "equal-style variable"_variable.html which should evaluate
+to a zero or non-zero value based on one or two or three variables
+which will store the x, y, or z coordinates of an atom (one variable
+per coordinate). These other variables must be "equal-style
+variables"_variable.html defined in the input script, but their
+formula can by anything. The {set} keyword is used to identify the
+names of these other variables, one variable for the x-coordinate of a
+created atom, one for y, and one for z.
+
+When an atom is created, its x, y, or z coordinates override the
+formula for any {set} variable that is defined. The {var} variable is
+then evaluated. If the returned value is 0.0, the atom is not
+created. If it is non-zero, the atom is created.
+
+As an example, these commands can be used in a 2d simulation, to
+create a sinusoidal surface. Note that the surface is "rough" due to
+individual lattice points being "above" or "below" the mathematical
+expression for the sinusoidal curve. If a finer lattice were used,
+the sinusoid would appear to be "smoother". Also note the use of the
+"xlat" and "ylat" "thermo_style"_thermo_style.html keywords which
+converts lattice spacings to distance. Click on the image for a
+larger version.
+
+variable x equal 100
+variable y equal 25
+lattice hex 0.8442
+region box block 0 $x 0 $y -0.5 0.5
+create_box 1 box :pre
+
+variable xx equal 0.0
+variable yy equal 0.0
+variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
+create_atoms 1 box var v set x xx set y yy :pre
+
+:c,image(JPG/sinusoid_small.jpg,JPG/sinusoid.jpg)
+
The {units} keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the {single}
style. A {box} value selects standard distance units as defined by
diff --git a/doc/dump.html b/doc/dump.html
index e83d9f5c5f..c13777fd3e 100644
--- a/doc/dump.html
+++ b/doc/dump.html
@@ -57,7 +57,7 @@
f_ID[N] = Nth column of local array calculated by a fix with ID
custom of custom/mpiio args = list of atom attributes
- possible attributes = id, mol, type, element, mass,
+ possible attributes = id, mol, proc, procp1, type, element, mass,
x, y, z, xs, ys, zs, xu, yu, zu,
xsu, ysu, zsu, ix, iy, iz,
vx, vy, vz, fx, fy, fz,
@@ -69,6 +69,7 @@
id = atom ID
mol = molecule ID
proc = ID of processor that owns atom
+ procp1 = ID+1 of processor that owns atom
type = atom type
element = name of atom element, as defined by dump_modify command
mass = atom mass
@@ -460,18 +461,20 @@ dump 1 all local 1000 tmp.dump index c_1[1] c_1[2] c_1[3] c_2[1] c_2[2]
This section explains the atom attributes that can be specified as
part of the custom and cfg styles.
-The id, mol, proc, type, element, mass, vx, vy, vz,
-fx, fy, fz, q attributes are self-explanatory.
+
The id, mol, proc, procp1, type, element, mass, vx,
+vy, vz, fx, fy, fz, q attributes are self-explanatory.
Id is the atom ID. Mol is the molecule ID, included in the data
file for molecular systems. Proc is the ID of the processor (0 to
-Nprocs-1) that currently owns the atom. Type is the atom type.
-Element is typically the chemical name of an element, which you must
-assign to each type via the dump_modify element
-command. More generally, it can be any string you wish to associated
-with an atom type. Mass is the atom mass. Vx, vy, vz, fx,
-fy, fz, and q are components of atom velocity and force and
-atomic charge.
+Nprocs-1) that currently owns the atom. Procp1 is the proc ID+1,
+which can be convenient in place of a type attribute (1 to Ntypes)
+for coloring atoms in a visualization program. Type is the atom
+type (1 to Ntypes). Element is typically the chemical name of an
+element, which you must assign to each type via the dump_modify
+element command. More generally, it can be any
+string you wish to associated with an atom type. Mass is the atom
+mass. Vx, vy, vz, fx, fy, fz, and q are components of
+atom velocity and force and atomic charge.
There are several options for outputting atom coordinates. The x,
y, z attributes write atom coordinates "unscaled", in the
diff --git a/doc/dump.txt b/doc/dump.txt
index 3f83f217ca..da12e99ed7 100644
--- a/doc/dump.txt
+++ b/doc/dump.txt
@@ -44,7 +44,7 @@ args = list of arguments for a particular style :l
f_ID\[N\] = Nth column of local array calculated by a fix with ID :pre
{custom} of {custom/mpiio} args = list of atom attributes
- possible attributes = id, mol, type, element, mass,
+ possible attributes = id, mol, proc, procp1, type, element, mass,
x, y, z, xs, ys, zs, xu, yu, zu,
xsu, ysu, zsu, ix, iy, iz,
vx, vy, vz, fx, fy, fz,
@@ -56,6 +56,7 @@ args = list of arguments for a particular style :l
id = atom ID
mol = molecule ID
proc = ID of processor that owns atom
+ procp1 = ID+1 of processor that owns atom
type = atom type
element = name of atom element, as defined by "dump_modify"_dump_modify.html command
mass = atom mass
@@ -78,8 +79,8 @@ args = list of arguments for a particular style :l
f_ID = per-atom vector calculated by a fix with ID
f_ID\[N\] = Nth column of per-atom array calculated by a fix with ID
v_name = per-atom vector calculated by an atom-style variable with name
- d_name = per-atom floating point vector with name managed by fix property/atom
- i_name = per-atom integer vector with name managed by fix property/atom :pre
+ d_name = per-atom floating point vector with name, managed by fix property/atom
+ i_name = per-atom integer vector with name, managed by fix property/atom :pre
:ule
[Examples:]
@@ -112,7 +113,7 @@ options; see details below.
As described below, the filename determines the kind of output (text
or binary or gzipped, one big file or one per timestep, one big file
-or one per processor).
+or multiple smaller files).
IMPORTANT NOTE: Because periodic boundary conditions are enforced only
on timesteps when neighbor lists are rebuilt, the coordinates of an
@@ -127,7 +128,7 @@ if the "atom_modify sort"_atom_modify.html option is on, which it is
by default. In this case atoms are re-ordered periodically during a
simulation, due to spatial sorting. It is also true when running in
parallel, because data for a single snapshot is collected from
-multiple processors.
+multiple processors, each of which owns a subset of the atoms.
For the {atom}, {custom}, {cfg}, and {local} styles, sorting is off by
default. For the {dcd}, {xtc}, {xyz}, and {molfile} styles, sorting by
@@ -295,11 +296,12 @@ from using the (numerical) atom type to an element name (or some
other label). This will help many visualization programs to guess
bonds and colors.
-Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files can
-be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd (a popular
-molecular viewing program). See "Section tools"_Section_tools.html#vmd
-of the manual and the tools/lmp2vmd/README.txt file for more information
-about support in VMD for reading and visualizing LAMMPS dump files.
+Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files
+can be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd, a
+popular molecular viewing program. See "Section
+tools"_Section_tools.html#vmd of the manual and the
+tools/lmp2vmd/README.txt file for more information about support in
+VMD for reading and visualizing LAMMPS dump files.
:line
@@ -332,7 +334,7 @@ tmp.dump.20000, etc. This option is not available for the {dcd} and
{xtc} styles. Note that the "dump_modify pad"_dump_modify.html
command can be used to insure all timestep numbers are the same length
(e.g. 00010), which can make it easier to read a series of dump files
-in order by some post-processing tools.
+in order with some post-processing tools.
If a "%" character appears in the filename, then each of P processors
writes a portion of the dump file, and the "%" character is replaced
@@ -447,18 +449,20 @@ dump 1 all local 1000 tmp.dump index c_1\[1\] c_1\[2\] c_1\[3\] c_2\[1\] c_2\[2\
This section explains the atom attributes that can be specified as
part of the {custom} and {cfg} styles.
-The {id}, {mol}, {proc}, {type}, {element}, {mass}, {vx}, {vy}, {vz},
-{fx}, {fy}, {fz}, {q} attributes are self-explanatory.
+The {id}, {mol}, {proc}, {procp1}, {type}, {element}, {mass}, {vx},
+{vy}, {vz}, {fx}, {fy}, {fz}, {q} attributes are self-explanatory.
{Id} is the atom ID. {Mol} is the molecule ID, included in the data
file for molecular systems. {Proc} is the ID of the processor (0 to
-Nprocs-1) that currently owns the atom. {Type} is the atom type.
-{Element} is typically the chemical name of an element, which you must
-assign to each type via the "dump_modify element"_dump_modify.html
-command. More generally, it can be any string you wish to associated
-with an atom type. {Mass} is the atom mass. {Vx}, {vy}, {vz}, {fx},
-{fy}, {fz}, and {q} are components of atom velocity and force and
-atomic charge.
+Nprocs-1) that currently owns the atom. {Procp1} is the proc ID+1,
+which can be convenient in place of a {type} attribute (1 to Ntypes)
+for coloring atoms in a visualization program. {Type} is the atom
+type (1 to Ntypes). {Element} is typically the chemical name of an
+element, which you must assign to each type via the "dump_modify
+element"_dump_modify.html command. More generally, it can be any
+string you wish to associated with an atom type. {Mass} is the atom
+mass. {Vx}, {vy}, {vz}, {fx}, {fy}, {fz}, and {q} are components of
+atom velocity and force and atomic charge.
There are several options for outputting atom coordinates. The {x},
{y}, {z} attributes write atom coordinates "unscaled", in the
diff --git a/doc/fix_store_state.txt b/doc/fix_store_state.txt
index e40a0f49cc..01f73a3bbe 100644
--- a/doc/fix_store_state.txt
+++ b/doc/fix_store_state.txt
@@ -46,8 +46,8 @@ input = one or more atom attributes :l
f_ID = per-atom vector calculated by a fix with ID
f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID
v_name = per-atom vector calculated by an atom-style variable with name
- d_name = per-atom floating point vector managed by fix property/atom
- i_name = per-atom integer vector managed by fix property/atom :pre
+ d_name = per-atom floating point vector name, managed by fix property/atom
+ i_name = per-atom integer vector name, managed by fix property/atom :pre
zero or more keyword/value pairs may be appended :l
keyword = {com} :l
diff --git a/doc/pair_table.html b/doc/pair_table.html
index 21d9fc9d1f..1919b40002 100644
--- a/doc/pair_table.html
+++ b/doc/pair_table.html
@@ -119,8 +119,8 @@ to very steep parts of the potential.
-The format of a tabulated file is as follows (without the
-parenthesized comments):
+
The format of a tabulated file is a series of one or more sections,
+defined as follows (without the parenthesized comments):
# Morse potential for Fe (one or more comment or blank lines)
diff --git a/doc/pair_table.txt b/doc/pair_table.txt
index 4b83d2a8f5..8938de20b7 100644
--- a/doc/pair_table.txt
+++ b/doc/pair_table.txt
@@ -113,8 +113,8 @@ to very steep parts of the potential. :l,ule
:line
-The format of a tabulated file is as follows (without the
-parenthesized comments):
+The format of a tabulated file is a series of one or more sections,
+defined as follows (without the parenthesized comments):
# Morse potential for Fe (one or more comment or blank lines) :pre
diff --git a/doc/variable.html b/doc/variable.html
index 51daea0bf2..5222302cbe 100644
--- a/doc/variable.html
+++ b/doc/variable.html
@@ -50,7 +50,7 @@
constants = PI
thermo keywords = vol, ke, press, etc from thermo_style
math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
- x==y, x!=y, xy, x>=y, x&&y, x||y, !x
+ x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x
math functions = sqrt(x), exp(x), ln(x), log(x), abs(x),
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
@@ -368,7 +368,8 @@ references to other variables.
Number 0.2, 100, 1.0e20, -15.4, etc
Constant PI
Thermo keywords vol, pe, ebond, etc
-Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x
+Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
+Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x
Math functions sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
Group functions count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim)
Region functions count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR)
diff --git a/doc/variable.txt b/doc/variable.txt
index e9362de392..d7323b3efe 100644
--- a/doc/variable.txt
+++ b/doc/variable.txt
@@ -45,7 +45,7 @@ style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {st
constants = PI
thermo keywords = vol, ke, press, etc from "thermo_style"_thermo_style.html
math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
- x==y, x!=y, xy, x>=y, x&&y, x||y, !x
+ x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x
math functions = sqrt(x), exp(x), ln(x), log(x), abs(x),
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
@@ -361,7 +361,8 @@ references to other variables.
Number: 0.2, 100, 1.0e20, -15.4, etc
Constant: PI
Thermo keywords: vol, pe, ebond, etc
-Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x
+Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
+Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x
Math functions: sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
Group functions: count(ID), mass(ID), charge(ID), xcm(ID,dim), \
vcm(ID,dim), fcm(ID,dim), bound(ID,dir), \
diff --git a/src/GRANULAR/fix_pour.cpp b/src/GRANULAR/fix_pour.cpp
index 73a13e2192..9947d9a84a 100644
--- a/src/GRANULAR/fix_pour.cpp
+++ b/src/GRANULAR/fix_pour.cpp
@@ -37,10 +37,11 @@ using namespace LAMMPS_NS;
using namespace FixConst;
using namespace MathConst;
-#define EPSILON 0.001
-
enum{ATOM,MOLECULE};
enum{ONE,RANGE,POLY};
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
+#define EPSILON 0.001
/* ---------------------------------------------------------------------- */
@@ -598,13 +599,25 @@ void FixPour::pre_exchange()
if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] &&
newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1;
- else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] &&
- comm->myloc[2] == comm->procgrid[2]-1 &&
- newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
- newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
- else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] &&
- comm->myloc[1] == comm->procgrid[1]-1 &&
- newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) {
+ if (comm->layout != LAYOUT_TILED) {
+ if (comm->myloc[2] == comm->procgrid[2]-1 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
+ newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
+ } else {
+ if (comm->mysplit[2][1] == 1.0 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
+ newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
+ }
+ } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) {
+ if (comm->layout != LAYOUT_TILED) {
+ if (comm->myloc[1] == comm->procgrid[1]-1 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ } else {
+ if (comm->mysplit[1][1] == 1.0 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ }
+ }
if (flag) {
if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]);
diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp
index 5ce73ee41f..81dbc98b47 100644
--- a/src/KSPACE/msm.cpp
+++ b/src/KSPACE/msm.cpp
@@ -148,6 +148,9 @@ void MSM::init()
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"MSM can only currently be used with "
+ "comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp
index 4d45ef2227..ec61833f76 100644
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@@ -183,6 +183,9 @@ void PPPM::init()
"slab correction");
if (domain->dimension == 2) error->all(FLERR,
"Cannot use PPPM with 2d simulation");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"PPPM can only currently be used with "
+ "comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp
index 195aa47af3..1dfea3bf77 100644
--- a/src/KSPACE/pppm_disp.cpp
+++ b/src/KSPACE/pppm_disp.cpp
@@ -213,6 +213,9 @@ void PPPMDisp::init()
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"PPPMDisp can only currently be used with "
+ "comm_style brick");
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");
diff --git a/src/MISC/fix_deposit.cpp b/src/MISC/fix_deposit.cpp
index a804028397..213d39329b 100644
--- a/src/MISC/fix_deposit.cpp
+++ b/src/MISC/fix_deposit.cpp
@@ -37,6 +37,7 @@ using namespace FixConst;
using namespace MathConst;
enum{ATOM,MOLECULE};
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define EPSILON 1.0e6
@@ -452,13 +453,25 @@ void FixDeposit::pre_exchange()
if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] &&
newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1;
- else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] &&
- comm->myloc[2] == comm->procgrid[2]-1 &&
- newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
- newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
- else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] &&
- comm->myloc[1] == comm->procgrid[1]-1 &&
- newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) {
+ if (comm->layout != LAYOUT_TILED) {
+ if (comm->myloc[2] == comm->procgrid[2]-1 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
+ newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
+ } else {
+ if (comm->mysplit[2][1] == 1.0 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
+ newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
+ }
+ } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) {
+ if (comm->layout != LAYOUT_TILED) {
+ if (comm->myloc[1] == comm->procgrid[1]-1 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ } else {
+ if (comm->mysplit[1][1] == 1.0 &&
+ newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
+ }
+ }
if (flag) {
if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]);
diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp
index ac45b4f56d..734b070541 100644
--- a/src/REPLICA/verlet_split.cpp
+++ b/src/REPLICA/verlet_split.cpp
@@ -52,6 +52,9 @@ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) :
if (universe->procs_per_world[0] % universe->procs_per_world[1])
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
"size be multiple of Kspace partition size");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"Verlet/split can only currently be used with "
+ "comm_style brick");
// master = 1 for Rspace procs, 0 for Kspace procs
@@ -214,6 +217,9 @@ VerletSplit::~VerletSplit()
void VerletSplit::init()
{
+ if (comm->style != 0)
+ error->universe_all(FLERR,"Verlet/split can only currently be used with "
+ "comm_style brick");
if (!force->kspace && comm->me == 0)
error->warning(FLERR,"No Kspace calculation with verlet/split");
diff --git a/src/SHOCK/fix_append_atoms.cpp b/src/SHOCK/fix_append_atoms.cpp
index 5e7af01f6e..571e77b129 100644
--- a/src/SHOCK/fix_append_atoms.cpp
+++ b/src/SHOCK/fix_append_atoms.cpp
@@ -29,6 +29,8 @@
using namespace LAMMPS_NS;
using namespace FixConst;
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
#define BIG 1.0e30
#define EPSILON 1.0e-6
@@ -410,7 +412,15 @@ void FixAppendAtoms::pre_exchange()
if (ntimestep % freq == 0) {
if (spatflag==1) if (get_spatial()==0) return;
- if (comm->myloc[2] == comm->procgrid[2]-1) {
+
+ int addflag = 0;
+ if (comm->layout != LAYOUT_TILED) {
+ if (comm->myloc[2] == comm->procgrid[2]-1) addflag = 1;
+ } else {
+ if (comm->mysplit[2][1] == 1.0) addflag = 1;
+ }
+
+ if (addflag) {
double bboxlo[3],bboxhi[3];
bboxlo[0] = domain->sublo[0]; bboxhi[0] = domain->subhi[0];
diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp
index 3db73bab51..37dc068797 100644
--- a/src/SRD/fix_srd.cpp
+++ b/src/SRD/fix_srd.cpp
@@ -334,9 +334,12 @@ void FixSRD::init()
if (bigexist && comm->ghost_velocity == 0)
error->all(FLERR,"Fix srd requires ghost atoms store velocity");
if (bigexist && collidestyle == NOSLIP && !atom->torque_flag)
- error->all(FLERR,"Fix SRD no-slip requires atom attribute torque");
+ error->all(FLERR,"Fix srd no-slip requires atom attribute torque");
if (initflag && update->dt != dt_big)
error->all(FLERR,"Cannot change timestep once fix srd is setup");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"Fix srd can only currently be used with "
+ "comm_style brick");
// orthogonal vs triclinic simulation box
// could be static or shearing box
diff --git a/src/STUBS/mpi.c b/src/STUBS/mpi.c
index 718e5fb475..987479bb7b 100644
--- a/src/STUBS/mpi.c
+++ b/src/STUBS/mpi.c
@@ -20,7 +20,7 @@
#include
#include "mpi.h"
-/* lo-level data structure */
+/* data structure for double/int */
struct _mpi_double_int {
double value;
@@ -28,6 +28,15 @@ struct _mpi_double_int {
};
typedef struct _mpi_double_int double_int;
+/* extra MPI_Datatypes registered by MPI_Type_contiguous */
+
+#define MAXEXTRA_DATATYPE 16
+
+int nextra_datatype;
+MPI_Datatype *ptr_datatype[MAXEXTRA_DATATYPE];
+int index_datatype[MAXEXTRA_DATATYPE];
+int size_datatype[MAXEXTRA_DATATYPE];
+
static int _mpi_is_initialized=0;
/* ---------------------------------------------------------------------- */
@@ -135,6 +144,8 @@ double MPI_Wtime()
/* ---------------------------------------------------------------------- */
+/* include sizes of user defined datatypes, stored in extra lists */
+
static int stubtypesize(MPI_Datatype datatype)
{
if (datatype == MPI_INT) return sizeof(int);
@@ -145,6 +156,12 @@ static int stubtypesize(MPI_Datatype datatype)
else if (datatype == MPI_LONG) return sizeof(long);
else if (datatype == MPI_LONG_LONG) return sizeof(uint64_t);
else if (datatype == MPI_DOUBLE_INT) return sizeof(double_int);
+ else {
+ int i;
+ for (i = 0; i < nextra_datatype; i++)
+ if (datatype == index_datatype[i]) return size_datatype[i];
+ }
+ return 0;
}
/* ---------------------------------------------------------------------- */
@@ -339,6 +356,66 @@ int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank)
/* ---------------------------------------------------------------------- */
+/* store size of user datatype in extra lists */
+
+int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
+ MPI_Datatype *newtype)
+{
+ if (nextra_datatype = MAXEXTRA_DATATYPE) return -1;
+ ptr_datatype[nextra_datatype] = newtype;
+ index_datatype[nextra_datatype] = -(nextra_datatype + 1);
+ size_datatype[nextra_datatype] = count * stubtypesize(oldtype);
+ nextra_datatype++;
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* set value of user datatype to internal negative index,
+ based on match of ptr */
+
+int MPI_Type_commit(MPI_Datatype *datatype)
+{
+ int i;
+ for (i = 0; i < nextra_datatype; i++)
+ if (datatype == ptr_datatype[i]) *datatype = index_datatype[i];
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+/* remove user datatype from extra lists */
+
+int MPI_Type_free(MPI_Datatype *datatype)
+{
+ int i;
+ for (i = 0; i < nextra_datatype; i++)
+ if (datatype == ptr_datatype[i]) {
+ ptr_datatype[i] = ptr_datatype[nextra_datatype-1];
+ index_datatype[i] = index_datatype[nextra_datatype-1];
+ size_datatype[i] = size_datatype[nextra_datatype-1];
+ nextra_datatype--;
+ break;
+ }
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op)
+{
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int MPI_Op_free(MPI_Op *op)
+{
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
int MPI_Barrier(MPI_Comm comm) {return 0;}
/* ---------------------------------------------------------------------- */
diff --git a/src/STUBS/mpi.h b/src/STUBS/mpi.h
index 0d93326b45..07ea1027eb 100644
--- a/src/STUBS/mpi.h
+++ b/src/STUBS/mpi.h
@@ -64,6 +64,9 @@ extern "C" {
#define MPI_MAX_PROCESSOR_NAME 128
+typedef void MPI_User_function(void *invec, void *inoutvec,
+ int *len, MPI_Datatype *datatype);
+
/* MPI data structs */
struct _MPI_Status {
@@ -123,6 +126,13 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int displ,
int *source, int *dest);
int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank);
+int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
+ MPI_Datatype *newtype);
+int MPI_Type_commit(MPI_Datatype *datatype);
+int MPI_Type_free(MPI_Datatype *datatype);
+
+int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op);
+int MPI_Op_free(MPI_Op *op);
int MPI_Barrier(MPI_Comm comm);
int MPI_Bcast(void *buf, int count, MPI_Datatype datatype,
diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp
index 9754dd1f94..2a2973c900 100644
--- a/src/USER-CUDA/pppm_cuda.cpp
+++ b/src/USER-CUDA/pppm_cuda.cpp
@@ -211,6 +211,9 @@ void PPPMCuda::init()
// error check
if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"PPPMCuda can only currently be used with "
+ "comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
diff --git a/src/USER-LB/fix_lb_fluid.cpp b/src/USER-LB/fix_lb_fluid.cpp
index 0161fd658d..4706d576e4 100644
--- a/src/USER-LB/fix_lb_fluid.cpp
+++ b/src/USER-LB/fix_lb_fluid.cpp
@@ -87,6 +87,10 @@ FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) :
if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command");
+ if (comm->style != 0)
+ error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
+ "comm_style brick");
+
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
@@ -566,9 +570,12 @@ int FixLbFluid::setmask()
void FixLbFluid::init(void)
{
-
int i,j;
+ if (comm->style != 0)
+ error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
+ "comm_style brick");
+
//--------------------------------------------------------------------------
// Check to see if the MD timestep has changed between runs.
//--------------------------------------------------------------------------
diff --git a/src/atom.cpp b/src/atom.cpp
index ff2b15f4a0..78e317299c 100644
--- a/src/atom.cpp
+++ b/src/atom.cpp
@@ -47,6 +47,8 @@ using namespace MathConst;
#define CUDA_CHUNK 3000
#define MAXBODY 20 // max # of lines in one body, also in ReadData class
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
/* ---------------------------------------------------------------------- */
Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
@@ -756,17 +758,33 @@ void Atom::data_atoms(int n, char *buf)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
}
- if (domain->xperiodic) {
- if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
- if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
- }
- if (domain->yperiodic) {
- if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
- if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
- }
- if (domain->zperiodic) {
- if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
- if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
+ if (comm->layout != LAYOUT_TILED) {
+ if (domain->xperiodic) {
+ if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
+ if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
+ if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
+ if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
+ }
+
+ } else {
+ if (domain->xperiodic) {
+ if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
+ if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
+ if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
+ if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
+ }
}
// xptr = which word in line starts xyz coords
diff --git a/src/balance.cpp b/src/balance.cpp
index a76a7ce364..8675d8568e 100644
--- a/src/balance.cpp
+++ b/src/balance.cpp
@@ -21,6 +21,7 @@
#include "balance.h"
#include "atom.h"
#include "comm.h"
+#include "rcb.h"
#include "irregular.h"
#include "domain.h"
#include "force.h"
@@ -30,9 +31,10 @@
using namespace LAMMPS_NS;
-enum{XYZ,SHIFT,RCB};
+enum{XYZ,SHIFT,BISECTION};
enum{NONE,UNIFORM,USER};
enum{X,Y,Z};
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@@ -47,6 +49,8 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
user_xsplit = user_ysplit = user_zsplit = NULL;
shift_allocate = 0;
+ rcb = NULL;
+
fp = NULL;
firststep = 1;
}
@@ -74,6 +78,8 @@ Balance::~Balance()
delete [] hisum;
}
+ delete rcb;
+
if (fp) fclose(fp);
}
@@ -176,7 +182,7 @@ void Balance::command(int narg, char **arg)
} else if (strcmp(arg[iarg],"rcb") == 0) {
if (style != -1) error->all(FLERR,"Illegal balance command");
- style = RCB;
+ style = BISECTION;
iarg++;
} else break;
@@ -232,6 +238,9 @@ void Balance::command(int narg, char **arg)
}
}
+ if (style == BISECTION && comm->style == 0)
+ error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
+
// insure atoms are in current box & update box via shrink-wrap
// init entire system since comm->setup is done
// comm::init needs neighbor::init needs pair::init needs kspace::init, etc
@@ -251,8 +260,10 @@ void Balance::command(int narg, char **arg)
double imbinit = imbalance_nlocal(maxinit);
// no load-balance if imbalance doesn't exceed threshhold
+ // unless switching from tiled to non tiled layout, then force rebalance
- if (imbinit < thresh) return;
+ if (comm->layout == LAYOUT_TILED && style != BISECTION) {
+ } else if (imbinit < thresh) return;
// debug output of initial state
@@ -262,80 +273,65 @@ void Balance::command(int narg, char **arg)
int niter = 0;
- // NOTE: if using XYZ or SHIFT and current partition is TILING,
- // then need to create initial BRICK partition before performing LB
-
// perform load-balance
// style XYZ = explicit setting of cutting planes of logical 3d grid
if (style == XYZ) {
+ if (comm->layout == LAYOUT_UNIFORM) {
+ if (xflag == USER || yflag == USER || zflag == USER)
+ comm->layout = LAYOUT_NONUNIFORM;
+ } else if (comm->style == LAYOUT_NONUNIFORM) {
+ if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
+ comm->layout = LAYOUT_UNIFORM;
+ } else if (comm->style == LAYOUT_TILED) {
+ if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
+ comm->layout = LAYOUT_UNIFORM;
+ else comm->layout = LAYOUT_NONUNIFORM;
+ }
+
if (xflag == UNIFORM) {
for (int i = 0; i < procgrid[0]; i++)
comm->xsplit[i] = i * 1.0/procgrid[0];
comm->xsplit[procgrid[0]] = 1.0;
- }
+ } else if (xflag == USER)
+ for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == UNIFORM) {
for (int i = 0; i < procgrid[1]; i++)
comm->ysplit[i] = i * 1.0/procgrid[1];
comm->ysplit[procgrid[1]] = 1.0;
- }
+ } else if (yflag == USER)
+ for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == UNIFORM) {
for (int i = 0; i < procgrid[2]; i++)
comm->zsplit[i] = i * 1.0/procgrid[2];
comm->zsplit[procgrid[2]] = 1.0;
- }
-
- if (xflag == USER)
- for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
-
- if (yflag == USER)
- for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
-
- if (zflag == USER)
+ } else if (zflag == USER)
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
}
// style SHIFT = adjust cutting planes of logical 3d grid
if (style == SHIFT) {
- static_setup(bstr);
+ comm->layout = LAYOUT_NONUNIFORM;
+ shift_setup_static(bstr);
niter = shift();
}
- // style RCB =
+ // style BISECTION = recursive coordinate bisectioning
- if (style == RCB) {
- error->all(FLERR,"Balance rcb is not yet supported");
-
- if (comm->style == 0)
- error->all(FLERR,"Cannot use balance rcb with comm_style brick");
+ if (style == BISECTION) {
+ comm->layout = LAYOUT_TILED;
+ bisection(1);
}
// output of final result
if (outflag && me == 0) dumpout(update->ntimestep,fp);
- // reset comm->uniform flag if necessary
-
- if (comm->uniform) {
- if (style == SHIFT) comm->uniform = 0;
- if (style == XYZ && xflag == USER) comm->uniform = 0;
- if (style == XYZ && yflag == USER) comm->uniform = 0;
- if (style == XYZ && zflag == USER) comm->uniform = 0;
- } else {
- if (dimension == 3) {
- if (style == XYZ &&
- xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
- comm->uniform = 1;
- } else {
- if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM)
- comm->uniform = 1;
- }
- }
-
// reset proc sub-domains
+ // for either brick or tiled comm style
if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box();
@@ -344,7 +340,8 @@ void Balance::command(int narg, char **arg)
if (domain->triclinic) domain->x2lamda(atom->nlocal);
Irregular *irregular = new Irregular(lmp);
- irregular->migrate_atoms(1);
+ if (style == BISECTION) irregular->migrate_atoms(1,rcb->sendproc);
+ else irregular->migrate_atoms(1);
delete irregular;
if (domain->triclinic) domain->lamda2x(atom->nlocal);
@@ -382,34 +379,36 @@ void Balance::command(int narg, char **arg)
}
}
- if (me == 0) {
- if (screen) {
- fprintf(screen," x cuts:");
- for (int i = 0; i <= comm->procgrid[0]; i++)
- fprintf(screen," %g",comm->xsplit[i]);
- fprintf(screen,"\n");
- fprintf(screen," y cuts:");
- for (int i = 0; i <= comm->procgrid[1]; i++)
- fprintf(screen," %g",comm->ysplit[i]);
- fprintf(screen,"\n");
- fprintf(screen," z cuts:");
- for (int i = 0; i <= comm->procgrid[2]; i++)
- fprintf(screen," %g",comm->zsplit[i]);
- fprintf(screen,"\n");
- }
- if (logfile) {
- fprintf(logfile," x cuts:");
- for (int i = 0; i <= comm->procgrid[0]; i++)
- fprintf(logfile," %g",comm->xsplit[i]);
- fprintf(logfile,"\n");
- fprintf(logfile," y cuts:");
- for (int i = 0; i <= comm->procgrid[1]; i++)
- fprintf(logfile," %g",comm->ysplit[i]);
- fprintf(logfile,"\n");
- fprintf(logfile," z cuts:");
- for (int i = 0; i <= comm->procgrid[2]; i++)
- fprintf(logfile," %g",comm->zsplit[i]);
- fprintf(logfile,"\n");
+ if (style != BISECTION) {
+ if (me == 0) {
+ if (screen) {
+ fprintf(screen," x cuts:");
+ for (int i = 0; i <= comm->procgrid[0]; i++)
+ fprintf(screen," %g",comm->xsplit[i]);
+ fprintf(screen,"\n");
+ fprintf(screen," y cuts:");
+ for (int i = 0; i <= comm->procgrid[1]; i++)
+ fprintf(screen," %g",comm->ysplit[i]);
+ fprintf(screen,"\n");
+ fprintf(screen," z cuts:");
+ for (int i = 0; i <= comm->procgrid[2]; i++)
+ fprintf(screen," %g",comm->zsplit[i]);
+ fprintf(screen,"\n");
+ }
+ if (logfile) {
+ fprintf(logfile," x cuts:");
+ for (int i = 0; i <= comm->procgrid[0]; i++)
+ fprintf(logfile," %g",comm->xsplit[i]);
+ fprintf(logfile,"\n");
+ fprintf(logfile," y cuts:");
+ for (int i = 0; i <= comm->procgrid[1]; i++)
+ fprintf(logfile," %g",comm->ysplit[i]);
+ fprintf(logfile,"\n");
+ fprintf(logfile," z cuts:");
+ for (int i = 0; i <= comm->procgrid[2]; i++)
+ fprintf(logfile," %g",comm->zsplit[i]);
+ fprintf(logfile,"\n");
+ }
}
}
}
@@ -467,13 +466,59 @@ double Balance::imbalance_splits(int &max)
return imbalance;
}
+/* ----------------------------------------------------------------------
+ perform balancing via RCB class
+ sortflag = flag for sorting order of received messages by proc ID
+------------------------------------------------------------------------- */
+
+int *Balance::bisection(int sortflag)
+{
+ if (!rcb) rcb = new RCB(lmp);
+
+ // NOTE: lo/hi args could be simulation box or particle bounding box
+ // if particle bbox, then mysplit needs to be reset to sim box
+ // NOTE: triclinic needs to be in lamda coords
+
+ int dim = domain->dimension;
+ double *boxlo = domain->boxlo;
+ double *boxhi = domain->boxhi;
+ double *prd = domain->prd;
+
+ rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi);
+ rcb->invert(sortflag);
+
+ // NOTE: this logic is specific to orthogonal boxes, not triclinic
+
+ comm->rcbnew = 1;
+ comm->rcbcut = rcb->cut;
+ comm->rcbcutdim = rcb->cutdim;
+
+ double (*mysplit)[2] = comm->mysplit;
+
+ mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0];
+ if (rcb->hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
+ else mysplit[0][1] = (rcb->hi[0] - boxlo[0]) / prd[0];
+
+ mysplit[1][0] = (rcb->lo[1] - boxlo[1]) / prd[1];
+ if (rcb->hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
+ else mysplit[1][1] = (rcb->hi[1] - boxlo[1]) / prd[1];
+
+ mysplit[2][0] = (rcb->lo[2] - boxlo[2]) / prd[2];
+ if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
+ else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2];
+
+ // return list of procs to send my atoms to
+
+ return rcb->sendproc;
+}
+
/* ----------------------------------------------------------------------
setup static load balance operations
- called from command
+ called from command and indirectly initially from fix balance
set rho = 0 for static balancing
------------------------------------------------------------------------- */
-void Balance::static_setup(char *str)
+void Balance::shift_setup_static(char *str)
{
shift_allocate = 1;
@@ -498,21 +543,35 @@ void Balance::static_setup(char *str)
losum = new bigint[max+1];
hisum = new bigint[max+1];
+ // if current layout is TILED, set initial uniform splits in Comm
+ // this gives starting point to subsequent shift balancing
+
+ if (comm->layout == LAYOUT_TILED) {
+ int *procgrid = comm->procgrid;
+ double *xsplit = comm->xsplit;
+ double *ysplit = comm->ysplit;
+ double *zsplit = comm->zsplit;
+
+ for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
+ for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
+ for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
+ xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
+ }
+
rho = 0;
}
/* ----------------------------------------------------------------------
setup shift load balance operations
called from fix balance
- set rho = 1 for shift balancing after call to shift_setup()
+ set rho = 1 to do dynamic balancing after call to shift_setup_static()
------------------------------------------------------------------------- */
void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
{
- static_setup(str);
+ shift_setup_static(str);
nitermax = nitermax_in;
stopthresh = thresh_in;
-
rho = 1;
}
@@ -525,7 +584,7 @@ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
int Balance::shift()
{
int i,j,k,m,np,max;
- double *split = NULL;
+ double *split;
// no balancing if no atoms
@@ -590,7 +649,7 @@ int Balance::shift()
// iterate until balanced
#ifdef BALANCE_DEBUG
- if (me == 0) debug_output(idim,0,np,split);
+ if (me == 0) debug_shift_output(idim,0,np,split);
#endif
int doneflag;
@@ -601,7 +660,7 @@ int Balance::shift()
niter++;
#ifdef BALANCE_DEBUG
- if (me == 0) debug_output(idim,m+1,np,split);
+ if (me == 0) debug_shift_output(idim,m+1,np,split);
if (me == 0 && fp) dumpout(update->ntimestep,fp);
#endif
@@ -827,7 +886,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
int nx = comm->procgrid[0] + 1;
int ny = comm->procgrid[1] + 1;
- //int nz = comm->procgrid[2] + 1;
+ int nz = comm->procgrid[2] + 1;
if (dimension == 2) {
int m = 0;
@@ -914,7 +973,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
------------------------------------------------------------------------- */
#ifdef BALANCE_DEBUG
-void Balance::debug_output(int idim, int m, int np, double *split)
+void Balance::debug_shift_output(int idim, int m, int np, double *split)
{
int i;
const char *dim = NULL;
diff --git a/src/balance.h b/src/balance.h
index 66ad6db721..69ff41abaf 100644
--- a/src/balance.h
+++ b/src/balance.h
@@ -27,11 +27,14 @@ namespace LAMMPS_NS {
class Balance : protected Pointers {
public:
+ class RCB *rcb;
+
Balance(class LAMMPS *);
~Balance();
void command(int, char **);
void shift_setup(char *, int, double);
int shift();
+ int *bisection(int sortflag = 0);
double imbalance_nlocal(int &);
void dumpout(bigint, FILE *);
@@ -66,13 +69,13 @@ class Balance : protected Pointers {
FILE *fp;
int firststep;
- void static_setup(char *);
double imbalance_splits(int &);
+ void shift_setup_static(char *);
void tally(int, int, double *);
int adjust(int, double *);
int binary(double, int, double *);
#ifdef BALANCE_DEBUG
- void debug_output(int, int, int, double *);
+ void debug_shift_output(int, int, int, double *);
#endif
};
diff --git a/src/comm.cpp b/src/comm.cpp
index a80db36d95..ad4b18786e 100644
--- a/src/comm.cpp
+++ b/src/comm.cpp
@@ -12,6 +12,7 @@
------------------------------------------------------------------------- */
#include "mpi.h"
+#include "stdlib.h"
#include "string.h"
#include "comm.h"
#include "universe.h"
@@ -20,9 +21,14 @@
#include "domain.h"
#include "group.h"
#include "procmap.h"
+#include "accelerator_kokkos.h"
#include "memory.h"
#include "error.h"
+#ifdef _OPENMP
+#include "omp.h"
+#endif
+
using namespace LAMMPS_NS;
enum{SINGLE,MULTI}; // same as in Comm sub-styles
@@ -47,25 +53,96 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
gridflag = ONELEVEL;
mapflag = CART;
customfile = NULL;
+ outfile = NULL;
recv_from_partition = send_to_partition = -1;
otherflag = 0;
- outfile = NULL;
+ maxexchange_atom = maxexchange_fix = 0;
+ grid2proc = NULL;
xsplit = ysplit = zsplit = NULL;
+ rcbnew = 0;
+
+ // use of OpenMP threads
+ // query OpenMP for number of threads/process set by user at run-time
+ // if the OMP_NUM_THREADS environment variable is not set, we default
+ // to using 1 thread. This follows the principle of the least surprise,
+ // while practically all OpenMP implementations violate it by using
+ // as many threads as there are (virtual) CPU cores by default.
+
+ nthreads = 1;
+#ifdef _OPENMP
+ if (lmp->kokkos) {
+ nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
+ } else if (getenv("OMP_NUM_THREADS") == NULL) {
+ nthreads = 1;
+ if (me == 0)
+ error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
+ } else {
+ nthreads = omp_get_max_threads();
+ }
+
+ // enforce consistent number of threads across all MPI tasks
+
+ MPI_Bcast(&nthreads,1,MPI_INT,0,world);
+ if (!lmp->kokkos) omp_set_num_threads(nthreads);
+
+ if (me == 0) {
+ if (screen)
+ fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
+ if (logfile)
+ fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
+ }
+#endif
+
}
/* ---------------------------------------------------------------------- */
Comm::~Comm()
{
+ memory->destroy(grid2proc);
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
-
delete [] customfile;
delete [] outfile;
}
+/* ----------------------------------------------------------------------
+ deep copy of arrays from old Comm class to new one
+ all public/protected vectors/arrays in parent Comm class must be copied
+ called from alternate constructor of child classes
+ when new comm style is created from Input
+------------------------------------------------------------------------- */
+
+void Comm::copy_arrays(Comm *oldcomm)
+{
+ if (oldcomm->grid2proc) {
+ memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
+ "comm:grid2proc");
+ memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0],
+ (procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int));
+
+ memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
+ memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
+ memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
+ memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double));
+ memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double));
+ memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double));
+ }
+
+ if (customfile) {
+ int n = strlen(oldcomm->customfile) + 1;
+ customfile = new char[n];
+ strcpy(customfile,oldcomm->customfile);
+ }
+ if (outfile) {
+ int n = strlen(oldcomm->outfile) + 1;
+ outfile = new char[n];
+ strcpy(outfile,oldcomm->outfile);
+ }
+}
+
/* ----------------------------------------------------------------------
modify communication params
invoked from input script by comm_modify command
diff --git a/src/comm.h b/src/comm.h
index e50def8c53..353ea131d1 100644
--- a/src/comm.h
+++ b/src/comm.h
@@ -21,21 +21,15 @@ namespace LAMMPS_NS {
class Comm : protected Pointers {
public:
int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
- int layout; // current proc domains: 0 = logical bricks, 1 = general tiling
- // can do style=1 on layout=0, but not vice versa
- // NOTE: uniform needs to be subsumed into layout
- int uniform; // 1 = equal subdomains, 0 = load-balanced
+ int layout; // LAYOUT_UNIFORM = logical equal-sized bricks
+ // LAYOUT_NONUNIFORM = logical bricks,
+ // but different sizes due to LB
+ // LAYOUT_TILED = general tiling, due to RCB LB
int me,nprocs; // proc info
- int procgrid[3]; // procs assigned in each dim of 3d grid
- int user_procgrid[3]; // user request for procs in each dim
- int myloc[3]; // which proc I am in each dim
- int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
- double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
double cutghost[3]; // cutoffs used for acquiring ghost atoms
double cutghostuser; // user-specified ghost cutoff
- int ***grid2proc; // which proc owns i,j,k loc in 3d grid
int recv_from_partition; // recv proc layout from this partition
int send_to_partition; // send my proc layout to this partition
// -1 if no recv or send
@@ -45,8 +39,27 @@ class Comm : protected Pointers {
int maxexchange_fix; // max contribution to exchange from Fixes
int nthreads; // OpenMP threads per MPI process
+ // public settings specific to layout = UNIFORM, NONUNIFORM
+
+ int procgrid[3]; // procs assigned in each dim of 3d grid
+ int user_procgrid[3]; // user request for procs in each dim
+ int myloc[3]; // which proc I am in each dim
+ int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
+ double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
+ int ***grid2proc; // which proc owns i,j,k loc in 3d grid
+
+ // public settings specific to layout = TILED
+
+ int rcbnew; // 1 if just reset by rebalance, else 0
+ double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain
+ double rcbcut; // RCB cut by this proc
+ int rcbcutdim; // dimension of RCB cut
+
+ // methods
+
Comm(class LAMMPS *);
virtual ~Comm();
+ void copy_arrays(class Comm *);
void modify_params(int, char **);
void set_processors(int, char **); // set 3d processor grid attributes
diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp
index 93a81c3ad2..70997725b0 100644
--- a/src/comm_brick.cpp
+++ b/src/comm_brick.cpp
@@ -22,6 +22,7 @@
#include "stdio.h"
#include "stdlib.h"
#include "comm_brick.h"
+#include "comm_tiled.h"
#include "universe.h"
#include "atom.h"
#include "atom_vec.h"
@@ -35,15 +36,10 @@
#include "compute.h"
#include "output.h"
#include "dump.h"
-#include "accelerator_kokkos.h"
#include "math_extra.h"
#include "error.h"
#include "memory.h"
-#ifdef _OPENMP
-#include "omp.h"
-#endif
-
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
@@ -52,58 +48,57 @@ using namespace LAMMPS_NS;
#define BIG 1.0e20
enum{SINGLE,MULTI}; // same as in Comm
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
{
- style = layout = 0;
+ style = 0;
+ layout = LAYOUT_UNIFORM;
+ init_buffers();
+}
- recv_from_partition = send_to_partition = -1;
- otherflag = 0;
+/* ---------------------------------------------------------------------- */
- grid2proc = NULL;
+CommBrick::~CommBrick()
+{
+ free_swap();
+ if (mode == MULTI) {
+ free_multi();
+ memory->destroy(cutghostmulti);
+ }
- uniform = 1;
+ if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
+ memory->sfree(sendlist);
+ memory->destroy(maxsendlist);
+
+ memory->destroy(buf_send);
+ memory->destroy(buf_recv);
+}
+
+/* ---------------------------------------------------------------------- */
+
+CommBrick::CommBrick(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
+{
+ if (oldcomm->layout == LAYOUT_TILED)
+ error->all(FLERR,"Cannot change to comm_style brick from tiled layout");
+
+ style = 0;
+ layout = oldcomm->layout;
+ copy_arrays(oldcomm);
+ init_buffers();
+}
+
+/* ----------------------------------------------------------------------
+ initialize comm buffers and other data structs local to CommBrick
+------------------------------------------------------------------------- */
+
+void CommBrick::init_buffers()
+{
multilo = multihi = NULL;
cutghostmulti = NULL;
- // use of OpenMP threads
- // query OpenMP for number of threads/process set by user at run-time
- // if the OMP_NUM_THREADS environment variable is not set, we default
- // to using 1 thread. This follows the principle of the least surprise,
- // while practically all OpenMP implementations violate it by using
- // as many threads as there are (virtual) CPU cores by default.
-
- nthreads = 1;
-#ifdef _OPENMP
- if (lmp->kokkos) {
- nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
- } else if (getenv("OMP_NUM_THREADS") == NULL) {
- nthreads = 1;
- if (me == 0)
- error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
- } else {
- nthreads = omp_get_max_threads();
- }
-
- // enforce consistent number of threads across all MPI tasks
-
- MPI_Bcast(&nthreads,1,MPI_INT,0,world);
- if (!lmp->kokkos) omp_set_num_threads(nthreads);
-
- if (me == 0) {
- if (screen)
- fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
- if (logfile)
- fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
- }
-#endif
-
- // initialize comm buffers & exchange memory
- // NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
-
- maxexchange_atom = maxexchange_fix = 0;
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
@@ -125,26 +120,6 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
/* ---------------------------------------------------------------------- */
-CommBrick::~CommBrick()
-{
- memory->destroy(grid2proc);
-
- free_swap();
- if (mode == MULTI) {
- free_multi();
- memory->destroy(cutghostmulti);
- }
-
- if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
- memory->sfree(sendlist);
- memory->destroy(maxsendlist);
-
- memory->destroy(buf_send);
- memory->destroy(buf_recv);
-}
-
-/* ---------------------------------------------------------------------- */
-
void CommBrick::init()
{
triclinic = domain->triclinic;
@@ -280,12 +255,12 @@ void CommBrick::setup()
// 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc
// maxneed[idim] = max procs away any proc recvs atoms in either direction
- // uniform = 1 = uniform sized sub-domains:
+ // layout = UNIFORM = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size
// limit to procgrid-1 for non-PBC
// recvneed = maxneed except for procs near non-PBC
// sendneed = recvneed of neighbor on each side
- // uniform = 0 = non-uniform sized sub-domains:
+ // layout = NONUNIFORM = non-uniform sized sub-domains:
// compute recvneed via updown() which accounts for non-PBC
// sendneed = recvneed of neighbor on each side
// maxneed via Allreduce() of recvneed
@@ -293,7 +268,7 @@ void CommBrick::setup()
int *periodicity = domain->periodicity;
int left,right;
- if (uniform) {
+ if (layout == LAYOUT_UNIFORM) {
maxneed[0] = static_cast (cutghost[0] * procgrid[0] / prd[0]) + 1;
maxneed[1] = static_cast (cutghost[1] * procgrid[1] / prd[1]) + 1;
maxneed[2] = static_cast (cutghost[2] * procgrid[2] / prd[2]) + 1;
@@ -561,16 +536,15 @@ void CommBrick::forward_comm(int dummy)
} else {
if (comm_x_only) {
if (sendnum[iswap])
- n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
- x[firstrecv[iswap]],pbc_flag[iswap],
- pbc[iswap]);
+ avec->pack_comm(sendnum[iswap],sendlist[iswap],
+ x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]);
} else if (ghost_velocity) {
- n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
- buf_send,pbc_flag[iswap],pbc[iswap]);
+ avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
+ buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
} else {
- n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
- buf_send,pbc_flag[iswap],pbc[iswap]);
+ avec->pack_comm(sendnum[iswap],sendlist[iswap],
+ buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
}
}
@@ -620,10 +594,10 @@ void CommBrick::reverse_comm()
} else {
if (comm_f_only) {
if (sendnum[iswap])
- avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
- f[firstrecv[iswap]]);
+ avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
+ f[firstrecv[iswap]]);
} else {
- n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
+ avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
}
}
diff --git a/src/comm_brick.h b/src/comm_brick.h
index 20592accab..8dafecfbe7 100644
--- a/src/comm_brick.h
+++ b/src/comm_brick.h
@@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommBrick : public Comm {
public:
CommBrick(class LAMMPS *);
+ CommBrick(class LAMMPS *, class Comm *);
virtual ~CommBrick();
virtual void init();
@@ -80,6 +81,7 @@ class CommBrick : public Comm {
int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer
+ void init_buffers();
int updown(int, int, int, double, int, double *);
// compare cutoff to procs
virtual void grow_send(int, int); // reallocate send buffer
diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp
index 9f111b999a..42f6efdb6c 100644
--- a/src/comm_tiled.cpp
+++ b/src/comm_tiled.cpp
@@ -11,15 +11,20 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
+#include "string.h"
#include "lmptype.h"
#include "comm_tiled.h"
+#include "comm_brick.h"
#include "atom.h"
#include "atom_vec.h"
#include "domain.h"
+#include "force.h"
#include "pair.h"
+#include "neighbor.h"
#include "modify.h"
#include "fix.h"
#include "compute.h"
+#include "output.h"
#include "dump.h"
#include "memory.h"
#include "error.h"
@@ -27,32 +32,88 @@
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
+#define BUFFACTOR 1.5
+#define BUFMIN 1000
+#define BUFEXTRA 1000
+
+// NOTE: change this to 16 after debugged
+
+#define DELTA_PROCS 1
enum{SINGLE,MULTI}; // same as in Comm
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp)
{
- style = 1;
- layout = 0;
-
error->all(FLERR,"Comm_style tiled is not yet supported");
+
+ style = 1;
+ layout = LAYOUT_UNIFORM;
+ init_buffers();
+}
+
+/* ---------------------------------------------------------------------- */
+
+CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
+{
+ error->all(FLERR,"Comm_style tiled is not yet supported");
+
+ style = 1;
+ layout = oldcomm->layout;
+ copy_arrays(oldcomm);
+ init_buffers();
}
/* ---------------------------------------------------------------------- */
CommTiled::~CommTiled()
{
+ memory->destroy(buf_send);
+ memory->destroy(buf_recv);
+ memory->destroy(overlap);
+ deallocate_swap(nswap);
}
-/* ---------------------------------------------------------------------- */
+/* ----------------------------------------------------------------------
+ initialize comm buffers and other data structs local to CommTiled
+ NOTE: if this is identical to CommBrick, put it into Comm ??
+------------------------------------------------------------------------- */
+
+void CommTiled::init_buffers()
+{
+ maxexchange = maxexchange_atom + maxexchange_fix;
+ bufextra = maxexchange + BUFEXTRA;
+
+ maxsend = BUFMIN;
+ memory->create(buf_send,maxsend+bufextra,"comm:buf_send");
+ maxrecv = BUFMIN;
+ memory->create(buf_recv,maxrecv,"comm:buf_recv");
+
+ maxoverlap = 0;
+ overlap = NULL;
+
+ nswap = 2 * domain->dimension;
+ allocate_swap(nswap);
+}
+
+/* ----------------------------------------------------------------------
+ NOTE: if this is nearly identical to CommBrick, put it into Comm ??
+------------------------------------------------------------------------- */
void CommTiled::init()
{
triclinic = domain->triclinic;
map_style = atom->map_style;
+ // temporary restrictions
+
+ if (triclinic)
+ error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box");
+ if (mode == MULTI)
+ error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm");
+
// comm_only = 1 if only x,f are exchanged in forward/reverse comm
// comm_x_only = 0 if ghost_velocity since velocities are added
@@ -72,51 +133,250 @@ void CommTiled::init()
for (int i = 0; i < modify->nfix; i++)
size_border += modify->fix[i]->comm_border;
+
+ // maxexchange = max # of datums/atom in exchange communication
+ // maxforward = # of datums in largest forward communication
+ // maxreverse = # of datums in largest reverse communication
+ // query pair,fix,compute,dump for their requirements
+ // pair style can force reverse comm even if newton off
+
+ maxexchange = BUFMIN + maxexchange_fix;
+ maxforward = MAX(size_forward,size_border);
+ maxreverse = size_reverse;
+
+ if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward);
+ if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse);
+
+ for (int i = 0; i < modify->nfix; i++) {
+ maxforward = MAX(maxforward,modify->fix[i]->comm_forward);
+ maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse);
+ }
+
+ for (int i = 0; i < modify->ncompute; i++) {
+ maxforward = MAX(maxforward,modify->compute[i]->comm_forward);
+ maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse);
+ }
+
+ for (int i = 0; i < output->ndump; i++) {
+ maxforward = MAX(maxforward,output->dump[i]->comm_forward);
+ maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse);
+ }
+
+ if (force->newton == 0) maxreverse = 0;
+ if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off);
}
/* ----------------------------------------------------------------------
setup spatial-decomposition communication patterns
- function of neighbor cutoff(s) & cutghostuser & current box size
- single mode sets slab boundaries (slablo,slabhi) based on max cutoff
- multi mode sets type-dependent slab boundaries (multilo,multihi)
+ function of neighbor cutoff(s) & cutghostuser & current box size and tiling
------------------------------------------------------------------------- */
void CommTiled::setup()
{
- // error on triclinic or multi?
- // set nswap = 2*dim
- // setup neighbor proc info for exchange()
- // setup nsendproc and nrecvproc bounts
- // setup sendproc and recvproc lists
- // setup sendboxes
- // reallocate requests and statuses
+ int i;
- // check that cutoff is <= 1/2 of periodic box len?
+ // domain properties used in setup and methods it calls
- // loop over dims
- // left:
- // construct ghost boxes
- // differnet in x,y,z
- // account for ghost borders in y,z
- // account for PBC by shifting
- // split into multiple boxes if straddles PBC
- // drop boxes down RCB tree
- // count unique procs they cover
- // what about self if crosses PBC
- // for each proc they cover:
- // compute box I send it to left
- // is a message I will recv from right (don't care about box)
- // for ghost-extended boxes
- // do not count procs that do not overlap my owned box at all
- // only touching edge of my owned box does not count
- // in this case list I send to and recv from may be different?
- // same thing to right
+ prd = domain->prd;
+ boxlo = domain->boxlo;
+ boxhi = domain->boxhi;
+ sublo = domain->sublo;
+ subhi = domain->subhi;
- // what need from decomp (RCB):
- // dropbox: return list of procs with overlap and overlapping boxes
- // return n, proclist, boxlist
- // otherbox: bbox of another proc
- // dropatom: return what proc owns the atom coord
+ int dimension = domain->dimension;
+ int *periodicity = domain->periodicity;
+
+ // set function pointers
+
+ if (layout != LAYOUT_TILED) {
+ box_drop = &CommTiled::box_drop_brick;
+ box_other = &CommTiled::box_other_brick;
+ } else {
+ box_drop = &CommTiled::box_drop_tiled;
+ box_other = &CommTiled::box_other_tiled;
+ }
+
+ // if RCB decomp has just changed, gather needed global RCB info
+
+ if (rcbnew) {
+ rcbnew = 0;
+ memcpy(&rcbinfo[me].mysplit[0][0],&mysplit[0][0],6*sizeof(double));
+ rcbinfo[me].cut = rcbcut;
+ rcbinfo[me].dim = rcbcutdim;
+ MPI_Allgather(&rcbinfo[me],sizeof(RCBinfo),MPI_CHAR,
+ rcbinfo,sizeof(RCBinfo),MPI_CHAR,world);
+ }
+
+ // check that cutoff < any periodic box length
+
+ double cut = MAX(neighbor->cutneighmax,cutghostuser);
+ cutghost[0] = cutghost[1] = cutghost[2] = cut;
+
+ if ((periodicity[0] && cut > prd[0]) ||
+ (periodicity[1] && cut > prd[1]) ||
+ (dimension == 3 && periodicity[2] && cut > prd[2]))
+ error->all(FLERR,"Communication cutoff for comm_style tiled "
+ "cannot exceed periodic box length");
+
+ // loop over 6 swap directions
+ // determine which procs I will send to and receive from in each swap
+ // done by intersecting ghost box with all proc sub-boxes it overlaps
+ // sets nsendproc, nrecvproc, sendproc, recvproc
+ // sets sendother, sendself, pbc_flag, pbc, sendbox
+
+ int noverlap1,indexme;
+ double lo1[3],hi1[3],lo2[3],hi2[3];
+ int one,two;
+
+ nswap = 0;
+ for (int idim = 0; idim < dimension; idim++) {
+ for (int iswap = 0; iswap < 2; iswap++) {
+
+ // one = first ghost box in same periodic image
+ // two = second ghost box wrapped across periodic boundary
+ // either may not exist
+
+ one = 1;
+ lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2];
+ hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2];
+ if (iswap == 0) {
+ lo1[idim] = sublo[idim] - cut;
+ hi1[idim] = sublo[idim];
+ } else {
+ lo1[idim] = subhi[idim];
+ hi1[idim] = subhi[idim] + cut;
+ }
+
+ two = 0;
+ if (iswap == 0 && periodicity[idim] && lo1[idim] < boxlo[idim]) two = 1;
+ if (iswap == 1 && periodicity[idim] && hi1[idim] > boxhi[idim]) two = 1;
+
+ if (two) {
+ lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2];
+ hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2];
+ if (iswap == 0) {
+ lo2[idim] = lo1[idim] + prd[idim];
+ hi2[idim] = hi1[idim] + prd[idim];
+ if (sublo[idim] == boxlo[idim]) {
+ one = 0;
+ hi2[idim] = boxhi[idim];
+ }
+ } else {
+ lo2[idim] = lo1[idim] - prd[idim];
+ hi2[idim] = hi1[idim] - prd[idim];
+ if (subhi[idim] == boxhi[idim]) {
+ one = 0;
+ lo2[idim] = boxlo[idim];
+ }
+ }
+ }
+
+ // noverlap = # of overlaps of box1/2 with procs via box_drop()
+ // overlap = list of overlapping procs
+ // if overlap with self, indexme = index of me in list
+
+ indexme = -1;
+ noverlap = 0;
+ if (one) (this->*box_drop)(idim,lo1,hi1,indexme);
+ noverlap1 = noverlap;
+ if (two) (this->*box_drop)(idim,lo2,hi2,indexme);
+
+ // if self is in overlap list, move it to end of list
+
+ if (indexme >= 0) {
+ int tmp = overlap[noverlap-1];
+ overlap[noverlap-1] = overlap[indexme];
+ overlap[indexme] = tmp;
+ }
+
+ // reallocate 2nd dimensions of all send/recv arrays, based on noverlap
+ // # of sends of this swap = # of recvs of nswap +/- 1
+
+ if (noverlap > nprocmax[iswap]) {
+ int oldmax = nprocmax[iswap];
+ while (nprocmax[iswap] < noverlap) nprocmax[iswap] += DELTA_PROCS;
+ grow_swap_send(iswap,nprocmax[iswap],oldmax);
+ if (iswap == 0) grow_swap_recv(iswap+1,nprocmax[iswap]);
+ else grow_swap_recv(iswap-1,nprocmax[iswap]);
+ }
+
+ // overlap how has list of noverlap procs
+ // includes PBC effects
+
+ if (overlap[noverlap-1] == me) sendself[nswap] = 1;
+ else sendself[nswap] = 0;
+ if (noverlap-sendself[nswap]) sendother[nswap] = 1;
+ else sendother[nswap] = 0;
+
+ nsendproc[nswap] = noverlap;
+ for (i = 0; i < noverlap; i++) sendproc[nswap][i] = overlap[i];
+ if (iswap == 0) {
+ nrecvproc[nswap+1] = noverlap;
+ for (i = 0; i < noverlap; i++) recvproc[nswap+1][i] = overlap[i];
+ } else {
+ nrecvproc[nswap-1] = noverlap;
+ for (i = 0; i < noverlap; i++) recvproc[nswap-1][i] = overlap[i];
+ }
+
+ // compute sendbox for each of my sends
+ // obox = intersection of ghostbox with other proc's sub-domain
+ // sbox = what I need to send to other proc
+ // = sublo to MIN(sublo+cut,subhi) in idim, for iswap = 0
+ // = MIN(subhi-cut,sublo) to subhi in idim, for iswap = 1
+ // = obox in other 2 dims
+ // if sbox touches sub-box boundaries in lower dims,
+ // extend sbox in those lower dims to include ghost atoms
+
+ double oboxlo[3],oboxhi[3],sbox[6];
+
+ for (i = 0; i < noverlap; i++) {
+ pbc_flag[nswap][i] = 0;
+ pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] =
+ pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0;
+
+ (this->*box_other)(idim,iswap,overlap[i],oboxlo,oboxhi);
+
+ if (i < noverlap1) {
+ sbox[0] = MAX(oboxlo[0],lo1[0]);
+ sbox[1] = MAX(oboxlo[1],lo1[1]);
+ sbox[2] = MAX(oboxlo[2],lo1[2]);
+ sbox[3] = MIN(oboxhi[0],hi1[0]);
+ sbox[4] = MIN(oboxhi[1],hi1[1]);
+ sbox[5] = MIN(oboxhi[2],hi1[2]);
+ } else {
+ pbc_flag[nswap][i] = 1;
+ pbc[nswap][i][idim] = 1;
+ sbox[0] = MAX(oboxlo[0],lo2[0]);
+ sbox[1] = MAX(oboxlo[1],lo2[1]);
+ sbox[2] = MAX(oboxlo[2],lo2[2]);
+ sbox[3] = MIN(oboxhi[0],hi2[0]);
+ sbox[4] = MIN(oboxhi[1],hi2[1]);
+ sbox[5] = MIN(oboxhi[2],hi2[2]);
+ }
+
+ if (iswap == 0) {
+ sbox[idim] = sublo[idim];
+ sbox[3+idim] = MIN(sublo[idim]+cut,subhi[idim]);
+ } else {
+ sbox[idim] = MAX(subhi[idim]-cut,sublo[idim]);
+ sbox[3+idim] = subhi[idim];
+ }
+
+ if (idim >= 1) {
+ if (sbox[0] == sublo[0]) sbox[0] -= cut;
+ if (sbox[4] == subhi[0]) sbox[4] += cut;
+ }
+ if (idim == 2) {
+ if (sbox[1] == sublo[1]) sbox[1] -= cut;
+ if (sbox[5] == subhi[1]) sbox[5] += cut;
+ }
+
+ memcpy(sendbox[nswap][i],sbox,6*sizeof(double));
+ }
+
+ nswap++;
+ }
+ }
}
/* ----------------------------------------------------------------------
@@ -126,46 +386,73 @@ void CommTiled::setup()
void CommTiled::forward_comm(int dummy)
{
- int i,irecv,n;
+ int i,irecv,n,nsend,nrecv;
MPI_Status status;
AtomVec *avec = atom->avec;
double **x = atom->x;
// exchange data with another set of procs in each swap
- // if first proc in set is self, then is just self across PBC, just copy
+ // post recvs from all procs except self
+ // send data to all procs except self
+ // copy data to self if sendself is set
+ // wait on all procs except self and unpack received data
// if comm_x_only set, exchange or copy directly to x, don't unpack
for (int iswap = 0; iswap < nswap; iswap++) {
- if (sendproc[iswap][0] != me) {
- if (comm_x_only) {
- for (i = 0; i < nrecvproc[iswap]; i++)
+ nsend = nsendproc[iswap] - sendself[iswap];
+ nrecv = nrecvproc[iswap] - sendself[iswap];
+
+ if (comm_x_only) {
+ if (sendother[iswap]) {
+ for (i = 0; i < nrecv; i++)
MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nsendproc[iswap]; i++) {
+ for (i = 0; i < nsend; i++) {
n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
- MPI_Waitall(nrecvproc[iswap],requests,statuses);
+ }
- } else if (ghost_velocity) {
- for (i = 0; i < nrecvproc[iswap]; i++)
+ if (sendself[iswap]) {
+ avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend],
+ pbc[iswap][nsend]);
+ }
+
+ if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
+
+ } else if (ghost_velocity) {
+ if (sendother[iswap]) {
+ for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nsendproc[iswap]; i++) {
+ for (i = 0; i < nsend; i++) {
n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i],
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
- for (i = 0; i < nrecvproc[iswap]; i++) {
- MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
- avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv],
+ }
+
+ if (sendself[iswap]) {
+ avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
+ avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
+ buf_send);
+ }
+
+ if (sendother[iswap]) {
+ for (i = 0; i < nrecv; i++) {
+ MPI_Waitany(nrecv,requests,&irecv,&status);
+ avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv],
&buf_recv[forward_recv_offset[iswap][irecv]]);
}
+ }
- } else {
- for (i = 0; i < nrecvproc[iswap]; i++)
+ } else {
+ if (sendother[iswap]) {
+ for (i = 0; i < nrecv; i++)
MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
size_forward_recv[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
@@ -174,27 +461,21 @@ void CommTiled::forward_comm(int dummy)
buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world);
}
- for (i = 0; i < nrecvproc[iswap]; i++) {
- MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
- avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv],
- &buf_recv[forward_recv_offset[iswap][irecv]]);
- }
}
- } else {
- if (comm_x_only) {
- if (sendnum[iswap][0])
- n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
- x[firstrecv[iswap][0]],pbc_flag[iswap][0],
- pbc[iswap][0]);
- } else if (ghost_velocity) {
- n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0],
- buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
- avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
- } else {
- n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0],
- buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
- avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
+ if (sendself[iswap]) {
+ avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]);
+ avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
+ buf_send);
+ }
+
+ if (sendother[iswap]) {
+ for (i = 0; i < nrecv; i++) {
+ MPI_Waitany(nrecv,requests,&irecv,&status);
+ avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv],
+ &buf_recv[forward_recv_offset[iswap][irecv]]);
+ }
}
}
}
@@ -207,57 +488,73 @@ void CommTiled::forward_comm(int dummy)
void CommTiled::reverse_comm()
{
- int i,irecv,n;
+ int i,irecv,n,nsend,nrecv;
MPI_Request request;
MPI_Status status;
AtomVec *avec = atom->avec;
double **f = atom->f;
// exchange data with another set of procs in each swap
- // if first proc in set is self, then is just self across PBC, just copy
+ // post recvs from all procs except self
+ // send data to all procs except self
+ // copy data to self if sendself is set
+ // wait on all procs except self and unpack received data
// if comm_f_only set, exchange or copy directly from f, don't pack
for (int iswap = nswap-1; iswap >= 0; iswap--) {
- if (sendproc[iswap][0] != me) {
- if (comm_f_only) {
- for (i = 0; i < nsendproc[iswap]; i++)
+ nsend = nsendproc[iswap] - sendself[iswap];
+ nrecv = nrecvproc[iswap] - sendself[iswap];
+
+ if (comm_f_only) {
+ if (sendother[iswap]) {
+ for (i = 0; i < nsend; i++)
MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
size_reverse_recv[iswap][i],MPI_DOUBLE,
sendproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nrecvproc[iswap]; i++)
+ for (i = 0; i < nrecv; i++)
MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i],
MPI_DOUBLE,recvproc[iswap][i],0,world);
- for (i = 0; i < nsendproc[iswap]; i++) {
- MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
- avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
- &buf_recv[reverse_recv_offset[iswap][irecv]]);
- }
+ }
- } else {
- for (i = 0; i < nsendproc[iswap]; i++)
- MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
- size_reverse_recv[iswap][i],MPI_DOUBLE,
- sendproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nrecvproc[iswap]; i++) {
- n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
- buf_send);
- MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
- }
- for (i = 0; i < nsendproc[iswap]; i++) {
- MPI_Waitany(nsendproc[iswap],requests,&irecv,&status);
+ if (sendself[iswap]) {
+ avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ f[firstrecv[iswap][nrecv]]);
+ }
+
+ if (sendother[iswap]) {
+ for (i = 0; i < nsend; i++) {
+ MPI_Waitany(nsend,requests,&irecv,&status);
avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
&buf_recv[reverse_recv_offset[iswap][irecv]]);
}
}
-
+
} else {
- if (comm_f_only) {
- if (sendnum[iswap][0])
- avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],
- f[firstrecv[iswap][0]]);
- } else {
- n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
- avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send);
+ if (sendother[iswap]) {
+ for (i = 0; i < nsend; i++)
+ MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]],
+ size_reverse_recv[iswap][i],MPI_DOUBLE,
+ sendproc[iswap][i],0,world,&requests[i]);
+ for (i = 0; i < nrecv; i++) {
+ n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i],
+ buf_send);
+ MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world);
+ }
+ }
+
+ if (sendself[iswap]) {
+ avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
+ buf_send);
+ avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ buf_send);
+ }
+
+ if (sendother[iswap]) {
+ for (i = 0; i < nsend; i++) {
+ MPI_Waitany(nsend,requests,&irecv,&status);
+ avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv],
+ &buf_recv[reverse_recv_offset[iswap][irecv]]);
+ }
}
}
}
@@ -298,7 +595,7 @@ void CommTiled::exchange()
void CommTiled::borders()
{
- int i,n,irecv,ngroup,nlast,nsend,rmaxswap;
+ int i,m,n,irecv,nlocal,nlast,nsend,nrecv,ncount,rmaxswap;
double xlo,xhi,ylo,yhi,zlo,zhi;
double *bbox;
double **x;
@@ -320,65 +617,69 @@ void CommTiled::borders()
// for yz-dim swaps, check owned and ghost atoms
// store sent atom indices in list for use in future timesteps
// NOTE: assume SINGLE mode, add back in logic for MULTI mode later
+ // and for ngroup when bordergroup is set
x = atom->x;
- for (i = 0; i < nsendproc[iswap]; i++) {
- bbox = sendbox[iswap][i];
- xlo = bbox[0]; xhi = bbox[1];
- ylo = bbox[2]; yhi = bbox[3];
- zlo = bbox[4]; zhi = bbox[5];
+ for (m = 0; m < nsendproc[iswap]; m++) {
+ bbox = sendbox[iswap][m];
+ xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2];
+ xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5];
- ngroup = atom->nfirst;
+ nlocal = atom->nlocal;
if (iswap < 2) nlast = atom->nlocal;
else nlast = atom->nlocal + atom->nghost;
- nsend = 0;
- for (i = 0; i < ngroup; i++)
+ ncount = 0;
+ for (i = 0; i < nlocal; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
- if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
- sendlist[iswap][i][nsend++] = i;
+ if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount);
+ sendlist[iswap][m][ncount++] = i;
}
for (i = atom->nlocal; i < nlast; i++)
if (x[i][0] >= xlo && x[i][0] <= xhi &&
x[i][1] >= ylo && x[i][1] <= yhi &&
x[i][2] >= zlo && x[i][2] <= zhi) {
- if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend);
- sendlist[iswap][i][nsend++] = i;
+ if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount);
+ sendlist[iswap][m][ncount++] = i;
}
- sendnum[iswap][i] = nsend;
- smax = MAX(smax,nsend);
+ sendnum[iswap][m] = ncount;
+ smax = MAX(smax,ncount);
}
- // send sendnum counts to procs who recv from me
+ // send sendnum counts to procs who recv from me except self
+ // copy data to self if sendself is set
- if (sendproc[iswap][0] != me) {
- for (i = 0; i < nrecvproc[iswap]; i++)
- MPI_Irecv(&recvnum[iswap][i],1,MPI_INT,
- recvproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nsendproc[iswap]; i++)
- MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world);
- MPI_Waitall(nrecvproc[iswap],requests,statuses);
+ nsend = nsendproc[iswap] - sendself[iswap];
+ nrecv = nrecvproc[iswap] - sendself[iswap];
- } else recvnum[iswap][0] = sendnum[iswap][0];
+ if (sendother[iswap]) {
+ for (m = 0; m < nrecv; m++)
+ MPI_Irecv(&recvnum[iswap][m],1,MPI_INT,
+ recvproc[iswap][m],0,world,&requests[m]);
+ for (m = 0; m < nsend; m++)
+ MPI_Send(&sendnum[iswap][m],1,MPI_INT,sendproc[iswap][m],0,world);
+ }
+ if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend];
+ if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses);
// setup other per swap/proc values from sendnum and recvnum
rmaxswap = 0;
- for (i = 0; i < nrecvproc[iswap]; i++) {
- rmaxswap += recvnum[iswap][i];
- size_forward_recv[iswap][i] = recvnum[iswap][i]*size_forward;
- size_reverse_send[iswap][i] = recvnum[iswap][i]*size_reverse;
- size_reverse_recv[iswap][i] = sendnum[iswap][i]*size_reverse;
- if (i == 0) {
+ for (m = 0; m < nrecvproc[iswap]; m++) {
+ rmaxswap += recvnum[iswap][m];
+ size_forward_recv[iswap][m] = recvnum[iswap][m]*size_forward;
+ size_reverse_send[iswap][m] = recvnum[iswap][m]*size_reverse;
+ size_reverse_recv[iswap][m] = sendnum[iswap][m]*size_reverse;
+ if (m == 0) {
firstrecv[iswap][0] = atom->nlocal + atom->nghost;
forward_recv_offset[iswap][0] = 0;
} else {
- firstrecv[iswap][i] = firstrecv[iswap][i-1] + recvnum[iswap][i-1];
- forward_recv_offset[iswap][i] =
- forward_recv_offset[iswap][i-1] + recvnum[iswap][i-1];
+ firstrecv[iswap][m] = firstrecv[iswap][m-1] + recvnum[iswap][m-1];
+ forward_recv_offset[iswap][m] =
+ forward_recv_offset[iswap][m-1] + recvnum[iswap][m-1];
}
}
rmax = MAX(rmax,rmaxswap);
@@ -390,54 +691,64 @@ void CommTiled::borders()
// swap atoms with other procs using pack_border(), unpack_border()
- if (sendproc[iswap][0] != me) {
- for (i = 0; i < nsendproc[iswap]; i++) {
- if (ghost_velocity) {
- for (i = 0; i < nrecvproc[iswap]; i++)
- MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
- recvnum[iswap][i]*size_border,
- MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nsendproc[iswap]; i++) {
- n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i],
- buf_send,pbc_flag[iswap][i],
- pbc[iswap][i]);
- MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
- sendproc[iswap][i],0,world);
- }
- for (i = 0; i < nrecvproc[iswap]; i++) {
- MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
- avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
- &buf_recv[forward_recv_offset[iswap][irecv]]);
- }
-
- } else {
- for (i = 0; i < nrecvproc[iswap]; i++)
- MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]],
- recvnum[iswap][i]*size_border,
- MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]);
- for (i = 0; i < nsendproc[iswap]; i++) {
- n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i],
- buf_send,pbc_flag[iswap][i],pbc[iswap][i]);
- MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
- sendproc[iswap][i],0,world);
- }
- for (i = 0; i < nrecvproc[iswap]; i++) {
- MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status);
- avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv],
- &buf_recv[forward_recv_offset[iswap][irecv]]);
- }
+ if (ghost_velocity) {
+ if (sendother[iswap]) {
+ for (m = 0; m < nrecv; m++)
+ MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]],
+ recvnum[iswap][m]*size_border,
+ MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
+ for (m = 0; m < nsend; m++) {
+ n = avec->pack_border_vel(sendnum[iswap][m],sendlist[iswap][m],
+ buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
+ MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
+ sendproc[iswap][m],0,world);
+ }
+ }
+
+ if (sendself[iswap]) {
+ n = avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ buf_send,pbc_flag[iswap][nsend],
+ pbc[iswap][nsend]);
+ avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv],
+ buf_send);
+ }
+
+ if (sendother[iswap]) {
+ for (m = 0; m < nrecv; m++) {
+ MPI_Waitany(nrecv,requests,&irecv,&status);
+ avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
+ &buf_recv[forward_recv_offset[iswap][irecv]]);
}
}
} else {
- if (ghost_velocity) {
- n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0],
- buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
- avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
- } else {
- n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0],
- buf_send,pbc_flag[iswap][0],pbc[iswap][0]);
- avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send);
+ if (sendother[iswap]) {
+ for (m = 0; m < nrecv; m++)
+ MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]],
+ recvnum[iswap][m]*size_border,
+ MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]);
+ for (m = 0; m < nsend; m++) {
+ n = avec->pack_border(sendnum[iswap][m],sendlist[iswap][m],
+ buf_send,pbc_flag[iswap][m],pbc[iswap][m]);
+ MPI_Send(buf_send,n*size_border,MPI_DOUBLE,
+ sendproc[iswap][m],0,world);
+ }
+ }
+
+ if (sendself[iswap]) {
+ n = avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend],
+ buf_send,pbc_flag[iswap][nsend],
+ pbc[iswap][nsend]);
+ avec->unpack_border(recvnum[iswap][nsend],firstrecv[iswap][nsend],
+ buf_send);
+ }
+
+ if (sendother[iswap]) {
+ for (m = 0; m < nrecv; m++) {
+ MPI_Waitany(nrecv,requests,&irecv,&status);
+ avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv],
+ &buf_recv[forward_recv_offset[iswap][irecv]]);
+ }
}
}
@@ -459,6 +770,8 @@ void CommTiled::borders()
if (map_style) atom->map_set();
}
+// NOTE: remaining forward/reverse methods still need to be updated
+
/* ----------------------------------------------------------------------
forward communication invoked by a Pair
n = constant number of datums per atom
@@ -786,6 +1099,186 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf)
return nrecv;
}
+/* ----------------------------------------------------------------------
+ determine overlap list of Noverlap procs the lo/hi box overlaps
+ overlap = non-zero area in common between box and proc sub-domain
+ box is owned by me and extends in dim
+------------------------------------------------------------------------- */
+
+void CommTiled::box_drop_brick(int idim, double *lo, double *hi, int &indexme)
+{
+ // NOTE: this is not triclinic compatible
+
+ int index,dir;
+ if (hi[idim] == sublo[idim]) {
+ index = myloc[idim] - 1;
+ dir = -1;
+ } else if (lo[idim] == subhi[idim]) {
+ index = myloc[idim] + 1;
+ dir = 1;
+ } else if (hi[idim] == boxhi[idim]) {
+ index = procgrid[idim] - 1;
+ dir = -1;
+ } else if (lo[idim] == boxlo[idim]) {
+ index = 0;
+ dir = 1;
+ }
+
+ int other1,other2,proc;
+ double lower,upper;
+ double *split;
+
+ if (idim == 0) {
+ other1 = myloc[1]; other2 = myloc[2];
+ split = xsplit;
+ } else if (idim == 1) {
+ other1 = myloc[0]; other2 = myloc[2];
+ split = ysplit;
+ } else {
+ other1 = myloc[0]; other2 = myloc[1];
+ split = zsplit;
+ }
+
+ while (1) {
+ lower = boxlo[idim] + prd[idim]*split[index];
+ if (index < procgrid[idim]-1)
+ upper = boxlo[idim] + prd[idim]*split[index+1];
+ else upper = boxhi[idim];
+ if (lower >= hi[idim] || upper <= lo[idim]) break;
+
+ if (idim == 0) proc = grid2proc[index][other1][other2];
+ else if (idim == 1) proc = grid2proc[other1][index][other2];
+ else proc = grid2proc[other1][other2][idim];
+
+ if (noverlap == maxoverlap) {
+ maxoverlap += DELTA_PROCS;
+ memory->grow(overlap,maxoverlap,"comm:overlap");
+ }
+
+ if (proc == me) indexme = noverlap;
+ overlap[noverlap++] = proc;
+ index += dir;
+ if (index < 0 || index >= procgrid[idim]) break;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ determine overlap list of Noverlap procs the lo/hi box overlaps
+ overlap = non-zero area in common between box and proc sub-domain
+ recursive method for traversing an RCB tree of cuts
+ no need to split lo/hi box as recurse b/c OK if box extends outside RCB box
+------------------------------------------------------------------------- */
+
+void CommTiled::box_drop_tiled(int idim, double *lo, double *hi, int &indexme)
+{
+ box_drop_tiled_recurse(lo,hi,0,nprocs-1,indexme);
+}
+
+void CommTiled::box_drop_tiled_recurse(double *lo, double *hi,
+ int proclower, int procupper,
+ int &indexme)
+{
+ // end recursion when partition is a single proc
+ // add proc to overlap list
+
+ if (proclower == procupper) {
+ if (noverlap == maxoverlap) {
+ maxoverlap += DELTA_PROCS;
+ memory->grow(overlap,maxoverlap,"comm:overlap");
+ }
+
+ if (proclower == me) indexme = noverlap;
+ overlap[noverlap++] = proclower;
+ return;
+ }
+
+ // drop box on each side of cut it extends beyond
+ // use > and < criteria so does not include a box it only touches
+ // procmid = 1st processor in upper half of partition
+ // = location in tree that stores this cut
+ // dim = 0,1,2 dimension of cut
+ // cut = position of cut
+
+ int procmid = proclower + (procupper - proclower) / 2 + 1;
+ double cut = rcbinfo[procmid].cut;
+ int idim = rcbinfo[procmid].dim;
+
+ if (lo[idim] < cut)
+ box_drop_tiled_recurse(lo,hi,proclower,procmid-1,indexme);
+ if (hi[idim] > cut)
+ box_drop_tiled_recurse(lo,hi,procmid,procupper,indexme);
+}
+
+/* ----------------------------------------------------------------------
+ return other box owned by proc as lo/hi corner pts
+------------------------------------------------------------------------- */
+
+void CommTiled::box_other_brick(int idim, int iswap,
+ int proc, double *lo, double *hi)
+{
+ lo[0] = sublo[0]; lo[1] = sublo[1]; lo[2] = sublo[2];
+ hi[0] = subhi[0]; hi[1] = subhi[1]; hi[2] = subhi[2];
+
+ int other1,other2,oproc;
+ double *split;
+
+ if (idim == 0) {
+ other1 = myloc[1]; other2 = myloc[2];
+ split = xsplit;
+ } else if (idim == 1) {
+ other1 = myloc[0]; other2 = myloc[2];
+ split = ysplit;
+ } else {
+ other1 = myloc[0]; other2 = myloc[1];
+ split = zsplit;
+ }
+
+ int dir = -1;
+ if (iswap) dir = 1;
+ int index = myloc[idim];
+ int n = procgrid[idim];
+
+ for (int i = 0; i < n; i++) {
+ index += dir;
+ if (index < 0) index = n-1;
+ else if (index >= n) index = 0;
+
+ if (idim == 0) oproc = grid2proc[index][other1][other2];
+ else if (idim == 1) oproc = grid2proc[other1][index][other2];
+ else oproc = grid2proc[other1][other2][idim];
+
+ if (proc == oproc) {
+ lo[idim] = boxlo[idim] + prd[idim]*split[index];
+ if (split[index+1] < 1.0)
+ hi[idim] = boxlo[idim] + prd[idim]*split[index+1];
+ else hi[idim] = boxhi[idim];
+ return;
+ }
+ }
+}
+
+/* ----------------------------------------------------------------------
+ return other box owned by proc as lo/hi corner pts
+------------------------------------------------------------------------- */
+
+void CommTiled::box_other_tiled(int idim, int iswap,
+ int proc, double *lo, double *hi)
+{
+ double (*split)[2] = rcbinfo[proc].mysplit;
+
+ lo[0] = boxlo[0] + prd[0]*split[0][0];
+ if (split[0][1] < 1.0) hi[0] = boxlo[0] + prd[0]*split[0][1];
+ else hi[0] = boxhi[0];
+
+ lo[1] = boxlo[1] + prd[1]*split[1][0];
+ if (split[1][1] < 1.0) hi[1] = boxlo[1] + prd[1]*split[1][1];
+ else hi[1] = boxhi[1];
+
+ lo[2] = boxlo[2] + prd[2]*split[2][0];
+ if (split[2][1] < 1.0) hi[2] = boxlo[2] + prd[2]*split[2][1];
+ else hi[2] = boxhi[2];
+}
+
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc
@@ -822,7 +1315,178 @@ void CommTiled::grow_list(int iswap, int iwhich, int n)
{
maxsendlist[iswap][iwhich] = static_cast (BUFFACTOR * n);
memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich],
- "comm:sendlist[iswap]");
+ "comm:sendlist[i][j]");
+}
+
+/* ----------------------------------------------------------------------
+ allocation of swap info
+------------------------------------------------------------------------- */
+
+void CommTiled::allocate_swap(int n)
+{
+ nsendproc = new int[n];
+ nrecvproc = new int[n];
+ sendother = new int[n];
+ sendself = new int[n];
+ nprocmax = new int[n];
+
+ sendproc = new int*[n];
+ recvproc = new int*[n];
+ sendnum = new int*[n];
+ recvnum = new int*[n];
+ size_forward_recv = new int*[n];
+ firstrecv = new int*[n];
+ size_reverse_send = new int*[n];
+ size_reverse_recv = new int*[n];
+ forward_recv_offset = new int*[n];
+ reverse_recv_offset = new int*[n];
+
+ pbc_flag = new int*[n];
+ pbc = new int**[n];
+ sendbox = new double**[n];
+ maxsendlist = new int*[n];
+ sendlist = new int**[n];
+
+ for (int i = 0; i < n; i++) {
+ sendproc[i] = recvproc[i] = NULL;
+ sendnum[i] = recvnum[i] = NULL;
+ size_forward_recv[i] = firstrecv[i] = NULL;
+ size_reverse_send[i] = size_reverse_recv[i] = NULL;
+ forward_recv_offset[i] = reverse_recv_offset[i] = NULL;
+
+ pbc_flag[i] = NULL;
+ pbc[i] = NULL;
+ sendbox[i] = NULL;
+ maxsendlist[i] = NULL;
+ sendlist[i] = NULL;
+ }
+
+ maxreqstat = 0;
+ requests = NULL;
+ statuses = NULL;
+
+ for (int i = 0; i < n; i++) {
+ nprocmax[i] = DELTA_PROCS;
+ grow_swap_send(i,DELTA_PROCS,0);
+ grow_swap_recv(i,DELTA_PROCS);
+ }
+}
+
+/* ----------------------------------------------------------------------
+ grow info for swap I, to allow for N procs to communicate with
+ ditto for complementary recv for swap I+1 or I-1, as invoked by caller
+------------------------------------------------------------------------- */
+
+void CommTiled::grow_swap_send(int i, int n, int nold)
+{
+ delete [] sendproc[i];
+ sendproc[i] = new int[n];
+ delete [] sendnum[i];
+ sendnum[i] = new int[n];
+
+ delete [] size_reverse_recv[i];
+ size_reverse_recv[i] = new int[n];
+ delete [] reverse_recv_offset[i];
+ reverse_recv_offset[i] = new int[n];
+
+ delete [] pbc_flag[i];
+ pbc_flag[i] = new int[n];
+ memory->destroy(pbc[i]);
+ memory->create(pbc[i],n,6,"comm:pbc_flag");
+ memory->destroy(sendbox[i]);
+ memory->create(sendbox[i],n,6,"comm:sendbox");
+
+ delete [] maxsendlist[i];
+ maxsendlist[i] = new int[n];
+
+ for (int j = 0; j < nold; j++) memory->destroy(sendlist[i][j]);
+ delete [] sendlist[i];
+ sendlist[i] = new int*[n];
+ for (int j = 0; j < n; j++) {
+ maxsendlist[i][j] = BUFMIN;
+ memory->create(sendlist[i][j],BUFMIN,"comm:sendlist[i][j]");
+ }
+}
+
+void CommTiled::grow_swap_recv(int i, int n)
+{
+ delete [] recvproc[i];
+ recvproc[i] = new int[n];
+ delete [] recvnum[i];
+ recvnum[i] = new int[n];
+
+ delete [] size_forward_recv[i];
+ size_forward_recv[i] = new int[n];
+ delete [] firstrecv[i];
+ firstrecv[i] = new int[n];
+ delete [] forward_recv_offset[i];
+ forward_recv_offset[i] = new int[n];
+
+ delete [] size_reverse_send[i];
+ size_reverse_send[i] = new int[n];
+
+ if (n > maxreqstat) {
+ maxreqstat = n;
+ delete [] requests;
+ delete [] statuses;
+ requests = new MPI_Request[n];
+ statuses = new MPI_Status[n];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ deallocate swap info
+------------------------------------------------------------------------- */
+
+void CommTiled::deallocate_swap(int n)
+{
+ delete [] nsendproc;
+ delete [] nrecvproc;
+ delete [] sendother;
+ delete [] sendself;
+
+ for (int i = 0; i < n; i++) {
+ delete [] sendproc[i];
+ delete [] recvproc[i];
+ delete [] sendnum[i];
+ delete [] recvnum[i];
+ delete [] size_forward_recv[i];
+ delete [] firstrecv[i];
+ delete [] size_reverse_send[i];
+ delete [] size_reverse_recv[i];
+ delete [] forward_recv_offset[i];
+ delete [] reverse_recv_offset[i];
+
+ delete [] pbc_flag[i];
+ memory->destroy(pbc[i]);
+ memory->destroy(sendbox[i]);
+ delete [] maxsendlist[i];
+
+ for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]);
+ delete [] sendlist[i];
+ }
+
+ delete [] sendproc;
+ delete [] recvproc;
+ delete [] sendnum;
+ delete [] recvnum;
+ delete [] size_forward_recv;
+ delete [] firstrecv;
+ delete [] size_reverse_send;
+ delete [] size_reverse_recv;
+ delete [] forward_recv_offset;
+ delete [] reverse_recv_offset;
+
+ delete [] pbc_flag;
+ delete [] pbc;
+ delete [] sendbox;
+ delete [] maxsendlist;
+ delete [] sendlist;
+
+ delete [] requests;
+ delete [] statuses;
+
+ delete [] nprocmax;
}
/* ----------------------------------------------------------------------
diff --git a/src/comm_tiled.h b/src/comm_tiled.h
index 2c7a6e62df..fd27c4de41 100644
--- a/src/comm_tiled.h
+++ b/src/comm_tiled.h
@@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommTiled : public Comm {
public:
CommTiled(class LAMMPS *);
+ CommTiled(class LAMMPS *, class Comm *);
virtual ~CommTiled();
void init();
@@ -46,22 +47,23 @@ class CommTiled : public Comm {
bigint memory_usage();
private:
- int nswap; // # of swaps to perform = 2*dim
- int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
-
int triclinic; // 0 if domain is orthog, 1 if triclinic
int map_style; // non-0 if global->local mapping is done
int size_forward; // # of per-atom datums in forward comm
int size_reverse; // # of datums in reverse comm
int size_border; // # of datums in forward border comm
- int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
+ int nswap; // # of swaps to perform = 2*dim
+ int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
+ int *sendother; // 1 if send to any other proc in each swap
+ int *sendself; // 1 if send to self in each swap
+ int *nprocmax; // current max # of send procs for each swap
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
+ int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int **size_forward_recv; // # of values to recv in each forward swap/proc
int **firstrecv; // where to put 1st recv atom per swap/proc
int **size_reverse_send; // # to send in each reverse comm per swap/proc
int **size_reverse_recv; // # to recv in each reverse comm per swap/proc
-
int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc
int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc
@@ -77,16 +79,53 @@ class CommTiled : public Comm {
int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm
+ int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer
+ int maxreqstat; // max size of Request and Status vectors
MPI_Request *requests;
MPI_Status *statuses;
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
- void grow_send(int, int); // reallocate send buffer
- void grow_recv(int); // free/allocate recv buffer
- void grow_list(int, int, int); // reallocate sendlist for one swap/proc
+ struct RCBinfo {
+ double mysplit[3][2]; // fractional RCB bounding box for one proc
+ double cut; // position of cut this proc owns
+ int dim; // dimension = 0/1/2 of cut
+ };
+
+ int noverlap; // # of overlapping procs
+ int maxoverlap; // current max length of overlap
+ int *overlap; // list of overlapping procs
+
+ RCBinfo *rcbinfo; // list of RCB info for all procs
+
+ double *prd; // local ptrs to Domain attributes
+ double *boxlo,*boxhi;
+ double *sublo,*subhi;
+
+ void init_buffers();
+
+ // box drop and other functions
+
+ typedef void (CommTiled::*BoxDropPtr)(int, double *, double *, int &);
+ BoxDropPtr box_drop;
+ void box_drop_brick(int, double *, double *, int &);
+ void box_drop_tiled(int, double *, double *, int &);
+ void box_drop_tiled_recurse(double *, double *, int, int, int &);
+
+ typedef void (CommTiled::*BoxOtherPtr)(int, int, int, double *, double *);
+ BoxOtherPtr box_other;
+ void box_other_brick(int, int, int, double *, double *);
+ void box_other_tiled(int, int, int, double *, double *);
+
+ void grow_send(int, int); // reallocate send buffer
+ void grow_recv(int); // free/allocate recv buffer
+ void grow_list(int, int, int); // reallocate sendlist for one swap/proc
+ void allocate_swap(int); // allocate swap arrays
+ void grow_swap_send(int, int, int); // grow swap arrays for send and recv
+ void grow_swap_recv(int, int);
+ void deallocate_swap(int); // deallocate swap arrays
};
}
diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp
index be5c479360..9777f23334 100644
--- a/src/create_atoms.cpp
+++ b/src/create_atoms.cpp
@@ -27,6 +27,8 @@
#include "domain.h"
#include "lattice.h"
#include "region.h"
+#include "input.h"
+#include "variable.h"
#include "random_park.h"
#include "random_mars.h"
#include "math_extra.h"
@@ -41,6 +43,7 @@ using namespace MathConst;
enum{BOX,REGION,SINGLE,RANDOM};
enum{ATOM,MOLECULE};
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@@ -103,6 +106,8 @@ void CreateAtoms::command(int narg, char **arg)
remapflag = 0;
mode = ATOM;
int molseed;
+ varflag = 0;
+ vstr = xstr = ystr = zstr = NULL;
nbasis = domain->lattice->nbasis;
basistype = new int[nbasis];
@@ -141,6 +146,33 @@ void CreateAtoms::command(int narg, char **arg)
else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
else error->all(FLERR,"Illegal create_atoms command");
iarg += 2;
+ } else if (strcmp(arg[iarg],"var") == 0) {
+ if (iarg+2 > narg) error->all(FLERR,"Illegal create_atoms command");
+ delete [] vstr;
+ int n = strlen(arg[iarg+1]) + 1;
+ vstr = new char[n];
+ strcpy(vstr,arg[iarg+1]);
+ varflag = 1;
+ iarg += 2;
+ } else if (strcmp(arg[iarg],"set") == 0) {
+ if (iarg+3 > narg) error->all(FLERR,"Illegal create_atoms command");
+ if (strcmp(arg[iarg+1],"x") == 0) {
+ delete [] xstr;
+ int n = strlen(arg[iarg+2]) + 1;
+ xstr = new char[n];
+ strcpy(xstr,arg[iarg+2]);
+ } else if (strcmp(arg[iarg+1],"y") == 0) {
+ delete [] ystr;
+ int n = strlen(arg[iarg+2]) + 1;
+ ystr = new char[n];
+ strcpy(ystr,arg[iarg+2]);
+ } else if (strcmp(arg[iarg+1],"z") == 0) {
+ delete [] zstr;
+ int n = strlen(arg[iarg+2]) + 1;
+ zstr = new char[n];
+ strcpy(zstr,arg[iarg+2]);
+ } else error->all(FLERR,"Illegal create_atoms command");
+ iarg += 3;
} else error->all(FLERR,"Illegal create_atoms command");
}
@@ -178,6 +210,47 @@ void CreateAtoms::command(int narg, char **arg)
ranmol = new RanMars(lmp,molseed+comm->me);
}
+ // error check and further setup for variable test
+ // save local copy of each equal variable string so can restore at end
+
+ if (!vstr && (xstr || ystr || zstr))
+ error->all(FLERR,"Incomplete use of variables in create_atoms command");
+ if (vstr && (!xstr && !ystr && !zstr))
+ error->all(FLERR,"Incomplete use of variables in create_atoms command");
+
+ if (varflag) {
+ vvar = input->variable->find(vstr);
+ if (vvar < 0)
+ error->all(FLERR,"Variable name for create_atoms does not exist");
+ if (!input->variable->equalstyle(vvar))
+ error->all(FLERR,"Variable for create_atoms is invalid style");
+
+ if (xstr) {
+ xvar = input->variable->find(xstr);
+ if (xvar < 0)
+ error->all(FLERR,"Variable name for create_atoms does not exist");
+ if (!input->variable->equalstyle(xvar))
+ error->all(FLERR,"Variable for create_atoms is invalid style");
+ input->variable->equal_save(xvar,xstr_copy);
+ }
+ if (ystr) {
+ yvar = input->variable->find(ystr);
+ if (yvar < 0)
+ error->all(FLERR,"Variable name for create_atoms does not exist");
+ if (!input->variable->equalstyle(yvar))
+ error->all(FLERR,"Variable for create_atoms is invalid style");
+ input->variable->equal_save(yvar,ystr_copy);
+ }
+ if (zstr) {
+ zvar = input->variable->find(zstr);
+ if (zvar < 0)
+ error->all(FLERR,"Variable name for create_atoms does not exist");
+ if (!input->variable->equalstyle(zvar))
+ error->all(FLERR,"Variable for create_atoms is invalid style");
+ input->variable->equal_save(zvar,zstr_copy);
+ }
+ }
+
// demand non-none lattice be defined for BOX and REGION
// else setup scaling for SINGLE and RANDOM
// could use domain->lattice->lattice2box() to do conversion of
@@ -226,17 +299,32 @@ void CreateAtoms::command(int narg, char **arg)
}
if (style == BOX || style == REGION) {
- if (domain->xperiodic) {
- if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
- if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0];
- }
- if (domain->yperiodic) {
- if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
- if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1];
- }
- if (domain->zperiodic) {
- if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
- if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2];
+ if (comm->layout != LAYOUT_TILED) {
+ if (domain->xperiodic) {
+ if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
+ if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
+ if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
+ if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2];
+ }
+ } else {
+ if (domain->xperiodic) {
+ if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
+ if (comm->mysplit[0][1] == 1.0) subhi[0] -= 2.0*epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
+ if (comm->mysplit[1][1] == 1.0) subhi[1] -= 2.0*epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
+ if (comm->mysplit[2][1] == 1.0) subhi[2] -= 2.0*epsilon[2];
+ }
}
}
@@ -259,6 +347,14 @@ void CreateAtoms::command(int narg, char **arg)
fix->set_arrays(i);
}
+ // restore each equal variable string previously saved
+
+ if (varflag) {
+ if (xstr) input->variable->equal_restore(xvar,xstr_copy);
+ if (ystr) input->variable->equal_restore(yvar,ystr_copy);
+ if (zstr) input->variable->equal_restore(zvar,zstr_copy);
+ }
+
// set new total # of atoms and error check
bigint nblocal = atom->nlocal;
@@ -409,6 +505,10 @@ void CreateAtoms::command(int narg, char **arg)
delete ranmol;
if (domain->lattice) delete [] basistype;
+ delete [] vstr;
+ delete [] xstr;
+ delete [] ystr;
+ delete [] zstr;
// print status
@@ -509,7 +609,7 @@ void CreateAtoms::add_random()
}
// generate random positions for each new atom/molecule within bounding box
- // iterate until atom is within region and triclinic simulation box
+ // iterate until atom is within region, variable, and triclinic simulation box
// if final atom position is in my subbox, create it
if (xlo > xhi || ylo > yhi || zlo > zhi)
@@ -527,6 +627,7 @@ void CreateAtoms::add_random()
if (nregion >= 0 &&
domain->regions[nregion]->match(xone[0],xone[1],xone[2]) == 0)
valid = 0;
+ if (varflag && vartest(xone) == 0) valid = 0;
if (triclinic) {
domain->x2lamda(xone,lamda);
coord = lamda;
@@ -642,6 +743,10 @@ void CreateAtoms::add_lattice()
if (style == REGION)
if (!domain->regions[nregion]->match(x[0],x[1],x[2])) continue;
+ // if variable test specified, eval variable
+
+ if (varflag && vartest(x) == 0) continue;
+
// test if atom/molecule position is in my subbox
if (triclinic) {
@@ -696,3 +801,19 @@ void CreateAtoms::add_molecule(double *center)
atom->add_molecule_atom(onemol,m,n,0);
}
}
+
+/* ----------------------------------------------------------------------
+ test a generated atom position against variable evaluation
+ first plug in x,y,z values as requested
+------------------------------------------------------------------------- */
+
+int CreateAtoms::vartest(double *x)
+{
+ if (xstr) input->variable->equal_override(xvar,x[0]);
+ if (ystr) input->variable->equal_override(yvar,x[1]);
+ if (zstr) input->variable->equal_override(zvar,x[2]);
+
+ double value = input->variable->compute_equal(vvar);
+ if (value == 0.0) return 0;
+ return 1;
+}
diff --git a/src/create_atoms.h b/src/create_atoms.h
index 26d3f475ef..a75a29d484 100644
--- a/src/create_atoms.h
+++ b/src/create_atoms.h
@@ -35,6 +35,10 @@ class CreateAtoms : protected Pointers {
double xone[3];
int remapflag;
+ int varflag,vvar,xvar,yvar,zvar;
+ char *vstr,*xstr,*ystr,*zstr;
+ char *xstr_copy,*ystr_copy,*zstr_copy;
+
class Molecule *onemol;
class RanMars *ranmol;
@@ -45,6 +49,7 @@ class CreateAtoms : protected Pointers {
void add_random();
void add_lattice();
void add_molecule(double *);
+ int vartest(double *); // evaluate a variable with new atom position
};
}
diff --git a/src/domain.cpp b/src/domain.cpp
index 5ffa35a82a..6f3344bb02 100644
--- a/src/domain.cpp
+++ b/src/domain.cpp
@@ -44,14 +44,15 @@
using namespace LAMMPS_NS;
using namespace MathConst;
+enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
+enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
#define BIG 1.0e20
#define SMALL 1.0e-4
#define DELTAREGION 4
#define BONDSTRETCH 1.1
-enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
-enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
-
/* ----------------------------------------------------------------------
default is periodic
------------------------------------------------------------------------- */
@@ -249,55 +250,81 @@ void Domain::set_global_box()
/* ----------------------------------------------------------------------
set lamda box params
assumes global box is defined and proc assignment has been made
- uses comm->xyz_split to define subbox boundaries in consistent manner
+ uses comm->xyz_split or comm->mysplit
+ to define subbox boundaries in consistent manner
------------------------------------------------------------------------- */
void Domain::set_lamda_box()
{
- int *myloc = comm->myloc;
- double *xsplit = comm->xsplit;
- double *ysplit = comm->ysplit;
- double *zsplit = comm->zsplit;
+ if (comm->layout != LAYOUT_TILED) {
+ int *myloc = comm->myloc;
+ double *xsplit = comm->xsplit;
+ double *ysplit = comm->ysplit;
+ double *zsplit = comm->zsplit;
- sublo_lamda[0] = xsplit[myloc[0]];
- subhi_lamda[0] = xsplit[myloc[0]+1];
+ sublo_lamda[0] = xsplit[myloc[0]];
+ subhi_lamda[0] = xsplit[myloc[0]+1];
+ sublo_lamda[1] = ysplit[myloc[1]];
+ subhi_lamda[1] = ysplit[myloc[1]+1];
+ sublo_lamda[2] = zsplit[myloc[2]];
+ subhi_lamda[2] = zsplit[myloc[2]+1];
- sublo_lamda[1] = ysplit[myloc[1]];
- subhi_lamda[1] = ysplit[myloc[1]+1];
+ } else {
+ double (*mysplit)[2] = comm->mysplit;
- sublo_lamda[2] = zsplit[myloc[2]];
- subhi_lamda[2] = zsplit[myloc[2]+1];
+ sublo_lamda[0] = mysplit[0][0];
+ subhi_lamda[0] = mysplit[0][1];
+ sublo_lamda[1] = mysplit[1][0];
+ subhi_lamda[1] = mysplit[1][1];
+ sublo_lamda[2] = mysplit[2][0];
+ subhi_lamda[2] = mysplit[2][1];
+ }
}
/* ----------------------------------------------------------------------
set local subbox params for orthogonal boxes
assumes global box is defined and proc assignment has been made
- uses comm->xyz_split to define subbox boundaries in consistent manner
+ uses comm->xyz_split or comm->mysplit
+ to define subbox boundaries in consistent manner
insure subhi[max] = boxhi
------------------------------------------------------------------------- */
void Domain::set_local_box()
{
- int *myloc = comm->myloc;
- int *procgrid = comm->procgrid;
- double *xsplit = comm->xsplit;
- double *ysplit = comm->ysplit;
- double *zsplit = comm->zsplit;
+ if (triclinic) return;
+
+ if (comm->layout != LAYOUT_TILED) {
+ int *myloc = comm->myloc;
+ int *procgrid = comm->procgrid;
+ double *xsplit = comm->xsplit;
+ double *ysplit = comm->ysplit;
+ double *zsplit = comm->zsplit;
- if (triclinic == 0) {
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
- if (myloc[0] < procgrid[0]-1)
- subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
+ if (myloc[0] < procgrid[0]-1) subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
- if (myloc[1] < procgrid[1]-1)
- subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
+ if (myloc[1] < procgrid[1]-1) subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
- if (myloc[2] < procgrid[2]-1)
- subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
+ if (myloc[2] < procgrid[2]-1) subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
+ else subhi[2] = boxhi[2];
+
+ } else {
+ double (*mysplit)[2] = comm->mysplit;
+
+ sublo[0] = boxlo[0] + xprd*mysplit[0][0];
+ if (mysplit[0][1] < 1.0) subhi[0] = boxlo[0] + xprd*mysplit[0][1];
+ else subhi[0] = boxhi[0];
+
+ sublo[1] = boxlo[1] + yprd*mysplit[1][0];
+ if (mysplit[1][1] < 1.0) subhi[1] = boxlo[1] + yprd*mysplit[1][1];
+ else subhi[1] = boxhi[1];
+
+ sublo[2] = boxlo[2] + zprd*mysplit[2][0];
+ if (mysplit[2][1] < 1.0) subhi[2] = boxlo[2] + zprd*mysplit[2][1];
else subhi[2] = boxhi[2];
}
}
diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp
index f1cb9379fa..56a38e2402 100644
--- a/src/dump_custom.cpp
+++ b/src/dump_custom.cpp
@@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
// customize by adding keyword
// also customize compute_atom_property.cpp
-enum{ID,MOL,PROC,TYPE,ELEMENT,MASS,
+enum{ID,MOL,PROC,PROCP1,TYPE,ELEMENT,MASS,
X,Y,Z,XS,YS,ZS,XSTRI,YSTRI,ZSTRI,XU,YU,ZU,XUTRI,YUTRI,ZUTRI,
XSU,YSU,ZSU,XSUTRI,YSUTRI,ZSUTRI,
IX,IY,IZ,
@@ -470,6 +470,10 @@ int DumpCustom::count()
for (i = 0; i < nlocal; i++) dchoose[i] = me;
ptr = dchoose;
nstride = 1;
+ } else if (thresh_array[ithresh] == PROCP1) {
+ for (i = 0; i < nlocal; i++) dchoose[i] = me;
+ ptr = dchoose;
+ nstride = 1;
} else if (thresh_array[ithresh] == TYPE) {
int *type = atom->type;
for (i = 0; i < nlocal; i++) dchoose[i] = type[i];
@@ -1018,6 +1022,9 @@ int DumpCustom::parse_fields(int narg, char **arg)
} else if (strcmp(arg[iarg],"proc") == 0) {
pack_choice[i] = &DumpCustom::pack_proc;
vtype[i] = INT;
+ } else if (strcmp(arg[iarg],"procp1") == 0) {
+ pack_choice[i] = &DumpCustom::pack_procp1;
+ vtype[i] = INT;
} else if (strcmp(arg[iarg],"type") == 0) {
pack_choice[i] = &DumpCustom::pack_type;
vtype[i] = INT;
@@ -1497,6 +1504,7 @@ int DumpCustom::modify_param(int narg, char **arg)
if (strcmp(arg[1],"id") == 0) thresh_array[nthresh] = ID;
else if (strcmp(arg[1],"mol") == 0) thresh_array[nthresh] = MOL;
else if (strcmp(arg[1],"proc") == 0) thresh_array[nthresh] = PROC;
+ else if (strcmp(arg[1],"procp1") == 0) thresh_array[nthresh] = PROCP1;
else if (strcmp(arg[1],"type") == 0) thresh_array[nthresh] = TYPE;
else if (strcmp(arg[1],"mass") == 0) thresh_array[nthresh] = MASS;
@@ -1876,6 +1884,16 @@ void DumpCustom::pack_proc(int n)
/* ---------------------------------------------------------------------- */
+void DumpCustom::pack_procp1(int n)
+{
+ for (int i = 0; i < nchoose; i++) {
+ buf[n] = me+1;
+ n += size_one;
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
void DumpCustom::pack_type(int n)
{
int *type = atom->type;
diff --git a/src/dump_custom.h b/src/dump_custom.h
index c483d5b7e1..ea3c5ecff8 100644
--- a/src/dump_custom.h
+++ b/src/dump_custom.h
@@ -124,6 +124,7 @@ class DumpCustom : public Dump {
void pack_id(int);
void pack_molecule(int);
void pack_proc(int);
+ void pack_procp1(int);
void pack_type(int);
void pack_mass(int);
diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp
index 22e718dd76..246d828e7d 100644
--- a/src/fix_adapt.cpp
+++ b/src/fix_adapt.cpp
@@ -498,8 +498,6 @@ void FixAdapt::restore_settings()
} else if (ad->which == ATOM) {
if (diamflag) {
- int mflag = 0;
- if (atom->rmass_flag) mflag = 1;
double density;
double *vec = fix_diam->vstore;
diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp
index e4b7b28b7d..c617d5d042 100644
--- a/src/fix_balance.cpp
+++ b/src/fix_balance.cpp
@@ -22,12 +22,14 @@
#include "irregular.h"
#include "force.h"
#include "kspace.h"
+#include "rcb.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace FixConst;
-enum{SHIFT,RCB};
+enum{SHIFT,BISECTION};
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@@ -53,7 +55,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
thresh = force->numeric(FLERR,arg[4]);
if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
- else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB;
+ else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION;
else error->all(FLERR,"Illegal fix balance command");
int iarg = 5;
@@ -65,7 +67,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
stopthresh = force->numeric(FLERR,arg[iarg+3]);
if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
iarg += 4;
- } else if (lbstyle == RCB) {
+ } else if (lbstyle == BISECTION) {
iarg++;
}
@@ -97,14 +99,16 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
}
}
- // create instance of Balance class and initialize it with params
- // NOTE: do I need Balance instance if RCB?
- // create instance of Irregular class
+ if (lbstyle == BISECTION && comm->style == 0)
+ error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick");
+
+ // create instance of Balance class
+ // if SHIFT, initialize it with params
balance = new Balance(lmp);
-
if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
- if (lbstyle == RCB) error->all(FLERR,"Fix balance rcb is not yet supported");
+
+ // create instance of Irregular class
irregular = new Irregular(lmp);
@@ -238,32 +242,33 @@ void FixBalance::rebalance()
{
imbprev = imbnow;
+ // invoke balancer and reset comm->uniform flag
+
+ int *sendproc;
if (lbstyle == SHIFT) {
itercount = balance->shift();
- } else if (lbstyle == RCB) {
+ comm->layout = LAYOUT_NONUNIFORM;
+ } else if (lbstyle == BISECTION) {
+ sendproc = balance->bisection();
+ comm->layout = LAYOUT_TILED;
}
// output of final result
if (fp) balance->dumpout(update->ntimestep,fp);
- // reset comm->uniform flag
- // NOTE: this needs to change with RCB
-
- comm->uniform = 0;
-
// reset proc sub-domains
if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box();
- // if splits moved further than neighboring processor
// move atoms to new processors via irregular()
- // only needed if migrate_check() says an atom moves to far,
+ // only needed if migrate_check() says an atom moves to far
// else allow caller's comm->exchange() to do it
if (domain->triclinic) domain->x2lamda(atom->nlocal);
- if (irregular->migrate_check()) irregular->migrate_atoms();
+ if (lbstyle == BISECTION) irregular->migrate_atoms(0,sendproc);
+ else if (irregular->migrate_check()) irregular->migrate_atoms();
if (domain->triclinic) domain->lamda2x(atom->nlocal);
// invoke KSpace setup_grid() to adjust to new proc sub-domains
@@ -303,5 +308,6 @@ double FixBalance::compute_vector(int i)
double FixBalance::memory_usage()
{
double bytes = irregular->memory_usage();
+ if (balance->rcb) bytes += balance->rcb->memory_usage();
return bytes;
}
diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp
index 3401c61a67..3d350c569a 100644
--- a/src/fix_deform.cpp
+++ b/src/fix_deform.cpp
@@ -987,3 +987,14 @@ void FixDeform::options(int narg, char **arg)
} else error->all(FLERR,"Illegal fix deform command");
}
}
+
+/* ----------------------------------------------------------------------
+ memory usage of Irregular
+------------------------------------------------------------------------- */
+
+double FixDeform::memory_usage()
+{
+ double bytes = 0.0;
+ if (irregular) bytes += irregular->memory_usage();
+ return bytes;
+}
diff --git a/src/fix_deform.h b/src/fix_deform.h
index 7a781629dc..469fa87761 100644
--- a/src/fix_deform.h
+++ b/src/fix_deform.h
@@ -35,6 +35,7 @@ class FixDeform : public Fix {
void init();
void pre_exchange();
void end_of_step();
+ double memory_usage();
private:
int triclinic,scaleflag,flipflag;
diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp
index 1abc05c739..b254636111 100644
--- a/src/fix_nh.cpp
+++ b/src/fix_nh.cpp
@@ -2271,3 +2271,14 @@ void FixNH::pre_exchange()
domain->lamda2x(atom->nlocal);
}
}
+
+/* ----------------------------------------------------------------------
+ memory usage of Irregular
+------------------------------------------------------------------------- */
+
+double FixNH::memory_usage()
+{
+ double bytes = 0.0;
+ if (irregular) bytes += irregular->memory_usage();
+ return bytes;
+}
diff --git a/src/fix_nh.h b/src/fix_nh.h
index 630c3cd68f..0e676b4a0d 100644
--- a/src/fix_nh.h
+++ b/src/fix_nh.h
@@ -39,6 +39,7 @@ class FixNH : public Fix {
void reset_target(double);
void reset_dt();
virtual void *extract(const char*,int &);
+ double memory_usage();
protected:
int dimension,which;
diff --git a/src/input.cpp b/src/input.cpp
index b5affba078..f0bcfe7867 100644
--- a/src/input.cpp
+++ b/src/input.cpp
@@ -1161,19 +1161,15 @@ void Input::comm_style()
{
if (narg < 1) error->all(FLERR,"Illegal comm_style command");
if (strcmp(arg[0],"brick") == 0) {
- if (comm->layout)
- error->all(FLERR,
- "Cannot switch to comm style brick from "
- "irregular tiling of proc domains");
- comm = new CommBrick(lmp);
- // NOTE: this will lose load balancing info in old CommBrick
- if (domain->box_exist) {
- comm->set_proc_grid();
- domain->set_local_box();
- }
+ if (comm->style == 0) return;
+ Comm *oldcomm = comm;
+ comm = new CommBrick(lmp,oldcomm);
+ delete oldcomm;
} else if (strcmp(arg[0],"tiled") == 0) {
- error->all(FLERR,"Comm_style tiled not yet supported");
- comm = new CommTiled(lmp);
+ if (comm->style == 1) return;
+ Comm *oldcomm = comm;
+ comm = new CommTiled(lmp,oldcomm);
+ delete oldcomm;
} else error->all(FLERR,"Illegal comm_style command");
}
diff --git a/src/irregular.cpp b/src/irregular.cpp
index b55d964327..9fd478a9da 100644
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@@ -30,6 +30,8 @@ using namespace LAMMPS_NS;
int *Irregular::proc_recv_copy;
int compare_standalone(const void *, const void *);
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
#define BUFFACTOR 1.5
#define BUFMIN 1000
#define BUFEXTRA 1000
@@ -43,13 +45,26 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
triclinic = domain->triclinic;
map_style = atom->map_style;
- procgrid = comm->procgrid;
- grid2proc = comm->grid2proc;
- aplan = NULL;
- dplan = NULL;
+ // migrate work vectors
- // initialize buffers for atom comm, not used for datum comm
+ maxlocal = 0;
+ mproclist = NULL;
+ msizes = NULL;
+
+ // send buffers
+
+ maxdbuf = 0;
+ dbuf = NULL;
+ maxbuf = 0;
+ buf = NULL;
+
+ // universal work vectors
+
+ memory->create(work1,nprocs,"irregular:work1");
+ memory->create(work2,nprocs,"irregular:work2");
+
+ // initialize buffers for migrate atoms, not used for datum comm
// these can persist for multiple irregular operations
maxsend = BUFMIN;
@@ -62,9 +77,12 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
Irregular::~Irregular()
{
- if (aplan) destroy_atom();
- if (dplan) destroy_data();
-
+ memory->destroy(mproclist);
+ memory->destroy(msizes);
+ memory->destroy(dbuf);
+ memory->destroy(buf);
+ memory->destroy(work1);
+ memory->destroy(work2);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
@@ -74,11 +92,13 @@ Irregular::~Irregular()
can be used in place of comm->exchange()
unlike exchange(), allows atoms to have moved arbitrarily long distances
sets up irregular plan, invokes it, destroys it
+ sortflag = flag for sorting order of received messages by proc ID
+ procassign = non-NULL if already know procs atoms are assigned to (from RCB)
atoms MUST be remapped to be inside simulation box before this is called
for triclinic: atoms must be in lamda coords (0-1) before this is called
------------------------------------------------------------------------- */
-void Irregular::migrate_atoms(int sortflag)
+void Irregular::migrate_atoms(int sortflag, int *procassign)
{
// clear global->local map since atoms move to new procs
// clear old ghosts so map_set() at end will operate only on local atoms
@@ -101,13 +121,16 @@ void Irregular::migrate_atoms(int sortflag)
subhi = domain->subhi_lamda;
}
- uniform = comm->uniform;
+ layout = comm->layout;
xsplit = comm->xsplit;
ysplit = comm->ysplit;
zsplit = comm->zsplit;
boxlo = domain->boxlo;
prd = domain->prd;
+ procgrid = comm->procgrid;
+ grid2proc = comm->grid2proc;
+
// loop over atoms, flag any that are not in my sub-box
// fill buffer with atoms leaving my box, using < and >=
// assign which proc it belongs to via coord2proc()
@@ -119,41 +142,63 @@ void Irregular::migrate_atoms(int sortflag)
double **x = atom->x;
int nlocal = atom->nlocal;
+ if (nlocal > maxlocal) {
+ maxlocal = nlocal;
+ memory->destroy(mproclist);
+ memory->destroy(msizes);
+ memory->create(mproclist,maxlocal,"irregular:mproclist");
+ memory->create(msizes,maxlocal,"irregular:msizes");
+ }
+
+ int igx,igy,igz;
int nsend = 0;
int nsendatom = 0;
- int *sizes = new int[nlocal];
- int *proclist = new int[nlocal];
- int igx,igy,igz;
-
int i = 0;
- while (i < nlocal) {
- if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
- x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
- x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
- proclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
- if (proclist[nsendatom] != me) {
+
+ if (procassign) {
+ while (i < nlocal) {
+ if (procassign[i] == me) i++;
+ else {
+ mproclist[nsendatom] = procassign[i];
if (nsend > maxsend) grow_send(nsend,1);
- sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
- nsend += sizes[nsendatom];
+ msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
+ nsend += msizes[nsendatom];
nsendatom++;
avec->copy(nlocal-1,i,1);
+ procassign[i] = procassign[nlocal-1];
nlocal--;
+ }
+ }
+
+ } else {
+ while (i < nlocal) {
+ if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
+ x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
+ x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
+ mproclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
+ if (mproclist[nsendatom] == me) i++;
+ else {
+ if (nsend > maxsend) grow_send(nsend,1);
+ msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
+ nsend += msizes[nsendatom];
+ nsendatom++;
+ avec->copy(nlocal-1,i,1);
+ nlocal--;
+ }
} else i++;
- } else i++;
+ }
}
+
atom->nlocal = nlocal;
// create irregular communication plan, perform comm, destroy plan
// returned nrecv = size of buffer needed for incoming atoms
- int nrecv = create_atom(nsendatom,sizes,proclist,sortflag);
+ int nrecv = create_atom(nsendatom,msizes,mproclist,sortflag);
if (nrecv > maxrecv) grow_recv(nrecv);
- exchange_atom(buf_send,sizes,buf_recv);
+ exchange_atom(buf_send,msizes,buf_recv);
destroy_atom();
- delete [] sizes;
- delete [] proclist;
-
// add received atoms to my list
int m = 0;
@@ -174,6 +219,11 @@ void Irregular::migrate_atoms(int sortflag)
int Irregular::migrate_check()
{
+ // migrate required if comm layout is tiled
+ // cannot use myloc[] logic below
+
+ if (comm->layout == LAYOUT_TILED) return 1;
+
// subbox bounds for orthogonal or triclinic box
// other comm/domain data used by coord2proc()
@@ -186,13 +236,16 @@ int Irregular::migrate_check()
subhi = domain->subhi_lamda;
}
- uniform = comm->uniform;
+ layout = comm->layout;
xsplit = comm->xsplit;
ysplit = comm->ysplit;
zsplit = comm->zsplit;
boxlo = domain->boxlo;
prd = domain->prd;
+ procgrid = comm->procgrid;
+ grid2proc = comm->grid2proc;
+
// loop over atoms, check for any that are not in my sub-box
// assign which proc it belongs to via coord2proc()
// if logical igx,igy,igz of newproc > one away from myloc, set flag = 1
@@ -250,6 +303,7 @@ int Irregular::migrate_check()
n = # of atoms to send
sizes = # of doubles for each atom
proclist = proc to send each atom to (not including self)
+ sortflag = flag for sorting order of received messages by proc ID
return total # of doubles I will recv (not including self)
------------------------------------------------------------------------- */
@@ -257,51 +311,60 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
{
int i;
- // allocate plan and work vectors
-
- if (aplan) destroy_atom();
- aplan = (PlanAtom *) memory->smalloc(sizeof(PlanAtom),"irregular:aplan");
- int *list = new int[nprocs];
- int *count = new int[nprocs];
-
- // nrecv = # of messages I receive
+ // setup for collective comm
+ // work1 = 1 for procs I send a message to, not including self
+ // work2 = 1 for all procs, used for ReduceScatter
for (i = 0; i < nprocs; i++) {
- list[i] = 0;
- count[i] = 1;
+ work1[i] = 0;
+ work2[i] = 1;
}
- for (i = 0; i < n; i++) list[proclist[i]] = 1;
+ for (i = 0; i < n; i++) work1[proclist[i]] = 1;
+ work1[me] = 0;
- int nrecv;
- MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
+ // nrecv_proc = # of procs I receive messages from, not including self
+ // options for performing ReduceScatter operation
+ // some are more efficient on some machines at big sizes
+
+#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
+ MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
+ nrecv_proc = work1[me];
+#else
+#ifdef LAMMPS_RS_ALLREDUCE
+ MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
+ nrecv_proc = work2[me];
+#else
+ MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
+#endif
+#endif
// allocate receive arrays
- int *proc_recv = new int[nrecv];
- int *length_recv = new int[nrecv];
- MPI_Request *request = new MPI_Request[nrecv];
- MPI_Status *status = new MPI_Status[nrecv];
+ proc_recv = new int[nrecv_proc];
+ length_recv = new int[nrecv_proc];
+ request = new MPI_Request[nrecv_proc];
+ status = new MPI_Status[nrecv_proc];
- // nsend = # of messages I send
+ // nsend_proc = # of messages I send
- for (i = 0; i < nprocs; i++) list[i] = 0;
- for (i = 0; i < n; i++) list[proclist[i]] += sizes[i];
+ for (i = 0; i < nprocs; i++) work1[i] = 0;
+ for (i = 0; i < n; i++) work1[proclist[i]] += sizes[i];
- int nsend = 0;
+ nsend_proc = 0;
for (i = 0; i < nprocs; i++)
- if (list[i]) nsend++;
+ if (work1[i]) nsend_proc++;
// allocate send arrays
- int *proc_send = new int[nsend];
- int *length_send = new int[nsend];
- int *num_send = new int[nsend];
- int *index_send = new int[n];
- int *offset_send = new int[n];
+ proc_send = new int[nsend_proc];
+ length_send = new int[nsend_proc];
+ num_send = new int[nsend_proc];
+ index_send = new int[n];
+ offset_send = new int[n];
// list still stores size of message for procs I send to
// proc_send = procs I send to
- // length_send = total size of message I send to each proc
+ // length_send = # of doubles I send to each proc
// to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me
// reset list to store which send message each proc corresponds to
@@ -311,81 +374,81 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
- if (list[iproc] > 0) {
+ if (work1[iproc] > 0) {
proc_send[isend] = iproc;
- length_send[isend] = list[iproc];
- list[iproc] = isend;
+ length_send[isend] = work1[iproc];
+ work1[iproc] = isend;
isend++;
}
}
// num_send = # of atoms I send to each proc
- for (i = 0; i < nsend; i++) num_send[i] = 0;
+ for (i = 0; i < nsend_proc; i++) num_send[i] = 0;
for (i = 0; i < n; i++) {
- isend = list[proclist[i]];
+ isend = work1[proclist[i]];
num_send[isend]++;
}
- // count = offsets into index_send for each proc I send to
+ // work2 = offsets into index_send for each proc I send to
// index_send = list of which atoms to send to each proc
// 1st N1 values are atom indices for 1st proc,
// next N2 values are atom indices for 2nd proc, etc
// offset_send = where each atom starts in send buffer
- count[0] = 0;
- for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
+ work2[0] = 0;
+ for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
for (i = 0; i < n; i++) {
- isend = list[proclist[i]];
- index_send[count[isend]++] = i;
+ isend = work1[proclist[i]];
+ index_send[work2[isend]++] = i;
if (i) offset_send[i] = offset_send[i-1] + sizes[i-1];
else offset_send[i] = 0;
}
// tell receivers how much data I send
- // sendmax = largest # of doubles I send in a single message
+ // sendmax_proc = # of doubles I send in largest single message
- int sendmax = 0;
- for (i = 0; i < nsend; i++) {
+ sendmax_proc = 0;
+ for (i = 0; i < nsend_proc; i++) {
MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world);
- sendmax = MAX(sendmax,length_send[i]);
+ sendmax_proc = MAX(sendmax_proc,length_send[i]);
}
// receive incoming messages
// proc_recv = procs I recv from
- // length_recv = total size of message each proc sends me
- // nrecvsize = total size of data I recv
+ // length_recv = # of doubles each proc sends me
+ // nrecvsize = total size of atom data I recv
int nrecvsize = 0;
- for (i = 0; i < nrecv; i++) {
+ for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE;
nrecvsize += length_recv[i];
}
- // sort proc_recv and num_recv by proc ID if requested
+ // sort proc_recv and length_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received atoms
// invoke by adding final arg = 1 to create_atom() call in migrate_atoms()
if (sortflag) {
- int *order = new int[nrecv];
- int *proc_recv_ordered = new int[nrecv];
- int *length_recv_ordered = new int[nrecv];
+ int *order = new int[nrecv_proc];
+ int *proc_recv_ordered = new int[nrecv_proc];
+ int *length_recv_ordered = new int[nrecv_proc];
- for (i = 0; i < nrecv; i++) order[i] = i;
+ for (i = 0; i < nrecv_proc; i++) order[i] = i;
proc_recv_copy = proc_recv;
- qsort(order,nrecv,sizeof(int),compare_standalone);
+ qsort(order,nrecv_proc,sizeof(int),compare_standalone);
int j;
- for (i = 0; i < nrecv; i++) {
+ for (i = 0; i < nrecv_proc; i++) {
j = order[i];
proc_recv_ordered[i] = proc_recv[j];
length_recv_ordered[i] = length_recv[j];
}
- memcpy(proc_recv,proc_recv_ordered,nrecv*sizeof(int));
- memcpy(length_recv,length_recv_ordered,nrecv*sizeof(int));
+ memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
+ memcpy(length_recv,length_recv_ordered,nrecv_proc*sizeof(int));
delete [] order;
delete [] proc_recv_ordered;
delete [] length_recv_ordered;
@@ -396,27 +459,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
MPI_Barrier(world);
- // free work vectors
-
- delete [] count;
- delete [] list;
-
- // initialize plan
-
- aplan->nsend = nsend;
- aplan->nrecv = nrecv;
- aplan->sendmax = sendmax;
-
- aplan->proc_send = proc_send;
- aplan->length_send = length_send;
- aplan->num_send = num_send;
- aplan->index_send = index_send;
- aplan->offset_send = offset_send;
- aplan->proc_recv = proc_recv;
- aplan->length_recv = length_recv;
-
- aplan->request = request;
- aplan->status = status;
+ // return size of atom data I will receive
return nrecvsize;
}
@@ -445,217 +488,226 @@ int compare_standalone(const void *iptr, const void *jptr)
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
{
- int i,m,n,offset,num_send;
+ int i,m,n,offset,count;
// post all receives
offset = 0;
- for (int irecv = 0; irecv < aplan->nrecv; irecv++) {
- MPI_Irecv(&recvbuf[offset],aplan->length_recv[irecv],MPI_DOUBLE,
- aplan->proc_recv[irecv],0,world,&aplan->request[irecv]);
- offset += aplan->length_recv[irecv];
+ for (int irecv = 0; irecv < nrecv_proc; irecv++) {
+ MPI_Irecv(&recvbuf[offset],length_recv[irecv],MPI_DOUBLE,
+ proc_recv[irecv],0,world,&request[irecv]);
+ offset += length_recv[irecv];
}
- // allocate buf for largest send
+ // reallocate buf for largest send if necessary
- double *buf;
- memory->create(buf,aplan->sendmax,"irregular:buf");
+ if (sendmax_proc > maxdbuf) {
+ memory->destroy(dbuf);
+ maxdbuf = sendmax_proc;
+ memory->create(dbuf,maxdbuf,"irregular:dbuf");
+ }
// send each message
// pack buf with list of atoms
// m = index of atom in sendbuf
- int *index_send = aplan->index_send;
- int nsend = aplan->nsend;
n = 0;
-
- for (int isend = 0; isend < nsend; isend++) {
+ for (int isend = 0; isend < nsend_proc; isend++) {
offset = 0;
- num_send = aplan->num_send[isend];
- for (i = 0; i < num_send; i++) {
+ count = num_send[isend];
+ for (i = 0; i < count; i++) {
m = index_send[n++];
- memcpy(&buf[offset],&sendbuf[aplan->offset_send[m]],
- sizes[m]*sizeof(double));
+ memcpy(&dbuf[offset],&sendbuf[offset_send[m]],sizes[m]*sizeof(double));
offset += sizes[m];
}
- MPI_Send(buf,aplan->length_send[isend],MPI_DOUBLE,
- aplan->proc_send[isend],0,world);
+ MPI_Send(dbuf,length_send[isend],MPI_DOUBLE,proc_send[isend],0,world);
}
- // free temporary send buffer
-
- memory->destroy(buf);
-
// wait on all incoming messages
- if (aplan->nrecv) MPI_Waitall(aplan->nrecv,aplan->request,aplan->status);
+ if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
}
/* ----------------------------------------------------------------------
- destroy communication plan for atoms
+ destroy vectors in communication plan for atoms
------------------------------------------------------------------------- */
void Irregular::destroy_atom()
{
- delete [] aplan->proc_send;
- delete [] aplan->length_send;
- delete [] aplan->num_send;
- delete [] aplan->index_send;
- delete [] aplan->offset_send;
- delete [] aplan->proc_recv;
- delete [] aplan->length_recv;
- delete [] aplan->request;
- delete [] aplan->status;
- memory->sfree(aplan);
- aplan = NULL;
+ delete [] proc_send;
+ delete [] length_send;
+ delete [] num_send;
+ delete [] index_send;
+ delete [] offset_send;
+ delete [] proc_recv;
+ delete [] length_recv;
+ delete [] request;
+ delete [] status;
}
/* ----------------------------------------------------------------------
- create a communication plan for datums
+ create communication plan based on list of datums of uniform size
n = # of datums to send
- proclist = proc to send each datum to (including self)
- return total # of datums I will recv (including self)
+ proclist = proc to send each datum to, can include self
+ sortflag = flag for sorting order of received messages by proc ID
+ return total # of datums I will recv, including any to self
------------------------------------------------------------------------- */
-int Irregular::create_data(int n, int *proclist)
+int Irregular::create_data(int n, int *proclist, int sortflag)
{
int i,m;
- // allocate plan and work vectors
-
- dplan = (PlanData *) memory->smalloc(sizeof(PlanData),"irregular:dplan");
- int *list = new int[nprocs];
- int *count = new int[nprocs];
-
- // nrecv = # of messages I receive
+ // setup for collective comm
+ // work1 = 1 for procs I send a message to, not including self
+ // work2 = 1 for all procs, used for ReduceScatter
for (i = 0; i < nprocs; i++) {
- list[i] = 0;
- count[i] = 1;
+ work1[i] = 0;
+ work2[i] = 1;
}
- for (i = 0; i < n; i++) list[proclist[i]] = 1;
+ for (i = 0; i < n; i++) work1[proclist[i]] = 1;
+ work1[me] = 0;
- int nrecv;
- MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
- if (list[me]) nrecv--;
+ // nrecv_proc = # of procs I receive messages from, not including self
+ // options for performing ReduceScatter operation
+ // some are more efficient on some machines at big sizes
+
+#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
+ MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
+ nrecv_proc = work1[me];
+#else
+#ifdef LAMMPS_RS_ALLREDUCE
+ MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
+ nrecv_proc = work2[me];
+#else
+ MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
+#endif
+#endif
// allocate receive arrays
- int *proc_recv = new int[nrecv];
- int *num_recv = new int[nrecv];
- MPI_Request *request = new MPI_Request[nrecv];
- MPI_Status *status = new MPI_Status[nrecv];
+ proc_recv = new int[nrecv_proc];
+ num_recv = new int[nrecv_proc];
+ request = new MPI_Request[nrecv_proc];
+ status = new MPI_Status[nrecv_proc];
- // nsend = # of messages I send
+ // work1 = # of datums I send to each proc, including self
+ // nsend_proc = # of procs I send messages to, not including self
- for (i = 0; i < nprocs; i++) list[i] = 0;
- for (i = 0; i < n; i++) list[proclist[i]]++;
+ for (i = 0; i < nprocs; i++) work1[i] = 0;
+ for (i = 0; i < n; i++) work1[proclist[i]]++;
- int nsend = 0;
+ nsend_proc = 0;
for (i = 0; i < nprocs; i++)
- if (list[i]) nsend++;
- if (list[me]) nsend--;
+ if (work1[i]) nsend_proc++;
+ if (work1[me]) nsend_proc--;
// allocate send and self arrays
- int *proc_send = new int[nsend];
- int *num_send = new int[nsend];
- int *index_send = new int[n-list[me]];
- int *index_self = new int[list[me]];
+ proc_send = new int[nsend_proc];
+ num_send = new int[nsend_proc];
+ index_send = new int[n-work1[me]];
+ index_self = new int[work1[me]];
// proc_send = procs I send to
// num_send = # of datums I send to each proc
// num_self = # of datums I copy to self
// to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me
- // reset list to store which send message each proc corresponds to
-
- int num_self;
+ // reset work1 to store which send message each proc corresponds to
int iproc = me;
int isend = 0;
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
- if (iproc == me) num_self = list[iproc];
- else if (list[iproc] > 0) {
+ if (iproc == me) {
+ num_self = work1[iproc];
+ work1[iproc] = 0;
+ } else if (work1[iproc] > 0) {
proc_send[isend] = iproc;
- num_send[isend] = list[iproc];
- list[iproc] = isend;
+ num_send[isend] = work1[iproc];
+ work1[iproc] = isend;
isend++;
}
}
- list[me] = 0;
- // count = offsets into index_send for each proc I send to
+ // work2 = offsets into index_send for each proc I send to
// m = ptr into index_self
// index_send = list of which datums to send to each proc
// 1st N1 values are datum indices for 1st proc,
// next N2 values are datum indices for 2nd proc, etc
+ // index_self = list of which datums to copy to self
- count[0] = 0;
- for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
+ work2[0] = 0;
+ for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
m = 0;
for (i = 0; i < n; i++) {
iproc = proclist[i];
if (iproc == me) index_self[m++] = i;
else {
- isend = list[iproc];
- index_send[count[isend]++] = i;
+ isend = work1[iproc];
+ index_send[work2[isend]++] = i;
}
}
// tell receivers how much data I send
- // sendmax = largest # of datums I send in a single message
+ // sendmax_proc = largest # of datums I send in a single message
- int sendmax = 0;
- for (i = 0; i < nsend; i++) {
+ sendmax_proc = 0;
+ for (i = 0; i < nsend_proc; i++) {
MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world);
- sendmax = MAX(sendmax,num_send[i]);
+ sendmax_proc = MAX(sendmax_proc,num_send[i]);
}
// receive incoming messages
// proc_recv = procs I recv from
// num_recv = total size of message each proc sends me
- // nrecvsize = total size of data I recv
+ // nrecvdatum = total size of data I recv
- int nrecvsize = 0;
- for (i = 0; i < nrecv; i++) {
+ int nrecvdatum = 0;
+ for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE;
- nrecvsize += num_recv[i];
+ nrecvdatum += num_recv[i];
+ }
+ nrecvdatum += num_self;
+
+ // sort proc_recv and num_recv by proc ID if requested
+ // useful for debugging to insure reproducible ordering of received datums
+
+ if (sortflag) {
+ int *order = new int[nrecv_proc];
+ int *proc_recv_ordered = new int[nrecv_proc];
+ int *num_recv_ordered = new int[nrecv_proc];
+
+ for (i = 0; i < nrecv_proc; i++) order[i] = i;
+ proc_recv_copy = proc_recv;
+ qsort(order,nrecv_proc,sizeof(int),compare_standalone);
+
+ int j;
+ for (i = 0; i < nrecv_proc; i++) {
+ j = order[i];
+ proc_recv_ordered[i] = proc_recv[j];
+ num_recv_ordered[i] = num_recv[j];
+ }
+
+ memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
+ memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
+ delete [] order;
+ delete [] proc_recv_ordered;
+ delete [] num_recv_ordered;
}
- nrecvsize += num_self;
// barrier to insure all MPI_ANY_SOURCE messages are received
// else another proc could proceed to exchange_data() and send to me
MPI_Barrier(world);
- // free work vectors
+ // return # of datums I will receive
- delete [] count;
- delete [] list;
-
- // initialize plan and return it
-
- dplan->nsend = nsend;
- dplan->nrecv = nrecv;
- dplan->sendmax = sendmax;
-
- dplan->proc_send = proc_send;
- dplan->num_send = num_send;
- dplan->index_send = index_send;
- dplan->proc_recv = proc_recv;
- dplan->num_recv = num_recv;
- dplan->num_self = num_self;
- dplan->index_self = index_self;
-
- dplan->request = request;
- dplan->status = status;
-
- return nrecvsize;
+ return nrecvdatum;
}
/* ----------------------------------------------------------------------
@@ -667,49 +719,41 @@ int Irregular::create_data(int n, int *proclist)
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
{
- int i,m,n,offset,num_send;
+ int i,m,n,offset,count;
// post all receives, starting after self copies
- offset = dplan->num_self*nbytes;
- for (int irecv = 0; irecv < dplan->nrecv; irecv++) {
- MPI_Irecv(&recvbuf[offset],dplan->num_recv[irecv]*nbytes,MPI_CHAR,
- dplan->proc_recv[irecv],0,world,&dplan->request[irecv]);
- offset += dplan->num_recv[irecv]*nbytes;
+ offset = num_self*nbytes;
+ for (int irecv = 0; irecv < nrecv_proc; irecv++) {
+ MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
+ proc_recv[irecv],0,world,&request[irecv]);
+ offset += num_recv[irecv]*nbytes;
}
- // allocate buf for largest send
+ // reallocate buf for largest send if necessary
- char *buf;
- memory->create(buf,dplan->sendmax*nbytes,"irregular:buf");
+ if (sendmax_proc*nbytes > maxbuf) {
+ memory->destroy(buf);
+ maxbuf = sendmax_proc*nbytes;
+ memory->create(buf,maxbuf,"irregular:buf");
+ }
// send each message
// pack buf with list of datums
// m = index of datum in sendbuf
- int *index_send = dplan->index_send;
- int nsend = dplan->nsend;
n = 0;
-
- for (int isend = 0; isend < nsend; isend++) {
- num_send = dplan->num_send[isend];
- for (i = 0; i < num_send; i++) {
+ for (int isend = 0; isend < nsend_proc; isend++) {
+ count = num_send[isend];
+ for (i = 0; i < count; i++) {
m = index_send[n++];
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
}
- MPI_Send(buf,dplan->num_send[isend]*nbytes,MPI_CHAR,
- dplan->proc_send[isend],0,world);
+ MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
}
- // free temporary send buffer
-
- memory->destroy(buf);
-
// copy datums to self, put at beginning of recvbuf
- int *index_self = dplan->index_self;
- int num_self = dplan->num_self;
-
for (i = 0; i < num_self; i++) {
m = index_self[i];
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
@@ -717,39 +761,37 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
// wait on all incoming messages
- if (dplan->nrecv) MPI_Waitall(dplan->nrecv,dplan->request,dplan->status);
+ if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
}
/* ----------------------------------------------------------------------
- destroy communication plan for datums
+ destroy vectors in communication plan for datums
------------------------------------------------------------------------- */
void Irregular::destroy_data()
{
- delete [] dplan->proc_send;
- delete [] dplan->num_send;
- delete [] dplan->index_send;
- delete [] dplan->proc_recv;
- delete [] dplan->num_recv;
- delete [] dplan->index_self;
- delete [] dplan->request;
- delete [] dplan->status;
- memory->sfree(dplan);
- dplan = NULL;
+ delete [] proc_send;
+ delete [] num_send;
+ delete [] index_send;
+ delete [] proc_recv;
+ delete [] num_recv;
+ delete [] index_self;
+ delete [] request;
+ delete [] status;
}
/* ----------------------------------------------------------------------
determine which proc owns atom with coord x[3]
x will be in box (orthogonal) or lamda coords (triclinic)
- for uniform = 1, directly calculate owning proc
- for non-uniform, iteratively find owning proc via binary search
+ if layout = UNIFORM, calculate owning proc directly
+ else layout = NONUNIFORM, iteratively find owning proc via binary search
return owning proc ID via grid2proc
return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
------------------------------------------------------------------------- */
int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz)
{
- if (uniform) {
+ if (layout == 0) {
if (triclinic == 0) {
igx = static_cast (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
igy = static_cast (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
@@ -846,7 +888,12 @@ void Irregular::grow_recv(int n)
bigint Irregular::memory_usage()
{
- bigint bytes = memory->usage(buf_send,maxsend);
- bytes += memory->usage(buf_recv,maxrecv);
+ bigint bytes = 0;
+ bytes += maxsend*sizeof(double); // buf_send
+ bytes += maxrecv*sizeof(double); // buf_recv
+ bytes += maxdbuf*sizeof(double); // dbuf
+ bytes += maxbuf; // buf
+ bytes += 2*maxlocal*sizeof(int); // mproclist,msizes
+ bytes += 2*nprocs*sizeof(int); // work1,work2
return bytes;
}
diff --git a/src/irregular.h b/src/irregular.h
index 576fad5c90..1fdbbe03f7 100644
--- a/src/irregular.h
+++ b/src/irregular.h
@@ -27,9 +27,9 @@ class Irregular : protected Pointers {
Irregular(class LAMMPS *);
~Irregular();
- void migrate_atoms(int sortflag = 0);
+ void migrate_atoms(int sortflag = 0, int *procassign = NULL);
int migrate_check();
- int create_data(int, int *);
+ int create_data(int, int *, int sortflag = 0);
void exchange_data(char *, int, char *);
void destroy_data();
bigint memory_usage();
@@ -38,58 +38,58 @@ class Irregular : protected Pointers {
int me,nprocs;
int triclinic;
int map_style;
- int uniform;
+ int layout;
double *xsplit,*ysplit,*zsplit; // ptrs to comm
int *procgrid; // ptr to comm
int ***grid2proc; // ptr to comm
double *boxlo; // ptr to domain
double *prd; // ptr to domain
- int maxsend,maxrecv; // size of buffers in # of doubles
- double *buf_send,*buf_recv;
+ int maxsend,maxrecv; // size of buf send/recv in # of doubles
+ double *buf_send,*buf_recv; // bufs used in migrate_atoms
+ int maxdbuf; // size of double buf in bytes
+ double *dbuf; // double buf for largest single atom send
+ int maxbuf; // size of char buf in bytes
+ char *buf; // char buf for largest single data send
- // plan for irregular communication of atoms
+ int *mproclist,*msizes; // persistent vectors in migrate_atoms
+ int maxlocal; // allocated size of mproclist and msizes
+
+ int *work1,*work2; // work vectors
+
+ // plan params for irregular communication of atoms or datums
+ // no params refer to atoms/data copied to self
+
+ int nsend_proc; // # of messages to send
+ int nrecv_proc; // # of messages to recv
+ int sendmax_proc; // # of doubles/datums in largest send message
+ int *proc_send; // list of procs to send to
+ int *num_send; // # of atoms/datums to send to each proc
+ int *index_send; // list of which atoms/datums to send to each proc
+ int *proc_recv; // list of procs to recv from
+ MPI_Request *request; // MPI requests for posted recvs
+ MPI_Status *status; // MPI statuses for WaitAll
+
+ // extra plan params plan for irregular communication of atoms
// no params refer to atoms copied to self
- struct PlanAtom {
- int nsend; // # of messages to send
- int nrecv; // # of messages to recv
- int sendmax; // # of doubles in largest send message
- int *proc_send; // procs to send to
- int *length_send; // # of doubles to send to each proc
- int *num_send; // # of atoms to send to each proc
- int *index_send; // list of which atoms to send to each proc
- int *offset_send; // where each atom starts in send buffer
- int *proc_recv; // procs to recv from
- int *length_recv; // # of doubles to recv from each proc
- MPI_Request *request; // MPI requests for posted recvs
- MPI_Status *status; // MPI statuses for WaitAll
- };
+ int *length_send; // # of doubles to send to each proc
+ int *length_recv; // # of doubles to recv from each proc
+ int *offset_send; // where each atom starts in send buffer
- // plan for irregular communication of datums
- // only 2 self params refer to atoms copied to self
+ // extra plan params plan for irregular communication of datums
+ // 2 self params refer to data copied to self
- struct PlanData { // plan for irregular communication of data
- int nsend; // # of messages to send
- int nrecv; // # of messages to recv
- int sendmax; // # of datums in largest send message
- int *proc_send; // procs to send to
- int *num_send; // # of datums to send to each proc
- int *index_send; // list of which datums to send to each proc
- int *proc_recv; // procs to recv from
- int *num_recv; // # of datums to recv from each proc
- int num_self; // # of datums to copy to self
- int *index_self; // list of which datums to copy to self
- MPI_Request *request; // MPI requests for posted recvs
- MPI_Status *status; // MPI statuses for WaitAll
- };
+ int *num_recv; // # of datums to recv from each proc
+ int num_self; // # of datums to copy to self
+ int *index_self; // list of which datums to copy to self
- PlanAtom *aplan;
- PlanData *dplan;
+ // private methods
int create_atom(int, int *, int *, int);
void exchange_atom(double *, int *, double *);
void destroy_atom();
+
int coord2proc(double *, int &, int &, int &);
int binary(double, int, double *);
diff --git a/src/rcb.cpp b/src/rcb.cpp
new file mode 100644
index 0000000000..de3f9f3ba6
--- /dev/null
+++ b/src/rcb.cpp
@@ -0,0 +1,926 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "string.h"
+#include "rcb.h"
+#include "irregular.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+
+#define MYHUGE 1.0e30
+#define TINY 1.0e-6
+
+// set this to bigger number after debugging
+
+#define DELTA 10
+
+// prototypes for non-class functions
+
+void box_merge(void *, void *, int *, MPI_Datatype *);
+void median_merge(void *, void *, int *, MPI_Datatype *);
+
+// NOTE: if want to have reuse flag, need to sum Tree across procs
+
+/* ---------------------------------------------------------------------- */
+
+RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
+{
+ MPI_Comm_rank(world,&me);
+ MPI_Comm_size(world,&nprocs);
+
+ ndot = maxdot = 0;
+ dots = NULL;
+
+ nlist = maxlist = 0;
+ dotlist = dotmark = NULL;
+
+ maxbuf = 0;
+ buf = NULL;
+
+ maxrecv = maxsend = 0;
+ recvproc = recvindex = sendproc = sendindex = NULL;
+
+ tree = NULL;
+ irregular = NULL;
+
+ // create MPI data and function types for box and median AllReduce ops
+
+ MPI_Type_contiguous(6,MPI_DOUBLE,&box_type);
+ MPI_Type_commit(&box_type);
+ MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type);
+ MPI_Type_commit(&med_type);
+
+ MPI_Op_create(box_merge,1,&box_op);
+ MPI_Op_create(median_merge,1,&med_op);
+
+ reuse = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+RCB::~RCB()
+{
+ memory->sfree(dots);
+ memory->destroy(dotlist);
+ memory->destroy(dotmark);
+ memory->sfree(buf);
+
+ memory->destroy(recvproc);
+ memory->destroy(recvindex);
+ memory->destroy(sendproc);
+ memory->destroy(sendindex);
+
+ memory->sfree(tree);
+ delete irregular;
+
+ MPI_Type_free(&med_type);
+ MPI_Type_free(&box_type);
+ MPI_Op_free(&box_op);
+ MPI_Op_free(&med_op);
+}
+
+/* ----------------------------------------------------------------------
+ perform RCB balancing of N particles at coords X in bounding box LO/HI
+ if wt = NULL, ignore per-particle weights
+ if wt defined, per-particle weights > 0.0
+ dimension = 2 or 3
+ as documented in rcb.h:
+ sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
+ all proc particles will be inside or on surface of 3-d box
+ defined by final lo/hi
+ // NOTE: worry about re-use of data structs for fix balance?
+ // NOTE: could get rid of wt all together, will it be used?
+------------------------------------------------------------------------- */
+
+void RCB::compute(int dimension, int n, double **x, double *wt,
+ double *bboxlo, double *bboxhi)
+{
+ int i,j,k;
+ int keep,outgoing,incoming,incoming2;
+ int dim,markactive;
+ int indexlo,indexhi;
+ int first_iteration,breakflag;
+ double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
+ double targetlo,targethi;
+ double valuemin,valuemax,valuehalf;
+ double tolerance;
+ MPI_Comm comm,comm_half;
+ MPI_Request request,request2;
+ MPI_Status status;
+ Median med,medme;
+
+ // create list of my Dots
+
+ ndot = nkeep = noriginal = n;
+
+ if (ndot > maxdot) {
+ maxdot = ndot;
+ memory->sfree(dots);
+ dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots");
+ }
+
+ for (i = 0; i < ndot; i++) {
+ dots[i].x[0] = x[i][0];
+ dots[i].x[1] = x[i][1];
+ dots[i].x[2] = x[i][2];
+ dots[i].proc = me;
+ dots[i].index = i;
+ }
+
+ if (wt)
+ for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
+ else
+ for (i = 0; i < ndot; i++) dots[i].wt = 1.0;
+
+ // initial bounding box = simulation box
+ // includes periodic or shrink-wrapped boundaries
+
+ lo = bbox.lo;
+ hi = bbox.hi;
+
+ lo[0] = bboxlo[0];
+ lo[1] = bboxlo[1];
+ lo[2] = bboxlo[2];
+ hi[0] = bboxhi[0];
+ hi[1] = bboxhi[1];
+ hi[2] = bboxhi[2];
+
+ // initialize counters
+
+ counters[0] = 0;
+ counters[1] = 0;
+ counters[2] = 0;
+ counters[3] = ndot;
+ counters[4] = maxdot;
+ counters[5] = 0;
+ counters[6] = 0;
+
+ // create communicator for use in recursion
+
+ MPI_Comm_dup(world,&comm);
+
+ // recurse until partition is a single proc = me
+ // proclower,procupper = lower,upper procs in partition
+ // procmid = 1st proc in upper half of partition
+
+ int procpartner,procpartner2;
+ int readnumber;
+
+ int procmid;
+ int proclower = 0;
+ int procupper = nprocs - 1;
+
+ while (proclower != procupper) {
+
+ // if odd # of procs, lower partition gets extra one
+
+ procmid = proclower + (procupper - proclower) / 2 + 1;
+
+ // determine communication partner(s)
+ // readnumber = # of proc partners to read from
+
+ if (me < procmid)
+ procpartner = me + (procmid - proclower);
+ else
+ procpartner = me - (procmid - proclower);
+
+ int readnumber = 1;
+ if (procpartner > procupper) {
+ readnumber = 0;
+ procpartner--;
+ }
+ if (me == procupper && procpartner != procmid - 1) {
+ readnumber = 2;
+ procpartner2 = procpartner + 1;
+ }
+
+ // wttot = summed weight of entire partition
+ // search tolerance = largest single weight (plus epsilon)
+ // targetlo = desired weight in lower half of partition
+ // targethi = desired weight in upper half of partition
+
+ wtmax = wtsum = 0.0;
+
+ if (wt) {
+ for (i = 0; i < ndot; i++) {
+ wtsum += dots[i].wt;
+ if (dots[i].wt > wtmax) wtmax = dots[i].wt;
+ }
+ } else {
+ for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
+ wtmax = 1.0;
+ }
+
+ MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
+ if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
+ else tolerance = 1.0;
+
+ tolerance *= 1.0 + TINY;
+ targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
+ targethi = wttot - targetlo;
+
+ // dim = dimension to bisect on
+ // do not allow choice of z dimension for 2d system
+
+ dim = 0;
+ if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1;
+ if (dimension == 3) {
+ if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2;
+ if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2;
+ }
+
+ // create active list and mark array for dots
+ // initialize active list to all dots
+
+ if (ndot > maxlist) {
+ memory->destroy(dotlist);
+ memory->destroy(dotmark);
+ maxlist = maxdot;
+ memory->create(dotlist,maxlist,"RCB:dotlist");
+ memory->create(dotmark,maxlist,"RCB:dotmark");
+ }
+
+ nlist = ndot;
+ for (i = 0; i < nlist; i++) dotlist[i] = i;
+
+ // median iteration
+ // zoom in on bisector until correct # of dots in each half of partition
+ // as each iteration of median-loop begins, require:
+ // all non-active dots are marked with 0/1 in dotmark
+ // valuemin <= every active dot <= valuemax
+ // wtlo, wthi = total wt of non-active dots
+ // when leave median-loop, require only:
+ // valuehalf = correct cut position
+ // all dots <= valuehalf are marked with 0 in dotmark
+ // all dots >= valuehalf are marked with 1 in dotmark
+ // markactive = which side of cut is active = 0/1
+ // indexlo,indexhi = indices of dot closest to median
+
+ wtlo = wthi = 0.0;
+ valuemin = lo[dim];
+ valuemax = hi[dim];
+ first_iteration = 1;
+
+ while (1) {
+
+ // choose bisector value
+ // use old value on 1st iteration if old cut dimension is the same
+ // on 2nd option: could push valuehalf towards geometric center
+ // with "1.0-factor" to force overshoot
+
+ if (first_iteration && reuse && dim == tree[procmid].dim) {
+ counters[5]++;
+ valuehalf = tree[procmid].cut;
+ if (valuehalf < valuemin || valuehalf > valuemax)
+ valuehalf = 0.5 * (valuemin + valuemax);
+ } else if (wt)
+ valuehalf = valuemin + (targetlo - wtlo) /
+ (wttot - wtlo - wthi) * (valuemax - valuemin);
+ else
+ valuehalf = 0.5 * (valuemin + valuemax);
+
+ first_iteration = 0;
+
+ // initialize local median data structure
+
+ medme.totallo = medme.totalhi = 0.0;
+ medme.valuelo = -MYHUGE;
+ medme.valuehi = MYHUGE;
+ medme.wtlo = medme.wthi = 0.0;
+ medme.countlo = medme.counthi = 0;
+ medme.proclo = medme.prochi = me;
+
+ // mark all active dots on one side or other of bisector
+ // also set all fields in median data struct
+ // save indices of closest dots on either side
+
+ for (j = 0; j < nlist; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] <= valuehalf) { // in lower part
+ medme.totallo += dots[i].wt;
+ dotmark[i] = 0;
+ if (dots[i].x[dim] > medme.valuelo) { // my closest dot
+ medme.valuelo = dots[i].x[dim];
+ medme.wtlo = dots[i].wt;
+ medme.countlo = 1;
+ indexlo = i;
+ } else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
+ medme.wtlo += dots[i].wt;
+ medme.countlo++;
+ }
+ }
+ else { // in upper part
+ medme.totalhi += dots[i].wt;
+ dotmark[i] = 1;
+ if (dots[i].x[dim] < medme.valuehi) { // my closest dot
+ medme.valuehi = dots[i].x[dim];
+ medme.wthi = dots[i].wt;
+ medme.counthi = 1;
+ indexhi = i;
+ } else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
+ medme.wthi += dots[i].wt;
+ medme.counthi++;
+ }
+ }
+ }
+
+ // combine median data struct across current subset of procs
+
+ counters[0]++;
+ MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);
+
+ // test median guess for convergence
+ // move additional dots that are next to cut across it
+
+ if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL
+
+ wtlo += med.totallo;
+ valuehalf = med.valuehi;
+
+ if (med.counthi == 1) { // only one dot to move
+ if (wtlo + med.wthi < targetlo) { // move it, keep iterating
+ if (me == med.prochi) dotmark[indexhi] = 0;
+ }
+ else { // only move if beneficial
+ if (wtlo + med.wthi - targetlo < targetlo - wtlo)
+ if (me == med.prochi) dotmark[indexhi] = 0;
+ break; // all done
+ }
+ }
+ else { // multiple dots to move
+ breakflag = 0;
+ wtok = 0.0;
+ if (medme.valuehi == med.valuehi) wtok = medme.wthi;
+ if (wtlo + med.wthi >= targetlo) { // all done
+ MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
+ wtmax = targetlo - wtlo;
+ if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
+ breakflag = 1;
+ } // wtok = most I can move
+ for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] == med.valuehi) { // only move if better
+ if (wtsum + dots[i].wt - wtok < wtok - wtsum)
+ dotmark[i] = 0;
+ wtsum += dots[i].wt;
+ }
+ }
+ if (breakflag) break; // done if moved enough
+ }
+
+ wtlo += med.wthi;
+ if (targetlo-wtlo <= tolerance) break; // close enough
+
+ valuemin = med.valuehi; // iterate again
+ markactive = 1;
+ }
+
+ else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL
+
+ wthi += med.totalhi;
+ valuehalf = med.valuelo;
+
+ if (med.countlo == 1) { // only one dot to move
+ if (wthi + med.wtlo < targethi) { // move it, keep iterating
+ if (me == med.proclo) dotmark[indexlo] = 1;
+ }
+ else { // only move if beneficial
+ if (wthi + med.wtlo - targethi < targethi - wthi)
+ if (me == med.proclo) dotmark[indexlo] = 1;
+ break; // all done
+ }
+ }
+ else { // multiple dots to move
+ breakflag = 0;
+ wtok = 0.0;
+ if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
+ if (wthi + med.wtlo >= targethi) { // all done
+ MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
+ wtmax = targethi - wthi;
+ if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
+ breakflag = 1;
+ } // wtok = most I can move
+ for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
+ i = dotlist[j];
+ if (dots[i].x[dim] == med.valuelo) { // only move if better
+ if (wtsum + dots[i].wt - wtok < wtok - wtsum)
+ dotmark[i] = 1;
+ wtsum += dots[i].wt;
+ }
+ }
+ if (breakflag) break; // done if moved enough
+ }
+
+ wthi += med.wtlo;
+ if (targethi-wthi <= tolerance) break; // close enough
+
+ valuemax = med.valuelo; // iterate again
+ markactive = 0;
+ }
+
+ else // Goldilocks result: both partitions just right
+ break;
+
+ // shrink the active list
+
+ k = 0;
+ for (j = 0; j < nlist; j++) {
+ i = dotlist[j];
+ if (dotmark[i] == markactive) dotlist[k++] = i;
+ }
+ nlist = k;
+ }
+
+ // found median
+ // store cut info only if I am procmid
+
+ if (me == procmid) {
+ cut = valuehalf;
+ cutdim = dim;
+ }
+
+ // use cut to shrink my RCB bounding box
+
+ if (me < procmid) hi[dim] = valuehalf;
+ else lo[dim] = valuehalf;
+
+ // outgoing = number of dots to ship to partner
+ // nkeep = number of dots that have never migrated
+
+ markactive = (me < procpartner);
+ for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
+ if (dotmark[i] == markactive) outgoing++;
+ else if (i < nkeep) keep++;
+ nkeep = keep;
+
+ // alert partner how many dots I'll send, read how many I'll recv
+
+ MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
+ incoming = 0;
+ if (readnumber) {
+ MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,&status);
+ if (readnumber == 2) {
+ MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,&status);
+ incoming += incoming2;
+ }
+ }
+
+ // check if need to alloc more space
+
+ int ndotnew = ndot - outgoing + incoming;
+ if (ndotnew > maxdot) {
+ while (maxdot < ndotnew) maxdot += DELTA;
+ dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots");
+ counters[6]++;
+ }
+
+ counters[1] += outgoing;
+ counters[2] += incoming;
+ if (ndotnew > counters[3]) counters[3] = ndotnew;
+ if (maxdot > counters[4]) counters[4] = maxdot;
+
+ // malloc comm send buffer
+
+ if (outgoing > maxbuf) {
+ memory->sfree(buf);
+ maxbuf = outgoing;
+ buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf");
+ }
+
+ // fill buffer with dots that are marked for sending
+ // pack down the unmarked ones
+
+ keep = outgoing = 0;
+ for (i = 0; i < ndot; i++) {
+ if (dotmark[i] == markactive)
+ memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
+ else
+ memcpy(&dots[keep++],&dots[i],sizeof(Dot));
+ }
+
+ // post receives for dots
+
+ if (readnumber > 0) {
+ MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
+ procpartner,1,world,&request);
+ if (readnumber == 2) {
+ keep += incoming - incoming2;
+ MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
+ procpartner2,1,world,&request2);
+ }
+ }
+
+ // handshake before sending dots to insure recvs have been posted
+
+ if (readnumber > 0) {
+ MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
+ if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
+ }
+ MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,&status);
+
+ // send dots to partner
+
+ MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);
+
+ // wait until all dots are received
+
+ if (readnumber > 0) {
+ MPI_Wait(&request,&status);
+ if (readnumber == 2) MPI_Wait(&request2,&status);
+ }
+
+ ndot = ndotnew;
+
+ // cut partition in half, create new communicators of 1/2 size
+
+ int split;
+ if (me < procmid) {
+ procupper = procmid - 1;
+ split = 0;
+ } else {
+ proclower = procmid;
+ split = 1;
+ }
+
+ MPI_Comm_split(comm,split,me,&comm_half);
+ MPI_Comm_free(&comm);
+ comm = comm_half;
+ }
+
+ // clean up
+
+ MPI_Comm_free(&comm);
+
+ // set public variables with results of rebalance
+
+ nfinal = ndot;
+
+ if (nfinal > maxrecv) {
+ memory->destroy(recvproc);
+ memory->destroy(recvindex);
+ maxrecv = nfinal;
+ memory->create(recvproc,maxrecv,"RCB:recvproc");
+ memory->create(recvindex,maxrecv,"RCB:recvindex");
+ }
+
+ for (i = 0; i < nfinal; i++) {
+ recvproc[i] = dots[i].proc;
+ recvindex[i] = dots[i].index;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ custom MPI reduce operation
+ merge of each component of an RCB bounding box
+------------------------------------------------------------------------- */
+
+void box_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
+
+{
+ RCB::BBox *box1 = (RCB::BBox *) in;
+ RCB::BBox *box2 = (RCB::BBox *) inout;
+
+ for (int i = 0; i < 3; i++) {
+ if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i];
+ if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i];
+ }
+}
+
+/* ----------------------------------------------------------------------
+ custom MPI reduce operation
+ merge median data structure
+ on input:
+ in,inout->totallo, totalhi = weight in both partitions on this proc
+ valuelo, valuehi = pos of nearest dot(s) to cut on this proc
+ wtlo, wthi = total wt of dot(s) at that pos on this proc
+ countlo, counthi = # of dot(s) nearest to cut on this proc
+ proclo, prochi = not used
+ on exit:
+ inout-> totallo, totalhi = total # of active dots in both partitions
+ valuelo, valuehi = pos of nearest dot(s) to cut
+ wtlo, wthi = total wt of dot(s) at that position
+ countlo, counthi = total # of dot(s) nearest to cut
+ proclo, prochi = one unique proc who owns a nearest dot
+ all procs must get same proclo,prochi
+------------------------------------------------------------------------- */
+
+void median_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
+
+{
+ RCB::Median *med1 = (RCB::Median *) in;
+ RCB::Median *med2 = (RCB::Median *) inout;
+
+ med2->totallo += med1->totallo;
+ if (med1->valuelo > med2->valuelo) {
+ med2->valuelo = med1->valuelo;
+ med2->wtlo = med1->wtlo;
+ med2->countlo = med1->countlo;
+ med2->proclo = med1->proclo;
+ }
+ else if (med1->valuelo == med2->valuelo) {
+ med2->wtlo += med1->wtlo;
+ med2->countlo += med1->countlo;
+ if (med1->proclo < med2->proclo) med2->proclo = med1->proclo;
+ }
+
+ med2->totalhi += med1->totalhi;
+ if (med1->valuehi < med2->valuehi) {
+ med2->valuehi = med1->valuehi;
+ med2->wthi = med1->wthi;
+ med2->counthi = med1->counthi;
+ med2->prochi = med1->prochi;
+ }
+ else if (med1->valuehi == med2->valuehi) {
+ med2->wthi += med1->wthi;
+ med2->counthi += med1->counthi;
+ if (med1->prochi < med2->prochi) med2->prochi = med1->prochi;
+ }
+}
+
+/* ----------------------------------------------------------------------
+ invert the RCB rebalance result to convert receive info into send info
+ sortflag = flag for sorting order of received messages by proc ID
+------------------------------------------------------------------------- */
+
+void RCB::invert(int sortflag)
+{
+ Invert *sbuf,*rbuf;
+
+ // only create Irregular if not previously created
+ // allows Irregular to persist for multiple RCB calls by fix balance
+
+ if (!irregular) irregular = new Irregular(lmp);
+
+ // nsend = # of dots to request from other procs
+
+ int nsend = nfinal-nkeep;
+
+ int *proclist;
+ memory->create(proclist,nsend,"RCB:proclist");
+
+ Invert *sinvert =
+ (Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert");
+
+ int m = 0;
+ for (int i = nkeep; i < nfinal; i++) {
+ proclist[m] = recvproc[i];
+ sinvert[m].rindex = recvindex[i];
+ sinvert[m].sproc = me;
+ sinvert[m].sindex = i;
+ m++;
+ }
+
+ // perform inversion via irregular comm
+ // nrecv = # of my dots to send to other procs
+
+ int nrecv = irregular->create_data(nsend,proclist,sortflag);
+ Invert *rinvert =
+ (Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert");
+ irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert);
+ irregular->destroy_data();
+
+ // set public variables from requests to send my dots
+
+ if (noriginal > maxsend) {
+ memory->destroy(sendproc);
+ memory->destroy(sendindex);
+ maxsend = noriginal;
+ memory->create(sendproc,maxsend,"RCB:sendproc");
+ memory->create(sendindex,maxsend,"RCB:sendindex");
+ }
+
+ for (int i = 0; i < nkeep; i++) {
+ sendproc[recvindex[i]] = me;
+ sendindex[recvindex[i]] = i;
+ }
+
+ for (int i = 0; i < nrecv; i++) {
+ m = rinvert[i].rindex;
+ sendproc[m] = rinvert[i].sproc;
+ sendindex[m] = rinvert[i].sindex;
+ }
+
+ // clean-up
+
+ memory->destroy(proclist);
+ memory->destroy(sinvert);
+ memory->destroy(rinvert);
+}
+
+/* ----------------------------------------------------------------------
+ memory use of Irregular
+------------------------------------------------------------------------- */
+
+bigint RCB::memory_usage()
+{
+ bigint bytes = 0;
+ if (irregular) bytes += irregular->memory_usage();
+ return bytes;
+}
+
+// -----------------------------------------------------------------------
+// DEBUG methods
+// -----------------------------------------------------------------------
+/*
+// consistency checks on RCB results
+
+void RCB::check()
+{
+ int i,iflag,total1,total2;
+ double weight,wtmax,wtmin,wtone,tolerance;
+
+ // check that total # of dots remained the same
+
+ MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world);
+ if (total1 != total2) {
+ if (me == 0)
+ printf("ERROR: Points before RCB = %d, Points after RCB = %d\n",
+ total1,total2);
+ }
+
+ // check that result is load-balanced within log2(P)*max-wt
+
+ weight = wtone = 0.0;
+ for (i = 0; i < ndot; i++) {
+ weight += dots[i].wt;
+ if (dots[i].wt > wtone) wtone = dots[i].wt;
+ }
+
+ MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
+ MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
+ MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world);
+
+ // i = smallest power-of-2 >= nprocs
+ // tolerance = largest-single-weight*log2(nprocs)
+
+ for (i = 0; (nprocs >> i) != 0; i++);
+ tolerance = tolerance * i * (1.0 + TINY);
+
+ if (wtmax - wtmin > tolerance) {
+ if (me == 0)
+ printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance);
+ MPI_Barrier(world);
+ if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight);
+ if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight);
+ }
+
+ MPI_Barrier(world);
+
+ // check that final set of points is inside RCB box of each proc
+
+ iflag = 0;
+ for (i = 0; i < ndot; i++) {
+ if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] ||
+ dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] ||
+ dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2])
+ iflag++;
+ }
+ if (iflag > 0)
+ printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me);
+}
+
+// stats for RCB decomposition
+
+void RCB::stats(int flag)
+{
+ int i,iflag,sum,min,max;
+ double ave,rsum,rmin,rmax;
+ double weight,wttot,wtmin,wtmax;
+
+ if (me == 0) printf("RCB Statistics:\n");
+
+ // distribution info
+
+ for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt;
+ MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world);
+ MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
+ MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
+
+ if (me == 0) {
+ printf(" Total weight of dots = %g\n",wttot);
+ printf(" Weight on each proc: ave = %g, max = %g, min = %g\n",
+ wttot/nprocs,wtmax,wtmin);
+ }
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d has weight = %g\n",me,weight);
+ }
+
+ for (i = 0, weight = 0.0; i < ndot; i++)
+ if (dots[i].wt > weight) weight = dots[i].wt;
+ MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
+
+ if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d max weight = %g\n",me,weight);
+ }
+
+ // counter info
+
+ MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d median count = %d\n",me,counters[0]);
+ }
+
+ MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d send count = %d\n",me,counters[1]);
+ }
+
+ MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d recv count = %d\n",me,counters[2]);
+ }
+
+ MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d max dots = %d\n",me,counters[3]);
+ }
+
+ MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d max memory = %d\n",me,counters[4]);
+ }
+
+ if (reuse) {
+ MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d # of Reuse = %d\n",me,counters[5]);
+ }
+ }
+
+ MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world);
+ MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world);
+ MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world);
+ ave = ((double) sum)/nprocs;
+ if (me == 0)
+ printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max);
+ if (flag) {
+ MPI_Barrier(world);
+ printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]);
+ }
+
+ // RCB boxes for each proc
+
+ if (flag) {
+ if (me == 0) printf(" RCB sub-domain boxes:\n");
+ for (i = 0; i < 3; i++) {
+ MPI_Barrier(world);
+ if (me == 0) printf(" Dimension %d\n",i+1);
+ MPI_Barrier(world);
+ printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]);
+ }
+ }
+}
+*/
diff --git a/src/rcb.h b/src/rcb.h
new file mode 100644
index 0000000000..4c2e3809e9
--- /dev/null
+++ b/src/rcb.h
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LAMMPS_RCB_H
+#define LAMMPS_RCB_H
+
+#include "mpi.h"
+#include "pointers.h"
+
+namespace LAMMPS_NS {
+
+class RCB : protected Pointers {
+ public:
+ // set by compute()
+
+ int noriginal; // # of dots I own before balancing
+ int nfinal; // # of dots I own after balancing
+ int nkeep; // how many dots of noriginal I still own
+ // will be first nkept of nfinal list
+ int *recvproc; // proc IDs of nfinal dots
+ int *recvindex; // index of nfinal dots on owning procs
+ // based on input list for compute()
+ double *lo,*hi; // final bounding box of my RCB sub-domain
+ double cut; // single cut (in Tree) owned by this proc
+ int cutdim; // dimension (0,1,2) of the cut
+
+ // set by invert()
+
+ int *sendproc; // proc to send each of my noriginal dots to
+ int *sendindex; // index of dot in receiver's nfinal list
+
+ RCB(class LAMMPS *);
+ ~RCB();
+ void compute(int, int, double **, double *, double *, double *);
+ void invert(int sortflag = 0);
+ bigint memory_usage();
+
+ // DEBUG methods
+ //void check();
+ //void stats(int);
+
+ // RCB cut info
+
+ struct Median {
+ double totallo,totalhi; // weight in each half of active partition
+ double valuelo,valuehi; // position of dot(s) nearest to cut
+ double wtlo,wthi; // total weight of dot(s) at that position
+ int countlo,counthi; // # of dots at that position
+ int proclo,prochi; // unique proc who owns a nearest dot
+ };
+
+ struct BBox {
+ double lo[3],hi[3]; // corner points of a bounding box
+ };
+
+ private:
+ int me,nprocs;
+
+ // point to balance on
+
+ struct Dot {
+ double x[3]; // coord of point
+ double wt; // weight of point
+ int proc; // owning proc
+ int index; // index on owning proc
+ };
+
+ // tree of RCB cuts
+
+ struct Tree {
+ double cut; // position of cut
+ int dim; // dimension = 0/1/2 of cut
+ };
+
+ // inversion message
+
+ struct Invert {
+ int rindex; // index on receiving proc
+ int sproc; // sending proc
+ int sindex; // index on sending proc
+ };
+
+ Dot *dots; // dots on this proc
+ int ndot; // # of dots on this proc
+ int maxdot; // allocated size of dots
+ int ndotorig;
+
+ int nlist;
+ int maxlist;
+ int *dotlist;
+ int *dotmark;
+
+ int maxbuf;
+ Dot *buf;
+
+ int maxrecv,maxsend;
+
+ BBox bbox;
+ class Irregular *irregular;
+
+ MPI_Op box_op,med_op;
+ MPI_Datatype box_type,med_type;
+
+ int reuse; // 1/0 to use/not use previous cuts
+ int dottop; // dots >= this index are new
+ double bboxlo[3]; // bounding box of final RCB sub-domain
+ double bboxhi[3];
+ Tree *tree; // tree of RCB cuts, used by reuse()
+ int counters[7]; // diagnostic counts
+ // 0 = # of median iterations
+ // 1 = # of points sent
+ // 2 = # of points received
+ // 3 = most points this proc ever owns
+ // 4 = most point memory this proc ever allocs
+ // 5 = # of times a previous cut is re-used
+ // 6 = # of reallocs of point vector
+};
+
+}
+
+#endif
diff --git a/src/replicate.cpp b/src/replicate.cpp
index b759c47fa3..26f3fca7ed 100644
--- a/src/replicate.cpp
+++ b/src/replicate.cpp
@@ -29,6 +29,8 @@ using namespace LAMMPS_NS;
#define LB_FACTOR 1.1
#define EPSILON 1.0e-6
+enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
+
/* ---------------------------------------------------------------------- */
Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {}
@@ -220,17 +222,33 @@ void Replicate::command(int narg, char **arg)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
}
- if (domain->xperiodic) {
- if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
- if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
- }
- if (domain->yperiodic) {
- if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
- if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
- }
- if (domain->zperiodic) {
- if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
- if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
+ if (comm->layout != LAYOUT_TILED) {
+ if (domain->xperiodic) {
+ if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
+ if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
+ if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
+ if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
+ }
+
+ } else {
+ if (domain->xperiodic) {
+ if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
+ if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
+ }
+ if (domain->yperiodic) {
+ if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
+ if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
+ }
+ if (domain->zperiodic) {
+ if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
+ if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
+ }
}
// loop over all procs
diff --git a/src/variable.cpp b/src/variable.cpp
index 6efe9ef3f9..f371512128 100644
--- a/src/variable.cpp
+++ b/src/variable.cpp
@@ -44,6 +44,7 @@ using namespace MathConst;
#define MAXLEVEL 4
#define MAXLINE 256
#define CHUNK 1024
+#define VALUELENGTH 64
#define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a)
@@ -306,7 +307,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(1,&arg[2],data[nvar]);
- data[nvar][1] = NULL;
+ data[nvar][1] = new char[VALUELENGTH];
// SCALARFILE for strings or numbers
// which = 1st value
@@ -360,7 +361,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(2,&arg[2],data[nvar]);
- data[nvar][2] = NULL;
+ data[nvar][2] = new char[VALUELENGTH];
// EQUAL
// replace pre-existing var if also style EQUAL (allows it to be reset)
@@ -374,9 +375,7 @@ void Variable::set(int narg, char **arg)
if (style[ivar] != EQUAL)
error->all(FLERR,"Cannot redefine variable as a different style");
delete [] data[ivar][0];
- if (data[ivar][1]) delete [] data[ivar][1];
copy(1,&arg[2],data[ivar]);
- data[ivar][1] = NULL;
replaceflag = 1;
} else {
if (nvar == maxvar) grow();
@@ -386,7 +385,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(1,&arg[2],data[nvar]);
- data[nvar][1] = NULL;
+ data[nvar][1] = new char[VALUELENGTH];
}
// ATOM
@@ -625,32 +624,24 @@ char *Variable::retrieve(char *name)
strcpy(data[ivar][0],result);
str = data[ivar][0];
} else if (style[ivar] == EQUAL) {
- char result[64];
double answer = evaluate(data[ivar][0],NULL);
- sprintf(result,"%.15g",answer);
- int n = strlen(result) + 1;
- if (data[ivar][1]) delete [] data[ivar][1];
- data[ivar][1] = new char[n];
- strcpy(data[ivar][1],result);
+ sprintf(data[ivar][1],"%.15g",answer);
str = data[ivar][1];
} else if (style[ivar] == FORMAT) {
- char result[64];
int jvar = find(data[ivar][0]);
if (jvar == -1) return NULL;
if (!equalstyle(jvar)) return NULL;
double answer = evaluate(data[jvar][0],NULL);
- sprintf(result,data[ivar][1],answer);
- int n = strlen(result) + 1;
- if (data[ivar][2]) delete [] data[ivar][2];
- data[ivar][2] = new char[n];
- strcpy(data[ivar][2],result);
+ sprintf(data[ivar][2],data[ivar][1],answer);
str = data[ivar][2];
} else if (style[ivar] == GETENV) {
const char *result = getenv(data[ivar][0]);
- if (data[ivar][1]) delete [] data[ivar][1];
- if (result == NULL) result = (const char *)"";
+ if (result == NULL) result = (const char *) "";
int n = strlen(result) + 1;
- data[ivar][1] = new char[n];
+ if (n > VALUELENGTH) {
+ delete [] data[ivar][1];
+ data[ivar][1] = new char[n];
+ }
strcpy(data[ivar][1],result);
str = data[ivar][1];
} else if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return NULL;
@@ -782,6 +773,45 @@ int Variable::atomstyle(int ivar)
return 0;
}
+/* ----------------------------------------------------------------------
+ save copy of EQUAL style ivar formula in copy
+ allocate copy here, later equal_restore() call will free it
+ insure data[ivar][0] is of VALUELENGTH since will be overridden
+------------------------------------------------------------------------- */
+
+void Variable::equal_save(int ivar, char *©)
+{
+ int n = strlen(data[ivar][0]) + 1;
+ copy = new char[n];
+ strcpy(copy,data[ivar][0]);
+ delete [] data[ivar][0];
+ data[ivar][0] = new char[VALUELENGTH];
+}
+
+/* ----------------------------------------------------------------------
+ restore formula string of EQUAL style ivar from copy
+ then free copy, allocated in equal_save()
+------------------------------------------------------------------------- */
+
+void Variable::equal_restore(int ivar, char *copy)
+{
+ delete [] data[ivar][0];
+ int n = strlen(copy) + 1;
+ data[ivar][0] = new char[n];
+ strcpy(data[ivar][0],copy);
+ delete [] copy;
+}
+
+/* ----------------------------------------------------------------------
+ override EQUAL style ivar formula with value converted to string
+ data[ivar][0] was set to length 64 in equal_save()
+------------------------------------------------------------------------- */
+
+void Variable::equal_override(int ivar, double value)
+{
+ sprintf(data[ivar][0],"%.15g",value);
+}
+
/* ----------------------------------------------------------------------
remove Nth variable from list and compact list
delete reader explicitly if it exists
diff --git a/src/variable.h b/src/variable.h
index bf6dcea656..f04a04f9d4 100644
--- a/src/variable.h
+++ b/src/variable.h
@@ -36,10 +36,15 @@ class Variable : protected Pointers {
int int_between_brackets(char *&, int);
double evaluate_boolean(char *);
+ void equal_save(int, char *&);
+ void equal_restore(int, char *);
+ void equal_override(int, double);
+
unsigned int data_mask(int ivar);
unsigned int data_mask(char *str);
private:
+ int me;
int nvar; // # of defined variables
int maxvar; // max # of variables following lists can hold
char **names; // name of each variable
@@ -57,7 +62,6 @@ class Variable : protected Pointers {
int precedence[17]; // precedence level of math operators
// set length to include up to OR in enum
- int me;
struct Tree { // parse tree for atom-style variables
double value; // single scalar
diff --git a/src/version.h b/src/version.h
index 25d90d9612..b4d684523a 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "22 Jul 2014"
+#define LAMMPS_VERSION "29 Jul 2014"