Merge branch 'lammps-icms' into cle-orig

This commit is contained in:
Axel Kohlmeyer
2014-07-30 15:02:14 -04:00
55 changed files with 3287 additions and 846 deletions

BIN
doc/JPG/sinusoid.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 126 KiB

BIN
doc/JPG/sinusoid_small.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -1,7 +1,7 @@
<HTML>
<HEAD>
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="22 Jul 2014 version">
<META NAME="docnumber" CONTENT="29 Jul 2014 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -22,7 +22,7 @@
<CENTER><H3>LAMMPS-ICMS Documentation
</H3></CENTER>
<CENTER><H4>22 Jul 2014 version
<CENTER><H4>29 Jul 2014 version
</H4></CENTER>
<H4>Version info:
</H4>

View File

@ -1,6 +1,6 @@
<HEAD>
<TITLE>LAMMPS-ICMS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="22 Jul 2014 version">
<META NAME="docnumber" CONTENT="29 Jul 2014 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -18,7 +18,7 @@
<H1></H1>
LAMMPS-ICMS Documentation :c,h3
22 Jul 2014 version :c,h4
29 Jul 2014 version :c,h4
Version info: :h4

View File

@ -988,14 +988,6 @@ will assign the restart file info to actual atoms. :dd
This is a restriction due to the way atoms are organized in a list to
enable the atom_modify first command. :dd
{Support for writing images in JPEG format not included} :dt
LAMMPS was not built with the -DLAMMPS_JPEG switch in the Makefile. :dd
{Support for writing images in PNG format not included} :dt
LAMMPS was not built with the -DLAMMPS_PNG switch in the Makefile. :dd
{Cannot dump sort on atom IDs with no atom IDs defined} :dt
Self-explanatory. :dd

View File

@ -31,7 +31,7 @@
</PRE>
<LI>zero or more keyword/value pairs may be appended
<LI>keyword = <I>mol</I> or <I>basis</I> or <I>remap</I> or <I>units</I>
<LI>keyword = <I>mol</I> or <I>basis</I> or <I>remap</I> or <I>var</I> or <I>set</I> or <I>units</I>
<PRE> <I>mol</I> value = template-ID seed
template-ID = ID of molecule template specified in a separate <A HREF = "molecule.html">molecule</A> command
@ -40,6 +40,10 @@
M = which basis atom
itype = atom type (1-N) to assign to this basis atom
<I>remap</I> value = <I>yes</I> or <I>no</I>
<I>var</I> value = name = variable name to evaluate for test of atom creation
<I>set</I> values = dim vname
dim = <I>x</I> or <I>y</I> or <I>z</I>
name = name of variable to set with x,y,z atom position
<I>units</I> value = <I>lattice</I> or <I>box</I>
<I>lattice</I> = the geometry is defined in lattice units
<I>box</I> = the geometry is defined in simulation box units
@ -51,6 +55,7 @@
<PRE>create_atoms 1 box
create_atoms 3 region regsphere basis 2 3
create_atoms 3 single 0 0 5
create_atoms 1 box var v set x xpos set y ypos
</PRE>
<P><B>Description:</B>
</P>
@ -189,6 +194,45 @@ box, it will mapped back into the box, assuming the relevant
dimensions are periodic. If it is set to <I>no</I>, no remapping is done
and no particle is created if its position is outside the box.
</P>
<P>The <I>var</I> and <I>set</I> keywords can be used to provide a criterion for
accepting or rejecting the addition of an individual atom, based on
its coordinates. The <I>vname</I> specified for the <I>var</I> keyword is the
name of an <A HREF = "variable.html">equal-style variable</A> which should evaluate
to a zero or non-zero value based on one or two or three variables
which will store the x, y, or z coordinates of an atom (one variable
per coordinate). These other variables must be <A HREF = "variable.html">equal-style
variables</A> defined in the input script, but their
formula can by anything. The <I>set</I> keyword is used to identify the
names of these other variables, one variable for the x-coordinate of a
created atom, one for y, and one for z.
</P>
<P>When an atom is created, its x, y, or z coordinates override the
formula for any <I>set</I> variable that is defined. The <I>var</I> variable is
then evaluated. If the returned value is 0.0, the atom is not
created. If it is non-zero, the atom is created.
</P>
<P>As an example, these commands can be used in a 2d simulation, to
create a sinusoidal surface. Note that the surface is "rough" due to
individual lattice points being "above" or "below" the mathematical
expression for the sinusoidal curve. If a finer lattice were used,
the sinusoid would appear to be "smoother". Also note the use of the
"xlat" and "ylat" <A HREF = "thermo_style.html">thermo_style</A> keywords which
converts lattice spacings to distance. Click on the image for a
larger version.
</P>
<PRE>variable x equal 100
variable y equal 25
lattice hex 0.8442
region box block 0 $x 0 $y -0.5 0.5
create_box 1 box
</PRE>
<PRE>variable xx equal 0.0
variable yy equal 0.0
variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
create_atoms 1 box var v set x xx set y yy
</PRE>
<CENTER><A HREF = "JPG/sinusoid.jpg"><IMG SRC = "JPG/sinusoid_small.jpg"></A>
</CENTER>
<P>The <I>units</I> keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the <I>single</I>
style. A <I>box</I> value selects standard distance units as defined by

View File

@ -24,7 +24,7 @@ style = {box} or {region} or {single} or {random} :l
seed = random # seed (positive integer)
region-ID = create atoms within this region, use NULL for entire simulation box :pre
zero or more keyword/value pairs may be appended :l
keyword = {mol} or {basis} or {remap} or {units} :l
keyword = {mol} or {basis} or {remap} or {var} or {set} or {units} :l
{mol} value = template-ID seed
template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
seed = random # seed (positive integer)
@ -32,6 +32,10 @@ keyword = {mol} or {basis} or {remap} or {units} :l
M = which basis atom
itype = atom type (1-N) to assign to this basis atom
{remap} value = {yes} or {no}
{var} value = name = variable name to evaluate for test of atom creation
{set} values = dim vname
dim = {x} or {y} or {z}
name = name of variable to set with x,y,z atom position
{units} value = {lattice} or {box}
{lattice} = the geometry is defined in lattice units
{box} = the geometry is defined in simulation box units :pre
@ -41,7 +45,8 @@ keyword = {mol} or {basis} or {remap} or {units} :l
create_atoms 1 box
create_atoms 3 region regsphere basis 2 3
create_atoms 3 single 0 0 5 :pre
create_atoms 3 single 0 0 5
create_atoms 1 box var v set x xpos set y ypos :pre
[Description:]
@ -180,6 +185,45 @@ box, it will mapped back into the box, assuming the relevant
dimensions are periodic. If it is set to {no}, no remapping is done
and no particle is created if its position is outside the box.
The {var} and {set} keywords can be used to provide a criterion for
accepting or rejecting the addition of an individual atom, based on
its coordinates. The {vname} specified for the {var} keyword is the
name of an "equal-style variable"_variable.html which should evaluate
to a zero or non-zero value based on one or two or three variables
which will store the x, y, or z coordinates of an atom (one variable
per coordinate). These other variables must be "equal-style
variables"_variable.html defined in the input script, but their
formula can by anything. The {set} keyword is used to identify the
names of these other variables, one variable for the x-coordinate of a
created atom, one for y, and one for z.
When an atom is created, its x, y, or z coordinates override the
formula for any {set} variable that is defined. The {var} variable is
then evaluated. If the returned value is 0.0, the atom is not
created. If it is non-zero, the atom is created.
As an example, these commands can be used in a 2d simulation, to
create a sinusoidal surface. Note that the surface is "rough" due to
individual lattice points being "above" or "below" the mathematical
expression for the sinusoidal curve. If a finer lattice were used,
the sinusoid would appear to be "smoother". Also note the use of the
"xlat" and "ylat" "thermo_style"_thermo_style.html keywords which
converts lattice spacings to distance. Click on the image for a
larger version.
variable x equal 100
variable y equal 25
lattice hex 0.8442
region box block 0 $x 0 $y -0.5 0.5
create_box 1 box :pre
variable xx equal 0.0
variable yy equal 0.0
variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
create_atoms 1 box var v set x xx set y yy :pre
:c,image(JPG/sinusoid_small.jpg,JPG/sinusoid.jpg)
The {units} keyword determines the meaning of the distance units used
to specify the coordinates of the one particle created by the {single}
style. A {box} value selects standard distance units as defined by

View File

@ -57,7 +57,7 @@
f_ID[N] = Nth column of local array calculated by a fix with ID
</PRE>
<PRE> <I>custom</I> of <I>custom/mpiio</I> args = list of atom attributes
possible attributes = id, mol, type, element, mass,
possible attributes = id, mol, proc, procp1, type, element, mass,
x, y, z, xs, ys, zs, xu, yu, zu,
xsu, ysu, zsu, ix, iy, iz,
vx, vy, vz, fx, fy, fz,
@ -69,6 +69,7 @@
<PRE> id = atom ID
mol = molecule ID
proc = ID of processor that owns atom
procp1 = ID+1 of processor that owns atom
type = atom type
element = name of atom element, as defined by <A HREF = "dump_modify.html">dump_modify</A> command
mass = atom mass
@ -460,18 +461,20 @@ dump 1 all local 1000 tmp.dump index c_1[1] c_1[2] c_1[3] c_2[1] c_2[2]
<P>This section explains the atom attributes that can be specified as
part of the <I>custom</I> and <I>cfg</I> styles.
</P>
<P>The <I>id</I>, <I>mol</I>, <I>proc</I>, <I>type</I>, <I>element</I>, <I>mass</I>, <I>vx</I>, <I>vy</I>, <I>vz</I>,
<I>fx</I>, <I>fy</I>, <I>fz</I>, <I>q</I> attributes are self-explanatory.
<P>The <I>id</I>, <I>mol</I>, <I>proc</I>, <I>procp1</I>, <I>type</I>, <I>element</I>, <I>mass</I>, <I>vx</I>,
<I>vy</I>, <I>vz</I>, <I>fx</I>, <I>fy</I>, <I>fz</I>, <I>q</I> attributes are self-explanatory.
</P>
<P><I>Id</I> is the atom ID. <I>Mol</I> is the molecule ID, included in the data
file for molecular systems. <I>Proc</I> is the ID of the processor (0 to
Nprocs-1) that currently owns the atom. <I>Type</I> is the atom type.
<I>Element</I> is typically the chemical name of an element, which you must
assign to each type via the <A HREF = "dump_modify.html">dump_modify element</A>
command. More generally, it can be any string you wish to associated
with an atom type. <I>Mass</I> is the atom mass. <I>Vx</I>, <I>vy</I>, <I>vz</I>, <I>fx</I>,
<I>fy</I>, <I>fz</I>, and <I>q</I> are components of atom velocity and force and
atomic charge.
Nprocs-1) that currently owns the atom. <I>Procp1</I> is the proc ID+1,
which can be convenient in place of a <I>type</I> attribute (1 to Ntypes)
for coloring atoms in a visualization program. <I>Type</I> is the atom
type (1 to Ntypes). <I>Element</I> is typically the chemical name of an
element, which you must assign to each type via the <A HREF = "dump_modify.html">dump_modify
element</A> command. More generally, it can be any
string you wish to associated with an atom type. <I>Mass</I> is the atom
mass. <I>Vx</I>, <I>vy</I>, <I>vz</I>, <I>fx</I>, <I>fy</I>, <I>fz</I>, and <I>q</I> are components of
atom velocity and force and atomic charge.
</P>
<P>There are several options for outputting atom coordinates. The <I>x</I>,
<I>y</I>, <I>z</I> attributes write atom coordinates "unscaled", in the

View File

@ -44,7 +44,7 @@ args = list of arguments for a particular style :l
f_ID\[N\] = Nth column of local array calculated by a fix with ID :pre
{custom} of {custom/mpiio} args = list of atom attributes
possible attributes = id, mol, type, element, mass,
possible attributes = id, mol, proc, procp1, type, element, mass,
x, y, z, xs, ys, zs, xu, yu, zu,
xsu, ysu, zsu, ix, iy, iz,
vx, vy, vz, fx, fy, fz,
@ -56,6 +56,7 @@ args = list of arguments for a particular style :l
id = atom ID
mol = molecule ID
proc = ID of processor that owns atom
procp1 = ID+1 of processor that owns atom
type = atom type
element = name of atom element, as defined by "dump_modify"_dump_modify.html command
mass = atom mass
@ -78,8 +79,8 @@ args = list of arguments for a particular style :l
f_ID = per-atom vector calculated by a fix with ID
f_ID\[N\] = Nth column of per-atom array calculated by a fix with ID
v_name = per-atom vector calculated by an atom-style variable with name
d_name = per-atom floating point vector with name managed by fix property/atom
i_name = per-atom integer vector with name managed by fix property/atom :pre
d_name = per-atom floating point vector with name, managed by fix property/atom
i_name = per-atom integer vector with name, managed by fix property/atom :pre
:ule
[Examples:]
@ -112,7 +113,7 @@ options; see details below.
As described below, the filename determines the kind of output (text
or binary or gzipped, one big file or one per timestep, one big file
or one per processor).
or multiple smaller files).
IMPORTANT NOTE: Because periodic boundary conditions are enforced only
on timesteps when neighbor lists are rebuilt, the coordinates of an
@ -127,7 +128,7 @@ if the "atom_modify sort"_atom_modify.html option is on, which it is
by default. In this case atoms are re-ordered periodically during a
simulation, due to spatial sorting. It is also true when running in
parallel, because data for a single snapshot is collected from
multiple processors.
multiple processors, each of which owns a subset of the atoms.
For the {atom}, {custom}, {cfg}, and {local} styles, sorting is off by
default. For the {dcd}, {xtc}, {xyz}, and {molfile} styles, sorting by
@ -295,11 +296,12 @@ from using the (numerical) atom type to an element name (or some
other label). This will help many visualization programs to guess
bonds and colors.
Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files can
be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd (a popular
molecular viewing program). See "Section tools"_Section_tools.html#vmd
of the manual and the tools/lmp2vmd/README.txt file for more information
about support in VMD for reading and visualizing LAMMPS dump files.
Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files
can be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd, a
popular molecular viewing program. See "Section
tools"_Section_tools.html#vmd of the manual and the
tools/lmp2vmd/README.txt file for more information about support in
VMD for reading and visualizing LAMMPS dump files.
:line
@ -332,7 +334,7 @@ tmp.dump.20000, etc. This option is not available for the {dcd} and
{xtc} styles. Note that the "dump_modify pad"_dump_modify.html
command can be used to insure all timestep numbers are the same length
(e.g. 00010), which can make it easier to read a series of dump files
in order by some post-processing tools.
in order with some post-processing tools.
If a "%" character appears in the filename, then each of P processors
writes a portion of the dump file, and the "%" character is replaced
@ -447,18 +449,20 @@ dump 1 all local 1000 tmp.dump index c_1\[1\] c_1\[2\] c_1\[3\] c_2\[1\] c_2\[2\
This section explains the atom attributes that can be specified as
part of the {custom} and {cfg} styles.
The {id}, {mol}, {proc}, {type}, {element}, {mass}, {vx}, {vy}, {vz},
{fx}, {fy}, {fz}, {q} attributes are self-explanatory.
The {id}, {mol}, {proc}, {procp1}, {type}, {element}, {mass}, {vx},
{vy}, {vz}, {fx}, {fy}, {fz}, {q} attributes are self-explanatory.
{Id} is the atom ID. {Mol} is the molecule ID, included in the data
file for molecular systems. {Proc} is the ID of the processor (0 to
Nprocs-1) that currently owns the atom. {Type} is the atom type.
{Element} is typically the chemical name of an element, which you must
assign to each type via the "dump_modify element"_dump_modify.html
command. More generally, it can be any string you wish to associated
with an atom type. {Mass} is the atom mass. {Vx}, {vy}, {vz}, {fx},
{fy}, {fz}, and {q} are components of atom velocity and force and
atomic charge.
Nprocs-1) that currently owns the atom. {Procp1} is the proc ID+1,
which can be convenient in place of a {type} attribute (1 to Ntypes)
for coloring atoms in a visualization program. {Type} is the atom
type (1 to Ntypes). {Element} is typically the chemical name of an
element, which you must assign to each type via the "dump_modify
element"_dump_modify.html command. More generally, it can be any
string you wish to associated with an atom type. {Mass} is the atom
mass. {Vx}, {vy}, {vz}, {fx}, {fy}, {fz}, and {q} are components of
atom velocity and force and atomic charge.
There are several options for outputting atom coordinates. The {x},
{y}, {z} attributes write atom coordinates "unscaled", in the

View File

@ -46,8 +46,8 @@ input = one or more atom attributes :l
f_ID = per-atom vector calculated by a fix with ID
f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID
v_name = per-atom vector calculated by an atom-style variable with name
d_name = per-atom floating point vector managed by fix property/atom
i_name = per-atom integer vector managed by fix property/atom :pre
d_name = per-atom floating point vector name, managed by fix property/atom
i_name = per-atom integer vector name, managed by fix property/atom :pre
zero or more keyword/value pairs may be appended :l
keyword = {com} :l

View File

@ -119,8 +119,8 @@ to very steep parts of the potential.
</UL>
<HR>
<P>The format of a tabulated file is as follows (without the
parenthesized comments):
<P>The format of a tabulated file is a series of one or more sections,
defined as follows (without the parenthesized comments):
</P>
<PRE># Morse potential for Fe (one or more comment or blank lines)
</PRE>

View File

@ -113,8 +113,8 @@ to very steep parts of the potential. :l,ule
:line
The format of a tabulated file is as follows (without the
parenthesized comments):
The format of a tabulated file is a series of one or more sections,
defined as follows (without the parenthesized comments):
# Morse potential for Fe (one or more comment or blank lines) :pre

View File

@ -50,7 +50,7 @@
constants = PI
thermo keywords = vol, ke, press, etc from <A HREF = "thermo_style.html">thermo_style</A>
math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
x==y, x!=y, x<y, x<=y, x>y, x>=y, x&&y, x||y, !x
x == y, x != y, x &lt y, x &lt= y, x &gt y, x &gt= y, x && y, x || y, !x
math functions = sqrt(x), exp(x), ln(x), log(x), abs(x),
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
@ -368,7 +368,8 @@ references to other variables.
<TR><TD >Number</TD><TD > 0.2, 100, 1.0e20, -15.4, etc</TD></TR>
<TR><TD >Constant</TD><TD > PI</TD></TR>
<TR><TD >Thermo keywords</TD><TD > vol, pe, ebond, etc</TD></TR>
<TR><TD >Math operators</TD><TD > (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, x<y, x<=y, x>y, x>=y, x&&y, x||y, !x</TD></TR>
<TR><TD >Math operators</TD><TD > (), -x, x+y, x-y, x*y, x/y, x^y, x%y, </TD></TR>
<TR><TD >Math operators</TD><TD > (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x &lt y, x &lt= y, x &gt y, x &gt= y, x && y, x || y, !x</TD></TR>
<TR><TD >Math functions</TD><TD > sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)</TD></TR>
<TR><TD >Group functions</TD><TD > count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim)</TD></TR>
<TR><TD >Region functions</TD><TD > count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR)</TD></TR>

View File

@ -45,7 +45,7 @@ style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {st
constants = PI
thermo keywords = vol, ke, press, etc from "thermo_style"_thermo_style.html
math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
x==y, x!=y, x<y, x<=y, x>y, x>=y, x&&y, x||y, !x
x == y, x != y, x &lt y, x &lt= y, x &gt y, x &gt= y, x && y, x || y, !x
math functions = sqrt(x), exp(x), ln(x), log(x), abs(x),
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
@ -361,7 +361,8 @@ references to other variables.
Number: 0.2, 100, 1.0e20, -15.4, etc
Constant: PI
Thermo keywords: vol, pe, ebond, etc
Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, x<y, x<=y, x>y, x>=y, x&&y, x||y, !x
Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y,
Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x &lt y, x &lt= y, x &gt y, x &gt= y, x && y, x || y, !x
Math functions: sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
Group functions: count(ID), mass(ID), charge(ID), xcm(ID,dim), \
vcm(ID,dim), fcm(ID,dim), bound(ID,dir), \

View File

@ -37,10 +37,11 @@ using namespace LAMMPS_NS;
using namespace FixConst;
using namespace MathConst;
#define EPSILON 0.001
enum{ATOM,MOLECULE};
enum{ONE,RANGE,POLY};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define EPSILON 0.001
/* ---------------------------------------------------------------------- */
@ -598,13 +599,25 @@ void FixPour::pre_exchange()
if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] &&
newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1;
else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] &&
comm->myloc[2] == comm->procgrid[2]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] &&
comm->myloc[1] == comm->procgrid[1]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) {
if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[2] == comm->procgrid[2]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
} else {
if (comm->mysplit[2][1] == 1.0 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
}
} else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) {
if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[1] == comm->procgrid[1]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
} else {
if (comm->mysplit[1][1] == 1.0 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
}
}
if (flag) {
if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]);

View File

@ -148,6 +148,9 @@ void MSM::init()
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot (yet) use MSM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"MSM can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");

View File

@ -183,6 +183,9 @@ void PPPM::init()
"slab correction");
if (domain->dimension == 2) error->all(FLERR,
"Cannot use PPPM with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPM can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");

View File

@ -213,6 +213,9 @@ void PPPMDisp::init()
triclinic_check();
if (domain->dimension == 2)
error->all(FLERR,"Cannot use PPPMDisp with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPMDisp can only currently be used with "
"comm_style brick");
if (slabflag == 0 && domain->nonperiodic > 0)
error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp");

View File

@ -37,6 +37,7 @@ using namespace FixConst;
using namespace MathConst;
enum{ATOM,MOLECULE};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define EPSILON 1.0e6
@ -452,13 +453,25 @@ void FixDeposit::pre_exchange()
if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] &&
newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1;
else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] &&
comm->myloc[2] == comm->procgrid[2]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] &&
comm->myloc[1] == comm->procgrid[1]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) {
if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[2] == comm->procgrid[2]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
} else {
if (comm->mysplit[2][1] == 1.0 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] &&
newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1;
}
} else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) {
if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[1] == comm->procgrid[1]-1 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
} else {
if (comm->mysplit[1][1] == 1.0 &&
newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1;
}
}
if (flag) {
if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]);

View File

@ -52,6 +52,9 @@ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) :
if (universe->procs_per_world[0] % universe->procs_per_world[1])
error->universe_all(FLERR,"Verlet/split requires Rspace partition "
"size be multiple of Kspace partition size");
if (comm->style != 0)
error->universe_all(FLERR,"Verlet/split can only currently be used with "
"comm_style brick");
// master = 1 for Rspace procs, 0 for Kspace procs
@ -214,6 +217,9 @@ VerletSplit::~VerletSplit()
void VerletSplit::init()
{
if (comm->style != 0)
error->universe_all(FLERR,"Verlet/split can only currently be used with "
"comm_style brick");
if (!force->kspace && comm->me == 0)
error->warning(FLERR,"No Kspace calculation with verlet/split");

View File

@ -29,6 +29,8 @@
using namespace LAMMPS_NS;
using namespace FixConst;
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define BIG 1.0e30
#define EPSILON 1.0e-6
@ -410,7 +412,15 @@ void FixAppendAtoms::pre_exchange()
if (ntimestep % freq == 0) {
if (spatflag==1) if (get_spatial()==0) return;
if (comm->myloc[2] == comm->procgrid[2]-1) {
int addflag = 0;
if (comm->layout != LAYOUT_TILED) {
if (comm->myloc[2] == comm->procgrid[2]-1) addflag = 1;
} else {
if (comm->mysplit[2][1] == 1.0) addflag = 1;
}
if (addflag) {
double bboxlo[3],bboxhi[3];
bboxlo[0] = domain->sublo[0]; bboxhi[0] = domain->subhi[0];

View File

@ -334,9 +334,12 @@ void FixSRD::init()
if (bigexist && comm->ghost_velocity == 0)
error->all(FLERR,"Fix srd requires ghost atoms store velocity");
if (bigexist && collidestyle == NOSLIP && !atom->torque_flag)
error->all(FLERR,"Fix SRD no-slip requires atom attribute torque");
error->all(FLERR,"Fix srd no-slip requires atom attribute torque");
if (initflag && update->dt != dt_big)
error->all(FLERR,"Cannot change timestep once fix srd is setup");
if (comm->style != 0)
error->universe_all(FLERR,"Fix srd can only currently be used with "
"comm_style brick");
// orthogonal vs triclinic simulation box
// could be static or shearing box

View File

@ -20,7 +20,7 @@
#include <sys/time.h>
#include "mpi.h"
/* lo-level data structure */
/* data structure for double/int */
struct _mpi_double_int {
double value;
@ -28,6 +28,15 @@ struct _mpi_double_int {
};
typedef struct _mpi_double_int double_int;
/* extra MPI_Datatypes registered by MPI_Type_contiguous */
#define MAXEXTRA_DATATYPE 16
int nextra_datatype;
MPI_Datatype *ptr_datatype[MAXEXTRA_DATATYPE];
int index_datatype[MAXEXTRA_DATATYPE];
int size_datatype[MAXEXTRA_DATATYPE];
static int _mpi_is_initialized=0;
/* ---------------------------------------------------------------------- */
@ -135,6 +144,8 @@ double MPI_Wtime()
/* ---------------------------------------------------------------------- */
/* include sizes of user defined datatypes, stored in extra lists */
static int stubtypesize(MPI_Datatype datatype)
{
if (datatype == MPI_INT) return sizeof(int);
@ -145,6 +156,12 @@ static int stubtypesize(MPI_Datatype datatype)
else if (datatype == MPI_LONG) return sizeof(long);
else if (datatype == MPI_LONG_LONG) return sizeof(uint64_t);
else if (datatype == MPI_DOUBLE_INT) return sizeof(double_int);
else {
int i;
for (i = 0; i < nextra_datatype; i++)
if (datatype == index_datatype[i]) return size_datatype[i];
}
return 0;
}
/* ---------------------------------------------------------------------- */
@ -339,6 +356,66 @@ int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank)
/* ---------------------------------------------------------------------- */
/* store size of user datatype in extra lists */
int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
MPI_Datatype *newtype)
{
if (nextra_datatype = MAXEXTRA_DATATYPE) return -1;
ptr_datatype[nextra_datatype] = newtype;
index_datatype[nextra_datatype] = -(nextra_datatype + 1);
size_datatype[nextra_datatype] = count * stubtypesize(oldtype);
nextra_datatype++;
return 0;
}
/* ---------------------------------------------------------------------- */
/* set value of user datatype to internal negative index,
based on match of ptr */
int MPI_Type_commit(MPI_Datatype *datatype)
{
int i;
for (i = 0; i < nextra_datatype; i++)
if (datatype == ptr_datatype[i]) *datatype = index_datatype[i];
return 0;
}
/* ---------------------------------------------------------------------- */
/* remove user datatype from extra lists */
int MPI_Type_free(MPI_Datatype *datatype)
{
int i;
for (i = 0; i < nextra_datatype; i++)
if (datatype == ptr_datatype[i]) {
ptr_datatype[i] = ptr_datatype[nextra_datatype-1];
index_datatype[i] = index_datatype[nextra_datatype-1];
size_datatype[i] = size_datatype[nextra_datatype-1];
nextra_datatype--;
break;
}
return 0;
}
/* ---------------------------------------------------------------------- */
int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op)
{
return 0;
}
/* ---------------------------------------------------------------------- */
int MPI_Op_free(MPI_Op *op)
{
return 0;
}
/* ---------------------------------------------------------------------- */
int MPI_Barrier(MPI_Comm comm) {return 0;}
/* ---------------------------------------------------------------------- */

View File

@ -64,6 +64,9 @@ extern "C" {
#define MPI_MAX_PROCESSOR_NAME 128
typedef void MPI_User_function(void *invec, void *inoutvec,
int *len, MPI_Datatype *datatype);
/* MPI data structs */
struct _MPI_Status {
@ -123,6 +126,13 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int displ,
int *source, int *dest);
int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank);
int MPI_Type_contiguous(int count, MPI_Datatype oldtype,
MPI_Datatype *newtype);
int MPI_Type_commit(MPI_Datatype *datatype);
int MPI_Type_free(MPI_Datatype *datatype);
int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op);
int MPI_Op_free(MPI_Op *op);
int MPI_Barrier(MPI_Comm comm);
int MPI_Bcast(void *buf, int count, MPI_Datatype datatype,

View File

@ -211,6 +211,9 @@ void PPPMCuda::init()
// error check
if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation");
if (comm->style != 0)
error->universe_all(FLERR,"PPPMCuda can only currently be used with "
"comm_style brick");
if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");

View File

@ -87,6 +87,10 @@ FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) :
if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command");
if (comm->style != 0)
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
"comm_style brick");
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
@ -566,9 +570,12 @@ int FixLbFluid::setmask()
void FixLbFluid::init(void)
{
int i,j;
if (comm->style != 0)
error->universe_all(FLERR,"Fix lb/fluid can only currently be used with "
"comm_style brick");
//--------------------------------------------------------------------------
// Check to see if the MD timestep has changed between runs.
//--------------------------------------------------------------------------

View File

@ -47,6 +47,8 @@ using namespace MathConst;
#define CUDA_CHUNK 3000
#define MAXBODY 20 // max # of lines in one body, also in ReadData class
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
@ -756,17 +758,33 @@ void Atom::data_atoms(int n, char *buf)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
}
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
if (comm->layout != LAYOUT_TILED) {
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
}
} else {
if (domain->xperiodic) {
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
}
}
// xptr = which word in line starts xyz coords

View File

@ -21,6 +21,7 @@
#include "balance.h"
#include "atom.h"
#include "comm.h"
#include "rcb.h"
#include "irregular.h"
#include "domain.h"
#include "force.h"
@ -30,9 +31,10 @@
using namespace LAMMPS_NS;
enum{XYZ,SHIFT,RCB};
enum{XYZ,SHIFT,BISECTION};
enum{NONE,UNIFORM,USER};
enum{X,Y,Z};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@ -47,6 +49,8 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp)
user_xsplit = user_ysplit = user_zsplit = NULL;
shift_allocate = 0;
rcb = NULL;
fp = NULL;
firststep = 1;
}
@ -74,6 +78,8 @@ Balance::~Balance()
delete [] hisum;
}
delete rcb;
if (fp) fclose(fp);
}
@ -176,7 +182,7 @@ void Balance::command(int narg, char **arg)
} else if (strcmp(arg[iarg],"rcb") == 0) {
if (style != -1) error->all(FLERR,"Illegal balance command");
style = RCB;
style = BISECTION;
iarg++;
} else break;
@ -232,6 +238,9 @@ void Balance::command(int narg, char **arg)
}
}
if (style == BISECTION && comm->style == 0)
error->all(FLERR,"Balance rcb cannot be used with comm_style brick");
// insure atoms are in current box & update box via shrink-wrap
// init entire system since comm->setup is done
// comm::init needs neighbor::init needs pair::init needs kspace::init, etc
@ -251,8 +260,10 @@ void Balance::command(int narg, char **arg)
double imbinit = imbalance_nlocal(maxinit);
// no load-balance if imbalance doesn't exceed threshhold
// unless switching from tiled to non tiled layout, then force rebalance
if (imbinit < thresh) return;
if (comm->layout == LAYOUT_TILED && style != BISECTION) {
} else if (imbinit < thresh) return;
// debug output of initial state
@ -262,80 +273,65 @@ void Balance::command(int narg, char **arg)
int niter = 0;
// NOTE: if using XYZ or SHIFT and current partition is TILING,
// then need to create initial BRICK partition before performing LB
// perform load-balance
// style XYZ = explicit setting of cutting planes of logical 3d grid
if (style == XYZ) {
if (comm->layout == LAYOUT_UNIFORM) {
if (xflag == USER || yflag == USER || zflag == USER)
comm->layout = LAYOUT_NONUNIFORM;
} else if (comm->style == LAYOUT_NONUNIFORM) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout = LAYOUT_UNIFORM;
} else if (comm->style == LAYOUT_TILED) {
if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->layout = LAYOUT_UNIFORM;
else comm->layout = LAYOUT_NONUNIFORM;
}
if (xflag == UNIFORM) {
for (int i = 0; i < procgrid[0]; i++)
comm->xsplit[i] = i * 1.0/procgrid[0];
comm->xsplit[procgrid[0]] = 1.0;
}
} else if (xflag == USER)
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == UNIFORM) {
for (int i = 0; i < procgrid[1]; i++)
comm->ysplit[i] = i * 1.0/procgrid[1];
comm->ysplit[procgrid[1]] = 1.0;
}
} else if (yflag == USER)
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == UNIFORM) {
for (int i = 0; i < procgrid[2]; i++)
comm->zsplit[i] = i * 1.0/procgrid[2];
comm->zsplit[procgrid[2]] = 1.0;
}
if (xflag == USER)
for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i];
if (yflag == USER)
for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i];
if (zflag == USER)
} else if (zflag == USER)
for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i];
}
// style SHIFT = adjust cutting planes of logical 3d grid
if (style == SHIFT) {
static_setup(bstr);
comm->layout = LAYOUT_NONUNIFORM;
shift_setup_static(bstr);
niter = shift();
}
// style RCB =
// style BISECTION = recursive coordinate bisectioning
if (style == RCB) {
error->all(FLERR,"Balance rcb is not yet supported");
if (comm->style == 0)
error->all(FLERR,"Cannot use balance rcb with comm_style brick");
if (style == BISECTION) {
comm->layout = LAYOUT_TILED;
bisection(1);
}
// output of final result
if (outflag && me == 0) dumpout(update->ntimestep,fp);
// reset comm->uniform flag if necessary
if (comm->uniform) {
if (style == SHIFT) comm->uniform = 0;
if (style == XYZ && xflag == USER) comm->uniform = 0;
if (style == XYZ && yflag == USER) comm->uniform = 0;
if (style == XYZ && zflag == USER) comm->uniform = 0;
} else {
if (dimension == 3) {
if (style == XYZ &&
xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM)
comm->uniform = 1;
} else {
if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM)
comm->uniform = 1;
}
}
// reset proc sub-domains
// for either brick or tiled comm style
if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box();
@ -344,7 +340,8 @@ void Balance::command(int narg, char **arg)
if (domain->triclinic) domain->x2lamda(atom->nlocal);
Irregular *irregular = new Irregular(lmp);
irregular->migrate_atoms(1);
if (style == BISECTION) irregular->migrate_atoms(1,rcb->sendproc);
else irregular->migrate_atoms(1);
delete irregular;
if (domain->triclinic) domain->lamda2x(atom->nlocal);
@ -382,34 +379,36 @@ void Balance::command(int narg, char **arg)
}
}
if (me == 0) {
if (screen) {
fprintf(screen," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(screen," %g",comm->xsplit[i]);
fprintf(screen,"\n");
fprintf(screen," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(screen," %g",comm->ysplit[i]);
fprintf(screen,"\n");
fprintf(screen," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(screen," %g",comm->zsplit[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(logfile," %g",comm->xsplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(logfile," %g",comm->ysplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(logfile," %g",comm->zsplit[i]);
fprintf(logfile,"\n");
if (style != BISECTION) {
if (me == 0) {
if (screen) {
fprintf(screen," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(screen," %g",comm->xsplit[i]);
fprintf(screen,"\n");
fprintf(screen," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(screen," %g",comm->ysplit[i]);
fprintf(screen,"\n");
fprintf(screen," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(screen," %g",comm->zsplit[i]);
fprintf(screen,"\n");
}
if (logfile) {
fprintf(logfile," x cuts:");
for (int i = 0; i <= comm->procgrid[0]; i++)
fprintf(logfile," %g",comm->xsplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," y cuts:");
for (int i = 0; i <= comm->procgrid[1]; i++)
fprintf(logfile," %g",comm->ysplit[i]);
fprintf(logfile,"\n");
fprintf(logfile," z cuts:");
for (int i = 0; i <= comm->procgrid[2]; i++)
fprintf(logfile," %g",comm->zsplit[i]);
fprintf(logfile,"\n");
}
}
}
}
@ -467,13 +466,59 @@ double Balance::imbalance_splits(int &max)
return imbalance;
}
/* ----------------------------------------------------------------------
perform balancing via RCB class
sortflag = flag for sorting order of received messages by proc ID
------------------------------------------------------------------------- */
int *Balance::bisection(int sortflag)
{
if (!rcb) rcb = new RCB(lmp);
// NOTE: lo/hi args could be simulation box or particle bounding box
// if particle bbox, then mysplit needs to be reset to sim box
// NOTE: triclinic needs to be in lamda coords
int dim = domain->dimension;
double *boxlo = domain->boxlo;
double *boxhi = domain->boxhi;
double *prd = domain->prd;
rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi);
rcb->invert(sortflag);
// NOTE: this logic is specific to orthogonal boxes, not triclinic
comm->rcbnew = 1;
comm->rcbcut = rcb->cut;
comm->rcbcutdim = rcb->cutdim;
double (*mysplit)[2] = comm->mysplit;
mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0];
if (rcb->hi[0] == boxhi[0]) mysplit[0][1] = 1.0;
else mysplit[0][1] = (rcb->hi[0] - boxlo[0]) / prd[0];
mysplit[1][0] = (rcb->lo[1] - boxlo[1]) / prd[1];
if (rcb->hi[1] == boxhi[1]) mysplit[1][1] = 1.0;
else mysplit[1][1] = (rcb->hi[1] - boxlo[1]) / prd[1];
mysplit[2][0] = (rcb->lo[2] - boxlo[2]) / prd[2];
if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0;
else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2];
// return list of procs to send my atoms to
return rcb->sendproc;
}
/* ----------------------------------------------------------------------
setup static load balance operations
called from command
called from command and indirectly initially from fix balance
set rho = 0 for static balancing
------------------------------------------------------------------------- */
void Balance::static_setup(char *str)
void Balance::shift_setup_static(char *str)
{
shift_allocate = 1;
@ -498,21 +543,35 @@ void Balance::static_setup(char *str)
losum = new bigint[max+1];
hisum = new bigint[max+1];
// if current layout is TILED, set initial uniform splits in Comm
// this gives starting point to subsequent shift balancing
if (comm->layout == LAYOUT_TILED) {
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0];
for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1];
for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2];
xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0;
}
rho = 0;
}
/* ----------------------------------------------------------------------
setup shift load balance operations
called from fix balance
set rho = 1 for shift balancing after call to shift_setup()
set rho = 1 to do dynamic balancing after call to shift_setup_static()
------------------------------------------------------------------------- */
void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
{
static_setup(str);
shift_setup_static(str);
nitermax = nitermax_in;
stopthresh = thresh_in;
rho = 1;
}
@ -525,7 +584,7 @@ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in)
int Balance::shift()
{
int i,j,k,m,np,max;
double *split = NULL;
double *split;
// no balancing if no atoms
@ -590,7 +649,7 @@ int Balance::shift()
// iterate until balanced
#ifdef BALANCE_DEBUG
if (me == 0) debug_output(idim,0,np,split);
if (me == 0) debug_shift_output(idim,0,np,split);
#endif
int doneflag;
@ -601,7 +660,7 @@ int Balance::shift()
niter++;
#ifdef BALANCE_DEBUG
if (me == 0) debug_output(idim,m+1,np,split);
if (me == 0) debug_shift_output(idim,m+1,np,split);
if (me == 0 && fp) dumpout(update->ntimestep,fp);
#endif
@ -827,7 +886,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
int nx = comm->procgrid[0] + 1;
int ny = comm->procgrid[1] + 1;
//int nz = comm->procgrid[2] + 1;
int nz = comm->procgrid[2] + 1;
if (dimension == 2) {
int m = 0;
@ -914,7 +973,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp)
------------------------------------------------------------------------- */
#ifdef BALANCE_DEBUG
void Balance::debug_output(int idim, int m, int np, double *split)
void Balance::debug_shift_output(int idim, int m, int np, double *split)
{
int i;
const char *dim = NULL;

View File

@ -27,11 +27,14 @@ namespace LAMMPS_NS {
class Balance : protected Pointers {
public:
class RCB *rcb;
Balance(class LAMMPS *);
~Balance();
void command(int, char **);
void shift_setup(char *, int, double);
int shift();
int *bisection(int sortflag = 0);
double imbalance_nlocal(int &);
void dumpout(bigint, FILE *);
@ -66,13 +69,13 @@ class Balance : protected Pointers {
FILE *fp;
int firststep;
void static_setup(char *);
double imbalance_splits(int &);
void shift_setup_static(char *);
void tally(int, int, double *);
int adjust(int, double *);
int binary(double, int, double *);
#ifdef BALANCE_DEBUG
void debug_output(int, int, int, double *);
void debug_shift_output(int, int, int, double *);
#endif
};

View File

@ -12,6 +12,7 @@
------------------------------------------------------------------------- */
#include "mpi.h"
#include "stdlib.h"
#include "string.h"
#include "comm.h"
#include "universe.h"
@ -20,9 +21,14 @@
#include "domain.h"
#include "group.h"
#include "procmap.h"
#include "accelerator_kokkos.h"
#include "memory.h"
#include "error.h"
#ifdef _OPENMP
#include "omp.h"
#endif
using namespace LAMMPS_NS;
enum{SINGLE,MULTI}; // same as in Comm sub-styles
@ -47,25 +53,96 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp)
gridflag = ONELEVEL;
mapflag = CART;
customfile = NULL;
outfile = NULL;
recv_from_partition = send_to_partition = -1;
otherflag = 0;
outfile = NULL;
maxexchange_atom = maxexchange_fix = 0;
grid2proc = NULL;
xsplit = ysplit = zsplit = NULL;
rcbnew = 0;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time
// if the OMP_NUM_THREADS environment variable is not set, we default
// to using 1 thread. This follows the principle of the least surprise,
// while practically all OpenMP implementations violate it by using
// as many threads as there are (virtual) CPU cores by default.
nthreads = 1;
#ifdef _OPENMP
if (lmp->kokkos) {
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
} else if (getenv("OMP_NUM_THREADS") == NULL) {
nthreads = 1;
if (me == 0)
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
} else {
nthreads = omp_get_max_threads();
}
// enforce consistent number of threads across all MPI tasks
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
if (!lmp->kokkos) omp_set_num_threads(nthreads);
if (me == 0) {
if (screen)
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
if (logfile)
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
}
#endif
}
/* ---------------------------------------------------------------------- */
Comm::~Comm()
{
memory->destroy(grid2proc);
memory->destroy(xsplit);
memory->destroy(ysplit);
memory->destroy(zsplit);
delete [] customfile;
delete [] outfile;
}
/* ----------------------------------------------------------------------
deep copy of arrays from old Comm class to new one
all public/protected vectors/arrays in parent Comm class must be copied
called from alternate constructor of child classes
when new comm style is created from Input
------------------------------------------------------------------------- */
void Comm::copy_arrays(Comm *oldcomm)
{
if (oldcomm->grid2proc) {
memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2],
"comm:grid2proc");
memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0],
(procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int));
memory->create(xsplit,procgrid[0]+1,"comm:xsplit");
memory->create(ysplit,procgrid[1]+1,"comm:ysplit");
memory->create(zsplit,procgrid[2]+1,"comm:zsplit");
memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double));
memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double));
memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double));
}
if (customfile) {
int n = strlen(oldcomm->customfile) + 1;
customfile = new char[n];
strcpy(customfile,oldcomm->customfile);
}
if (outfile) {
int n = strlen(oldcomm->outfile) + 1;
outfile = new char[n];
strcpy(outfile,oldcomm->outfile);
}
}
/* ----------------------------------------------------------------------
modify communication params
invoked from input script by comm_modify command

View File

@ -21,21 +21,15 @@ namespace LAMMPS_NS {
class Comm : protected Pointers {
public:
int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling
int layout; // current proc domains: 0 = logical bricks, 1 = general tiling
// can do style=1 on layout=0, but not vice versa
// NOTE: uniform needs to be subsumed into layout
int uniform; // 1 = equal subdomains, 0 = load-balanced
int layout; // LAYOUT_UNIFORM = logical equal-sized bricks
// LAYOUT_NONUNIFORM = logical bricks,
// but different sizes due to LB
// LAYOUT_TILED = general tiling, due to RCB LB
int me,nprocs; // proc info
int procgrid[3]; // procs assigned in each dim of 3d grid
int user_procgrid[3]; // user request for procs in each dim
int myloc[3]; // which proc I am in each dim
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
double cutghost[3]; // cutoffs used for acquiring ghost atoms
double cutghostuser; // user-specified ghost cutoff
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
int recv_from_partition; // recv proc layout from this partition
int send_to_partition; // send my proc layout to this partition
// -1 if no recv or send
@ -45,8 +39,27 @@ class Comm : protected Pointers {
int maxexchange_fix; // max contribution to exchange from Fixes
int nthreads; // OpenMP threads per MPI process
// public settings specific to layout = UNIFORM, NONUNIFORM
int procgrid[3]; // procs assigned in each dim of 3d grid
int user_procgrid[3]; // user request for procs in each dim
int myloc[3]; // which proc I am in each dim
int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right
double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes
int ***grid2proc; // which proc owns i,j,k loc in 3d grid
// public settings specific to layout = TILED
int rcbnew; // 1 if just reset by rebalance, else 0
double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain
double rcbcut; // RCB cut by this proc
int rcbcutdim; // dimension of RCB cut
// methods
Comm(class LAMMPS *);
virtual ~Comm();
void copy_arrays(class Comm *);
void modify_params(int, char **);
void set_processors(int, char **); // set 3d processor grid attributes

View File

@ -22,6 +22,7 @@
#include "stdio.h"
#include "stdlib.h"
#include "comm_brick.h"
#include "comm_tiled.h"
#include "universe.h"
#include "atom.h"
#include "atom_vec.h"
@ -35,15 +36,10 @@
#include "compute.h"
#include "output.h"
#include "dump.h"
#include "accelerator_kokkos.h"
#include "math_extra.h"
#include "error.h"
#include "memory.h"
#ifdef _OPENMP
#include "omp.h"
#endif
using namespace LAMMPS_NS;
#define BUFFACTOR 1.5
@ -52,58 +48,57 @@ using namespace LAMMPS_NS;
#define BIG 1.0e20
enum{SINGLE,MULTI}; // same as in Comm
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
{
style = layout = 0;
style = 0;
layout = LAYOUT_UNIFORM;
init_buffers();
}
recv_from_partition = send_to_partition = -1;
otherflag = 0;
/* ---------------------------------------------------------------------- */
grid2proc = NULL;
CommBrick::~CommBrick()
{
free_swap();
if (mode == MULTI) {
free_multi();
memory->destroy(cutghostmulti);
}
uniform = 1;
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
/* ---------------------------------------------------------------------- */
CommBrick::CommBrick(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm)
{
if (oldcomm->layout == LAYOUT_TILED)
error->all(FLERR,"Cannot change to comm_style brick from tiled layout");
style = 0;
layout = oldcomm->layout;
copy_arrays(oldcomm);
init_buffers();
}
/* ----------------------------------------------------------------------
initialize comm buffers and other data structs local to CommBrick
------------------------------------------------------------------------- */
void CommBrick::init_buffers()
{
multilo = multihi = NULL;
cutghostmulti = NULL;
// use of OpenMP threads
// query OpenMP for number of threads/process set by user at run-time
// if the OMP_NUM_THREADS environment variable is not set, we default
// to using 1 thread. This follows the principle of the least surprise,
// while practically all OpenMP implementations violate it by using
// as many threads as there are (virtual) CPU cores by default.
nthreads = 1;
#ifdef _OPENMP
if (lmp->kokkos) {
nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa;
} else if (getenv("OMP_NUM_THREADS") == NULL) {
nthreads = 1;
if (me == 0)
error->warning(FLERR,"OMP_NUM_THREADS environment is not set.");
} else {
nthreads = omp_get_max_threads();
}
// enforce consistent number of threads across all MPI tasks
MPI_Bcast(&nthreads,1,MPI_INT,0,world);
if (!lmp->kokkos) omp_set_num_threads(nthreads);
if (me == 0) {
if (screen)
fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads);
if (logfile)
fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads);
}
#endif
// initialize comm buffers & exchange memory
// NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body
maxexchange_atom = maxexchange_fix = 0;
maxexchange = maxexchange_atom + maxexchange_fix;
bufextra = maxexchange + BUFEXTRA;
@ -125,26 +120,6 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp)
/* ---------------------------------------------------------------------- */
CommBrick::~CommBrick()
{
memory->destroy(grid2proc);
free_swap();
if (mode == MULTI) {
free_multi();
memory->destroy(cutghostmulti);
}
if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]);
memory->sfree(sendlist);
memory->destroy(maxsendlist);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
/* ---------------------------------------------------------------------- */
void CommBrick::init()
{
triclinic = domain->triclinic;
@ -280,12 +255,12 @@ void CommBrick::setup()
// 0 = to left, 1 = to right
// set equal to recvneed[idim][1/0] of neighbor proc
// maxneed[idim] = max procs away any proc recvs atoms in either direction
// uniform = 1 = uniform sized sub-domains:
// layout = UNIFORM = uniform sized sub-domains:
// maxneed is directly computable from sub-domain size
// limit to procgrid-1 for non-PBC
// recvneed = maxneed except for procs near non-PBC
// sendneed = recvneed of neighbor on each side
// uniform = 0 = non-uniform sized sub-domains:
// layout = NONUNIFORM = non-uniform sized sub-domains:
// compute recvneed via updown() which accounts for non-PBC
// sendneed = recvneed of neighbor on each side
// maxneed via Allreduce() of recvneed
@ -293,7 +268,7 @@ void CommBrick::setup()
int *periodicity = domain->periodicity;
int left,right;
if (uniform) {
if (layout == LAYOUT_UNIFORM) {
maxneed[0] = static_cast<int> (cutghost[0] * procgrid[0] / prd[0]) + 1;
maxneed[1] = static_cast<int> (cutghost[1] * procgrid[1] / prd[1]) + 1;
maxneed[2] = static_cast<int> (cutghost[2] * procgrid[2] / prd[2]) + 1;
@ -561,16 +536,15 @@ void CommBrick::forward_comm(int dummy)
} else {
if (comm_x_only) {
if (sendnum[iswap])
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
x[firstrecv[iswap]],pbc_flag[iswap],
pbc[iswap]);
avec->pack_comm(sendnum[iswap],sendlist[iswap],
x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]);
} else if (ghost_velocity) {
n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
avec->pack_comm_vel(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send);
} else {
n = avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
avec->pack_comm(sendnum[iswap],sendlist[iswap],
buf_send,pbc_flag[iswap],pbc[iswap]);
avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send);
}
}
@ -620,10 +594,10 @@ void CommBrick::reverse_comm()
} else {
if (comm_f_only) {
if (sendnum[iswap])
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
f[firstrecv[iswap]]);
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],
f[firstrecv[iswap]]);
} else {
n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send);
avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send);
}
}

View File

@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommBrick : public Comm {
public:
CommBrick(class LAMMPS *);
CommBrick(class LAMMPS *, class Comm *);
virtual ~CommBrick();
virtual void init();
@ -80,6 +81,7 @@ class CommBrick : public Comm {
int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer
void init_buffers();
int updown(int, int, int, double, int, double *);
// compare cutoff to procs
virtual void grow_send(int, int); // reallocate send buffer

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@ namespace LAMMPS_NS {
class CommTiled : public Comm {
public:
CommTiled(class LAMMPS *);
CommTiled(class LAMMPS *, class Comm *);
virtual ~CommTiled();
void init();
@ -46,22 +47,23 @@ class CommTiled : public Comm {
bigint memory_usage();
private:
int nswap; // # of swaps to perform = 2*dim
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
int triclinic; // 0 if domain is orthog, 1 if triclinic
int map_style; // non-0 if global->local mapping is done
int size_forward; // # of per-atom datums in forward comm
int size_reverse; // # of datums in reverse comm
int size_border; // # of datums in forward border comm
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int nswap; // # of swaps to perform = 2*dim
int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap
int *sendother; // 1 if send to any other proc in each swap
int *sendself; // 1 if send to self in each swap
int *nprocmax; // current max # of send procs for each swap
int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc
int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc
int **size_forward_recv; // # of values to recv in each forward swap/proc
int **firstrecv; // where to put 1st recv atom per swap/proc
int **size_reverse_send; // # to send in each reverse comm per swap/proc
int **size_reverse_recv; // # to recv in each reverse comm per swap/proc
int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc
int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc
@ -77,16 +79,53 @@ class CommTiled : public Comm {
int maxsend,maxrecv; // current size of send/recv buffer
int maxforward,maxreverse; // max # of datums in forward/reverse comm
int maxexchange; // max # of datums/atom in exchange comm
int bufextra; // extra space beyond maxsend in send buffer
int maxreqstat; // max size of Request and Status vectors
MPI_Request *requests;
MPI_Status *statuses;
int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm
void grow_send(int, int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
struct RCBinfo {
double mysplit[3][2]; // fractional RCB bounding box for one proc
double cut; // position of cut this proc owns
int dim; // dimension = 0/1/2 of cut
};
int noverlap; // # of overlapping procs
int maxoverlap; // current max length of overlap
int *overlap; // list of overlapping procs
RCBinfo *rcbinfo; // list of RCB info for all procs
double *prd; // local ptrs to Domain attributes
double *boxlo,*boxhi;
double *sublo,*subhi;
void init_buffers();
// box drop and other functions
typedef void (CommTiled::*BoxDropPtr)(int, double *, double *, int &);
BoxDropPtr box_drop;
void box_drop_brick(int, double *, double *, int &);
void box_drop_tiled(int, double *, double *, int &);
void box_drop_tiled_recurse(double *, double *, int, int, int &);
typedef void (CommTiled::*BoxOtherPtr)(int, int, int, double *, double *);
BoxOtherPtr box_other;
void box_other_brick(int, int, int, double *, double *);
void box_other_tiled(int, int, int, double *, double *);
void grow_send(int, int); // reallocate send buffer
void grow_recv(int); // free/allocate recv buffer
void grow_list(int, int, int); // reallocate sendlist for one swap/proc
void allocate_swap(int); // allocate swap arrays
void grow_swap_send(int, int, int); // grow swap arrays for send and recv
void grow_swap_recv(int, int);
void deallocate_swap(int); // deallocate swap arrays
};
}

View File

@ -27,6 +27,8 @@
#include "domain.h"
#include "lattice.h"
#include "region.h"
#include "input.h"
#include "variable.h"
#include "random_park.h"
#include "random_mars.h"
#include "math_extra.h"
@ -41,6 +43,7 @@ using namespace MathConst;
enum{BOX,REGION,SINGLE,RANDOM};
enum{ATOM,MOLECULE};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@ -103,6 +106,8 @@ void CreateAtoms::command(int narg, char **arg)
remapflag = 0;
mode = ATOM;
int molseed;
varflag = 0;
vstr = xstr = ystr = zstr = NULL;
nbasis = domain->lattice->nbasis;
basistype = new int[nbasis];
@ -141,6 +146,33 @@ void CreateAtoms::command(int narg, char **arg)
else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1;
else error->all(FLERR,"Illegal create_atoms command");
iarg += 2;
} else if (strcmp(arg[iarg],"var") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal create_atoms command");
delete [] vstr;
int n = strlen(arg[iarg+1]) + 1;
vstr = new char[n];
strcpy(vstr,arg[iarg+1]);
varflag = 1;
iarg += 2;
} else if (strcmp(arg[iarg],"set") == 0) {
if (iarg+3 > narg) error->all(FLERR,"Illegal create_atoms command");
if (strcmp(arg[iarg+1],"x") == 0) {
delete [] xstr;
int n = strlen(arg[iarg+2]) + 1;
xstr = new char[n];
strcpy(xstr,arg[iarg+2]);
} else if (strcmp(arg[iarg+1],"y") == 0) {
delete [] ystr;
int n = strlen(arg[iarg+2]) + 1;
ystr = new char[n];
strcpy(ystr,arg[iarg+2]);
} else if (strcmp(arg[iarg+1],"z") == 0) {
delete [] zstr;
int n = strlen(arg[iarg+2]) + 1;
zstr = new char[n];
strcpy(zstr,arg[iarg+2]);
} else error->all(FLERR,"Illegal create_atoms command");
iarg += 3;
} else error->all(FLERR,"Illegal create_atoms command");
}
@ -178,6 +210,47 @@ void CreateAtoms::command(int narg, char **arg)
ranmol = new RanMars(lmp,molseed+comm->me);
}
// error check and further setup for variable test
// save local copy of each equal variable string so can restore at end
if (!vstr && (xstr || ystr || zstr))
error->all(FLERR,"Incomplete use of variables in create_atoms command");
if (vstr && (!xstr && !ystr && !zstr))
error->all(FLERR,"Incomplete use of variables in create_atoms command");
if (varflag) {
vvar = input->variable->find(vstr);
if (vvar < 0)
error->all(FLERR,"Variable name for create_atoms does not exist");
if (!input->variable->equalstyle(vvar))
error->all(FLERR,"Variable for create_atoms is invalid style");
if (xstr) {
xvar = input->variable->find(xstr);
if (xvar < 0)
error->all(FLERR,"Variable name for create_atoms does not exist");
if (!input->variable->equalstyle(xvar))
error->all(FLERR,"Variable for create_atoms is invalid style");
input->variable->equal_save(xvar,xstr_copy);
}
if (ystr) {
yvar = input->variable->find(ystr);
if (yvar < 0)
error->all(FLERR,"Variable name for create_atoms does not exist");
if (!input->variable->equalstyle(yvar))
error->all(FLERR,"Variable for create_atoms is invalid style");
input->variable->equal_save(yvar,ystr_copy);
}
if (zstr) {
zvar = input->variable->find(zstr);
if (zvar < 0)
error->all(FLERR,"Variable name for create_atoms does not exist");
if (!input->variable->equalstyle(zvar))
error->all(FLERR,"Variable for create_atoms is invalid style");
input->variable->equal_save(zvar,zstr_copy);
}
}
// demand non-none lattice be defined for BOX and REGION
// else setup scaling for SINGLE and RANDOM
// could use domain->lattice->lattice2box() to do conversion of
@ -226,17 +299,32 @@ void CreateAtoms::command(int narg, char **arg)
}
if (style == BOX || style == REGION) {
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2];
if (comm->layout != LAYOUT_TILED) {
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2];
}
} else {
if (domain->xperiodic) {
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
if (comm->mysplit[0][1] == 1.0) subhi[0] -= 2.0*epsilon[0];
}
if (domain->yperiodic) {
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
if (comm->mysplit[1][1] == 1.0) subhi[1] -= 2.0*epsilon[1];
}
if (domain->zperiodic) {
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
if (comm->mysplit[2][1] == 1.0) subhi[2] -= 2.0*epsilon[2];
}
}
}
@ -259,6 +347,14 @@ void CreateAtoms::command(int narg, char **arg)
fix->set_arrays(i);
}
// restore each equal variable string previously saved
if (varflag) {
if (xstr) input->variable->equal_restore(xvar,xstr_copy);
if (ystr) input->variable->equal_restore(yvar,ystr_copy);
if (zstr) input->variable->equal_restore(zvar,zstr_copy);
}
// set new total # of atoms and error check
bigint nblocal = atom->nlocal;
@ -409,6 +505,10 @@ void CreateAtoms::command(int narg, char **arg)
delete ranmol;
if (domain->lattice) delete [] basistype;
delete [] vstr;
delete [] xstr;
delete [] ystr;
delete [] zstr;
// print status
@ -509,7 +609,7 @@ void CreateAtoms::add_random()
}
// generate random positions for each new atom/molecule within bounding box
// iterate until atom is within region and triclinic simulation box
// iterate until atom is within region, variable, and triclinic simulation box
// if final atom position is in my subbox, create it
if (xlo > xhi || ylo > yhi || zlo > zhi)
@ -527,6 +627,7 @@ void CreateAtoms::add_random()
if (nregion >= 0 &&
domain->regions[nregion]->match(xone[0],xone[1],xone[2]) == 0)
valid = 0;
if (varflag && vartest(xone) == 0) valid = 0;
if (triclinic) {
domain->x2lamda(xone,lamda);
coord = lamda;
@ -642,6 +743,10 @@ void CreateAtoms::add_lattice()
if (style == REGION)
if (!domain->regions[nregion]->match(x[0],x[1],x[2])) continue;
// if variable test specified, eval variable
if (varflag && vartest(x) == 0) continue;
// test if atom/molecule position is in my subbox
if (triclinic) {
@ -696,3 +801,19 @@ void CreateAtoms::add_molecule(double *center)
atom->add_molecule_atom(onemol,m,n,0);
}
}
/* ----------------------------------------------------------------------
test a generated atom position against variable evaluation
first plug in x,y,z values as requested
------------------------------------------------------------------------- */
int CreateAtoms::vartest(double *x)
{
if (xstr) input->variable->equal_override(xvar,x[0]);
if (ystr) input->variable->equal_override(yvar,x[1]);
if (zstr) input->variable->equal_override(zvar,x[2]);
double value = input->variable->compute_equal(vvar);
if (value == 0.0) return 0;
return 1;
}

View File

@ -35,6 +35,10 @@ class CreateAtoms : protected Pointers {
double xone[3];
int remapflag;
int varflag,vvar,xvar,yvar,zvar;
char *vstr,*xstr,*ystr,*zstr;
char *xstr_copy,*ystr_copy,*zstr_copy;
class Molecule *onemol;
class RanMars *ranmol;
@ -45,6 +49,7 @@ class CreateAtoms : protected Pointers {
void add_random();
void add_lattice();
void add_molecule(double *);
int vartest(double *); // evaluate a variable with new atom position
};
}

View File

@ -44,14 +44,15 @@
using namespace LAMMPS_NS;
using namespace MathConst;
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define BIG 1.0e20
#define SMALL 1.0e-4
#define DELTAREGION 4
#define BONDSTRETCH 1.1
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
enum{IGNORE,WARN,ERROR}; // same as thermo.cpp
/* ----------------------------------------------------------------------
default is periodic
------------------------------------------------------------------------- */
@ -249,55 +250,81 @@ void Domain::set_global_box()
/* ----------------------------------------------------------------------
set lamda box params
assumes global box is defined and proc assignment has been made
uses comm->xyz_split to define subbox boundaries in consistent manner
uses comm->xyz_split or comm->mysplit
to define subbox boundaries in consistent manner
------------------------------------------------------------------------- */
void Domain::set_lamda_box()
{
int *myloc = comm->myloc;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
if (comm->layout != LAYOUT_TILED) {
int *myloc = comm->myloc;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
sublo_lamda[0] = xsplit[myloc[0]];
subhi_lamda[0] = xsplit[myloc[0]+1];
sublo_lamda[0] = xsplit[myloc[0]];
subhi_lamda[0] = xsplit[myloc[0]+1];
sublo_lamda[1] = ysplit[myloc[1]];
subhi_lamda[1] = ysplit[myloc[1]+1];
sublo_lamda[2] = zsplit[myloc[2]];
subhi_lamda[2] = zsplit[myloc[2]+1];
sublo_lamda[1] = ysplit[myloc[1]];
subhi_lamda[1] = ysplit[myloc[1]+1];
} else {
double (*mysplit)[2] = comm->mysplit;
sublo_lamda[2] = zsplit[myloc[2]];
subhi_lamda[2] = zsplit[myloc[2]+1];
sublo_lamda[0] = mysplit[0][0];
subhi_lamda[0] = mysplit[0][1];
sublo_lamda[1] = mysplit[1][0];
subhi_lamda[1] = mysplit[1][1];
sublo_lamda[2] = mysplit[2][0];
subhi_lamda[2] = mysplit[2][1];
}
}
/* ----------------------------------------------------------------------
set local subbox params for orthogonal boxes
assumes global box is defined and proc assignment has been made
uses comm->xyz_split to define subbox boundaries in consistent manner
uses comm->xyz_split or comm->mysplit
to define subbox boundaries in consistent manner
insure subhi[max] = boxhi
------------------------------------------------------------------------- */
void Domain::set_local_box()
{
int *myloc = comm->myloc;
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
if (triclinic) return;
if (comm->layout != LAYOUT_TILED) {
int *myloc = comm->myloc;
int *procgrid = comm->procgrid;
double *xsplit = comm->xsplit;
double *ysplit = comm->ysplit;
double *zsplit = comm->zsplit;
if (triclinic == 0) {
sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]];
if (myloc[0] < procgrid[0]-1)
subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
if (myloc[0] < procgrid[0]-1) subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]];
if (myloc[1] < procgrid[1]-1)
subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
if (myloc[1] < procgrid[1]-1) subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]];
if (myloc[2] < procgrid[2]-1)
subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
if (myloc[2] < procgrid[2]-1) subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1];
else subhi[2] = boxhi[2];
} else {
double (*mysplit)[2] = comm->mysplit;
sublo[0] = boxlo[0] + xprd*mysplit[0][0];
if (mysplit[0][1] < 1.0) subhi[0] = boxlo[0] + xprd*mysplit[0][1];
else subhi[0] = boxhi[0];
sublo[1] = boxlo[1] + yprd*mysplit[1][0];
if (mysplit[1][1] < 1.0) subhi[1] = boxlo[1] + yprd*mysplit[1][1];
else subhi[1] = boxhi[1];
sublo[2] = boxlo[2] + zprd*mysplit[2][0];
if (mysplit[2][1] < 1.0) subhi[2] = boxlo[2] + zprd*mysplit[2][1];
else subhi[2] = boxhi[2];
}
}

View File

@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
// customize by adding keyword
// also customize compute_atom_property.cpp
enum{ID,MOL,PROC,TYPE,ELEMENT,MASS,
enum{ID,MOL,PROC,PROCP1,TYPE,ELEMENT,MASS,
X,Y,Z,XS,YS,ZS,XSTRI,YSTRI,ZSTRI,XU,YU,ZU,XUTRI,YUTRI,ZUTRI,
XSU,YSU,ZSU,XSUTRI,YSUTRI,ZSUTRI,
IX,IY,IZ,
@ -470,6 +470,10 @@ int DumpCustom::count()
for (i = 0; i < nlocal; i++) dchoose[i] = me;
ptr = dchoose;
nstride = 1;
} else if (thresh_array[ithresh] == PROCP1) {
for (i = 0; i < nlocal; i++) dchoose[i] = me;
ptr = dchoose;
nstride = 1;
} else if (thresh_array[ithresh] == TYPE) {
int *type = atom->type;
for (i = 0; i < nlocal; i++) dchoose[i] = type[i];
@ -1018,6 +1022,9 @@ int DumpCustom::parse_fields(int narg, char **arg)
} else if (strcmp(arg[iarg],"proc") == 0) {
pack_choice[i] = &DumpCustom::pack_proc;
vtype[i] = INT;
} else if (strcmp(arg[iarg],"procp1") == 0) {
pack_choice[i] = &DumpCustom::pack_procp1;
vtype[i] = INT;
} else if (strcmp(arg[iarg],"type") == 0) {
pack_choice[i] = &DumpCustom::pack_type;
vtype[i] = INT;
@ -1497,6 +1504,7 @@ int DumpCustom::modify_param(int narg, char **arg)
if (strcmp(arg[1],"id") == 0) thresh_array[nthresh] = ID;
else if (strcmp(arg[1],"mol") == 0) thresh_array[nthresh] = MOL;
else if (strcmp(arg[1],"proc") == 0) thresh_array[nthresh] = PROC;
else if (strcmp(arg[1],"procp1") == 0) thresh_array[nthresh] = PROCP1;
else if (strcmp(arg[1],"type") == 0) thresh_array[nthresh] = TYPE;
else if (strcmp(arg[1],"mass") == 0) thresh_array[nthresh] = MASS;
@ -1876,6 +1884,16 @@ void DumpCustom::pack_proc(int n)
/* ---------------------------------------------------------------------- */
void DumpCustom::pack_procp1(int n)
{
for (int i = 0; i < nchoose; i++) {
buf[n] = me+1;
n += size_one;
}
}
/* ---------------------------------------------------------------------- */
void DumpCustom::pack_type(int n)
{
int *type = atom->type;

View File

@ -124,6 +124,7 @@ class DumpCustom : public Dump {
void pack_id(int);
void pack_molecule(int);
void pack_proc(int);
void pack_procp1(int);
void pack_type(int);
void pack_mass(int);

View File

@ -498,8 +498,6 @@ void FixAdapt::restore_settings()
} else if (ad->which == ATOM) {
if (diamflag) {
int mflag = 0;
if (atom->rmass_flag) mflag = 1;
double density;
double *vec = fix_diam->vstore;

View File

@ -22,12 +22,14 @@
#include "irregular.h"
#include "force.h"
#include "kspace.h"
#include "rcb.h"
#include "error.h"
using namespace LAMMPS_NS;
using namespace FixConst;
enum{SHIFT,RCB};
enum{SHIFT,BISECTION};
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
@ -53,7 +55,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
thresh = force->numeric(FLERR,arg[4]);
if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT;
else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB;
else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION;
else error->all(FLERR,"Illegal fix balance command");
int iarg = 5;
@ -65,7 +67,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
stopthresh = force->numeric(FLERR,arg[iarg+3]);
if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command");
iarg += 4;
} else if (lbstyle == RCB) {
} else if (lbstyle == BISECTION) {
iarg++;
}
@ -97,14 +99,16 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) :
}
}
// create instance of Balance class and initialize it with params
// NOTE: do I need Balance instance if RCB?
// create instance of Irregular class
if (lbstyle == BISECTION && comm->style == 0)
error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick");
// create instance of Balance class
// if SHIFT, initialize it with params
balance = new Balance(lmp);
if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh);
if (lbstyle == RCB) error->all(FLERR,"Fix balance rcb is not yet supported");
// create instance of Irregular class
irregular = new Irregular(lmp);
@ -238,32 +242,33 @@ void FixBalance::rebalance()
{
imbprev = imbnow;
// invoke balancer and reset comm->uniform flag
int *sendproc;
if (lbstyle == SHIFT) {
itercount = balance->shift();
} else if (lbstyle == RCB) {
comm->layout = LAYOUT_NONUNIFORM;
} else if (lbstyle == BISECTION) {
sendproc = balance->bisection();
comm->layout = LAYOUT_TILED;
}
// output of final result
if (fp) balance->dumpout(update->ntimestep,fp);
// reset comm->uniform flag
// NOTE: this needs to change with RCB
comm->uniform = 0;
// reset proc sub-domains
if (domain->triclinic) domain->set_lamda_box();
domain->set_local_box();
// if splits moved further than neighboring processor
// move atoms to new processors via irregular()
// only needed if migrate_check() says an atom moves to far,
// only needed if migrate_check() says an atom moves to far
// else allow caller's comm->exchange() to do it
if (domain->triclinic) domain->x2lamda(atom->nlocal);
if (irregular->migrate_check()) irregular->migrate_atoms();
if (lbstyle == BISECTION) irregular->migrate_atoms(0,sendproc);
else if (irregular->migrate_check()) irregular->migrate_atoms();
if (domain->triclinic) domain->lamda2x(atom->nlocal);
// invoke KSpace setup_grid() to adjust to new proc sub-domains
@ -303,5 +308,6 @@ double FixBalance::compute_vector(int i)
double FixBalance::memory_usage()
{
double bytes = irregular->memory_usage();
if (balance->rcb) bytes += balance->rcb->memory_usage();
return bytes;
}

View File

@ -987,3 +987,14 @@ void FixDeform::options(int narg, char **arg)
} else error->all(FLERR,"Illegal fix deform command");
}
}
/* ----------------------------------------------------------------------
memory usage of Irregular
------------------------------------------------------------------------- */
double FixDeform::memory_usage()
{
double bytes = 0.0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}

View File

@ -35,6 +35,7 @@ class FixDeform : public Fix {
void init();
void pre_exchange();
void end_of_step();
double memory_usage();
private:
int triclinic,scaleflag,flipflag;

View File

@ -2271,3 +2271,14 @@ void FixNH::pre_exchange()
domain->lamda2x(atom->nlocal);
}
}
/* ----------------------------------------------------------------------
memory usage of Irregular
------------------------------------------------------------------------- */
double FixNH::memory_usage()
{
double bytes = 0.0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}

View File

@ -39,6 +39,7 @@ class FixNH : public Fix {
void reset_target(double);
void reset_dt();
virtual void *extract(const char*,int &);
double memory_usage();
protected:
int dimension,which;

View File

@ -1161,19 +1161,15 @@ void Input::comm_style()
{
if (narg < 1) error->all(FLERR,"Illegal comm_style command");
if (strcmp(arg[0],"brick") == 0) {
if (comm->layout)
error->all(FLERR,
"Cannot switch to comm style brick from "
"irregular tiling of proc domains");
comm = new CommBrick(lmp);
// NOTE: this will lose load balancing info in old CommBrick
if (domain->box_exist) {
comm->set_proc_grid();
domain->set_local_box();
}
if (comm->style == 0) return;
Comm *oldcomm = comm;
comm = new CommBrick(lmp,oldcomm);
delete oldcomm;
} else if (strcmp(arg[0],"tiled") == 0) {
error->all(FLERR,"Comm_style tiled not yet supported");
comm = new CommTiled(lmp);
if (comm->style == 1) return;
Comm *oldcomm = comm;
comm = new CommTiled(lmp,oldcomm);
delete oldcomm;
} else error->all(FLERR,"Illegal comm_style command");
}

View File

@ -30,6 +30,8 @@ using namespace LAMMPS_NS;
int *Irregular::proc_recv_copy;
int compare_standalone(const void *, const void *);
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
#define BUFFACTOR 1.5
#define BUFMIN 1000
#define BUFEXTRA 1000
@ -43,13 +45,26 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
triclinic = domain->triclinic;
map_style = atom->map_style;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
aplan = NULL;
dplan = NULL;
// migrate work vectors
// initialize buffers for atom comm, not used for datum comm
maxlocal = 0;
mproclist = NULL;
msizes = NULL;
// send buffers
maxdbuf = 0;
dbuf = NULL;
maxbuf = 0;
buf = NULL;
// universal work vectors
memory->create(work1,nprocs,"irregular:work1");
memory->create(work2,nprocs,"irregular:work2");
// initialize buffers for migrate atoms, not used for datum comm
// these can persist for multiple irregular operations
maxsend = BUFMIN;
@ -62,9 +77,12 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp)
Irregular::~Irregular()
{
if (aplan) destroy_atom();
if (dplan) destroy_data();
memory->destroy(mproclist);
memory->destroy(msizes);
memory->destroy(dbuf);
memory->destroy(buf);
memory->destroy(work1);
memory->destroy(work2);
memory->destroy(buf_send);
memory->destroy(buf_recv);
}
@ -74,11 +92,13 @@ Irregular::~Irregular()
can be used in place of comm->exchange()
unlike exchange(), allows atoms to have moved arbitrarily long distances
sets up irregular plan, invokes it, destroys it
sortflag = flag for sorting order of received messages by proc ID
procassign = non-NULL if already know procs atoms are assigned to (from RCB)
atoms MUST be remapped to be inside simulation box before this is called
for triclinic: atoms must be in lamda coords (0-1) before this is called
------------------------------------------------------------------------- */
void Irregular::migrate_atoms(int sortflag)
void Irregular::migrate_atoms(int sortflag, int *procassign)
{
// clear global->local map since atoms move to new procs
// clear old ghosts so map_set() at end will operate only on local atoms
@ -101,13 +121,16 @@ void Irregular::migrate_atoms(int sortflag)
subhi = domain->subhi_lamda;
}
uniform = comm->uniform;
layout = comm->layout;
xsplit = comm->xsplit;
ysplit = comm->ysplit;
zsplit = comm->zsplit;
boxlo = domain->boxlo;
prd = domain->prd;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
// loop over atoms, flag any that are not in my sub-box
// fill buffer with atoms leaving my box, using < and >=
// assign which proc it belongs to via coord2proc()
@ -119,41 +142,63 @@ void Irregular::migrate_atoms(int sortflag)
double **x = atom->x;
int nlocal = atom->nlocal;
if (nlocal > maxlocal) {
maxlocal = nlocal;
memory->destroy(mproclist);
memory->destroy(msizes);
memory->create(mproclist,maxlocal,"irregular:mproclist");
memory->create(msizes,maxlocal,"irregular:msizes");
}
int igx,igy,igz;
int nsend = 0;
int nsendatom = 0;
int *sizes = new int[nlocal];
int *proclist = new int[nlocal];
int igx,igy,igz;
int i = 0;
while (i < nlocal) {
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
proclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
if (proclist[nsendatom] != me) {
if (procassign) {
while (i < nlocal) {
if (procassign[i] == me) i++;
else {
mproclist[nsendatom] = procassign[i];
if (nsend > maxsend) grow_send(nsend,1);
sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
nsend += sizes[nsendatom];
msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
nsend += msizes[nsendatom];
nsendatom++;
avec->copy(nlocal-1,i,1);
procassign[i] = procassign[nlocal-1];
nlocal--;
}
}
} else {
while (i < nlocal) {
if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] ||
x[i][1] < sublo[1] || x[i][1] >= subhi[1] ||
x[i][2] < sublo[2] || x[i][2] >= subhi[2]) {
mproclist[nsendatom] = coord2proc(x[i],igx,igy,igz);
if (mproclist[nsendatom] == me) i++;
else {
if (nsend > maxsend) grow_send(nsend,1);
msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]);
nsend += msizes[nsendatom];
nsendatom++;
avec->copy(nlocal-1,i,1);
nlocal--;
}
} else i++;
} else i++;
}
}
atom->nlocal = nlocal;
// create irregular communication plan, perform comm, destroy plan
// returned nrecv = size of buffer needed for incoming atoms
int nrecv = create_atom(nsendatom,sizes,proclist,sortflag);
int nrecv = create_atom(nsendatom,msizes,mproclist,sortflag);
if (nrecv > maxrecv) grow_recv(nrecv);
exchange_atom(buf_send,sizes,buf_recv);
exchange_atom(buf_send,msizes,buf_recv);
destroy_atom();
delete [] sizes;
delete [] proclist;
// add received atoms to my list
int m = 0;
@ -174,6 +219,11 @@ void Irregular::migrate_atoms(int sortflag)
int Irregular::migrate_check()
{
// migrate required if comm layout is tiled
// cannot use myloc[] logic below
if (comm->layout == LAYOUT_TILED) return 1;
// subbox bounds for orthogonal or triclinic box
// other comm/domain data used by coord2proc()
@ -186,13 +236,16 @@ int Irregular::migrate_check()
subhi = domain->subhi_lamda;
}
uniform = comm->uniform;
layout = comm->layout;
xsplit = comm->xsplit;
ysplit = comm->ysplit;
zsplit = comm->zsplit;
boxlo = domain->boxlo;
prd = domain->prd;
procgrid = comm->procgrid;
grid2proc = comm->grid2proc;
// loop over atoms, check for any that are not in my sub-box
// assign which proc it belongs to via coord2proc()
// if logical igx,igy,igz of newproc > one away from myloc, set flag = 1
@ -250,6 +303,7 @@ int Irregular::migrate_check()
n = # of atoms to send
sizes = # of doubles for each atom
proclist = proc to send each atom to (not including self)
sortflag = flag for sorting order of received messages by proc ID
return total # of doubles I will recv (not including self)
------------------------------------------------------------------------- */
@ -257,51 +311,60 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
{
int i;
// allocate plan and work vectors
if (aplan) destroy_atom();
aplan = (PlanAtom *) memory->smalloc(sizeof(PlanAtom),"irregular:aplan");
int *list = new int[nprocs];
int *count = new int[nprocs];
// nrecv = # of messages I receive
// setup for collective comm
// work1 = 1 for procs I send a message to, not including self
// work2 = 1 for all procs, used for ReduceScatter
for (i = 0; i < nprocs; i++) {
list[i] = 0;
count[i] = 1;
work1[i] = 0;
work2[i] = 1;
}
for (i = 0; i < n; i++) list[proclist[i]] = 1;
for (i = 0; i < n; i++) work1[proclist[i]] = 1;
work1[me] = 0;
int nrecv;
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
// nrecv_proc = # of procs I receive messages from, not including self
// options for performing ReduceScatter operation
// some are more efficient on some machines at big sizes
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work1[me];
#else
#ifdef LAMMPS_RS_ALLREDUCE
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work2[me];
#else
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
#endif
#endif
// allocate receive arrays
int *proc_recv = new int[nrecv];
int *length_recv = new int[nrecv];
MPI_Request *request = new MPI_Request[nrecv];
MPI_Status *status = new MPI_Status[nrecv];
proc_recv = new int[nrecv_proc];
length_recv = new int[nrecv_proc];
request = new MPI_Request[nrecv_proc];
status = new MPI_Status[nrecv_proc];
// nsend = # of messages I send
// nsend_proc = # of messages I send
for (i = 0; i < nprocs; i++) list[i] = 0;
for (i = 0; i < n; i++) list[proclist[i]] += sizes[i];
for (i = 0; i < nprocs; i++) work1[i] = 0;
for (i = 0; i < n; i++) work1[proclist[i]] += sizes[i];
int nsend = 0;
nsend_proc = 0;
for (i = 0; i < nprocs; i++)
if (list[i]) nsend++;
if (work1[i]) nsend_proc++;
// allocate send arrays
int *proc_send = new int[nsend];
int *length_send = new int[nsend];
int *num_send = new int[nsend];
int *index_send = new int[n];
int *offset_send = new int[n];
proc_send = new int[nsend_proc];
length_send = new int[nsend_proc];
num_send = new int[nsend_proc];
index_send = new int[n];
offset_send = new int[n];
// list still stores size of message for procs I send to
// proc_send = procs I send to
// length_send = total size of message I send to each proc
// length_send = # of doubles I send to each proc
// to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me
// reset list to store which send message each proc corresponds to
@ -311,81 +374,81 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
if (list[iproc] > 0) {
if (work1[iproc] > 0) {
proc_send[isend] = iproc;
length_send[isend] = list[iproc];
list[iproc] = isend;
length_send[isend] = work1[iproc];
work1[iproc] = isend;
isend++;
}
}
// num_send = # of atoms I send to each proc
for (i = 0; i < nsend; i++) num_send[i] = 0;
for (i = 0; i < nsend_proc; i++) num_send[i] = 0;
for (i = 0; i < n; i++) {
isend = list[proclist[i]];
isend = work1[proclist[i]];
num_send[isend]++;
}
// count = offsets into index_send for each proc I send to
// work2 = offsets into index_send for each proc I send to
// index_send = list of which atoms to send to each proc
// 1st N1 values are atom indices for 1st proc,
// next N2 values are atom indices for 2nd proc, etc
// offset_send = where each atom starts in send buffer
count[0] = 0;
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
work2[0] = 0;
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
for (i = 0; i < n; i++) {
isend = list[proclist[i]];
index_send[count[isend]++] = i;
isend = work1[proclist[i]];
index_send[work2[isend]++] = i;
if (i) offset_send[i] = offset_send[i-1] + sizes[i-1];
else offset_send[i] = 0;
}
// tell receivers how much data I send
// sendmax = largest # of doubles I send in a single message
// sendmax_proc = # of doubles I send in largest single message
int sendmax = 0;
for (i = 0; i < nsend; i++) {
sendmax_proc = 0;
for (i = 0; i < nsend_proc; i++) {
MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world);
sendmax = MAX(sendmax,length_send[i]);
sendmax_proc = MAX(sendmax_proc,length_send[i]);
}
// receive incoming messages
// proc_recv = procs I recv from
// length_recv = total size of message each proc sends me
// nrecvsize = total size of data I recv
// length_recv = # of doubles each proc sends me
// nrecvsize = total size of atom data I recv
int nrecvsize = 0;
for (i = 0; i < nrecv; i++) {
for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE;
nrecvsize += length_recv[i];
}
// sort proc_recv and num_recv by proc ID if requested
// sort proc_recv and length_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received atoms
// invoke by adding final arg = 1 to create_atom() call in migrate_atoms()
if (sortflag) {
int *order = new int[nrecv];
int *proc_recv_ordered = new int[nrecv];
int *length_recv_ordered = new int[nrecv];
int *order = new int[nrecv_proc];
int *proc_recv_ordered = new int[nrecv_proc];
int *length_recv_ordered = new int[nrecv_proc];
for (i = 0; i < nrecv; i++) order[i] = i;
for (i = 0; i < nrecv_proc; i++) order[i] = i;
proc_recv_copy = proc_recv;
qsort(order,nrecv,sizeof(int),compare_standalone);
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
int j;
for (i = 0; i < nrecv; i++) {
for (i = 0; i < nrecv_proc; i++) {
j = order[i];
proc_recv_ordered[i] = proc_recv[j];
length_recv_ordered[i] = length_recv[j];
}
memcpy(proc_recv,proc_recv_ordered,nrecv*sizeof(int));
memcpy(length_recv,length_recv_ordered,nrecv*sizeof(int));
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
memcpy(length_recv,length_recv_ordered,nrecv_proc*sizeof(int));
delete [] order;
delete [] proc_recv_ordered;
delete [] length_recv_ordered;
@ -396,27 +459,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
MPI_Barrier(world);
// free work vectors
delete [] count;
delete [] list;
// initialize plan
aplan->nsend = nsend;
aplan->nrecv = nrecv;
aplan->sendmax = sendmax;
aplan->proc_send = proc_send;
aplan->length_send = length_send;
aplan->num_send = num_send;
aplan->index_send = index_send;
aplan->offset_send = offset_send;
aplan->proc_recv = proc_recv;
aplan->length_recv = length_recv;
aplan->request = request;
aplan->status = status;
// return size of atom data I will receive
return nrecvsize;
}
@ -445,217 +488,226 @@ int compare_standalone(const void *iptr, const void *jptr)
void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf)
{
int i,m,n,offset,num_send;
int i,m,n,offset,count;
// post all receives
offset = 0;
for (int irecv = 0; irecv < aplan->nrecv; irecv++) {
MPI_Irecv(&recvbuf[offset],aplan->length_recv[irecv],MPI_DOUBLE,
aplan->proc_recv[irecv],0,world,&aplan->request[irecv]);
offset += aplan->length_recv[irecv];
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
MPI_Irecv(&recvbuf[offset],length_recv[irecv],MPI_DOUBLE,
proc_recv[irecv],0,world,&request[irecv]);
offset += length_recv[irecv];
}
// allocate buf for largest send
// reallocate buf for largest send if necessary
double *buf;
memory->create(buf,aplan->sendmax,"irregular:buf");
if (sendmax_proc > maxdbuf) {
memory->destroy(dbuf);
maxdbuf = sendmax_proc;
memory->create(dbuf,maxdbuf,"irregular:dbuf");
}
// send each message
// pack buf with list of atoms
// m = index of atom in sendbuf
int *index_send = aplan->index_send;
int nsend = aplan->nsend;
n = 0;
for (int isend = 0; isend < nsend; isend++) {
for (int isend = 0; isend < nsend_proc; isend++) {
offset = 0;
num_send = aplan->num_send[isend];
for (i = 0; i < num_send; i++) {
count = num_send[isend];
for (i = 0; i < count; i++) {
m = index_send[n++];
memcpy(&buf[offset],&sendbuf[aplan->offset_send[m]],
sizes[m]*sizeof(double));
memcpy(&dbuf[offset],&sendbuf[offset_send[m]],sizes[m]*sizeof(double));
offset += sizes[m];
}
MPI_Send(buf,aplan->length_send[isend],MPI_DOUBLE,
aplan->proc_send[isend],0,world);
MPI_Send(dbuf,length_send[isend],MPI_DOUBLE,proc_send[isend],0,world);
}
// free temporary send buffer
memory->destroy(buf);
// wait on all incoming messages
if (aplan->nrecv) MPI_Waitall(aplan->nrecv,aplan->request,aplan->status);
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
}
/* ----------------------------------------------------------------------
destroy communication plan for atoms
destroy vectors in communication plan for atoms
------------------------------------------------------------------------- */
void Irregular::destroy_atom()
{
delete [] aplan->proc_send;
delete [] aplan->length_send;
delete [] aplan->num_send;
delete [] aplan->index_send;
delete [] aplan->offset_send;
delete [] aplan->proc_recv;
delete [] aplan->length_recv;
delete [] aplan->request;
delete [] aplan->status;
memory->sfree(aplan);
aplan = NULL;
delete [] proc_send;
delete [] length_send;
delete [] num_send;
delete [] index_send;
delete [] offset_send;
delete [] proc_recv;
delete [] length_recv;
delete [] request;
delete [] status;
}
/* ----------------------------------------------------------------------
create a communication plan for datums
create communication plan based on list of datums of uniform size
n = # of datums to send
proclist = proc to send each datum to (including self)
return total # of datums I will recv (including self)
proclist = proc to send each datum to, can include self
sortflag = flag for sorting order of received messages by proc ID
return total # of datums I will recv, including any to self
------------------------------------------------------------------------- */
int Irregular::create_data(int n, int *proclist)
int Irregular::create_data(int n, int *proclist, int sortflag)
{
int i,m;
// allocate plan and work vectors
dplan = (PlanData *) memory->smalloc(sizeof(PlanData),"irregular:dplan");
int *list = new int[nprocs];
int *count = new int[nprocs];
// nrecv = # of messages I receive
// setup for collective comm
// work1 = 1 for procs I send a message to, not including self
// work2 = 1 for all procs, used for ReduceScatter
for (i = 0; i < nprocs; i++) {
list[i] = 0;
count[i] = 1;
work1[i] = 0;
work2[i] = 1;
}
for (i = 0; i < n; i++) list[proclist[i]] = 1;
for (i = 0; i < n; i++) work1[proclist[i]] = 1;
work1[me] = 0;
int nrecv;
MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world);
if (list[me]) nrecv--;
// nrecv_proc = # of procs I receive messages from, not including self
// options for performing ReduceScatter operation
// some are more efficient on some machines at big sizes
#ifdef LAMMPS_RS_ALLREDUCE_INPLACE
MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work1[me];
#else
#ifdef LAMMPS_RS_ALLREDUCE
MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world);
nrecv_proc = work2[me];
#else
MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world);
#endif
#endif
// allocate receive arrays
int *proc_recv = new int[nrecv];
int *num_recv = new int[nrecv];
MPI_Request *request = new MPI_Request[nrecv];
MPI_Status *status = new MPI_Status[nrecv];
proc_recv = new int[nrecv_proc];
num_recv = new int[nrecv_proc];
request = new MPI_Request[nrecv_proc];
status = new MPI_Status[nrecv_proc];
// nsend = # of messages I send
// work1 = # of datums I send to each proc, including self
// nsend_proc = # of procs I send messages to, not including self
for (i = 0; i < nprocs; i++) list[i] = 0;
for (i = 0; i < n; i++) list[proclist[i]]++;
for (i = 0; i < nprocs; i++) work1[i] = 0;
for (i = 0; i < n; i++) work1[proclist[i]]++;
int nsend = 0;
nsend_proc = 0;
for (i = 0; i < nprocs; i++)
if (list[i]) nsend++;
if (list[me]) nsend--;
if (work1[i]) nsend_proc++;
if (work1[me]) nsend_proc--;
// allocate send and self arrays
int *proc_send = new int[nsend];
int *num_send = new int[nsend];
int *index_send = new int[n-list[me]];
int *index_self = new int[list[me]];
proc_send = new int[nsend_proc];
num_send = new int[nsend_proc];
index_send = new int[n-work1[me]];
index_self = new int[work1[me]];
// proc_send = procs I send to
// num_send = # of datums I send to each proc
// num_self = # of datums I copy to self
// to balance pattern of send messages:
// each proc begins with iproc > me, continues until iproc = me
// reset list to store which send message each proc corresponds to
int num_self;
// reset work1 to store which send message each proc corresponds to
int iproc = me;
int isend = 0;
for (i = 0; i < nprocs; i++) {
iproc++;
if (iproc == nprocs) iproc = 0;
if (iproc == me) num_self = list[iproc];
else if (list[iproc] > 0) {
if (iproc == me) {
num_self = work1[iproc];
work1[iproc] = 0;
} else if (work1[iproc] > 0) {
proc_send[isend] = iproc;
num_send[isend] = list[iproc];
list[iproc] = isend;
num_send[isend] = work1[iproc];
work1[iproc] = isend;
isend++;
}
}
list[me] = 0;
// count = offsets into index_send for each proc I send to
// work2 = offsets into index_send for each proc I send to
// m = ptr into index_self
// index_send = list of which datums to send to each proc
// 1st N1 values are datum indices for 1st proc,
// next N2 values are datum indices for 2nd proc, etc
// index_self = list of which datums to copy to self
count[0] = 0;
for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1];
work2[0] = 0;
for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1];
m = 0;
for (i = 0; i < n; i++) {
iproc = proclist[i];
if (iproc == me) index_self[m++] = i;
else {
isend = list[iproc];
index_send[count[isend]++] = i;
isend = work1[iproc];
index_send[work2[isend]++] = i;
}
}
// tell receivers how much data I send
// sendmax = largest # of datums I send in a single message
// sendmax_proc = largest # of datums I send in a single message
int sendmax = 0;
for (i = 0; i < nsend; i++) {
sendmax_proc = 0;
for (i = 0; i < nsend_proc; i++) {
MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world);
sendmax = MAX(sendmax,num_send[i]);
sendmax_proc = MAX(sendmax_proc,num_send[i]);
}
// receive incoming messages
// proc_recv = procs I recv from
// num_recv = total size of message each proc sends me
// nrecvsize = total size of data I recv
// nrecvdatum = total size of data I recv
int nrecvsize = 0;
for (i = 0; i < nrecv; i++) {
int nrecvdatum = 0;
for (i = 0; i < nrecv_proc; i++) {
MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status);
proc_recv[i] = status->MPI_SOURCE;
nrecvsize += num_recv[i];
nrecvdatum += num_recv[i];
}
nrecvdatum += num_self;
// sort proc_recv and num_recv by proc ID if requested
// useful for debugging to insure reproducible ordering of received datums
if (sortflag) {
int *order = new int[nrecv_proc];
int *proc_recv_ordered = new int[nrecv_proc];
int *num_recv_ordered = new int[nrecv_proc];
for (i = 0; i < nrecv_proc; i++) order[i] = i;
proc_recv_copy = proc_recv;
qsort(order,nrecv_proc,sizeof(int),compare_standalone);
int j;
for (i = 0; i < nrecv_proc; i++) {
j = order[i];
proc_recv_ordered[i] = proc_recv[j];
num_recv_ordered[i] = num_recv[j];
}
memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int));
memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int));
delete [] order;
delete [] proc_recv_ordered;
delete [] num_recv_ordered;
}
nrecvsize += num_self;
// barrier to insure all MPI_ANY_SOURCE messages are received
// else another proc could proceed to exchange_data() and send to me
MPI_Barrier(world);
// free work vectors
// return # of datums I will receive
delete [] count;
delete [] list;
// initialize plan and return it
dplan->nsend = nsend;
dplan->nrecv = nrecv;
dplan->sendmax = sendmax;
dplan->proc_send = proc_send;
dplan->num_send = num_send;
dplan->index_send = index_send;
dplan->proc_recv = proc_recv;
dplan->num_recv = num_recv;
dplan->num_self = num_self;
dplan->index_self = index_self;
dplan->request = request;
dplan->status = status;
return nrecvsize;
return nrecvdatum;
}
/* ----------------------------------------------------------------------
@ -667,49 +719,41 @@ int Irregular::create_data(int n, int *proclist)
void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
{
int i,m,n,offset,num_send;
int i,m,n,offset,count;
// post all receives, starting after self copies
offset = dplan->num_self*nbytes;
for (int irecv = 0; irecv < dplan->nrecv; irecv++) {
MPI_Irecv(&recvbuf[offset],dplan->num_recv[irecv]*nbytes,MPI_CHAR,
dplan->proc_recv[irecv],0,world,&dplan->request[irecv]);
offset += dplan->num_recv[irecv]*nbytes;
offset = num_self*nbytes;
for (int irecv = 0; irecv < nrecv_proc; irecv++) {
MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR,
proc_recv[irecv],0,world,&request[irecv]);
offset += num_recv[irecv]*nbytes;
}
// allocate buf for largest send
// reallocate buf for largest send if necessary
char *buf;
memory->create(buf,dplan->sendmax*nbytes,"irregular:buf");
if (sendmax_proc*nbytes > maxbuf) {
memory->destroy(buf);
maxbuf = sendmax_proc*nbytes;
memory->create(buf,maxbuf,"irregular:buf");
}
// send each message
// pack buf with list of datums
// m = index of datum in sendbuf
int *index_send = dplan->index_send;
int nsend = dplan->nsend;
n = 0;
for (int isend = 0; isend < nsend; isend++) {
num_send = dplan->num_send[isend];
for (i = 0; i < num_send; i++) {
for (int isend = 0; isend < nsend_proc; isend++) {
count = num_send[isend];
for (i = 0; i < count; i++) {
m = index_send[n++];
memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes);
}
MPI_Send(buf,dplan->num_send[isend]*nbytes,MPI_CHAR,
dplan->proc_send[isend],0,world);
MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world);
}
// free temporary send buffer
memory->destroy(buf);
// copy datums to self, put at beginning of recvbuf
int *index_self = dplan->index_self;
int num_self = dplan->num_self;
for (i = 0; i < num_self; i++) {
m = index_self[i];
memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes);
@ -717,39 +761,37 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf)
// wait on all incoming messages
if (dplan->nrecv) MPI_Waitall(dplan->nrecv,dplan->request,dplan->status);
if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status);
}
/* ----------------------------------------------------------------------
destroy communication plan for datums
destroy vectors in communication plan for datums
------------------------------------------------------------------------- */
void Irregular::destroy_data()
{
delete [] dplan->proc_send;
delete [] dplan->num_send;
delete [] dplan->index_send;
delete [] dplan->proc_recv;
delete [] dplan->num_recv;
delete [] dplan->index_self;
delete [] dplan->request;
delete [] dplan->status;
memory->sfree(dplan);
dplan = NULL;
delete [] proc_send;
delete [] num_send;
delete [] index_send;
delete [] proc_recv;
delete [] num_recv;
delete [] index_self;
delete [] request;
delete [] status;
}
/* ----------------------------------------------------------------------
determine which proc owns atom with coord x[3]
x will be in box (orthogonal) or lamda coords (triclinic)
for uniform = 1, directly calculate owning proc
for non-uniform, iteratively find owning proc via binary search
if layout = UNIFORM, calculate owning proc directly
else layout = NONUNIFORM, iteratively find owning proc via binary search
return owning proc ID via grid2proc
return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs
------------------------------------------------------------------------- */
int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz)
{
if (uniform) {
if (layout == 0) {
if (triclinic == 0) {
igx = static_cast<int> (procgrid[0] * (x[0]-boxlo[0]) / prd[0]);
igy = static_cast<int> (procgrid[1] * (x[1]-boxlo[1]) / prd[1]);
@ -846,7 +888,12 @@ void Irregular::grow_recv(int n)
bigint Irregular::memory_usage()
{
bigint bytes = memory->usage(buf_send,maxsend);
bytes += memory->usage(buf_recv,maxrecv);
bigint bytes = 0;
bytes += maxsend*sizeof(double); // buf_send
bytes += maxrecv*sizeof(double); // buf_recv
bytes += maxdbuf*sizeof(double); // dbuf
bytes += maxbuf; // buf
bytes += 2*maxlocal*sizeof(int); // mproclist,msizes
bytes += 2*nprocs*sizeof(int); // work1,work2
return bytes;
}

View File

@ -27,9 +27,9 @@ class Irregular : protected Pointers {
Irregular(class LAMMPS *);
~Irregular();
void migrate_atoms(int sortflag = 0);
void migrate_atoms(int sortflag = 0, int *procassign = NULL);
int migrate_check();
int create_data(int, int *);
int create_data(int, int *, int sortflag = 0);
void exchange_data(char *, int, char *);
void destroy_data();
bigint memory_usage();
@ -38,58 +38,58 @@ class Irregular : protected Pointers {
int me,nprocs;
int triclinic;
int map_style;
int uniform;
int layout;
double *xsplit,*ysplit,*zsplit; // ptrs to comm
int *procgrid; // ptr to comm
int ***grid2proc; // ptr to comm
double *boxlo; // ptr to domain
double *prd; // ptr to domain
int maxsend,maxrecv; // size of buffers in # of doubles
double *buf_send,*buf_recv;
int maxsend,maxrecv; // size of buf send/recv in # of doubles
double *buf_send,*buf_recv; // bufs used in migrate_atoms
int maxdbuf; // size of double buf in bytes
double *dbuf; // double buf for largest single atom send
int maxbuf; // size of char buf in bytes
char *buf; // char buf for largest single data send
// plan for irregular communication of atoms
int *mproclist,*msizes; // persistent vectors in migrate_atoms
int maxlocal; // allocated size of mproclist and msizes
int *work1,*work2; // work vectors
// plan params for irregular communication of atoms or datums
// no params refer to atoms/data copied to self
int nsend_proc; // # of messages to send
int nrecv_proc; // # of messages to recv
int sendmax_proc; // # of doubles/datums in largest send message
int *proc_send; // list of procs to send to
int *num_send; // # of atoms/datums to send to each proc
int *index_send; // list of which atoms/datums to send to each proc
int *proc_recv; // list of procs to recv from
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
// extra plan params plan for irregular communication of atoms
// no params refer to atoms copied to self
struct PlanAtom {
int nsend; // # of messages to send
int nrecv; // # of messages to recv
int sendmax; // # of doubles in largest send message
int *proc_send; // procs to send to
int *length_send; // # of doubles to send to each proc
int *num_send; // # of atoms to send to each proc
int *index_send; // list of which atoms to send to each proc
int *offset_send; // where each atom starts in send buffer
int *proc_recv; // procs to recv from
int *length_recv; // # of doubles to recv from each proc
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
};
int *length_send; // # of doubles to send to each proc
int *length_recv; // # of doubles to recv from each proc
int *offset_send; // where each atom starts in send buffer
// plan for irregular communication of datums
// only 2 self params refer to atoms copied to self
// extra plan params plan for irregular communication of datums
// 2 self params refer to data copied to self
struct PlanData { // plan for irregular communication of data
int nsend; // # of messages to send
int nrecv; // # of messages to recv
int sendmax; // # of datums in largest send message
int *proc_send; // procs to send to
int *num_send; // # of datums to send to each proc
int *index_send; // list of which datums to send to each proc
int *proc_recv; // procs to recv from
int *num_recv; // # of datums to recv from each proc
int num_self; // # of datums to copy to self
int *index_self; // list of which datums to copy to self
MPI_Request *request; // MPI requests for posted recvs
MPI_Status *status; // MPI statuses for WaitAll
};
int *num_recv; // # of datums to recv from each proc
int num_self; // # of datums to copy to self
int *index_self; // list of which datums to copy to self
PlanAtom *aplan;
PlanData *dplan;
// private methods
int create_atom(int, int *, int *, int);
void exchange_atom(double *, int *, double *);
void destroy_atom();
int coord2proc(double *, int &, int &, int &);
int binary(double, int, double *);

926
src/rcb.cpp Normal file
View File

@ -0,0 +1,926 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "string.h"
#include "rcb.h"
#include "irregular.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MYHUGE 1.0e30
#define TINY 1.0e-6
// set this to bigger number after debugging
#define DELTA 10
// prototypes for non-class functions
void box_merge(void *, void *, int *, MPI_Datatype *);
void median_merge(void *, void *, int *, MPI_Datatype *);
// NOTE: if want to have reuse flag, need to sum Tree across procs
/* ---------------------------------------------------------------------- */
RCB::RCB(LAMMPS *lmp) : Pointers(lmp)
{
MPI_Comm_rank(world,&me);
MPI_Comm_size(world,&nprocs);
ndot = maxdot = 0;
dots = NULL;
nlist = maxlist = 0;
dotlist = dotmark = NULL;
maxbuf = 0;
buf = NULL;
maxrecv = maxsend = 0;
recvproc = recvindex = sendproc = sendindex = NULL;
tree = NULL;
irregular = NULL;
// create MPI data and function types for box and median AllReduce ops
MPI_Type_contiguous(6,MPI_DOUBLE,&box_type);
MPI_Type_commit(&box_type);
MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type);
MPI_Type_commit(&med_type);
MPI_Op_create(box_merge,1,&box_op);
MPI_Op_create(median_merge,1,&med_op);
reuse = 0;
}
/* ---------------------------------------------------------------------- */
RCB::~RCB()
{
memory->sfree(dots);
memory->destroy(dotlist);
memory->destroy(dotmark);
memory->sfree(buf);
memory->destroy(recvproc);
memory->destroy(recvindex);
memory->destroy(sendproc);
memory->destroy(sendindex);
memory->sfree(tree);
delete irregular;
MPI_Type_free(&med_type);
MPI_Type_free(&box_type);
MPI_Op_free(&box_op);
MPI_Op_free(&med_op);
}
/* ----------------------------------------------------------------------
perform RCB balancing of N particles at coords X in bounding box LO/HI
if wt = NULL, ignore per-particle weights
if wt defined, per-particle weights > 0.0
dimension = 2 or 3
as documented in rcb.h:
sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi
all proc particles will be inside or on surface of 3-d box
defined by final lo/hi
// NOTE: worry about re-use of data structs for fix balance?
// NOTE: could get rid of wt all together, will it be used?
------------------------------------------------------------------------- */
void RCB::compute(int dimension, int n, double **x, double *wt,
double *bboxlo, double *bboxhi)
{
int i,j,k;
int keep,outgoing,incoming,incoming2;
int dim,markactive;
int indexlo,indexhi;
int first_iteration,breakflag;
double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax;
double targetlo,targethi;
double valuemin,valuemax,valuehalf;
double tolerance;
MPI_Comm comm,comm_half;
MPI_Request request,request2;
MPI_Status status;
Median med,medme;
// create list of my Dots
ndot = nkeep = noriginal = n;
if (ndot > maxdot) {
maxdot = ndot;
memory->sfree(dots);
dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots");
}
for (i = 0; i < ndot; i++) {
dots[i].x[0] = x[i][0];
dots[i].x[1] = x[i][1];
dots[i].x[2] = x[i][2];
dots[i].proc = me;
dots[i].index = i;
}
if (wt)
for (i = 0; i < ndot; i++) dots[i].wt = wt[i];
else
for (i = 0; i < ndot; i++) dots[i].wt = 1.0;
// initial bounding box = simulation box
// includes periodic or shrink-wrapped boundaries
lo = bbox.lo;
hi = bbox.hi;
lo[0] = bboxlo[0];
lo[1] = bboxlo[1];
lo[2] = bboxlo[2];
hi[0] = bboxhi[0];
hi[1] = bboxhi[1];
hi[2] = bboxhi[2];
// initialize counters
counters[0] = 0;
counters[1] = 0;
counters[2] = 0;
counters[3] = ndot;
counters[4] = maxdot;
counters[5] = 0;
counters[6] = 0;
// create communicator for use in recursion
MPI_Comm_dup(world,&comm);
// recurse until partition is a single proc = me
// proclower,procupper = lower,upper procs in partition
// procmid = 1st proc in upper half of partition
int procpartner,procpartner2;
int readnumber;
int procmid;
int proclower = 0;
int procupper = nprocs - 1;
while (proclower != procupper) {
// if odd # of procs, lower partition gets extra one
procmid = proclower + (procupper - proclower) / 2 + 1;
// determine communication partner(s)
// readnumber = # of proc partners to read from
if (me < procmid)
procpartner = me + (procmid - proclower);
else
procpartner = me - (procmid - proclower);
int readnumber = 1;
if (procpartner > procupper) {
readnumber = 0;
procpartner--;
}
if (me == procupper && procpartner != procmid - 1) {
readnumber = 2;
procpartner2 = procpartner + 1;
}
// wttot = summed weight of entire partition
// search tolerance = largest single weight (plus epsilon)
// targetlo = desired weight in lower half of partition
// targethi = desired weight in upper half of partition
wtmax = wtsum = 0.0;
if (wt) {
for (i = 0; i < ndot; i++) {
wtsum += dots[i].wt;
if (dots[i].wt > wtmax) wtmax = dots[i].wt;
}
} else {
for (i = 0; i < ndot; i++) wtsum += dots[i].wt;
wtmax = 1.0;
}
MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm);
if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm);
else tolerance = 1.0;
tolerance *= 1.0 + TINY;
targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower);
targethi = wttot - targetlo;
// dim = dimension to bisect on
// do not allow choice of z dimension for 2d system
dim = 0;
if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1;
if (dimension == 3) {
if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2;
if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2;
}
// create active list and mark array for dots
// initialize active list to all dots
if (ndot > maxlist) {
memory->destroy(dotlist);
memory->destroy(dotmark);
maxlist = maxdot;
memory->create(dotlist,maxlist,"RCB:dotlist");
memory->create(dotmark,maxlist,"RCB:dotmark");
}
nlist = ndot;
for (i = 0; i < nlist; i++) dotlist[i] = i;
// median iteration
// zoom in on bisector until correct # of dots in each half of partition
// as each iteration of median-loop begins, require:
// all non-active dots are marked with 0/1 in dotmark
// valuemin <= every active dot <= valuemax
// wtlo, wthi = total wt of non-active dots
// when leave median-loop, require only:
// valuehalf = correct cut position
// all dots <= valuehalf are marked with 0 in dotmark
// all dots >= valuehalf are marked with 1 in dotmark
// markactive = which side of cut is active = 0/1
// indexlo,indexhi = indices of dot closest to median
wtlo = wthi = 0.0;
valuemin = lo[dim];
valuemax = hi[dim];
first_iteration = 1;
while (1) {
// choose bisector value
// use old value on 1st iteration if old cut dimension is the same
// on 2nd option: could push valuehalf towards geometric center
// with "1.0-factor" to force overshoot
if (first_iteration && reuse && dim == tree[procmid].dim) {
counters[5]++;
valuehalf = tree[procmid].cut;
if (valuehalf < valuemin || valuehalf > valuemax)
valuehalf = 0.5 * (valuemin + valuemax);
} else if (wt)
valuehalf = valuemin + (targetlo - wtlo) /
(wttot - wtlo - wthi) * (valuemax - valuemin);
else
valuehalf = 0.5 * (valuemin + valuemax);
first_iteration = 0;
// initialize local median data structure
medme.totallo = medme.totalhi = 0.0;
medme.valuelo = -MYHUGE;
medme.valuehi = MYHUGE;
medme.wtlo = medme.wthi = 0.0;
medme.countlo = medme.counthi = 0;
medme.proclo = medme.prochi = me;
// mark all active dots on one side or other of bisector
// also set all fields in median data struct
// save indices of closest dots on either side
for (j = 0; j < nlist; j++) {
i = dotlist[j];
if (dots[i].x[dim] <= valuehalf) { // in lower part
medme.totallo += dots[i].wt;
dotmark[i] = 0;
if (dots[i].x[dim] > medme.valuelo) { // my closest dot
medme.valuelo = dots[i].x[dim];
medme.wtlo = dots[i].wt;
medme.countlo = 1;
indexlo = i;
} else if (dots[i].x[dim] == medme.valuelo) { // tied for closest
medme.wtlo += dots[i].wt;
medme.countlo++;
}
}
else { // in upper part
medme.totalhi += dots[i].wt;
dotmark[i] = 1;
if (dots[i].x[dim] < medme.valuehi) { // my closest dot
medme.valuehi = dots[i].x[dim];
medme.wthi = dots[i].wt;
medme.counthi = 1;
indexhi = i;
} else if (dots[i].x[dim] == medme.valuehi) { // tied for closest
medme.wthi += dots[i].wt;
medme.counthi++;
}
}
}
// combine median data struct across current subset of procs
counters[0]++;
MPI_Allreduce(&medme,&med,1,med_type,med_op,comm);
// test median guess for convergence
// move additional dots that are next to cut across it
if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL
wtlo += med.totallo;
valuehalf = med.valuehi;
if (med.counthi == 1) { // only one dot to move
if (wtlo + med.wthi < targetlo) { // move it, keep iterating
if (me == med.prochi) dotmark[indexhi] = 0;
}
else { // only move if beneficial
if (wtlo + med.wthi - targetlo < targetlo - wtlo)
if (me == med.prochi) dotmark[indexhi] = 0;
break; // all done
}
}
else { // multiple dots to move
breakflag = 0;
wtok = 0.0;
if (medme.valuehi == med.valuehi) wtok = medme.wthi;
if (wtlo + med.wthi >= targetlo) { // all done
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
wtmax = targetlo - wtlo;
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
breakflag = 1;
} // wtok = most I can move
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
i = dotlist[j];
if (dots[i].x[dim] == med.valuehi) { // only move if better
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
dotmark[i] = 0;
wtsum += dots[i].wt;
}
}
if (breakflag) break; // done if moved enough
}
wtlo += med.wthi;
if (targetlo-wtlo <= tolerance) break; // close enough
valuemin = med.valuehi; // iterate again
markactive = 1;
}
else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL
wthi += med.totalhi;
valuehalf = med.valuelo;
if (med.countlo == 1) { // only one dot to move
if (wthi + med.wtlo < targethi) { // move it, keep iterating
if (me == med.proclo) dotmark[indexlo] = 1;
}
else { // only move if beneficial
if (wthi + med.wtlo - targethi < targethi - wthi)
if (me == med.proclo) dotmark[indexlo] = 1;
break; // all done
}
}
else { // multiple dots to move
breakflag = 0;
wtok = 0.0;
if (medme.valuelo == med.valuelo) wtok = medme.wtlo;
if (wthi + med.wtlo >= targethi) { // all done
MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm);
wtmax = targethi - wthi;
if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax);
breakflag = 1;
} // wtok = most I can move
for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) {
i = dotlist[j];
if (dots[i].x[dim] == med.valuelo) { // only move if better
if (wtsum + dots[i].wt - wtok < wtok - wtsum)
dotmark[i] = 1;
wtsum += dots[i].wt;
}
}
if (breakflag) break; // done if moved enough
}
wthi += med.wtlo;
if (targethi-wthi <= tolerance) break; // close enough
valuemax = med.valuelo; // iterate again
markactive = 0;
}
else // Goldilocks result: both partitions just right
break;
// shrink the active list
k = 0;
for (j = 0; j < nlist; j++) {
i = dotlist[j];
if (dotmark[i] == markactive) dotlist[k++] = i;
}
nlist = k;
}
// found median
// store cut info only if I am procmid
if (me == procmid) {
cut = valuehalf;
cutdim = dim;
}
// use cut to shrink my RCB bounding box
if (me < procmid) hi[dim] = valuehalf;
else lo[dim] = valuehalf;
// outgoing = number of dots to ship to partner
// nkeep = number of dots that have never migrated
markactive = (me < procpartner);
for (i = 0, keep = 0, outgoing = 0; i < ndot; i++)
if (dotmark[i] == markactive) outgoing++;
else if (i < nkeep) keep++;
nkeep = keep;
// alert partner how many dots I'll send, read how many I'll recv
MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world);
incoming = 0;
if (readnumber) {
MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,&status);
if (readnumber == 2) {
MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,&status);
incoming += incoming2;
}
}
// check if need to alloc more space
int ndotnew = ndot - outgoing + incoming;
if (ndotnew > maxdot) {
while (maxdot < ndotnew) maxdot += DELTA;
dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots");
counters[6]++;
}
counters[1] += outgoing;
counters[2] += incoming;
if (ndotnew > counters[3]) counters[3] = ndotnew;
if (maxdot > counters[4]) counters[4] = maxdot;
// malloc comm send buffer
if (outgoing > maxbuf) {
memory->sfree(buf);
maxbuf = outgoing;
buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf");
}
// fill buffer with dots that are marked for sending
// pack down the unmarked ones
keep = outgoing = 0;
for (i = 0; i < ndot; i++) {
if (dotmark[i] == markactive)
memcpy(&buf[outgoing++],&dots[i],sizeof(Dot));
else
memcpy(&dots[keep++],&dots[i],sizeof(Dot));
}
// post receives for dots
if (readnumber > 0) {
MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR,
procpartner,1,world,&request);
if (readnumber == 2) {
keep += incoming - incoming2;
MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR,
procpartner2,1,world,&request2);
}
}
// handshake before sending dots to insure recvs have been posted
if (readnumber > 0) {
MPI_Send(NULL,0,MPI_INT,procpartner,0,world);
if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world);
}
MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,&status);
// send dots to partner
MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world);
// wait until all dots are received
if (readnumber > 0) {
MPI_Wait(&request,&status);
if (readnumber == 2) MPI_Wait(&request2,&status);
}
ndot = ndotnew;
// cut partition in half, create new communicators of 1/2 size
int split;
if (me < procmid) {
procupper = procmid - 1;
split = 0;
} else {
proclower = procmid;
split = 1;
}
MPI_Comm_split(comm,split,me,&comm_half);
MPI_Comm_free(&comm);
comm = comm_half;
}
// clean up
MPI_Comm_free(&comm);
// set public variables with results of rebalance
nfinal = ndot;
if (nfinal > maxrecv) {
memory->destroy(recvproc);
memory->destroy(recvindex);
maxrecv = nfinal;
memory->create(recvproc,maxrecv,"RCB:recvproc");
memory->create(recvindex,maxrecv,"RCB:recvindex");
}
for (i = 0; i < nfinal; i++) {
recvproc[i] = dots[i].proc;
recvindex[i] = dots[i].index;
}
}
/* ----------------------------------------------------------------------
custom MPI reduce operation
merge of each component of an RCB bounding box
------------------------------------------------------------------------- */
void box_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
{
RCB::BBox *box1 = (RCB::BBox *) in;
RCB::BBox *box2 = (RCB::BBox *) inout;
for (int i = 0; i < 3; i++) {
if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i];
if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i];
}
}
/* ----------------------------------------------------------------------
custom MPI reduce operation
merge median data structure
on input:
in,inout->totallo, totalhi = weight in both partitions on this proc
valuelo, valuehi = pos of nearest dot(s) to cut on this proc
wtlo, wthi = total wt of dot(s) at that pos on this proc
countlo, counthi = # of dot(s) nearest to cut on this proc
proclo, prochi = not used
on exit:
inout-> totallo, totalhi = total # of active dots in both partitions
valuelo, valuehi = pos of nearest dot(s) to cut
wtlo, wthi = total wt of dot(s) at that position
countlo, counthi = total # of dot(s) nearest to cut
proclo, prochi = one unique proc who owns a nearest dot
all procs must get same proclo,prochi
------------------------------------------------------------------------- */
void median_merge(void *in, void *inout, int *len, MPI_Datatype *dptr)
{
RCB::Median *med1 = (RCB::Median *) in;
RCB::Median *med2 = (RCB::Median *) inout;
med2->totallo += med1->totallo;
if (med1->valuelo > med2->valuelo) {
med2->valuelo = med1->valuelo;
med2->wtlo = med1->wtlo;
med2->countlo = med1->countlo;
med2->proclo = med1->proclo;
}
else if (med1->valuelo == med2->valuelo) {
med2->wtlo += med1->wtlo;
med2->countlo += med1->countlo;
if (med1->proclo < med2->proclo) med2->proclo = med1->proclo;
}
med2->totalhi += med1->totalhi;
if (med1->valuehi < med2->valuehi) {
med2->valuehi = med1->valuehi;
med2->wthi = med1->wthi;
med2->counthi = med1->counthi;
med2->prochi = med1->prochi;
}
else if (med1->valuehi == med2->valuehi) {
med2->wthi += med1->wthi;
med2->counthi += med1->counthi;
if (med1->prochi < med2->prochi) med2->prochi = med1->prochi;
}
}
/* ----------------------------------------------------------------------
invert the RCB rebalance result to convert receive info into send info
sortflag = flag for sorting order of received messages by proc ID
------------------------------------------------------------------------- */
void RCB::invert(int sortflag)
{
Invert *sbuf,*rbuf;
// only create Irregular if not previously created
// allows Irregular to persist for multiple RCB calls by fix balance
if (!irregular) irregular = new Irregular(lmp);
// nsend = # of dots to request from other procs
int nsend = nfinal-nkeep;
int *proclist;
memory->create(proclist,nsend,"RCB:proclist");
Invert *sinvert =
(Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert");
int m = 0;
for (int i = nkeep; i < nfinal; i++) {
proclist[m] = recvproc[i];
sinvert[m].rindex = recvindex[i];
sinvert[m].sproc = me;
sinvert[m].sindex = i;
m++;
}
// perform inversion via irregular comm
// nrecv = # of my dots to send to other procs
int nrecv = irregular->create_data(nsend,proclist,sortflag);
Invert *rinvert =
(Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert");
irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert);
irregular->destroy_data();
// set public variables from requests to send my dots
if (noriginal > maxsend) {
memory->destroy(sendproc);
memory->destroy(sendindex);
maxsend = noriginal;
memory->create(sendproc,maxsend,"RCB:sendproc");
memory->create(sendindex,maxsend,"RCB:sendindex");
}
for (int i = 0; i < nkeep; i++) {
sendproc[recvindex[i]] = me;
sendindex[recvindex[i]] = i;
}
for (int i = 0; i < nrecv; i++) {
m = rinvert[i].rindex;
sendproc[m] = rinvert[i].sproc;
sendindex[m] = rinvert[i].sindex;
}
// clean-up
memory->destroy(proclist);
memory->destroy(sinvert);
memory->destroy(rinvert);
}
/* ----------------------------------------------------------------------
memory use of Irregular
------------------------------------------------------------------------- */
bigint RCB::memory_usage()
{
bigint bytes = 0;
if (irregular) bytes += irregular->memory_usage();
return bytes;
}
// -----------------------------------------------------------------------
// DEBUG methods
// -----------------------------------------------------------------------
/*
// consistency checks on RCB results
void RCB::check()
{
int i,iflag,total1,total2;
double weight,wtmax,wtmin,wtone,tolerance;
// check that total # of dots remained the same
MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world);
if (total1 != total2) {
if (me == 0)
printf("ERROR: Points before RCB = %d, Points after RCB = %d\n",
total1,total2);
}
// check that result is load-balanced within log2(P)*max-wt
weight = wtone = 0.0;
for (i = 0; i < ndot; i++) {
weight += dots[i].wt;
if (dots[i].wt > wtone) wtone = dots[i].wt;
}
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world);
// i = smallest power-of-2 >= nprocs
// tolerance = largest-single-weight*log2(nprocs)
for (i = 0; (nprocs >> i) != 0; i++);
tolerance = tolerance * i * (1.0 + TINY);
if (wtmax - wtmin > tolerance) {
if (me == 0)
printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance);
MPI_Barrier(world);
if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight);
if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight);
}
MPI_Barrier(world);
// check that final set of points is inside RCB box of each proc
iflag = 0;
for (i = 0; i < ndot; i++) {
if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] ||
dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] ||
dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2])
iflag++;
}
if (iflag > 0)
printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me);
}
// stats for RCB decomposition
void RCB::stats(int flag)
{
int i,iflag,sum,min,max;
double ave,rsum,rmin,rmax;
double weight,wttot,wtmin,wtmax;
if (me == 0) printf("RCB Statistics:\n");
// distribution info
for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt;
MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world);
MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world);
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
if (me == 0) {
printf(" Total weight of dots = %g\n",wttot);
printf(" Weight on each proc: ave = %g, max = %g, min = %g\n",
wttot/nprocs,wtmax,wtmin);
}
if (flag) {
MPI_Barrier(world);
printf(" Proc %d has weight = %g\n",me,weight);
}
for (i = 0, weight = 0.0; i < ndot; i++)
if (dots[i].wt > weight) weight = dots[i].wt;
MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world);
if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max weight = %g\n",me,weight);
}
// counter info
MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d median count = %d\n",me,counters[0]);
}
MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d send count = %d\n",me,counters[1]);
}
MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d recv count = %d\n",me,counters[2]);
}
MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max dots = %d\n",me,counters[3]);
}
MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d max memory = %d\n",me,counters[4]);
}
if (reuse) {
MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d # of Reuse = %d\n",me,counters[5]);
}
}
MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world);
MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world);
MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world);
ave = ((double) sum)/nprocs;
if (me == 0)
printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max);
if (flag) {
MPI_Barrier(world);
printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]);
}
// RCB boxes for each proc
if (flag) {
if (me == 0) printf(" RCB sub-domain boxes:\n");
for (i = 0; i < 3; i++) {
MPI_Barrier(world);
if (me == 0) printf(" Dimension %d\n",i+1);
MPI_Barrier(world);
printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]);
}
}
}
*/

131
src/rcb.h Normal file
View File

@ -0,0 +1,131 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LAMMPS_RCB_H
#define LAMMPS_RCB_H
#include "mpi.h"
#include "pointers.h"
namespace LAMMPS_NS {
class RCB : protected Pointers {
public:
// set by compute()
int noriginal; // # of dots I own before balancing
int nfinal; // # of dots I own after balancing
int nkeep; // how many dots of noriginal I still own
// will be first nkept of nfinal list
int *recvproc; // proc IDs of nfinal dots
int *recvindex; // index of nfinal dots on owning procs
// based on input list for compute()
double *lo,*hi; // final bounding box of my RCB sub-domain
double cut; // single cut (in Tree) owned by this proc
int cutdim; // dimension (0,1,2) of the cut
// set by invert()
int *sendproc; // proc to send each of my noriginal dots to
int *sendindex; // index of dot in receiver's nfinal list
RCB(class LAMMPS *);
~RCB();
void compute(int, int, double **, double *, double *, double *);
void invert(int sortflag = 0);
bigint memory_usage();
// DEBUG methods
//void check();
//void stats(int);
// RCB cut info
struct Median {
double totallo,totalhi; // weight in each half of active partition
double valuelo,valuehi; // position of dot(s) nearest to cut
double wtlo,wthi; // total weight of dot(s) at that position
int countlo,counthi; // # of dots at that position
int proclo,prochi; // unique proc who owns a nearest dot
};
struct BBox {
double lo[3],hi[3]; // corner points of a bounding box
};
private:
int me,nprocs;
// point to balance on
struct Dot {
double x[3]; // coord of point
double wt; // weight of point
int proc; // owning proc
int index; // index on owning proc
};
// tree of RCB cuts
struct Tree {
double cut; // position of cut
int dim; // dimension = 0/1/2 of cut
};
// inversion message
struct Invert {
int rindex; // index on receiving proc
int sproc; // sending proc
int sindex; // index on sending proc
};
Dot *dots; // dots on this proc
int ndot; // # of dots on this proc
int maxdot; // allocated size of dots
int ndotorig;
int nlist;
int maxlist;
int *dotlist;
int *dotmark;
int maxbuf;
Dot *buf;
int maxrecv,maxsend;
BBox bbox;
class Irregular *irregular;
MPI_Op box_op,med_op;
MPI_Datatype box_type,med_type;
int reuse; // 1/0 to use/not use previous cuts
int dottop; // dots >= this index are new
double bboxlo[3]; // bounding box of final RCB sub-domain
double bboxhi[3];
Tree *tree; // tree of RCB cuts, used by reuse()
int counters[7]; // diagnostic counts
// 0 = # of median iterations
// 1 = # of points sent
// 2 = # of points received
// 3 = most points this proc ever owns
// 4 = most point memory this proc ever allocs
// 5 = # of times a previous cut is re-used
// 6 = # of reallocs of point vector
};
}
#endif

View File

@ -29,6 +29,8 @@ using namespace LAMMPS_NS;
#define LB_FACTOR 1.1
#define EPSILON 1.0e-6
enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files
/* ---------------------------------------------------------------------- */
Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {}
@ -220,17 +222,33 @@ void Replicate::command(int narg, char **arg)
sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2];
}
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
if (comm->layout != LAYOUT_TILED) {
if (domain->xperiodic) {
if (comm->myloc[0] == 0) sublo[0] -= epsilon[0];
if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->myloc[1] == 0) sublo[1] -= epsilon[1];
if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->myloc[2] == 0) sublo[2] -= epsilon[2];
if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2];
}
} else {
if (domain->xperiodic) {
if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0];
if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0];
}
if (domain->yperiodic) {
if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1];
if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1];
}
if (domain->zperiodic) {
if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2];
if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2];
}
}
// loop over all procs

View File

@ -44,6 +44,7 @@ using namespace MathConst;
#define MAXLEVEL 4
#define MAXLINE 256
#define CHUNK 1024
#define VALUELENGTH 64
#define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a)
@ -306,7 +307,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(1,&arg[2],data[nvar]);
data[nvar][1] = NULL;
data[nvar][1] = new char[VALUELENGTH];
// SCALARFILE for strings or numbers
// which = 1st value
@ -360,7 +361,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(2,&arg[2],data[nvar]);
data[nvar][2] = NULL;
data[nvar][2] = new char[VALUELENGTH];
// EQUAL
// replace pre-existing var if also style EQUAL (allows it to be reset)
@ -374,9 +375,7 @@ void Variable::set(int narg, char **arg)
if (style[ivar] != EQUAL)
error->all(FLERR,"Cannot redefine variable as a different style");
delete [] data[ivar][0];
if (data[ivar][1]) delete [] data[ivar][1];
copy(1,&arg[2],data[ivar]);
data[ivar][1] = NULL;
replaceflag = 1;
} else {
if (nvar == maxvar) grow();
@ -386,7 +385,7 @@ void Variable::set(int narg, char **arg)
pad[nvar] = 0;
data[nvar] = new char*[num[nvar]];
copy(1,&arg[2],data[nvar]);
data[nvar][1] = NULL;
data[nvar][1] = new char[VALUELENGTH];
}
// ATOM
@ -625,32 +624,24 @@ char *Variable::retrieve(char *name)
strcpy(data[ivar][0],result);
str = data[ivar][0];
} else if (style[ivar] == EQUAL) {
char result[64];
double answer = evaluate(data[ivar][0],NULL);
sprintf(result,"%.15g",answer);
int n = strlen(result) + 1;
if (data[ivar][1]) delete [] data[ivar][1];
data[ivar][1] = new char[n];
strcpy(data[ivar][1],result);
sprintf(data[ivar][1],"%.15g",answer);
str = data[ivar][1];
} else if (style[ivar] == FORMAT) {
char result[64];
int jvar = find(data[ivar][0]);
if (jvar == -1) return NULL;
if (!equalstyle(jvar)) return NULL;
double answer = evaluate(data[jvar][0],NULL);
sprintf(result,data[ivar][1],answer);
int n = strlen(result) + 1;
if (data[ivar][2]) delete [] data[ivar][2];
data[ivar][2] = new char[n];
strcpy(data[ivar][2],result);
sprintf(data[ivar][2],data[ivar][1],answer);
str = data[ivar][2];
} else if (style[ivar] == GETENV) {
const char *result = getenv(data[ivar][0]);
if (data[ivar][1]) delete [] data[ivar][1];
if (result == NULL) result = (const char *)"";
if (result == NULL) result = (const char *) "";
int n = strlen(result) + 1;
data[ivar][1] = new char[n];
if (n > VALUELENGTH) {
delete [] data[ivar][1];
data[ivar][1] = new char[n];
}
strcpy(data[ivar][1],result);
str = data[ivar][1];
} else if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return NULL;
@ -782,6 +773,45 @@ int Variable::atomstyle(int ivar)
return 0;
}
/* ----------------------------------------------------------------------
save copy of EQUAL style ivar formula in copy
allocate copy here, later equal_restore() call will free it
insure data[ivar][0] is of VALUELENGTH since will be overridden
------------------------------------------------------------------------- */
void Variable::equal_save(int ivar, char *&copy)
{
int n = strlen(data[ivar][0]) + 1;
copy = new char[n];
strcpy(copy,data[ivar][0]);
delete [] data[ivar][0];
data[ivar][0] = new char[VALUELENGTH];
}
/* ----------------------------------------------------------------------
restore formula string of EQUAL style ivar from copy
then free copy, allocated in equal_save()
------------------------------------------------------------------------- */
void Variable::equal_restore(int ivar, char *copy)
{
delete [] data[ivar][0];
int n = strlen(copy) + 1;
data[ivar][0] = new char[n];
strcpy(data[ivar][0],copy);
delete [] copy;
}
/* ----------------------------------------------------------------------
override EQUAL style ivar formula with value converted to string
data[ivar][0] was set to length 64 in equal_save()
------------------------------------------------------------------------- */
void Variable::equal_override(int ivar, double value)
{
sprintf(data[ivar][0],"%.15g",value);
}
/* ----------------------------------------------------------------------
remove Nth variable from list and compact list
delete reader explicitly if it exists

View File

@ -36,10 +36,15 @@ class Variable : protected Pointers {
int int_between_brackets(char *&, int);
double evaluate_boolean(char *);
void equal_save(int, char *&);
void equal_restore(int, char *);
void equal_override(int, double);
unsigned int data_mask(int ivar);
unsigned int data_mask(char *str);
private:
int me;
int nvar; // # of defined variables
int maxvar; // max # of variables following lists can hold
char **names; // name of each variable
@ -57,7 +62,6 @@ class Variable : protected Pointers {
int precedence[17]; // precedence level of math operators
// set length to include up to OR in enum
int me;
struct Tree { // parse tree for atom-style variables
double value; // single scalar

View File

@ -1 +1 @@
#define LAMMPS_VERSION "22 Jul 2014"
#define LAMMPS_VERSION "29 Jul 2014"