diff --git a/doc/JPG/sinusoid.jpg b/doc/JPG/sinusoid.jpg new file mode 100644 index 0000000000..00969281a1 Binary files /dev/null and b/doc/JPG/sinusoid.jpg differ diff --git a/doc/JPG/sinusoid_small.jpg b/doc/JPG/sinusoid_small.jpg new file mode 100644 index 0000000000..1643d81698 Binary files /dev/null and b/doc/JPG/sinusoid_small.jpg differ diff --git a/doc/Manual.html b/doc/Manual.html index 43e96bec91..fa49ba5bdf 100644 --- a/doc/Manual.html +++ b/doc/Manual.html @@ -1,7 +1,7 @@ LAMMPS-ICMS Users Manual - + @@ -22,7 +22,7 @@

LAMMPS-ICMS Documentation

-

22 Jul 2014 version +

29 Jul 2014 version

Version info:

diff --git a/doc/Manual.txt b/doc/Manual.txt index 28755def04..b2edaf4851 100644 --- a/doc/Manual.txt +++ b/doc/Manual.txt @@ -1,6 +1,6 @@ LAMMPS-ICMS Users Manual - + @@ -18,7 +18,7 @@

LAMMPS-ICMS Documentation :c,h3 -22 Jul 2014 version :c,h4 +29 Jul 2014 version :c,h4 Version info: :h4 diff --git a/doc/Section_errors.txt b/doc/Section_errors.txt index 39c73b8831..91878bc777 100644 --- a/doc/Section_errors.txt +++ b/doc/Section_errors.txt @@ -988,14 +988,6 @@ will assign the restart file info to actual atoms. :dd This is a restriction due to the way atoms are organized in a list to enable the atom_modify first command. :dd -{Support for writing images in JPEG format not included} :dt - -LAMMPS was not built with the -DLAMMPS_JPEG switch in the Makefile. :dd - -{Support for writing images in PNG format not included} :dt - -LAMMPS was not built with the -DLAMMPS_PNG switch in the Makefile. :dd - {Cannot dump sort on atom IDs with no atom IDs defined} :dt Self-explanatory. :dd diff --git a/doc/create_atoms.html b/doc/create_atoms.html index 6f06935377..eff378a418 100644 --- a/doc/create_atoms.html +++ b/doc/create_atoms.html @@ -31,7 +31,7 @@
  • zero or more keyword/value pairs may be appended -
  • keyword = mol or basis or remap or units +
  • keyword = mol or basis or remap or var or set or units
      mol value = template-ID seed
         template-ID = ID of molecule template specified in a separate molecule command
    @@ -40,6 +40,10 @@
         M = which basis atom
         itype = atom type (1-N) to assign to this basis atom
       remap value = yes or no
    +  var value = name = variable name to evaluate for test of atom creation
    +  set values = dim vname
    +    dim = x or y or z
    +    name = name of variable to set with x,y,z atom position
       units value = lattice or box
         lattice = the geometry is defined in lattice units
         box = the geometry is defined in simulation box units 
    @@ -51,6 +55,7 @@
     
    create_atoms 1 box
     create_atoms 3 region regsphere basis 2 3
     create_atoms 3 single 0 0 5 
    +create_atoms 1 box var v set x xpos set y ypos 
     

    Description:

    @@ -189,6 +194,45 @@ box, it will mapped back into the box, assuming the relevant dimensions are periodic. If it is set to no, no remapping is done and no particle is created if its position is outside the box.

    +

    The var and set keywords can be used to provide a criterion for +accepting or rejecting the addition of an individual atom, based on +its coordinates. The vname specified for the var keyword is the +name of an equal-style variable which should evaluate +to a zero or non-zero value based on one or two or three variables +which will store the x, y, or z coordinates of an atom (one variable +per coordinate). These other variables must be equal-style +variables defined in the input script, but their +formula can by anything. The set keyword is used to identify the +names of these other variables, one variable for the x-coordinate of a +created atom, one for y, and one for z. +

    +

    When an atom is created, its x, y, or z coordinates override the +formula for any set variable that is defined. The var variable is +then evaluated. If the returned value is 0.0, the atom is not +created. If it is non-zero, the atom is created. +

    +

    As an example, these commands can be used in a 2d simulation, to +create a sinusoidal surface. Note that the surface is "rough" due to +individual lattice points being "above" or "below" the mathematical +expression for the sinusoidal curve. If a finer lattice were used, +the sinusoid would appear to be "smoother". Also note the use of the +"xlat" and "ylat" thermo_style keywords which +converts lattice spacings to distance. Click on the image for a +larger version. +

    +
    variable        x equal 100
    +variable        y equal 25
    +lattice		hex 0.8442
    +region		box block 0 $x 0 $y -0.5 0.5
    +create_box	1 box 
    +
    +
    variable        xx equal 0.0
    +variable        yy equal 0.0
    +variable        v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0"
    +create_atoms	1 box var v set x xx set y yy 
    +
    +
    +

    The units keyword determines the meaning of the distance units used to specify the coordinates of the one particle created by the single style. A box value selects standard distance units as defined by diff --git a/doc/create_atoms.txt b/doc/create_atoms.txt index 4552d43e9a..f395b87b32 100644 --- a/doc/create_atoms.txt +++ b/doc/create_atoms.txt @@ -24,7 +24,7 @@ style = {box} or {region} or {single} or {random} :l seed = random # seed (positive integer) region-ID = create atoms within this region, use NULL for entire simulation box :pre zero or more keyword/value pairs may be appended :l -keyword = {mol} or {basis} or {remap} or {units} :l +keyword = {mol} or {basis} or {remap} or {var} or {set} or {units} :l {mol} value = template-ID seed template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command seed = random # seed (positive integer) @@ -32,6 +32,10 @@ keyword = {mol} or {basis} or {remap} or {units} :l M = which basis atom itype = atom type (1-N) to assign to this basis atom {remap} value = {yes} or {no} + {var} value = name = variable name to evaluate for test of atom creation + {set} values = dim vname + dim = {x} or {y} or {z} + name = name of variable to set with x,y,z atom position {units} value = {lattice} or {box} {lattice} = the geometry is defined in lattice units {box} = the geometry is defined in simulation box units :pre @@ -41,7 +45,8 @@ keyword = {mol} or {basis} or {remap} or {units} :l create_atoms 1 box create_atoms 3 region regsphere basis 2 3 -create_atoms 3 single 0 0 5 :pre +create_atoms 3 single 0 0 5 +create_atoms 1 box var v set x xpos set y ypos :pre [Description:] @@ -180,6 +185,45 @@ box, it will mapped back into the box, assuming the relevant dimensions are periodic. If it is set to {no}, no remapping is done and no particle is created if its position is outside the box. +The {var} and {set} keywords can be used to provide a criterion for +accepting or rejecting the addition of an individual atom, based on +its coordinates. The {vname} specified for the {var} keyword is the +name of an "equal-style variable"_variable.html which should evaluate +to a zero or non-zero value based on one or two or three variables +which will store the x, y, or z coordinates of an atom (one variable +per coordinate). These other variables must be "equal-style +variables"_variable.html defined in the input script, but their +formula can by anything. The {set} keyword is used to identify the +names of these other variables, one variable for the x-coordinate of a +created atom, one for y, and one for z. + +When an atom is created, its x, y, or z coordinates override the +formula for any {set} variable that is defined. The {var} variable is +then evaluated. If the returned value is 0.0, the atom is not +created. If it is non-zero, the atom is created. + +As an example, these commands can be used in a 2d simulation, to +create a sinusoidal surface. Note that the surface is "rough" due to +individual lattice points being "above" or "below" the mathematical +expression for the sinusoidal curve. If a finer lattice were used, +the sinusoid would appear to be "smoother". Also note the use of the +"xlat" and "ylat" "thermo_style"_thermo_style.html keywords which +converts lattice spacings to distance. Click on the image for a +larger version. + +variable x equal 100 +variable y equal 25 +lattice hex 0.8442 +region box block 0 $x 0 $y -0.5 0.5 +create_box 1 box :pre + +variable xx equal 0.0 +variable yy equal 0.0 +variable v equal "(0.2*v_y*ylat * cos(v_xx/xlat * 2.0*PI*4.0/v_x) + 0.5*v_y*ylat - v_yy) > 0.0" +create_atoms 1 box var v set x xx set y yy :pre + +:c,image(JPG/sinusoid_small.jpg,JPG/sinusoid.jpg) + The {units} keyword determines the meaning of the distance units used to specify the coordinates of the one particle created by the {single} style. A {box} value selects standard distance units as defined by diff --git a/doc/dump.html b/doc/dump.html index e83d9f5c5f..c13777fd3e 100644 --- a/doc/dump.html +++ b/doc/dump.html @@ -57,7 +57,7 @@ f_ID[N] = Nth column of local array calculated by a fix with ID

      custom of custom/mpiio args = list of atom attributes
    -    possible attributes = id, mol, type, element, mass,
    +    possible attributes = id, mol, proc, procp1, type, element, mass,
     			  x, y, z, xs, ys, zs, xu, yu, zu, 
     			  xsu, ysu, zsu, ix, iy, iz,
     			  vx, vy, vz, fx, fy, fz,
    @@ -69,6 +69,7 @@
     
          id = atom ID
           mol = molecule ID
           proc = ID of processor that owns atom
    +      procp1 = ID+1 of processor that owns atom
           type = atom type
           element = name of atom element, as defined by dump_modify command
           mass = atom mass
    @@ -460,18 +461,20 @@ dump 1 all local 1000 tmp.dump index c_1[1] c_1[2] c_1[3] c_2[1] c_2[2]
     

    This section explains the atom attributes that can be specified as part of the custom and cfg styles.

    -

    The id, mol, proc, type, element, mass, vx, vy, vz, -fx, fy, fz, q attributes are self-explanatory. +

    The id, mol, proc, procp1, type, element, mass, vx, +vy, vz, fx, fy, fz, q attributes are self-explanatory.

    Id is the atom ID. Mol is the molecule ID, included in the data file for molecular systems. Proc is the ID of the processor (0 to -Nprocs-1) that currently owns the atom. Type is the atom type. -Element is typically the chemical name of an element, which you must -assign to each type via the dump_modify element -command. More generally, it can be any string you wish to associated -with an atom type. Mass is the atom mass. Vx, vy, vz, fx, -fy, fz, and q are components of atom velocity and force and -atomic charge. +Nprocs-1) that currently owns the atom. Procp1 is the proc ID+1, +which can be convenient in place of a type attribute (1 to Ntypes) +for coloring atoms in a visualization program. Type is the atom +type (1 to Ntypes). Element is typically the chemical name of an +element, which you must assign to each type via the dump_modify +element command. More generally, it can be any +string you wish to associated with an atom type. Mass is the atom +mass. Vx, vy, vz, fx, fy, fz, and q are components of +atom velocity and force and atomic charge.

    There are several options for outputting atom coordinates. The x, y, z attributes write atom coordinates "unscaled", in the diff --git a/doc/dump.txt b/doc/dump.txt index 3f83f217ca..da12e99ed7 100644 --- a/doc/dump.txt +++ b/doc/dump.txt @@ -44,7 +44,7 @@ args = list of arguments for a particular style :l f_ID\[N\] = Nth column of local array calculated by a fix with ID :pre {custom} of {custom/mpiio} args = list of atom attributes - possible attributes = id, mol, type, element, mass, + possible attributes = id, mol, proc, procp1, type, element, mass, x, y, z, xs, ys, zs, xu, yu, zu, xsu, ysu, zsu, ix, iy, iz, vx, vy, vz, fx, fy, fz, @@ -56,6 +56,7 @@ args = list of arguments for a particular style :l id = atom ID mol = molecule ID proc = ID of processor that owns atom + procp1 = ID+1 of processor that owns atom type = atom type element = name of atom element, as defined by "dump_modify"_dump_modify.html command mass = atom mass @@ -78,8 +79,8 @@ args = list of arguments for a particular style :l f_ID = per-atom vector calculated by a fix with ID f_ID\[N\] = Nth column of per-atom array calculated by a fix with ID v_name = per-atom vector calculated by an atom-style variable with name - d_name = per-atom floating point vector with name managed by fix property/atom - i_name = per-atom integer vector with name managed by fix property/atom :pre + d_name = per-atom floating point vector with name, managed by fix property/atom + i_name = per-atom integer vector with name, managed by fix property/atom :pre :ule [Examples:] @@ -112,7 +113,7 @@ options; see details below. As described below, the filename determines the kind of output (text or binary or gzipped, one big file or one per timestep, one big file -or one per processor). +or multiple smaller files). IMPORTANT NOTE: Because periodic boundary conditions are enforced only on timesteps when neighbor lists are rebuilt, the coordinates of an @@ -127,7 +128,7 @@ if the "atom_modify sort"_atom_modify.html option is on, which it is by default. In this case atoms are re-ordered periodically during a simulation, due to spatial sorting. It is also true when running in parallel, because data for a single snapshot is collected from -multiple processors. +multiple processors, each of which owns a subset of the atoms. For the {atom}, {custom}, {cfg}, and {local} styles, sorting is off by default. For the {dcd}, {xtc}, {xyz}, and {molfile} styles, sorting by @@ -295,11 +296,12 @@ from using the (numerical) atom type to an element name (or some other label). This will help many visualization programs to guess bonds and colors. -Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files can -be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd (a popular -molecular viewing program). See "Section tools"_Section_tools.html#vmd -of the manual and the tools/lmp2vmd/README.txt file for more information -about support in VMD for reading and visualizing LAMMPS dump files. +Note that {atom}, {custom}, {dcd}, {xtc}, and {xyz} style dump files +can be read directly by "VMD"_http://www.ks.uiuc.edu/Research/vmd, a +popular molecular viewing program. See "Section +tools"_Section_tools.html#vmd of the manual and the +tools/lmp2vmd/README.txt file for more information about support in +VMD for reading and visualizing LAMMPS dump files. :line @@ -332,7 +334,7 @@ tmp.dump.20000, etc. This option is not available for the {dcd} and {xtc} styles. Note that the "dump_modify pad"_dump_modify.html command can be used to insure all timestep numbers are the same length (e.g. 00010), which can make it easier to read a series of dump files -in order by some post-processing tools. +in order with some post-processing tools. If a "%" character appears in the filename, then each of P processors writes a portion of the dump file, and the "%" character is replaced @@ -447,18 +449,20 @@ dump 1 all local 1000 tmp.dump index c_1\[1\] c_1\[2\] c_1\[3\] c_2\[1\] c_2\[2\ This section explains the atom attributes that can be specified as part of the {custom} and {cfg} styles. -The {id}, {mol}, {proc}, {type}, {element}, {mass}, {vx}, {vy}, {vz}, -{fx}, {fy}, {fz}, {q} attributes are self-explanatory. +The {id}, {mol}, {proc}, {procp1}, {type}, {element}, {mass}, {vx}, +{vy}, {vz}, {fx}, {fy}, {fz}, {q} attributes are self-explanatory. {Id} is the atom ID. {Mol} is the molecule ID, included in the data file for molecular systems. {Proc} is the ID of the processor (0 to -Nprocs-1) that currently owns the atom. {Type} is the atom type. -{Element} is typically the chemical name of an element, which you must -assign to each type via the "dump_modify element"_dump_modify.html -command. More generally, it can be any string you wish to associated -with an atom type. {Mass} is the atom mass. {Vx}, {vy}, {vz}, {fx}, -{fy}, {fz}, and {q} are components of atom velocity and force and -atomic charge. +Nprocs-1) that currently owns the atom. {Procp1} is the proc ID+1, +which can be convenient in place of a {type} attribute (1 to Ntypes) +for coloring atoms in a visualization program. {Type} is the atom +type (1 to Ntypes). {Element} is typically the chemical name of an +element, which you must assign to each type via the "dump_modify +element"_dump_modify.html command. More generally, it can be any +string you wish to associated with an atom type. {Mass} is the atom +mass. {Vx}, {vy}, {vz}, {fx}, {fy}, {fz}, and {q} are components of +atom velocity and force and atomic charge. There are several options for outputting atom coordinates. The {x}, {y}, {z} attributes write atom coordinates "unscaled", in the diff --git a/doc/fix_store_state.txt b/doc/fix_store_state.txt index e40a0f49cc..01f73a3bbe 100644 --- a/doc/fix_store_state.txt +++ b/doc/fix_store_state.txt @@ -46,8 +46,8 @@ input = one or more atom attributes :l f_ID = per-atom vector calculated by a fix with ID f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID v_name = per-atom vector calculated by an atom-style variable with name - d_name = per-atom floating point vector managed by fix property/atom - i_name = per-atom integer vector managed by fix property/atom :pre + d_name = per-atom floating point vector name, managed by fix property/atom + i_name = per-atom integer vector name, managed by fix property/atom :pre zero or more keyword/value pairs may be appended :l keyword = {com} :l diff --git a/doc/pair_table.html b/doc/pair_table.html index 21d9fc9d1f..1919b40002 100644 --- a/doc/pair_table.html +++ b/doc/pair_table.html @@ -119,8 +119,8 @@ to very steep parts of the potential.


    -

    The format of a tabulated file is as follows (without the -parenthesized comments): +

    The format of a tabulated file is a series of one or more sections, +defined as follows (without the parenthesized comments):

    # Morse potential for Fe   (one or more comment or blank lines) 
     
    diff --git a/doc/pair_table.txt b/doc/pair_table.txt index 4b83d2a8f5..8938de20b7 100644 --- a/doc/pair_table.txt +++ b/doc/pair_table.txt @@ -113,8 +113,8 @@ to very steep parts of the potential. :l,ule :line -The format of a tabulated file is as follows (without the -parenthesized comments): +The format of a tabulated file is a series of one or more sections, +defined as follows (without the parenthesized comments): # Morse potential for Fe (one or more comment or blank lines) :pre diff --git a/doc/variable.html b/doc/variable.html index 51daea0bf2..5222302cbe 100644 --- a/doc/variable.html +++ b/doc/variable.html @@ -50,7 +50,7 @@ constants = PI thermo keywords = vol, ke, press, etc from thermo_style math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y, - x==y, x!=y, xy, x>=y, x&&y, x||y, !x + x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x math functions = sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x) @@ -368,7 +368,8 @@ references to other variables. Number 0.2, 100, 1.0e20, -15.4, etc Constant PI Thermo keywords vol, pe, ebond, etc -Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x +Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y, +Math operators (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x Math functions sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) Group functions count(ID), mass(ID), charge(ID), xcm(ID,dim), vcm(ID,dim), fcm(ID,dim), bound(ID,dir), gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), inertia(ID,dimdim), omega(ID,dim) Region functions count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) diff --git a/doc/variable.txt b/doc/variable.txt index e9362de392..d7323b3efe 100644 --- a/doc/variable.txt +++ b/doc/variable.txt @@ -45,7 +45,7 @@ style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {st constants = PI thermo keywords = vol, ke, press, etc from "thermo_style"_thermo_style.html math operators = (), -x, x+y, x-y, x*y, x/y, x^y, x%y, - x==y, x!=y, xy, x>=y, x&&y, x||y, !x + x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x math functions = sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x) @@ -361,7 +361,8 @@ references to other variables. Number: 0.2, 100, 1.0e20, -15.4, etc Constant: PI Thermo keywords: vol, pe, ebond, etc -Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x==y, x!=y, xy, x>=y, x&&y, x||y, !x +Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, +Math operators: (), -x, x+y, x-y, x*y, x/y, x^y, x%y, x == y, x != y, x < y, x <= y, x > y, x >= y, x && y, x || y, !x Math functions: sqrt(x), exp(x), ln(x), log(x), abs(x), sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), ramp(x,y), stagger(x,y), logfreq(x,y,z), stride(x,y,z), vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z) Group functions: count(ID), mass(ID), charge(ID), xcm(ID,dim), \ vcm(ID,dim), fcm(ID,dim), bound(ID,dir), \ diff --git a/src/GRANULAR/fix_pour.cpp b/src/GRANULAR/fix_pour.cpp index 73a13e2192..9947d9a84a 100644 --- a/src/GRANULAR/fix_pour.cpp +++ b/src/GRANULAR/fix_pour.cpp @@ -37,10 +37,11 @@ using namespace LAMMPS_NS; using namespace FixConst; using namespace MathConst; -#define EPSILON 0.001 - enum{ATOM,MOLECULE}; enum{ONE,RANGE,POLY}; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + +#define EPSILON 0.001 /* ---------------------------------------------------------------------- */ @@ -598,13 +599,25 @@ void FixPour::pre_exchange() if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] && newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1; - else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] && - comm->myloc[2] == comm->procgrid[2]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && - newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; - else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] && - comm->myloc[1] == comm->procgrid[1]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) { + if (comm->layout != LAYOUT_TILED) { + if (comm->myloc[2] == comm->procgrid[2]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } else { + if (comm->mysplit[2][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } + } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) { + if (comm->layout != LAYOUT_TILED) { + if (comm->myloc[1] == comm->procgrid[1]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } else { + if (comm->mysplit[1][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } + } if (flag) { if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]); diff --git a/src/KSPACE/msm.cpp b/src/KSPACE/msm.cpp index 5ce73ee41f..81dbc98b47 100644 --- a/src/KSPACE/msm.cpp +++ b/src/KSPACE/msm.cpp @@ -148,6 +148,9 @@ void MSM::init() triclinic_check(); if (domain->dimension == 2) error->all(FLERR,"Cannot (yet) use MSM with 2d simulation"); + if (comm->style != 0) + error->universe_all(FLERR,"MSM can only currently be used with " + "comm_style brick"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); diff --git a/src/KSPACE/pppm.cpp b/src/KSPACE/pppm.cpp index 4d45ef2227..ec61833f76 100644 --- a/src/KSPACE/pppm.cpp +++ b/src/KSPACE/pppm.cpp @@ -183,6 +183,9 @@ void PPPM::init() "slab correction"); if (domain->dimension == 2) error->all(FLERR, "Cannot use PPPM with 2d simulation"); + if (comm->style != 0) + error->universe_all(FLERR,"PPPM can only currently be used with " + "comm_style brick"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 195aa47af3..1dfea3bf77 100644 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -213,6 +213,9 @@ void PPPMDisp::init() triclinic_check(); if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMDisp with 2d simulation"); + if (comm->style != 0) + error->universe_all(FLERR,"PPPMDisp can only currently be used with " + "comm_style brick"); if (slabflag == 0 && domain->nonperiodic > 0) error->all(FLERR,"Cannot use nonperiodic boundaries with PPPMDisp"); diff --git a/src/MISC/fix_deposit.cpp b/src/MISC/fix_deposit.cpp index a804028397..213d39329b 100644 --- a/src/MISC/fix_deposit.cpp +++ b/src/MISC/fix_deposit.cpp @@ -37,6 +37,7 @@ using namespace FixConst; using namespace MathConst; enum{ATOM,MOLECULE}; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files #define EPSILON 1.0e6 @@ -452,13 +453,25 @@ void FixDeposit::pre_exchange() if (newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && newcoord[1] >= sublo[1] && newcoord[1] < subhi[1] && newcoord[2] >= sublo[2] && newcoord[2] < subhi[2]) flag = 1; - else if (dimension == 3 && newcoord[2] >= domain->boxhi[2] && - comm->myloc[2] == comm->procgrid[2]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && - newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; - else if (dimension == 2 && newcoord[1] >= domain->boxhi[1] && - comm->myloc[1] == comm->procgrid[1]-1 && - newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + else if (dimension == 3 && newcoord[2] >= domain->boxhi[2]) { + if (comm->layout != LAYOUT_TILED) { + if (comm->myloc[2] == comm->procgrid[2]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } else { + if (comm->mysplit[2][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0] && + newcoord[1] >= sublo[1] && newcoord[1] < subhi[1]) flag = 1; + } + } else if (dimension == 2 && newcoord[1] >= domain->boxhi[1]) { + if (comm->layout != LAYOUT_TILED) { + if (comm->myloc[1] == comm->procgrid[1]-1 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } else { + if (comm->mysplit[1][1] == 1.0 && + newcoord[0] >= sublo[0] && newcoord[0] < subhi[0]) flag = 1; + } + } if (flag) { if (mode == ATOM) atom->avec->create_atom(ntype,coords[m]); diff --git a/src/REPLICA/verlet_split.cpp b/src/REPLICA/verlet_split.cpp index ac45b4f56d..734b070541 100644 --- a/src/REPLICA/verlet_split.cpp +++ b/src/REPLICA/verlet_split.cpp @@ -52,6 +52,9 @@ VerletSplit::VerletSplit(LAMMPS *lmp, int narg, char **arg) : if (universe->procs_per_world[0] % universe->procs_per_world[1]) error->universe_all(FLERR,"Verlet/split requires Rspace partition " "size be multiple of Kspace partition size"); + if (comm->style != 0) + error->universe_all(FLERR,"Verlet/split can only currently be used with " + "comm_style brick"); // master = 1 for Rspace procs, 0 for Kspace procs @@ -214,6 +217,9 @@ VerletSplit::~VerletSplit() void VerletSplit::init() { + if (comm->style != 0) + error->universe_all(FLERR,"Verlet/split can only currently be used with " + "comm_style brick"); if (!force->kspace && comm->me == 0) error->warning(FLERR,"No Kspace calculation with verlet/split"); diff --git a/src/SHOCK/fix_append_atoms.cpp b/src/SHOCK/fix_append_atoms.cpp index 5e7af01f6e..571e77b129 100644 --- a/src/SHOCK/fix_append_atoms.cpp +++ b/src/SHOCK/fix_append_atoms.cpp @@ -29,6 +29,8 @@ using namespace LAMMPS_NS; using namespace FixConst; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + #define BIG 1.0e30 #define EPSILON 1.0e-6 @@ -410,7 +412,15 @@ void FixAppendAtoms::pre_exchange() if (ntimestep % freq == 0) { if (spatflag==1) if (get_spatial()==0) return; - if (comm->myloc[2] == comm->procgrid[2]-1) { + + int addflag = 0; + if (comm->layout != LAYOUT_TILED) { + if (comm->myloc[2] == comm->procgrid[2]-1) addflag = 1; + } else { + if (comm->mysplit[2][1] == 1.0) addflag = 1; + } + + if (addflag) { double bboxlo[3],bboxhi[3]; bboxlo[0] = domain->sublo[0]; bboxhi[0] = domain->subhi[0]; diff --git a/src/SRD/fix_srd.cpp b/src/SRD/fix_srd.cpp index 3db73bab51..37dc068797 100644 --- a/src/SRD/fix_srd.cpp +++ b/src/SRD/fix_srd.cpp @@ -334,9 +334,12 @@ void FixSRD::init() if (bigexist && comm->ghost_velocity == 0) error->all(FLERR,"Fix srd requires ghost atoms store velocity"); if (bigexist && collidestyle == NOSLIP && !atom->torque_flag) - error->all(FLERR,"Fix SRD no-slip requires atom attribute torque"); + error->all(FLERR,"Fix srd no-slip requires atom attribute torque"); if (initflag && update->dt != dt_big) error->all(FLERR,"Cannot change timestep once fix srd is setup"); + if (comm->style != 0) + error->universe_all(FLERR,"Fix srd can only currently be used with " + "comm_style brick"); // orthogonal vs triclinic simulation box // could be static or shearing box diff --git a/src/STUBS/mpi.c b/src/STUBS/mpi.c index 718e5fb475..987479bb7b 100644 --- a/src/STUBS/mpi.c +++ b/src/STUBS/mpi.c @@ -20,7 +20,7 @@ #include #include "mpi.h" -/* lo-level data structure */ +/* data structure for double/int */ struct _mpi_double_int { double value; @@ -28,6 +28,15 @@ struct _mpi_double_int { }; typedef struct _mpi_double_int double_int; +/* extra MPI_Datatypes registered by MPI_Type_contiguous */ + +#define MAXEXTRA_DATATYPE 16 + +int nextra_datatype; +MPI_Datatype *ptr_datatype[MAXEXTRA_DATATYPE]; +int index_datatype[MAXEXTRA_DATATYPE]; +int size_datatype[MAXEXTRA_DATATYPE]; + static int _mpi_is_initialized=0; /* ---------------------------------------------------------------------- */ @@ -135,6 +144,8 @@ double MPI_Wtime() /* ---------------------------------------------------------------------- */ +/* include sizes of user defined datatypes, stored in extra lists */ + static int stubtypesize(MPI_Datatype datatype) { if (datatype == MPI_INT) return sizeof(int); @@ -145,6 +156,12 @@ static int stubtypesize(MPI_Datatype datatype) else if (datatype == MPI_LONG) return sizeof(long); else if (datatype == MPI_LONG_LONG) return sizeof(uint64_t); else if (datatype == MPI_DOUBLE_INT) return sizeof(double_int); + else { + int i; + for (i = 0; i < nextra_datatype; i++) + if (datatype == index_datatype[i]) return size_datatype[i]; + } + return 0; } /* ---------------------------------------------------------------------- */ @@ -339,6 +356,66 @@ int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank) /* ---------------------------------------------------------------------- */ +/* store size of user datatype in extra lists */ + +int MPI_Type_contiguous(int count, MPI_Datatype oldtype, + MPI_Datatype *newtype) +{ + if (nextra_datatype = MAXEXTRA_DATATYPE) return -1; + ptr_datatype[nextra_datatype] = newtype; + index_datatype[nextra_datatype] = -(nextra_datatype + 1); + size_datatype[nextra_datatype] = count * stubtypesize(oldtype); + nextra_datatype++; + return 0; +} + +/* ---------------------------------------------------------------------- */ + +/* set value of user datatype to internal negative index, + based on match of ptr */ + +int MPI_Type_commit(MPI_Datatype *datatype) +{ + int i; + for (i = 0; i < nextra_datatype; i++) + if (datatype == ptr_datatype[i]) *datatype = index_datatype[i]; + return 0; +} + +/* ---------------------------------------------------------------------- */ + +/* remove user datatype from extra lists */ + +int MPI_Type_free(MPI_Datatype *datatype) +{ + int i; + for (i = 0; i < nextra_datatype; i++) + if (datatype == ptr_datatype[i]) { + ptr_datatype[i] = ptr_datatype[nextra_datatype-1]; + index_datatype[i] = index_datatype[nextra_datatype-1]; + size_datatype[i] = size_datatype[nextra_datatype-1]; + nextra_datatype--; + break; + } + return 0; +} + +/* ---------------------------------------------------------------------- */ + +int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op) +{ + return 0; +} + +/* ---------------------------------------------------------------------- */ + +int MPI_Op_free(MPI_Op *op) +{ + return 0; +} + +/* ---------------------------------------------------------------------- */ + int MPI_Barrier(MPI_Comm comm) {return 0;} /* ---------------------------------------------------------------------- */ diff --git a/src/STUBS/mpi.h b/src/STUBS/mpi.h index 0d93326b45..07ea1027eb 100644 --- a/src/STUBS/mpi.h +++ b/src/STUBS/mpi.h @@ -64,6 +64,9 @@ extern "C" { #define MPI_MAX_PROCESSOR_NAME 128 +typedef void MPI_User_function(void *invec, void *inoutvec, + int *len, MPI_Datatype *datatype); + /* MPI data structs */ struct _MPI_Status { @@ -123,6 +126,13 @@ int MPI_Cart_shift(MPI_Comm comm, int direction, int displ, int *source, int *dest); int MPI_Cart_rank(MPI_Comm comm, int *coords, int *rank); +int MPI_Type_contiguous(int count, MPI_Datatype oldtype, + MPI_Datatype *newtype); +int MPI_Type_commit(MPI_Datatype *datatype); +int MPI_Type_free(MPI_Datatype *datatype); + +int MPI_Op_create(MPI_User_function *function, int commute, MPI_Op *op); +int MPI_Op_free(MPI_Op *op); int MPI_Barrier(MPI_Comm comm); int MPI_Bcast(void *buf, int count, MPI_Datatype datatype, diff --git a/src/USER-CUDA/pppm_cuda.cpp b/src/USER-CUDA/pppm_cuda.cpp index 9754dd1f94..2a2973c900 100644 --- a/src/USER-CUDA/pppm_cuda.cpp +++ b/src/USER-CUDA/pppm_cuda.cpp @@ -211,6 +211,9 @@ void PPPMCuda::init() // error check if (domain->dimension == 2) error->all(FLERR,"Cannot use PPPMCuda with 2d simulation"); + if (comm->style != 0) + error->universe_all(FLERR,"PPPMCuda can only currently be used with " + "comm_style brick"); if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q"); diff --git a/src/USER-LB/fix_lb_fluid.cpp b/src/USER-LB/fix_lb_fluid.cpp index 0161fd658d..4706d576e4 100644 --- a/src/USER-LB/fix_lb_fluid.cpp +++ b/src/USER-LB/fix_lb_fluid.cpp @@ -87,6 +87,10 @@ FixLbFluid::FixLbFluid(LAMMPS *lmp, int narg, char **arg) : if(narg <7) error->all(FLERR,"Illegal fix lb/fluid command"); + if (comm->style != 0) + error->universe_all(FLERR,"Fix lb/fluid can only currently be used with " + "comm_style brick"); + MPI_Comm_rank(world,&me); MPI_Comm_size(world,&nprocs); @@ -566,9 +570,12 @@ int FixLbFluid::setmask() void FixLbFluid::init(void) { - int i,j; + if (comm->style != 0) + error->universe_all(FLERR,"Fix lb/fluid can only currently be used with " + "comm_style brick"); + //-------------------------------------------------------------------------- // Check to see if the MD timestep has changed between runs. //-------------------------------------------------------------------------- diff --git a/src/atom.cpp b/src/atom.cpp index ff2b15f4a0..78e317299c 100644 --- a/src/atom.cpp +++ b/src/atom.cpp @@ -47,6 +47,8 @@ using namespace MathConst; #define CUDA_CHUNK 3000 #define MAXBODY 20 // max # of lines in one body, also in ReadData class +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + /* ---------------------------------------------------------------------- */ Atom::Atom(LAMMPS *lmp) : Pointers(lmp) @@ -756,17 +758,33 @@ void Atom::data_atoms(int n, char *buf) sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } - if (domain->xperiodic) { - if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; - if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; - } - if (domain->yperiodic) { - if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; - if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; - } - if (domain->zperiodic) { - if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; - if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; + if (comm->layout != LAYOUT_TILED) { + if (domain->xperiodic) { + if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; + if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; + } + if (domain->yperiodic) { + if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; + if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; + } + if (domain->zperiodic) { + if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; + if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; + } + + } else { + if (domain->xperiodic) { + if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; + if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; + } + if (domain->yperiodic) { + if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; + if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; + } + if (domain->zperiodic) { + if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; + if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; + } } // xptr = which word in line starts xyz coords diff --git a/src/balance.cpp b/src/balance.cpp index a76a7ce364..8675d8568e 100644 --- a/src/balance.cpp +++ b/src/balance.cpp @@ -21,6 +21,7 @@ #include "balance.h" #include "atom.h" #include "comm.h" +#include "rcb.h" #include "irregular.h" #include "domain.h" #include "force.h" @@ -30,9 +31,10 @@ using namespace LAMMPS_NS; -enum{XYZ,SHIFT,RCB}; +enum{XYZ,SHIFT,BISECTION}; enum{NONE,UNIFORM,USER}; enum{X,Y,Z}; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ @@ -47,6 +49,8 @@ Balance::Balance(LAMMPS *lmp) : Pointers(lmp) user_xsplit = user_ysplit = user_zsplit = NULL; shift_allocate = 0; + rcb = NULL; + fp = NULL; firststep = 1; } @@ -74,6 +78,8 @@ Balance::~Balance() delete [] hisum; } + delete rcb; + if (fp) fclose(fp); } @@ -176,7 +182,7 @@ void Balance::command(int narg, char **arg) } else if (strcmp(arg[iarg],"rcb") == 0) { if (style != -1) error->all(FLERR,"Illegal balance command"); - style = RCB; + style = BISECTION; iarg++; } else break; @@ -232,6 +238,9 @@ void Balance::command(int narg, char **arg) } } + if (style == BISECTION && comm->style == 0) + error->all(FLERR,"Balance rcb cannot be used with comm_style brick"); + // insure atoms are in current box & update box via shrink-wrap // init entire system since comm->setup is done // comm::init needs neighbor::init needs pair::init needs kspace::init, etc @@ -251,8 +260,10 @@ void Balance::command(int narg, char **arg) double imbinit = imbalance_nlocal(maxinit); // no load-balance if imbalance doesn't exceed threshhold + // unless switching from tiled to non tiled layout, then force rebalance - if (imbinit < thresh) return; + if (comm->layout == LAYOUT_TILED && style != BISECTION) { + } else if (imbinit < thresh) return; // debug output of initial state @@ -262,80 +273,65 @@ void Balance::command(int narg, char **arg) int niter = 0; - // NOTE: if using XYZ or SHIFT and current partition is TILING, - // then need to create initial BRICK partition before performing LB - // perform load-balance // style XYZ = explicit setting of cutting planes of logical 3d grid if (style == XYZ) { + if (comm->layout == LAYOUT_UNIFORM) { + if (xflag == USER || yflag == USER || zflag == USER) + comm->layout = LAYOUT_NONUNIFORM; + } else if (comm->style == LAYOUT_NONUNIFORM) { + if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) + comm->layout = LAYOUT_UNIFORM; + } else if (comm->style == LAYOUT_TILED) { + if (xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) + comm->layout = LAYOUT_UNIFORM; + else comm->layout = LAYOUT_NONUNIFORM; + } + if (xflag == UNIFORM) { for (int i = 0; i < procgrid[0]; i++) comm->xsplit[i] = i * 1.0/procgrid[0]; comm->xsplit[procgrid[0]] = 1.0; - } + } else if (xflag == USER) + for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i]; if (yflag == UNIFORM) { for (int i = 0; i < procgrid[1]; i++) comm->ysplit[i] = i * 1.0/procgrid[1]; comm->ysplit[procgrid[1]] = 1.0; - } + } else if (yflag == USER) + for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i]; if (zflag == UNIFORM) { for (int i = 0; i < procgrid[2]; i++) comm->zsplit[i] = i * 1.0/procgrid[2]; comm->zsplit[procgrid[2]] = 1.0; - } - - if (xflag == USER) - for (int i = 0; i <= procgrid[0]; i++) comm->xsplit[i] = user_xsplit[i]; - - if (yflag == USER) - for (int i = 0; i <= procgrid[1]; i++) comm->ysplit[i] = user_ysplit[i]; - - if (zflag == USER) + } else if (zflag == USER) for (int i = 0; i <= procgrid[2]; i++) comm->zsplit[i] = user_zsplit[i]; } // style SHIFT = adjust cutting planes of logical 3d grid if (style == SHIFT) { - static_setup(bstr); + comm->layout = LAYOUT_NONUNIFORM; + shift_setup_static(bstr); niter = shift(); } - // style RCB = + // style BISECTION = recursive coordinate bisectioning - if (style == RCB) { - error->all(FLERR,"Balance rcb is not yet supported"); - - if (comm->style == 0) - error->all(FLERR,"Cannot use balance rcb with comm_style brick"); + if (style == BISECTION) { + comm->layout = LAYOUT_TILED; + bisection(1); } // output of final result if (outflag && me == 0) dumpout(update->ntimestep,fp); - // reset comm->uniform flag if necessary - - if (comm->uniform) { - if (style == SHIFT) comm->uniform = 0; - if (style == XYZ && xflag == USER) comm->uniform = 0; - if (style == XYZ && yflag == USER) comm->uniform = 0; - if (style == XYZ && zflag == USER) comm->uniform = 0; - } else { - if (dimension == 3) { - if (style == XYZ && - xflag == UNIFORM && yflag == UNIFORM && zflag == UNIFORM) - comm->uniform = 1; - } else { - if (style == XYZ && xflag == UNIFORM && yflag == UNIFORM) - comm->uniform = 1; - } - } - // reset proc sub-domains + // for either brick or tiled comm style if (domain->triclinic) domain->set_lamda_box(); domain->set_local_box(); @@ -344,7 +340,8 @@ void Balance::command(int narg, char **arg) if (domain->triclinic) domain->x2lamda(atom->nlocal); Irregular *irregular = new Irregular(lmp); - irregular->migrate_atoms(1); + if (style == BISECTION) irregular->migrate_atoms(1,rcb->sendproc); + else irregular->migrate_atoms(1); delete irregular; if (domain->triclinic) domain->lamda2x(atom->nlocal); @@ -382,34 +379,36 @@ void Balance::command(int narg, char **arg) } } - if (me == 0) { - if (screen) { - fprintf(screen," x cuts:"); - for (int i = 0; i <= comm->procgrid[0]; i++) - fprintf(screen," %g",comm->xsplit[i]); - fprintf(screen,"\n"); - fprintf(screen," y cuts:"); - for (int i = 0; i <= comm->procgrid[1]; i++) - fprintf(screen," %g",comm->ysplit[i]); - fprintf(screen,"\n"); - fprintf(screen," z cuts:"); - for (int i = 0; i <= comm->procgrid[2]; i++) - fprintf(screen," %g",comm->zsplit[i]); - fprintf(screen,"\n"); - } - if (logfile) { - fprintf(logfile," x cuts:"); - for (int i = 0; i <= comm->procgrid[0]; i++) - fprintf(logfile," %g",comm->xsplit[i]); - fprintf(logfile,"\n"); - fprintf(logfile," y cuts:"); - for (int i = 0; i <= comm->procgrid[1]; i++) - fprintf(logfile," %g",comm->ysplit[i]); - fprintf(logfile,"\n"); - fprintf(logfile," z cuts:"); - for (int i = 0; i <= comm->procgrid[2]; i++) - fprintf(logfile," %g",comm->zsplit[i]); - fprintf(logfile,"\n"); + if (style != BISECTION) { + if (me == 0) { + if (screen) { + fprintf(screen," x cuts:"); + for (int i = 0; i <= comm->procgrid[0]; i++) + fprintf(screen," %g",comm->xsplit[i]); + fprintf(screen,"\n"); + fprintf(screen," y cuts:"); + for (int i = 0; i <= comm->procgrid[1]; i++) + fprintf(screen," %g",comm->ysplit[i]); + fprintf(screen,"\n"); + fprintf(screen," z cuts:"); + for (int i = 0; i <= comm->procgrid[2]; i++) + fprintf(screen," %g",comm->zsplit[i]); + fprintf(screen,"\n"); + } + if (logfile) { + fprintf(logfile," x cuts:"); + for (int i = 0; i <= comm->procgrid[0]; i++) + fprintf(logfile," %g",comm->xsplit[i]); + fprintf(logfile,"\n"); + fprintf(logfile," y cuts:"); + for (int i = 0; i <= comm->procgrid[1]; i++) + fprintf(logfile," %g",comm->ysplit[i]); + fprintf(logfile,"\n"); + fprintf(logfile," z cuts:"); + for (int i = 0; i <= comm->procgrid[2]; i++) + fprintf(logfile," %g",comm->zsplit[i]); + fprintf(logfile,"\n"); + } } } } @@ -467,13 +466,59 @@ double Balance::imbalance_splits(int &max) return imbalance; } +/* ---------------------------------------------------------------------- + perform balancing via RCB class + sortflag = flag for sorting order of received messages by proc ID +------------------------------------------------------------------------- */ + +int *Balance::bisection(int sortflag) +{ + if (!rcb) rcb = new RCB(lmp); + + // NOTE: lo/hi args could be simulation box or particle bounding box + // if particle bbox, then mysplit needs to be reset to sim box + // NOTE: triclinic needs to be in lamda coords + + int dim = domain->dimension; + double *boxlo = domain->boxlo; + double *boxhi = domain->boxhi; + double *prd = domain->prd; + + rcb->compute(dim,atom->nlocal,atom->x,NULL,boxlo,boxhi); + rcb->invert(sortflag); + + // NOTE: this logic is specific to orthogonal boxes, not triclinic + + comm->rcbnew = 1; + comm->rcbcut = rcb->cut; + comm->rcbcutdim = rcb->cutdim; + + double (*mysplit)[2] = comm->mysplit; + + mysplit[0][0] = (rcb->lo[0] - boxlo[0]) / prd[0]; + if (rcb->hi[0] == boxhi[0]) mysplit[0][1] = 1.0; + else mysplit[0][1] = (rcb->hi[0] - boxlo[0]) / prd[0]; + + mysplit[1][0] = (rcb->lo[1] - boxlo[1]) / prd[1]; + if (rcb->hi[1] == boxhi[1]) mysplit[1][1] = 1.0; + else mysplit[1][1] = (rcb->hi[1] - boxlo[1]) / prd[1]; + + mysplit[2][0] = (rcb->lo[2] - boxlo[2]) / prd[2]; + if (rcb->hi[2] == boxhi[2]) mysplit[2][1] = 1.0; + else mysplit[2][1] = (rcb->hi[2] - boxlo[2]) / prd[2]; + + // return list of procs to send my atoms to + + return rcb->sendproc; +} + /* ---------------------------------------------------------------------- setup static load balance operations - called from command + called from command and indirectly initially from fix balance set rho = 0 for static balancing ------------------------------------------------------------------------- */ -void Balance::static_setup(char *str) +void Balance::shift_setup_static(char *str) { shift_allocate = 1; @@ -498,21 +543,35 @@ void Balance::static_setup(char *str) losum = new bigint[max+1]; hisum = new bigint[max+1]; + // if current layout is TILED, set initial uniform splits in Comm + // this gives starting point to subsequent shift balancing + + if (comm->layout == LAYOUT_TILED) { + int *procgrid = comm->procgrid; + double *xsplit = comm->xsplit; + double *ysplit = comm->ysplit; + double *zsplit = comm->zsplit; + + for (int i = 0; i < procgrid[0]; i++) xsplit[i] = i * 1.0/procgrid[0]; + for (int i = 0; i < procgrid[1]; i++) ysplit[i] = i * 1.0/procgrid[1]; + for (int i = 0; i < procgrid[2]; i++) zsplit[i] = i * 1.0/procgrid[2]; + xsplit[procgrid[0]] = ysplit[procgrid[1]] = zsplit[procgrid[2]] = 1.0; + } + rho = 0; } /* ---------------------------------------------------------------------- setup shift load balance operations called from fix balance - set rho = 1 for shift balancing after call to shift_setup() + set rho = 1 to do dynamic balancing after call to shift_setup_static() ------------------------------------------------------------------------- */ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in) { - static_setup(str); + shift_setup_static(str); nitermax = nitermax_in; stopthresh = thresh_in; - rho = 1; } @@ -525,7 +584,7 @@ void Balance::shift_setup(char *str, int nitermax_in, double thresh_in) int Balance::shift() { int i,j,k,m,np,max; - double *split = NULL; + double *split; // no balancing if no atoms @@ -590,7 +649,7 @@ int Balance::shift() // iterate until balanced #ifdef BALANCE_DEBUG - if (me == 0) debug_output(idim,0,np,split); + if (me == 0) debug_shift_output(idim,0,np,split); #endif int doneflag; @@ -601,7 +660,7 @@ int Balance::shift() niter++; #ifdef BALANCE_DEBUG - if (me == 0) debug_output(idim,m+1,np,split); + if (me == 0) debug_shift_output(idim,m+1,np,split); if (me == 0 && fp) dumpout(update->ntimestep,fp); #endif @@ -827,7 +886,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp) int nx = comm->procgrid[0] + 1; int ny = comm->procgrid[1] + 1; - //int nz = comm->procgrid[2] + 1; + int nz = comm->procgrid[2] + 1; if (dimension == 2) { int m = 0; @@ -914,7 +973,7 @@ void Balance::dumpout(bigint tstep, FILE *bfp) ------------------------------------------------------------------------- */ #ifdef BALANCE_DEBUG -void Balance::debug_output(int idim, int m, int np, double *split) +void Balance::debug_shift_output(int idim, int m, int np, double *split) { int i; const char *dim = NULL; diff --git a/src/balance.h b/src/balance.h index 66ad6db721..69ff41abaf 100644 --- a/src/balance.h +++ b/src/balance.h @@ -27,11 +27,14 @@ namespace LAMMPS_NS { class Balance : protected Pointers { public: + class RCB *rcb; + Balance(class LAMMPS *); ~Balance(); void command(int, char **); void shift_setup(char *, int, double); int shift(); + int *bisection(int sortflag = 0); double imbalance_nlocal(int &); void dumpout(bigint, FILE *); @@ -66,13 +69,13 @@ class Balance : protected Pointers { FILE *fp; int firststep; - void static_setup(char *); double imbalance_splits(int &); + void shift_setup_static(char *); void tally(int, int, double *); int adjust(int, double *); int binary(double, int, double *); #ifdef BALANCE_DEBUG - void debug_output(int, int, int, double *); + void debug_shift_output(int, int, int, double *); #endif }; diff --git a/src/comm.cpp b/src/comm.cpp index a80db36d95..ad4b18786e 100644 --- a/src/comm.cpp +++ b/src/comm.cpp @@ -12,6 +12,7 @@ ------------------------------------------------------------------------- */ #include "mpi.h" +#include "stdlib.h" #include "string.h" #include "comm.h" #include "universe.h" @@ -20,9 +21,14 @@ #include "domain.h" #include "group.h" #include "procmap.h" +#include "accelerator_kokkos.h" #include "memory.h" #include "error.h" +#ifdef _OPENMP +#include "omp.h" +#endif + using namespace LAMMPS_NS; enum{SINGLE,MULTI}; // same as in Comm sub-styles @@ -47,25 +53,96 @@ Comm::Comm(LAMMPS *lmp) : Pointers(lmp) gridflag = ONELEVEL; mapflag = CART; customfile = NULL; + outfile = NULL; recv_from_partition = send_to_partition = -1; otherflag = 0; - outfile = NULL; + maxexchange_atom = maxexchange_fix = 0; + grid2proc = NULL; xsplit = ysplit = zsplit = NULL; + rcbnew = 0; + + // use of OpenMP threads + // query OpenMP for number of threads/process set by user at run-time + // if the OMP_NUM_THREADS environment variable is not set, we default + // to using 1 thread. This follows the principle of the least surprise, + // while practically all OpenMP implementations violate it by using + // as many threads as there are (virtual) CPU cores by default. + + nthreads = 1; +#ifdef _OPENMP + if (lmp->kokkos) { + nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa; + } else if (getenv("OMP_NUM_THREADS") == NULL) { + nthreads = 1; + if (me == 0) + error->warning(FLERR,"OMP_NUM_THREADS environment is not set."); + } else { + nthreads = omp_get_max_threads(); + } + + // enforce consistent number of threads across all MPI tasks + + MPI_Bcast(&nthreads,1,MPI_INT,0,world); + if (!lmp->kokkos) omp_set_num_threads(nthreads); + + if (me == 0) { + if (screen) + fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads); + if (logfile) + fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads); + } +#endif + } /* ---------------------------------------------------------------------- */ Comm::~Comm() { + memory->destroy(grid2proc); memory->destroy(xsplit); memory->destroy(ysplit); memory->destroy(zsplit); - delete [] customfile; delete [] outfile; } +/* ---------------------------------------------------------------------- + deep copy of arrays from old Comm class to new one + all public/protected vectors/arrays in parent Comm class must be copied + called from alternate constructor of child classes + when new comm style is created from Input +------------------------------------------------------------------------- */ + +void Comm::copy_arrays(Comm *oldcomm) +{ + if (oldcomm->grid2proc) { + memory->create(grid2proc,procgrid[0],procgrid[1],procgrid[2], + "comm:grid2proc"); + memcpy(&grid2proc[0][0][0],&oldcomm->grid2proc[0][0][0], + (procgrid[0]*procgrid[1]*procgrid[2])*sizeof(int)); + + memory->create(xsplit,procgrid[0]+1,"comm:xsplit"); + memory->create(ysplit,procgrid[1]+1,"comm:ysplit"); + memory->create(zsplit,procgrid[2]+1,"comm:zsplit"); + memcpy(xsplit,oldcomm->xsplit,(procgrid[0]+1)*sizeof(double)); + memcpy(ysplit,oldcomm->ysplit,(procgrid[1]+1)*sizeof(double)); + memcpy(zsplit,oldcomm->zsplit,(procgrid[2]+1)*sizeof(double)); + } + + if (customfile) { + int n = strlen(oldcomm->customfile) + 1; + customfile = new char[n]; + strcpy(customfile,oldcomm->customfile); + } + if (outfile) { + int n = strlen(oldcomm->outfile) + 1; + outfile = new char[n]; + strcpy(outfile,oldcomm->outfile); + } +} + /* ---------------------------------------------------------------------- modify communication params invoked from input script by comm_modify command diff --git a/src/comm.h b/src/comm.h index e50def8c53..353ea131d1 100644 --- a/src/comm.h +++ b/src/comm.h @@ -21,21 +21,15 @@ namespace LAMMPS_NS { class Comm : protected Pointers { public: int style; // comm pattern: 0 = 6-way stencil, 1 = irregular tiling - int layout; // current proc domains: 0 = logical bricks, 1 = general tiling - // can do style=1 on layout=0, but not vice versa - // NOTE: uniform needs to be subsumed into layout - int uniform; // 1 = equal subdomains, 0 = load-balanced + int layout; // LAYOUT_UNIFORM = logical equal-sized bricks + // LAYOUT_NONUNIFORM = logical bricks, + // but different sizes due to LB + // LAYOUT_TILED = general tiling, due to RCB LB int me,nprocs; // proc info - int procgrid[3]; // procs assigned in each dim of 3d grid - int user_procgrid[3]; // user request for procs in each dim - int myloc[3]; // which proc I am in each dim - int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right int ghost_velocity; // 1 if ghost atoms have velocity, 0 if not - double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes double cutghost[3]; // cutoffs used for acquiring ghost atoms double cutghostuser; // user-specified ghost cutoff - int ***grid2proc; // which proc owns i,j,k loc in 3d grid int recv_from_partition; // recv proc layout from this partition int send_to_partition; // send my proc layout to this partition // -1 if no recv or send @@ -45,8 +39,27 @@ class Comm : protected Pointers { int maxexchange_fix; // max contribution to exchange from Fixes int nthreads; // OpenMP threads per MPI process + // public settings specific to layout = UNIFORM, NONUNIFORM + + int procgrid[3]; // procs assigned in each dim of 3d grid + int user_procgrid[3]; // user request for procs in each dim + int myloc[3]; // which proc I am in each dim + int procneigh[3][2]; // my 6 neighboring procs, 0/1 = left/right + double *xsplit,*ysplit,*zsplit; // fractional (0-1) sub-domain sizes + int ***grid2proc; // which proc owns i,j,k loc in 3d grid + + // public settings specific to layout = TILED + + int rcbnew; // 1 if just reset by rebalance, else 0 + double mysplit[3][2]; // fractional (0-1) bounds of my sub-domain + double rcbcut; // RCB cut by this proc + int rcbcutdim; // dimension of RCB cut + + // methods + Comm(class LAMMPS *); virtual ~Comm(); + void copy_arrays(class Comm *); void modify_params(int, char **); void set_processors(int, char **); // set 3d processor grid attributes diff --git a/src/comm_brick.cpp b/src/comm_brick.cpp index 93a81c3ad2..70997725b0 100644 --- a/src/comm_brick.cpp +++ b/src/comm_brick.cpp @@ -22,6 +22,7 @@ #include "stdio.h" #include "stdlib.h" #include "comm_brick.h" +#include "comm_tiled.h" #include "universe.h" #include "atom.h" #include "atom_vec.h" @@ -35,15 +36,10 @@ #include "compute.h" #include "output.h" #include "dump.h" -#include "accelerator_kokkos.h" #include "math_extra.h" #include "error.h" #include "memory.h" -#ifdef _OPENMP -#include "omp.h" -#endif - using namespace LAMMPS_NS; #define BUFFACTOR 1.5 @@ -52,58 +48,57 @@ using namespace LAMMPS_NS; #define BIG 1.0e20 enum{SINGLE,MULTI}; // same as in Comm +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp) { - style = layout = 0; + style = 0; + layout = LAYOUT_UNIFORM; + init_buffers(); +} - recv_from_partition = send_to_partition = -1; - otherflag = 0; +/* ---------------------------------------------------------------------- */ - grid2proc = NULL; +CommBrick::~CommBrick() +{ + free_swap(); + if (mode == MULTI) { + free_multi(); + memory->destroy(cutghostmulti); + } - uniform = 1; + if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); + memory->sfree(sendlist); + memory->destroy(maxsendlist); + + memory->destroy(buf_send); + memory->destroy(buf_recv); +} + +/* ---------------------------------------------------------------------- */ + +CommBrick::CommBrick(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm) +{ + if (oldcomm->layout == LAYOUT_TILED) + error->all(FLERR,"Cannot change to comm_style brick from tiled layout"); + + style = 0; + layout = oldcomm->layout; + copy_arrays(oldcomm); + init_buffers(); +} + +/* ---------------------------------------------------------------------- + initialize comm buffers and other data structs local to CommBrick +------------------------------------------------------------------------- */ + +void CommBrick::init_buffers() +{ multilo = multihi = NULL; cutghostmulti = NULL; - // use of OpenMP threads - // query OpenMP for number of threads/process set by user at run-time - // if the OMP_NUM_THREADS environment variable is not set, we default - // to using 1 thread. This follows the principle of the least surprise, - // while practically all OpenMP implementations violate it by using - // as many threads as there are (virtual) CPU cores by default. - - nthreads = 1; -#ifdef _OPENMP - if (lmp->kokkos) { - nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa; - } else if (getenv("OMP_NUM_THREADS") == NULL) { - nthreads = 1; - if (me == 0) - error->warning(FLERR,"OMP_NUM_THREADS environment is not set."); - } else { - nthreads = omp_get_max_threads(); - } - - // enforce consistent number of threads across all MPI tasks - - MPI_Bcast(&nthreads,1,MPI_INT,0,world); - if (!lmp->kokkos) omp_set_num_threads(nthreads); - - if (me == 0) { - if (screen) - fprintf(screen," using %d OpenMP thread(s) per MPI task\n",nthreads); - if (logfile) - fprintf(logfile," using %d OpenMP thread(s) per MPI task\n",nthreads); - } -#endif - - // initialize comm buffers & exchange memory - // NOTE: allow for AtomVec to set maxexchange_atom, e.g. for atom_style body - - maxexchange_atom = maxexchange_fix = 0; maxexchange = maxexchange_atom + maxexchange_fix; bufextra = maxexchange + BUFEXTRA; @@ -125,26 +120,6 @@ CommBrick::CommBrick(LAMMPS *lmp) : Comm(lmp) /* ---------------------------------------------------------------------- */ -CommBrick::~CommBrick() -{ - memory->destroy(grid2proc); - - free_swap(); - if (mode == MULTI) { - free_multi(); - memory->destroy(cutghostmulti); - } - - if (sendlist) for (int i = 0; i < maxswap; i++) memory->destroy(sendlist[i]); - memory->sfree(sendlist); - memory->destroy(maxsendlist); - - memory->destroy(buf_send); - memory->destroy(buf_recv); -} - -/* ---------------------------------------------------------------------- */ - void CommBrick::init() { triclinic = domain->triclinic; @@ -280,12 +255,12 @@ void CommBrick::setup() // 0 = to left, 1 = to right // set equal to recvneed[idim][1/0] of neighbor proc // maxneed[idim] = max procs away any proc recvs atoms in either direction - // uniform = 1 = uniform sized sub-domains: + // layout = UNIFORM = uniform sized sub-domains: // maxneed is directly computable from sub-domain size // limit to procgrid-1 for non-PBC // recvneed = maxneed except for procs near non-PBC // sendneed = recvneed of neighbor on each side - // uniform = 0 = non-uniform sized sub-domains: + // layout = NONUNIFORM = non-uniform sized sub-domains: // compute recvneed via updown() which accounts for non-PBC // sendneed = recvneed of neighbor on each side // maxneed via Allreduce() of recvneed @@ -293,7 +268,7 @@ void CommBrick::setup() int *periodicity = domain->periodicity; int left,right; - if (uniform) { + if (layout == LAYOUT_UNIFORM) { maxneed[0] = static_cast (cutghost[0] * procgrid[0] / prd[0]) + 1; maxneed[1] = static_cast (cutghost[1] * procgrid[1] / prd[1]) + 1; maxneed[2] = static_cast (cutghost[2] * procgrid[2] / prd[2]) + 1; @@ -561,16 +536,15 @@ void CommBrick::forward_comm(int dummy) } else { if (comm_x_only) { if (sendnum[iswap]) - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - x[firstrecv[iswap]],pbc_flag[iswap], - pbc[iswap]); + avec->pack_comm(sendnum[iswap],sendlist[iswap], + x[firstrecv[iswap]],pbc_flag[iswap],pbc[iswap]); } else if (ghost_velocity) { - n = avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); + avec->pack_comm_vel(sendnum[iswap],sendlist[iswap], + buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm_vel(recvnum[iswap],firstrecv[iswap],buf_send); } else { - n = avec->pack_comm(sendnum[iswap],sendlist[iswap], - buf_send,pbc_flag[iswap],pbc[iswap]); + avec->pack_comm(sendnum[iswap],sendlist[iswap], + buf_send,pbc_flag[iswap],pbc[iswap]); avec->unpack_comm(recvnum[iswap],firstrecv[iswap],buf_send); } } @@ -620,10 +594,10 @@ void CommBrick::reverse_comm() } else { if (comm_f_only) { if (sendnum[iswap]) - avec->unpack_reverse(sendnum[iswap],sendlist[iswap], - f[firstrecv[iswap]]); + avec->unpack_reverse(sendnum[iswap],sendlist[iswap], + f[firstrecv[iswap]]); } else { - n = avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); + avec->pack_reverse(recvnum[iswap],firstrecv[iswap],buf_send); avec->unpack_reverse(sendnum[iswap],sendlist[iswap],buf_send); } } diff --git a/src/comm_brick.h b/src/comm_brick.h index 20592accab..8dafecfbe7 100644 --- a/src/comm_brick.h +++ b/src/comm_brick.h @@ -21,6 +21,7 @@ namespace LAMMPS_NS { class CommBrick : public Comm { public: CommBrick(class LAMMPS *); + CommBrick(class LAMMPS *, class Comm *); virtual ~CommBrick(); virtual void init(); @@ -80,6 +81,7 @@ class CommBrick : public Comm { int maxexchange; // max # of datums/atom in exchange comm int bufextra; // extra space beyond maxsend in send buffer + void init_buffers(); int updown(int, int, int, double, int, double *); // compare cutoff to procs virtual void grow_send(int, int); // reallocate send buffer diff --git a/src/comm_tiled.cpp b/src/comm_tiled.cpp index 9f111b999a..42f6efdb6c 100644 --- a/src/comm_tiled.cpp +++ b/src/comm_tiled.cpp @@ -11,15 +11,20 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "string.h" #include "lmptype.h" #include "comm_tiled.h" +#include "comm_brick.h" #include "atom.h" #include "atom_vec.h" #include "domain.h" +#include "force.h" #include "pair.h" +#include "neighbor.h" #include "modify.h" #include "fix.h" #include "compute.h" +#include "output.h" #include "dump.h" #include "memory.h" #include "error.h" @@ -27,32 +32,88 @@ using namespace LAMMPS_NS; #define BUFFACTOR 1.5 +#define BUFFACTOR 1.5 +#define BUFMIN 1000 +#define BUFEXTRA 1000 + +// NOTE: change this to 16 after debugged + +#define DELTA_PROCS 1 enum{SINGLE,MULTI}; // same as in Comm +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ CommTiled::CommTiled(LAMMPS *lmp) : Comm(lmp) { - style = 1; - layout = 0; - error->all(FLERR,"Comm_style tiled is not yet supported"); + + style = 1; + layout = LAYOUT_UNIFORM; + init_buffers(); +} + +/* ---------------------------------------------------------------------- */ + +CommTiled::CommTiled(LAMMPS *lmp, Comm *oldcomm) : Comm(*oldcomm) +{ + error->all(FLERR,"Comm_style tiled is not yet supported"); + + style = 1; + layout = oldcomm->layout; + copy_arrays(oldcomm); + init_buffers(); } /* ---------------------------------------------------------------------- */ CommTiled::~CommTiled() { + memory->destroy(buf_send); + memory->destroy(buf_recv); + memory->destroy(overlap); + deallocate_swap(nswap); } -/* ---------------------------------------------------------------------- */ +/* ---------------------------------------------------------------------- + initialize comm buffers and other data structs local to CommTiled + NOTE: if this is identical to CommBrick, put it into Comm ?? +------------------------------------------------------------------------- */ + +void CommTiled::init_buffers() +{ + maxexchange = maxexchange_atom + maxexchange_fix; + bufextra = maxexchange + BUFEXTRA; + + maxsend = BUFMIN; + memory->create(buf_send,maxsend+bufextra,"comm:buf_send"); + maxrecv = BUFMIN; + memory->create(buf_recv,maxrecv,"comm:buf_recv"); + + maxoverlap = 0; + overlap = NULL; + + nswap = 2 * domain->dimension; + allocate_swap(nswap); +} + +/* ---------------------------------------------------------------------- + NOTE: if this is nearly identical to CommBrick, put it into Comm ?? +------------------------------------------------------------------------- */ void CommTiled::init() { triclinic = domain->triclinic; map_style = atom->map_style; + // temporary restrictions + + if (triclinic) + error->all(FLERR,"Cannot yet use comm_style tiled with triclinic box"); + if (mode == MULTI) + error->all(FLERR,"Cannot yet use comm_style tiled with multi-mode comm"); + // comm_only = 1 if only x,f are exchanged in forward/reverse comm // comm_x_only = 0 if ghost_velocity since velocities are added @@ -72,51 +133,250 @@ void CommTiled::init() for (int i = 0; i < modify->nfix; i++) size_border += modify->fix[i]->comm_border; + + // maxexchange = max # of datums/atom in exchange communication + // maxforward = # of datums in largest forward communication + // maxreverse = # of datums in largest reverse communication + // query pair,fix,compute,dump for their requirements + // pair style can force reverse comm even if newton off + + maxexchange = BUFMIN + maxexchange_fix; + maxforward = MAX(size_forward,size_border); + maxreverse = size_reverse; + + if (force->pair) maxforward = MAX(maxforward,force->pair->comm_forward); + if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse); + + for (int i = 0; i < modify->nfix; i++) { + maxforward = MAX(maxforward,modify->fix[i]->comm_forward); + maxreverse = MAX(maxreverse,modify->fix[i]->comm_reverse); + } + + for (int i = 0; i < modify->ncompute; i++) { + maxforward = MAX(maxforward,modify->compute[i]->comm_forward); + maxreverse = MAX(maxreverse,modify->compute[i]->comm_reverse); + } + + for (int i = 0; i < output->ndump; i++) { + maxforward = MAX(maxforward,output->dump[i]->comm_forward); + maxreverse = MAX(maxreverse,output->dump[i]->comm_reverse); + } + + if (force->newton == 0) maxreverse = 0; + if (force->pair) maxreverse = MAX(maxreverse,force->pair->comm_reverse_off); } /* ---------------------------------------------------------------------- setup spatial-decomposition communication patterns - function of neighbor cutoff(s) & cutghostuser & current box size - single mode sets slab boundaries (slablo,slabhi) based on max cutoff - multi mode sets type-dependent slab boundaries (multilo,multihi) + function of neighbor cutoff(s) & cutghostuser & current box size and tiling ------------------------------------------------------------------------- */ void CommTiled::setup() { - // error on triclinic or multi? - // set nswap = 2*dim - // setup neighbor proc info for exchange() - // setup nsendproc and nrecvproc bounts - // setup sendproc and recvproc lists - // setup sendboxes - // reallocate requests and statuses + int i; - // check that cutoff is <= 1/2 of periodic box len? + // domain properties used in setup and methods it calls - // loop over dims - // left: - // construct ghost boxes - // differnet in x,y,z - // account for ghost borders in y,z - // account for PBC by shifting - // split into multiple boxes if straddles PBC - // drop boxes down RCB tree - // count unique procs they cover - // what about self if crosses PBC - // for each proc they cover: - // compute box I send it to left - // is a message I will recv from right (don't care about box) - // for ghost-extended boxes - // do not count procs that do not overlap my owned box at all - // only touching edge of my owned box does not count - // in this case list I send to and recv from may be different? - // same thing to right + prd = domain->prd; + boxlo = domain->boxlo; + boxhi = domain->boxhi; + sublo = domain->sublo; + subhi = domain->subhi; - // what need from decomp (RCB): - // dropbox: return list of procs with overlap and overlapping boxes - // return n, proclist, boxlist - // otherbox: bbox of another proc - // dropatom: return what proc owns the atom coord + int dimension = domain->dimension; + int *periodicity = domain->periodicity; + + // set function pointers + + if (layout != LAYOUT_TILED) { + box_drop = &CommTiled::box_drop_brick; + box_other = &CommTiled::box_other_brick; + } else { + box_drop = &CommTiled::box_drop_tiled; + box_other = &CommTiled::box_other_tiled; + } + + // if RCB decomp has just changed, gather needed global RCB info + + if (rcbnew) { + rcbnew = 0; + memcpy(&rcbinfo[me].mysplit[0][0],&mysplit[0][0],6*sizeof(double)); + rcbinfo[me].cut = rcbcut; + rcbinfo[me].dim = rcbcutdim; + MPI_Allgather(&rcbinfo[me],sizeof(RCBinfo),MPI_CHAR, + rcbinfo,sizeof(RCBinfo),MPI_CHAR,world); + } + + // check that cutoff < any periodic box length + + double cut = MAX(neighbor->cutneighmax,cutghostuser); + cutghost[0] = cutghost[1] = cutghost[2] = cut; + + if ((periodicity[0] && cut > prd[0]) || + (periodicity[1] && cut > prd[1]) || + (dimension == 3 && periodicity[2] && cut > prd[2])) + error->all(FLERR,"Communication cutoff for comm_style tiled " + "cannot exceed periodic box length"); + + // loop over 6 swap directions + // determine which procs I will send to and receive from in each swap + // done by intersecting ghost box with all proc sub-boxes it overlaps + // sets nsendproc, nrecvproc, sendproc, recvproc + // sets sendother, sendself, pbc_flag, pbc, sendbox + + int noverlap1,indexme; + double lo1[3],hi1[3],lo2[3],hi2[3]; + int one,two; + + nswap = 0; + for (int idim = 0; idim < dimension; idim++) { + for (int iswap = 0; iswap < 2; iswap++) { + + // one = first ghost box in same periodic image + // two = second ghost box wrapped across periodic boundary + // either may not exist + + one = 1; + lo1[0] = sublo[0]; lo1[1] = sublo[1]; lo1[2] = sublo[2]; + hi1[0] = subhi[0]; hi1[1] = subhi[1]; hi1[2] = subhi[2]; + if (iswap == 0) { + lo1[idim] = sublo[idim] - cut; + hi1[idim] = sublo[idim]; + } else { + lo1[idim] = subhi[idim]; + hi1[idim] = subhi[idim] + cut; + } + + two = 0; + if (iswap == 0 && periodicity[idim] && lo1[idim] < boxlo[idim]) two = 1; + if (iswap == 1 && periodicity[idim] && hi1[idim] > boxhi[idim]) two = 1; + + if (two) { + lo2[0] = sublo[0]; lo2[1] = sublo[1]; lo2[2] = sublo[2]; + hi2[0] = subhi[0]; hi2[1] = subhi[1]; hi2[2] = subhi[2]; + if (iswap == 0) { + lo2[idim] = lo1[idim] + prd[idim]; + hi2[idim] = hi1[idim] + prd[idim]; + if (sublo[idim] == boxlo[idim]) { + one = 0; + hi2[idim] = boxhi[idim]; + } + } else { + lo2[idim] = lo1[idim] - prd[idim]; + hi2[idim] = hi1[idim] - prd[idim]; + if (subhi[idim] == boxhi[idim]) { + one = 0; + lo2[idim] = boxlo[idim]; + } + } + } + + // noverlap = # of overlaps of box1/2 with procs via box_drop() + // overlap = list of overlapping procs + // if overlap with self, indexme = index of me in list + + indexme = -1; + noverlap = 0; + if (one) (this->*box_drop)(idim,lo1,hi1,indexme); + noverlap1 = noverlap; + if (two) (this->*box_drop)(idim,lo2,hi2,indexme); + + // if self is in overlap list, move it to end of list + + if (indexme >= 0) { + int tmp = overlap[noverlap-1]; + overlap[noverlap-1] = overlap[indexme]; + overlap[indexme] = tmp; + } + + // reallocate 2nd dimensions of all send/recv arrays, based on noverlap + // # of sends of this swap = # of recvs of nswap +/- 1 + + if (noverlap > nprocmax[iswap]) { + int oldmax = nprocmax[iswap]; + while (nprocmax[iswap] < noverlap) nprocmax[iswap] += DELTA_PROCS; + grow_swap_send(iswap,nprocmax[iswap],oldmax); + if (iswap == 0) grow_swap_recv(iswap+1,nprocmax[iswap]); + else grow_swap_recv(iswap-1,nprocmax[iswap]); + } + + // overlap how has list of noverlap procs + // includes PBC effects + + if (overlap[noverlap-1] == me) sendself[nswap] = 1; + else sendself[nswap] = 0; + if (noverlap-sendself[nswap]) sendother[nswap] = 1; + else sendother[nswap] = 0; + + nsendproc[nswap] = noverlap; + for (i = 0; i < noverlap; i++) sendproc[nswap][i] = overlap[i]; + if (iswap == 0) { + nrecvproc[nswap+1] = noverlap; + for (i = 0; i < noverlap; i++) recvproc[nswap+1][i] = overlap[i]; + } else { + nrecvproc[nswap-1] = noverlap; + for (i = 0; i < noverlap; i++) recvproc[nswap-1][i] = overlap[i]; + } + + // compute sendbox for each of my sends + // obox = intersection of ghostbox with other proc's sub-domain + // sbox = what I need to send to other proc + // = sublo to MIN(sublo+cut,subhi) in idim, for iswap = 0 + // = MIN(subhi-cut,sublo) to subhi in idim, for iswap = 1 + // = obox in other 2 dims + // if sbox touches sub-box boundaries in lower dims, + // extend sbox in those lower dims to include ghost atoms + + double oboxlo[3],oboxhi[3],sbox[6]; + + for (i = 0; i < noverlap; i++) { + pbc_flag[nswap][i] = 0; + pbc[nswap][i][0] = pbc[nswap][i][1] = pbc[nswap][i][2] = + pbc[nswap][i][3] = pbc[nswap][i][4] = pbc[nswap][i][5] = 0; + + (this->*box_other)(idim,iswap,overlap[i],oboxlo,oboxhi); + + if (i < noverlap1) { + sbox[0] = MAX(oboxlo[0],lo1[0]); + sbox[1] = MAX(oboxlo[1],lo1[1]); + sbox[2] = MAX(oboxlo[2],lo1[2]); + sbox[3] = MIN(oboxhi[0],hi1[0]); + sbox[4] = MIN(oboxhi[1],hi1[1]); + sbox[5] = MIN(oboxhi[2],hi1[2]); + } else { + pbc_flag[nswap][i] = 1; + pbc[nswap][i][idim] = 1; + sbox[0] = MAX(oboxlo[0],lo2[0]); + sbox[1] = MAX(oboxlo[1],lo2[1]); + sbox[2] = MAX(oboxlo[2],lo2[2]); + sbox[3] = MIN(oboxhi[0],hi2[0]); + sbox[4] = MIN(oboxhi[1],hi2[1]); + sbox[5] = MIN(oboxhi[2],hi2[2]); + } + + if (iswap == 0) { + sbox[idim] = sublo[idim]; + sbox[3+idim] = MIN(sublo[idim]+cut,subhi[idim]); + } else { + sbox[idim] = MAX(subhi[idim]-cut,sublo[idim]); + sbox[3+idim] = subhi[idim]; + } + + if (idim >= 1) { + if (sbox[0] == sublo[0]) sbox[0] -= cut; + if (sbox[4] == subhi[0]) sbox[4] += cut; + } + if (idim == 2) { + if (sbox[1] == sublo[1]) sbox[1] -= cut; + if (sbox[5] == subhi[1]) sbox[5] += cut; + } + + memcpy(sendbox[nswap][i],sbox,6*sizeof(double)); + } + + nswap++; + } + } } /* ---------------------------------------------------------------------- @@ -126,46 +386,73 @@ void CommTiled::setup() void CommTiled::forward_comm(int dummy) { - int i,irecv,n; + int i,irecv,n,nsend,nrecv; MPI_Status status; AtomVec *avec = atom->avec; double **x = atom->x; // exchange data with another set of procs in each swap - // if first proc in set is self, then is just self across PBC, just copy + // post recvs from all procs except self + // send data to all procs except self + // copy data to self if sendself is set + // wait on all procs except self and unpack received data // if comm_x_only set, exchange or copy directly to x, don't unpack for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap][0] != me) { - if (comm_x_only) { - for (i = 0; i < nrecvproc[iswap]; i++) + nsend = nsendproc[iswap] - sendself[iswap]; + nrecv = nrecvproc[iswap] - sendself[iswap]; + + if (comm_x_only) { + if (sendother[iswap]) { + for (i = 0; i < nrecv; i++) MPI_Irecv(x[firstrecv[iswap][i]],size_forward_recv[iswap][i], MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsendproc[iswap]; i++) { + for (i = 0; i < nsend; i++) { n = avec->pack_comm(sendnum[iswap][i],sendlist[iswap][i], buf_send,pbc_flag[iswap][i],pbc[iswap][i]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); } - MPI_Waitall(nrecvproc[iswap],requests,statuses); + } - } else if (ghost_velocity) { - for (i = 0; i < nrecvproc[iswap]; i++) + if (sendself[iswap]) { + avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend], + x[firstrecv[iswap][nrecv]],pbc_flag[iswap][nsend], + pbc[iswap][nsend]); + } + + if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses); + + } else if (ghost_velocity) { + if (sendother[iswap]) { + for (i = 0; i < nrecv; i++) MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], size_forward_recv[iswap][i], MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsendproc[iswap]; i++) { + for (i = 0; i < nsend; i++) { n = avec->pack_comm_vel(sendnum[iswap][i],sendlist[iswap][i], buf_send,pbc_flag[iswap][i],pbc[iswap][i]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); } - for (i = 0; i < nrecvproc[iswap]; i++) { - MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); - avec->unpack_comm_vel(recvnum[iswap][i],firstrecv[iswap][irecv], + } + + if (sendself[iswap]) { + avec->pack_comm_vel(sendnum[iswap][nsend],sendlist[iswap][nsend], + buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]); + avec->unpack_comm_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv], + buf_send); + } + + if (sendother[iswap]) { + for (i = 0; i < nrecv; i++) { + MPI_Waitany(nrecv,requests,&irecv,&status); + avec->unpack_comm_vel(recvnum[iswap][irecv],firstrecv[iswap][irecv], &buf_recv[forward_recv_offset[iswap][irecv]]); } + } - } else { - for (i = 0; i < nrecvproc[iswap]; i++) + } else { + if (sendother[iswap]) { + for (i = 0; i < nrecv; i++) MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], size_forward_recv[iswap][i], MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); @@ -174,27 +461,21 @@ void CommTiled::forward_comm(int dummy) buf_send,pbc_flag[iswap][i],pbc[iswap][i]); MPI_Send(buf_send,n,MPI_DOUBLE,sendproc[iswap][i],0,world); } - for (i = 0; i < nrecvproc[iswap]; i++) { - MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); - avec->unpack_comm(recvnum[iswap][i],firstrecv[iswap][irecv], - &buf_recv[forward_recv_offset[iswap][irecv]]); - } } - } else { - if (comm_x_only) { - if (sendnum[iswap][0]) - n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], - x[firstrecv[iswap][0]],pbc_flag[iswap][0], - pbc[iswap][0]); - } else if (ghost_velocity) { - n = avec->pack_comm_vel(sendnum[iswap][0],sendlist[iswap][0], - buf_send,pbc_flag[iswap][0],pbc[iswap][0]); - avec->unpack_comm_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); - } else { - n = avec->pack_comm(sendnum[iswap][0],sendlist[iswap][0], - buf_send,pbc_flag[iswap][0],pbc[iswap][0]); - avec->unpack_comm(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + if (sendself[iswap]) { + avec->pack_comm(sendnum[iswap][nsend],sendlist[iswap][nsend], + buf_send,pbc_flag[iswap][nsend],pbc[iswap][nsend]); + avec->unpack_comm(recvnum[iswap][nrecv],firstrecv[iswap][nrecv], + buf_send); + } + + if (sendother[iswap]) { + for (i = 0; i < nrecv; i++) { + MPI_Waitany(nrecv,requests,&irecv,&status); + avec->unpack_comm(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } } } } @@ -207,57 +488,73 @@ void CommTiled::forward_comm(int dummy) void CommTiled::reverse_comm() { - int i,irecv,n; + int i,irecv,n,nsend,nrecv; MPI_Request request; MPI_Status status; AtomVec *avec = atom->avec; double **f = atom->f; // exchange data with another set of procs in each swap - // if first proc in set is self, then is just self across PBC, just copy + // post recvs from all procs except self + // send data to all procs except self + // copy data to self if sendself is set + // wait on all procs except self and unpack received data // if comm_f_only set, exchange or copy directly from f, don't pack for (int iswap = nswap-1; iswap >= 0; iswap--) { - if (sendproc[iswap][0] != me) { - if (comm_f_only) { - for (i = 0; i < nsendproc[iswap]; i++) + nsend = nsendproc[iswap] - sendself[iswap]; + nrecv = nrecvproc[iswap] - sendself[iswap]; + + if (comm_f_only) { + if (sendother[iswap]) { + for (i = 0; i < nsend; i++) MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], size_reverse_recv[iswap][i],MPI_DOUBLE, sendproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nrecvproc[iswap]; i++) + for (i = 0; i < nrecv; i++) MPI_Send(f[firstrecv[iswap][i]],size_reverse_send[iswap][i], MPI_DOUBLE,recvproc[iswap][i],0,world); - for (i = 0; i < nsendproc[iswap]; i++) { - MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); - avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], - &buf_recv[reverse_recv_offset[iswap][irecv]]); - } + } - } else { - for (i = 0; i < nsendproc[iswap]; i++) - MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], - size_reverse_recv[iswap][i],MPI_DOUBLE, - sendproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nrecvproc[iswap]; i++) { - n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i], - buf_send); - MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world); - } - for (i = 0; i < nsendproc[iswap]; i++) { - MPI_Waitany(nsendproc[iswap],requests,&irecv,&status); + if (sendself[iswap]) { + avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend], + f[firstrecv[iswap][nrecv]]); + } + + if (sendother[iswap]) { + for (i = 0; i < nsend; i++) { + MPI_Waitany(nsend,requests,&irecv,&status); avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], &buf_recv[reverse_recv_offset[iswap][irecv]]); } } - + } else { - if (comm_f_only) { - if (sendnum[iswap][0]) - avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0], - f[firstrecv[iswap][0]]); - } else { - n = avec->pack_reverse(recvnum[iswap][0],firstrecv[iswap][0],buf_send); - avec->unpack_reverse(sendnum[iswap][0],sendlist[iswap][0],buf_send); + if (sendother[iswap]) { + for (i = 0; i < nsend; i++) + MPI_Irecv(&buf_recv[reverse_recv_offset[iswap][i]], + size_reverse_recv[iswap][i],MPI_DOUBLE, + sendproc[iswap][i],0,world,&requests[i]); + for (i = 0; i < nrecv; i++) { + n = avec->pack_reverse(recvnum[iswap][i],firstrecv[iswap][i], + buf_send); + MPI_Send(buf_send,n,MPI_DOUBLE,recvproc[iswap][i],0,world); + } + } + + if (sendself[iswap]) { + avec->pack_reverse(recvnum[iswap][nrecv],firstrecv[iswap][nrecv], + buf_send); + avec->unpack_reverse(sendnum[iswap][nsend],sendlist[iswap][nsend], + buf_send); + } + + if (sendother[iswap]) { + for (i = 0; i < nsend; i++) { + MPI_Waitany(nsend,requests,&irecv,&status); + avec->unpack_reverse(sendnum[iswap][irecv],sendlist[iswap][irecv], + &buf_recv[reverse_recv_offset[iswap][irecv]]); + } } } } @@ -298,7 +595,7 @@ void CommTiled::exchange() void CommTiled::borders() { - int i,n,irecv,ngroup,nlast,nsend,rmaxswap; + int i,m,n,irecv,nlocal,nlast,nsend,nrecv,ncount,rmaxswap; double xlo,xhi,ylo,yhi,zlo,zhi; double *bbox; double **x; @@ -320,65 +617,69 @@ void CommTiled::borders() // for yz-dim swaps, check owned and ghost atoms // store sent atom indices in list for use in future timesteps // NOTE: assume SINGLE mode, add back in logic for MULTI mode later + // and for ngroup when bordergroup is set x = atom->x; - for (i = 0; i < nsendproc[iswap]; i++) { - bbox = sendbox[iswap][i]; - xlo = bbox[0]; xhi = bbox[1]; - ylo = bbox[2]; yhi = bbox[3]; - zlo = bbox[4]; zhi = bbox[5]; + for (m = 0; m < nsendproc[iswap]; m++) { + bbox = sendbox[iswap][m]; + xlo = bbox[0]; ylo = bbox[1]; zlo = bbox[2]; + xhi = bbox[3]; yhi = bbox[4]; zhi = bbox[5]; - ngroup = atom->nfirst; + nlocal = atom->nlocal; if (iswap < 2) nlast = atom->nlocal; else nlast = atom->nlocal + atom->nghost; - nsend = 0; - for (i = 0; i < ngroup; i++) + ncount = 0; + for (i = 0; i < nlocal; i++) if (x[i][0] >= xlo && x[i][0] <= xhi && x[i][1] >= ylo && x[i][1] <= yhi && x[i][2] >= zlo && x[i][2] <= zhi) { - if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); - sendlist[iswap][i][nsend++] = i; + if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount); + sendlist[iswap][m][ncount++] = i; } for (i = atom->nlocal; i < nlast; i++) if (x[i][0] >= xlo && x[i][0] <= xhi && x[i][1] >= ylo && x[i][1] <= yhi && x[i][2] >= zlo && x[i][2] <= zhi) { - if (nsend == maxsendlist[iswap][i]) grow_list(iswap,i,nsend); - sendlist[iswap][i][nsend++] = i; + if (ncount == maxsendlist[iswap][m]) grow_list(iswap,i,ncount); + sendlist[iswap][m][ncount++] = i; } - sendnum[iswap][i] = nsend; - smax = MAX(smax,nsend); + sendnum[iswap][m] = ncount; + smax = MAX(smax,ncount); } - // send sendnum counts to procs who recv from me + // send sendnum counts to procs who recv from me except self + // copy data to self if sendself is set - if (sendproc[iswap][0] != me) { - for (i = 0; i < nrecvproc[iswap]; i++) - MPI_Irecv(&recvnum[iswap][i],1,MPI_INT, - recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsendproc[iswap]; i++) - MPI_Send(&sendnum[iswap][i],1,MPI_INT,sendproc[iswap][i],0,world); - MPI_Waitall(nrecvproc[iswap],requests,statuses); + nsend = nsendproc[iswap] - sendself[iswap]; + nrecv = nrecvproc[iswap] - sendself[iswap]; - } else recvnum[iswap][0] = sendnum[iswap][0]; + if (sendother[iswap]) { + for (m = 0; m < nrecv; m++) + MPI_Irecv(&recvnum[iswap][m],1,MPI_INT, + recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) + MPI_Send(&sendnum[iswap][m],1,MPI_INT,sendproc[iswap][m],0,world); + } + if (sendself[iswap]) recvnum[iswap][nrecv] = sendnum[iswap][nsend]; + if (sendother[iswap]) MPI_Waitall(nrecv,requests,statuses); // setup other per swap/proc values from sendnum and recvnum rmaxswap = 0; - for (i = 0; i < nrecvproc[iswap]; i++) { - rmaxswap += recvnum[iswap][i]; - size_forward_recv[iswap][i] = recvnum[iswap][i]*size_forward; - size_reverse_send[iswap][i] = recvnum[iswap][i]*size_reverse; - size_reverse_recv[iswap][i] = sendnum[iswap][i]*size_reverse; - if (i == 0) { + for (m = 0; m < nrecvproc[iswap]; m++) { + rmaxswap += recvnum[iswap][m]; + size_forward_recv[iswap][m] = recvnum[iswap][m]*size_forward; + size_reverse_send[iswap][m] = recvnum[iswap][m]*size_reverse; + size_reverse_recv[iswap][m] = sendnum[iswap][m]*size_reverse; + if (m == 0) { firstrecv[iswap][0] = atom->nlocal + atom->nghost; forward_recv_offset[iswap][0] = 0; } else { - firstrecv[iswap][i] = firstrecv[iswap][i-1] + recvnum[iswap][i-1]; - forward_recv_offset[iswap][i] = - forward_recv_offset[iswap][i-1] + recvnum[iswap][i-1]; + firstrecv[iswap][m] = firstrecv[iswap][m-1] + recvnum[iswap][m-1]; + forward_recv_offset[iswap][m] = + forward_recv_offset[iswap][m-1] + recvnum[iswap][m-1]; } } rmax = MAX(rmax,rmaxswap); @@ -390,54 +691,64 @@ void CommTiled::borders() // swap atoms with other procs using pack_border(), unpack_border() - if (sendproc[iswap][0] != me) { - for (i = 0; i < nsendproc[iswap]; i++) { - if (ghost_velocity) { - for (i = 0; i < nrecvproc[iswap]; i++) - MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], - recvnum[iswap][i]*size_border, - MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsendproc[iswap]; i++) { - n = avec->pack_border_vel(sendnum[iswap][i],sendlist[iswap][i], - buf_send,pbc_flag[iswap][i], - pbc[iswap][i]); - MPI_Send(buf_send,n*size_border,MPI_DOUBLE, - sendproc[iswap][i],0,world); - } - for (i = 0; i < nrecvproc[iswap]; i++) { - MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); - avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv], - &buf_recv[forward_recv_offset[iswap][irecv]]); - } - - } else { - for (i = 0; i < nrecvproc[iswap]; i++) - MPI_Irecv(&buf_recv[forward_recv_offset[iswap][i]], - recvnum[iswap][i]*size_border, - MPI_DOUBLE,recvproc[iswap][i],0,world,&requests[i]); - for (i = 0; i < nsendproc[iswap]; i++) { - n = avec->pack_border(sendnum[iswap][i],sendlist[iswap][i], - buf_send,pbc_flag[iswap][i],pbc[iswap][i]); - MPI_Send(buf_send,n*size_border,MPI_DOUBLE, - sendproc[iswap][i],0,world); - } - for (i = 0; i < nrecvproc[iswap]; i++) { - MPI_Waitany(nrecvproc[iswap],requests,&irecv,&status); - avec->unpack_border(recvnum[iswap][i],firstrecv[iswap][irecv], - &buf_recv[forward_recv_offset[iswap][irecv]]); - } + if (ghost_velocity) { + if (sendother[iswap]) { + for (m = 0; m < nrecv; m++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]], + recvnum[iswap][m]*size_border, + MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) { + n = avec->pack_border_vel(sendnum[iswap][m],sendlist[iswap][m], + buf_send,pbc_flag[iswap][m],pbc[iswap][m]); + MPI_Send(buf_send,n*size_border,MPI_DOUBLE, + sendproc[iswap][m],0,world); + } + } + + if (sendself[iswap]) { + n = avec->pack_border_vel(sendnum[iswap][nsend],sendlist[iswap][nsend], + buf_send,pbc_flag[iswap][nsend], + pbc[iswap][nsend]); + avec->unpack_border_vel(recvnum[iswap][nrecv],firstrecv[iswap][nrecv], + buf_send); + } + + if (sendother[iswap]) { + for (m = 0; m < nrecv; m++) { + MPI_Waitany(nrecv,requests,&irecv,&status); + avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); } } } else { - if (ghost_velocity) { - n = avec->pack_border_vel(sendnum[iswap][0],sendlist[iswap][0], - buf_send,pbc_flag[iswap][0],pbc[iswap][0]); - avec->unpack_border_vel(recvnum[iswap][0],firstrecv[iswap][0],buf_send); - } else { - n = avec->pack_border(sendnum[iswap][0],sendlist[iswap][0], - buf_send,pbc_flag[iswap][0],pbc[iswap][0]); - avec->unpack_border(recvnum[iswap][0],firstrecv[iswap][0],buf_send); + if (sendother[iswap]) { + for (m = 0; m < nrecv; m++) + MPI_Irecv(&buf_recv[forward_recv_offset[iswap][m]], + recvnum[iswap][m]*size_border, + MPI_DOUBLE,recvproc[iswap][m],0,world,&requests[m]); + for (m = 0; m < nsend; m++) { + n = avec->pack_border(sendnum[iswap][m],sendlist[iswap][m], + buf_send,pbc_flag[iswap][m],pbc[iswap][m]); + MPI_Send(buf_send,n*size_border,MPI_DOUBLE, + sendproc[iswap][m],0,world); + } + } + + if (sendself[iswap]) { + n = avec->pack_border(sendnum[iswap][nsend],sendlist[iswap][nsend], + buf_send,pbc_flag[iswap][nsend], + pbc[iswap][nsend]); + avec->unpack_border(recvnum[iswap][nsend],firstrecv[iswap][nsend], + buf_send); + } + + if (sendother[iswap]) { + for (m = 0; m < nrecv; m++) { + MPI_Waitany(nrecv,requests,&irecv,&status); + avec->unpack_border(recvnum[iswap][irecv],firstrecv[iswap][irecv], + &buf_recv[forward_recv_offset[iswap][irecv]]); + } } } @@ -459,6 +770,8 @@ void CommTiled::borders() if (map_style) atom->map_set(); } +// NOTE: remaining forward/reverse methods still need to be updated + /* ---------------------------------------------------------------------- forward communication invoked by a Pair n = constant number of datums per atom @@ -786,6 +1099,186 @@ int CommTiled::exchange_variable(int n, double *inbuf, double *&outbuf) return nrecv; } +/* ---------------------------------------------------------------------- + determine overlap list of Noverlap procs the lo/hi box overlaps + overlap = non-zero area in common between box and proc sub-domain + box is owned by me and extends in dim +------------------------------------------------------------------------- */ + +void CommTiled::box_drop_brick(int idim, double *lo, double *hi, int &indexme) +{ + // NOTE: this is not triclinic compatible + + int index,dir; + if (hi[idim] == sublo[idim]) { + index = myloc[idim] - 1; + dir = -1; + } else if (lo[idim] == subhi[idim]) { + index = myloc[idim] + 1; + dir = 1; + } else if (hi[idim] == boxhi[idim]) { + index = procgrid[idim] - 1; + dir = -1; + } else if (lo[idim] == boxlo[idim]) { + index = 0; + dir = 1; + } + + int other1,other2,proc; + double lower,upper; + double *split; + + if (idim == 0) { + other1 = myloc[1]; other2 = myloc[2]; + split = xsplit; + } else if (idim == 1) { + other1 = myloc[0]; other2 = myloc[2]; + split = ysplit; + } else { + other1 = myloc[0]; other2 = myloc[1]; + split = zsplit; + } + + while (1) { + lower = boxlo[idim] + prd[idim]*split[index]; + if (index < procgrid[idim]-1) + upper = boxlo[idim] + prd[idim]*split[index+1]; + else upper = boxhi[idim]; + if (lower >= hi[idim] || upper <= lo[idim]) break; + + if (idim == 0) proc = grid2proc[index][other1][other2]; + else if (idim == 1) proc = grid2proc[other1][index][other2]; + else proc = grid2proc[other1][other2][idim]; + + if (noverlap == maxoverlap) { + maxoverlap += DELTA_PROCS; + memory->grow(overlap,maxoverlap,"comm:overlap"); + } + + if (proc == me) indexme = noverlap; + overlap[noverlap++] = proc; + index += dir; + if (index < 0 || index >= procgrid[idim]) break; + } +} + +/* ---------------------------------------------------------------------- + determine overlap list of Noverlap procs the lo/hi box overlaps + overlap = non-zero area in common between box and proc sub-domain + recursive method for traversing an RCB tree of cuts + no need to split lo/hi box as recurse b/c OK if box extends outside RCB box +------------------------------------------------------------------------- */ + +void CommTiled::box_drop_tiled(int idim, double *lo, double *hi, int &indexme) +{ + box_drop_tiled_recurse(lo,hi,0,nprocs-1,indexme); +} + +void CommTiled::box_drop_tiled_recurse(double *lo, double *hi, + int proclower, int procupper, + int &indexme) +{ + // end recursion when partition is a single proc + // add proc to overlap list + + if (proclower == procupper) { + if (noverlap == maxoverlap) { + maxoverlap += DELTA_PROCS; + memory->grow(overlap,maxoverlap,"comm:overlap"); + } + + if (proclower == me) indexme = noverlap; + overlap[noverlap++] = proclower; + return; + } + + // drop box on each side of cut it extends beyond + // use > and < criteria so does not include a box it only touches + // procmid = 1st processor in upper half of partition + // = location in tree that stores this cut + // dim = 0,1,2 dimension of cut + // cut = position of cut + + int procmid = proclower + (procupper - proclower) / 2 + 1; + double cut = rcbinfo[procmid].cut; + int idim = rcbinfo[procmid].dim; + + if (lo[idim] < cut) + box_drop_tiled_recurse(lo,hi,proclower,procmid-1,indexme); + if (hi[idim] > cut) + box_drop_tiled_recurse(lo,hi,procmid,procupper,indexme); +} + +/* ---------------------------------------------------------------------- + return other box owned by proc as lo/hi corner pts +------------------------------------------------------------------------- */ + +void CommTiled::box_other_brick(int idim, int iswap, + int proc, double *lo, double *hi) +{ + lo[0] = sublo[0]; lo[1] = sublo[1]; lo[2] = sublo[2]; + hi[0] = subhi[0]; hi[1] = subhi[1]; hi[2] = subhi[2]; + + int other1,other2,oproc; + double *split; + + if (idim == 0) { + other1 = myloc[1]; other2 = myloc[2]; + split = xsplit; + } else if (idim == 1) { + other1 = myloc[0]; other2 = myloc[2]; + split = ysplit; + } else { + other1 = myloc[0]; other2 = myloc[1]; + split = zsplit; + } + + int dir = -1; + if (iswap) dir = 1; + int index = myloc[idim]; + int n = procgrid[idim]; + + for (int i = 0; i < n; i++) { + index += dir; + if (index < 0) index = n-1; + else if (index >= n) index = 0; + + if (idim == 0) oproc = grid2proc[index][other1][other2]; + else if (idim == 1) oproc = grid2proc[other1][index][other2]; + else oproc = grid2proc[other1][other2][idim]; + + if (proc == oproc) { + lo[idim] = boxlo[idim] + prd[idim]*split[index]; + if (split[index+1] < 1.0) + hi[idim] = boxlo[idim] + prd[idim]*split[index+1]; + else hi[idim] = boxhi[idim]; + return; + } + } +} + +/* ---------------------------------------------------------------------- + return other box owned by proc as lo/hi corner pts +------------------------------------------------------------------------- */ + +void CommTiled::box_other_tiled(int idim, int iswap, + int proc, double *lo, double *hi) +{ + double (*split)[2] = rcbinfo[proc].mysplit; + + lo[0] = boxlo[0] + prd[0]*split[0][0]; + if (split[0][1] < 1.0) hi[0] = boxlo[0] + prd[0]*split[0][1]; + else hi[0] = boxhi[0]; + + lo[1] = boxlo[1] + prd[1]*split[1][0]; + if (split[1][1] < 1.0) hi[1] = boxlo[1] + prd[1]*split[1][1]; + else hi[1] = boxhi[1]; + + lo[2] = boxlo[2] + prd[2]*split[2][0]; + if (split[2][1] < 1.0) hi[2] = boxlo[2] + prd[2]*split[2][1]; + else hi[2] = boxhi[2]; +} + /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra if flag = 1, realloc @@ -822,7 +1315,178 @@ void CommTiled::grow_list(int iswap, int iwhich, int n) { maxsendlist[iswap][iwhich] = static_cast (BUFFACTOR * n); memory->grow(sendlist[iswap][iwhich],maxsendlist[iswap][iwhich], - "comm:sendlist[iswap]"); + "comm:sendlist[i][j]"); +} + +/* ---------------------------------------------------------------------- + allocation of swap info +------------------------------------------------------------------------- */ + +void CommTiled::allocate_swap(int n) +{ + nsendproc = new int[n]; + nrecvproc = new int[n]; + sendother = new int[n]; + sendself = new int[n]; + nprocmax = new int[n]; + + sendproc = new int*[n]; + recvproc = new int*[n]; + sendnum = new int*[n]; + recvnum = new int*[n]; + size_forward_recv = new int*[n]; + firstrecv = new int*[n]; + size_reverse_send = new int*[n]; + size_reverse_recv = new int*[n]; + forward_recv_offset = new int*[n]; + reverse_recv_offset = new int*[n]; + + pbc_flag = new int*[n]; + pbc = new int**[n]; + sendbox = new double**[n]; + maxsendlist = new int*[n]; + sendlist = new int**[n]; + + for (int i = 0; i < n; i++) { + sendproc[i] = recvproc[i] = NULL; + sendnum[i] = recvnum[i] = NULL; + size_forward_recv[i] = firstrecv[i] = NULL; + size_reverse_send[i] = size_reverse_recv[i] = NULL; + forward_recv_offset[i] = reverse_recv_offset[i] = NULL; + + pbc_flag[i] = NULL; + pbc[i] = NULL; + sendbox[i] = NULL; + maxsendlist[i] = NULL; + sendlist[i] = NULL; + } + + maxreqstat = 0; + requests = NULL; + statuses = NULL; + + for (int i = 0; i < n; i++) { + nprocmax[i] = DELTA_PROCS; + grow_swap_send(i,DELTA_PROCS,0); + grow_swap_recv(i,DELTA_PROCS); + } +} + +/* ---------------------------------------------------------------------- + grow info for swap I, to allow for N procs to communicate with + ditto for complementary recv for swap I+1 or I-1, as invoked by caller +------------------------------------------------------------------------- */ + +void CommTiled::grow_swap_send(int i, int n, int nold) +{ + delete [] sendproc[i]; + sendproc[i] = new int[n]; + delete [] sendnum[i]; + sendnum[i] = new int[n]; + + delete [] size_reverse_recv[i]; + size_reverse_recv[i] = new int[n]; + delete [] reverse_recv_offset[i]; + reverse_recv_offset[i] = new int[n]; + + delete [] pbc_flag[i]; + pbc_flag[i] = new int[n]; + memory->destroy(pbc[i]); + memory->create(pbc[i],n,6,"comm:pbc_flag"); + memory->destroy(sendbox[i]); + memory->create(sendbox[i],n,6,"comm:sendbox"); + + delete [] maxsendlist[i]; + maxsendlist[i] = new int[n]; + + for (int j = 0; j < nold; j++) memory->destroy(sendlist[i][j]); + delete [] sendlist[i]; + sendlist[i] = new int*[n]; + for (int j = 0; j < n; j++) { + maxsendlist[i][j] = BUFMIN; + memory->create(sendlist[i][j],BUFMIN,"comm:sendlist[i][j]"); + } +} + +void CommTiled::grow_swap_recv(int i, int n) +{ + delete [] recvproc[i]; + recvproc[i] = new int[n]; + delete [] recvnum[i]; + recvnum[i] = new int[n]; + + delete [] size_forward_recv[i]; + size_forward_recv[i] = new int[n]; + delete [] firstrecv[i]; + firstrecv[i] = new int[n]; + delete [] forward_recv_offset[i]; + forward_recv_offset[i] = new int[n]; + + delete [] size_reverse_send[i]; + size_reverse_send[i] = new int[n]; + + if (n > maxreqstat) { + maxreqstat = n; + delete [] requests; + delete [] statuses; + requests = new MPI_Request[n]; + statuses = new MPI_Status[n]; + } +} + +/* ---------------------------------------------------------------------- + deallocate swap info +------------------------------------------------------------------------- */ + +void CommTiled::deallocate_swap(int n) +{ + delete [] nsendproc; + delete [] nrecvproc; + delete [] sendother; + delete [] sendself; + + for (int i = 0; i < n; i++) { + delete [] sendproc[i]; + delete [] recvproc[i]; + delete [] sendnum[i]; + delete [] recvnum[i]; + delete [] size_forward_recv[i]; + delete [] firstrecv[i]; + delete [] size_reverse_send[i]; + delete [] size_reverse_recv[i]; + delete [] forward_recv_offset[i]; + delete [] reverse_recv_offset[i]; + + delete [] pbc_flag[i]; + memory->destroy(pbc[i]); + memory->destroy(sendbox[i]); + delete [] maxsendlist[i]; + + for (int j = 0; j < nprocmax[i]; j++) memory->destroy(sendlist[i][j]); + delete [] sendlist[i]; + } + + delete [] sendproc; + delete [] recvproc; + delete [] sendnum; + delete [] recvnum; + delete [] size_forward_recv; + delete [] firstrecv; + delete [] size_reverse_send; + delete [] size_reverse_recv; + delete [] forward_recv_offset; + delete [] reverse_recv_offset; + + delete [] pbc_flag; + delete [] pbc; + delete [] sendbox; + delete [] maxsendlist; + delete [] sendlist; + + delete [] requests; + delete [] statuses; + + delete [] nprocmax; } /* ---------------------------------------------------------------------- diff --git a/src/comm_tiled.h b/src/comm_tiled.h index 2c7a6e62df..fd27c4de41 100644 --- a/src/comm_tiled.h +++ b/src/comm_tiled.h @@ -21,6 +21,7 @@ namespace LAMMPS_NS { class CommTiled : public Comm { public: CommTiled(class LAMMPS *); + CommTiled(class LAMMPS *, class Comm *); virtual ~CommTiled(); void init(); @@ -46,22 +47,23 @@ class CommTiled : public Comm { bigint memory_usage(); private: - int nswap; // # of swaps to perform = 2*dim - int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap - int triclinic; // 0 if domain is orthog, 1 if triclinic int map_style; // non-0 if global->local mapping is done int size_forward; // # of per-atom datums in forward comm int size_reverse; // # of datums in reverse comm int size_border; // # of datums in forward border comm - int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc + int nswap; // # of swaps to perform = 2*dim + int *nsendproc,*nrecvproc; // # of procs to send/recv to/from in each swap + int *sendother; // 1 if send to any other proc in each swap + int *sendself; // 1 if send to self in each swap + int *nprocmax; // current max # of send procs for each swap int **sendproc,**recvproc; // proc to send/recv to/from per swap/proc + int **sendnum,**recvnum; // # of atoms to send/recv per swap/proc int **size_forward_recv; // # of values to recv in each forward swap/proc int **firstrecv; // where to put 1st recv atom per swap/proc int **size_reverse_send; // # to send in each reverse comm per swap/proc int **size_reverse_recv; // # to recv in each reverse comm per swap/proc - int **forward_recv_offset; // forward comm offsets in buf_recv per swap/proc int **reverse_recv_offset; // reverse comm offsets in buf_recv per swap/proc @@ -77,16 +79,53 @@ class CommTiled : public Comm { int maxsend,maxrecv; // current size of send/recv buffer int maxforward,maxreverse; // max # of datums in forward/reverse comm + int maxexchange; // max # of datums/atom in exchange comm int bufextra; // extra space beyond maxsend in send buffer + int maxreqstat; // max size of Request and Status vectors MPI_Request *requests; MPI_Status *statuses; int comm_x_only,comm_f_only; // 1 if only exchange x,f in for/rev comm - void grow_send(int, int); // reallocate send buffer - void grow_recv(int); // free/allocate recv buffer - void grow_list(int, int, int); // reallocate sendlist for one swap/proc + struct RCBinfo { + double mysplit[3][2]; // fractional RCB bounding box for one proc + double cut; // position of cut this proc owns + int dim; // dimension = 0/1/2 of cut + }; + + int noverlap; // # of overlapping procs + int maxoverlap; // current max length of overlap + int *overlap; // list of overlapping procs + + RCBinfo *rcbinfo; // list of RCB info for all procs + + double *prd; // local ptrs to Domain attributes + double *boxlo,*boxhi; + double *sublo,*subhi; + + void init_buffers(); + + // box drop and other functions + + typedef void (CommTiled::*BoxDropPtr)(int, double *, double *, int &); + BoxDropPtr box_drop; + void box_drop_brick(int, double *, double *, int &); + void box_drop_tiled(int, double *, double *, int &); + void box_drop_tiled_recurse(double *, double *, int, int, int &); + + typedef void (CommTiled::*BoxOtherPtr)(int, int, int, double *, double *); + BoxOtherPtr box_other; + void box_other_brick(int, int, int, double *, double *); + void box_other_tiled(int, int, int, double *, double *); + + void grow_send(int, int); // reallocate send buffer + void grow_recv(int); // free/allocate recv buffer + void grow_list(int, int, int); // reallocate sendlist for one swap/proc + void allocate_swap(int); // allocate swap arrays + void grow_swap_send(int, int, int); // grow swap arrays for send and recv + void grow_swap_recv(int, int); + void deallocate_swap(int); // deallocate swap arrays }; } diff --git a/src/create_atoms.cpp b/src/create_atoms.cpp index be5c479360..9777f23334 100644 --- a/src/create_atoms.cpp +++ b/src/create_atoms.cpp @@ -27,6 +27,8 @@ #include "domain.h" #include "lattice.h" #include "region.h" +#include "input.h" +#include "variable.h" #include "random_park.h" #include "random_mars.h" #include "math_extra.h" @@ -41,6 +43,7 @@ using namespace MathConst; enum{BOX,REGION,SINGLE,RANDOM}; enum{ATOM,MOLECULE}; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ @@ -103,6 +106,8 @@ void CreateAtoms::command(int narg, char **arg) remapflag = 0; mode = ATOM; int molseed; + varflag = 0; + vstr = xstr = ystr = zstr = NULL; nbasis = domain->lattice->nbasis; basistype = new int[nbasis]; @@ -141,6 +146,33 @@ void CreateAtoms::command(int narg, char **arg) else if (strcmp(arg[iarg+1],"lattice") == 0) scaleflag = 1; else error->all(FLERR,"Illegal create_atoms command"); iarg += 2; + } else if (strcmp(arg[iarg],"var") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal create_atoms command"); + delete [] vstr; + int n = strlen(arg[iarg+1]) + 1; + vstr = new char[n]; + strcpy(vstr,arg[iarg+1]); + varflag = 1; + iarg += 2; + } else if (strcmp(arg[iarg],"set") == 0) { + if (iarg+3 > narg) error->all(FLERR,"Illegal create_atoms command"); + if (strcmp(arg[iarg+1],"x") == 0) { + delete [] xstr; + int n = strlen(arg[iarg+2]) + 1; + xstr = new char[n]; + strcpy(xstr,arg[iarg+2]); + } else if (strcmp(arg[iarg+1],"y") == 0) { + delete [] ystr; + int n = strlen(arg[iarg+2]) + 1; + ystr = new char[n]; + strcpy(ystr,arg[iarg+2]); + } else if (strcmp(arg[iarg+1],"z") == 0) { + delete [] zstr; + int n = strlen(arg[iarg+2]) + 1; + zstr = new char[n]; + strcpy(zstr,arg[iarg+2]); + } else error->all(FLERR,"Illegal create_atoms command"); + iarg += 3; } else error->all(FLERR,"Illegal create_atoms command"); } @@ -178,6 +210,47 @@ void CreateAtoms::command(int narg, char **arg) ranmol = new RanMars(lmp,molseed+comm->me); } + // error check and further setup for variable test + // save local copy of each equal variable string so can restore at end + + if (!vstr && (xstr || ystr || zstr)) + error->all(FLERR,"Incomplete use of variables in create_atoms command"); + if (vstr && (!xstr && !ystr && !zstr)) + error->all(FLERR,"Incomplete use of variables in create_atoms command"); + + if (varflag) { + vvar = input->variable->find(vstr); + if (vvar < 0) + error->all(FLERR,"Variable name for create_atoms does not exist"); + if (!input->variable->equalstyle(vvar)) + error->all(FLERR,"Variable for create_atoms is invalid style"); + + if (xstr) { + xvar = input->variable->find(xstr); + if (xvar < 0) + error->all(FLERR,"Variable name for create_atoms does not exist"); + if (!input->variable->equalstyle(xvar)) + error->all(FLERR,"Variable for create_atoms is invalid style"); + input->variable->equal_save(xvar,xstr_copy); + } + if (ystr) { + yvar = input->variable->find(ystr); + if (yvar < 0) + error->all(FLERR,"Variable name for create_atoms does not exist"); + if (!input->variable->equalstyle(yvar)) + error->all(FLERR,"Variable for create_atoms is invalid style"); + input->variable->equal_save(yvar,ystr_copy); + } + if (zstr) { + zvar = input->variable->find(zstr); + if (zvar < 0) + error->all(FLERR,"Variable name for create_atoms does not exist"); + if (!input->variable->equalstyle(zvar)) + error->all(FLERR,"Variable for create_atoms is invalid style"); + input->variable->equal_save(zvar,zstr_copy); + } + } + // demand non-none lattice be defined for BOX and REGION // else setup scaling for SINGLE and RANDOM // could use domain->lattice->lattice2box() to do conversion of @@ -226,17 +299,32 @@ void CreateAtoms::command(int narg, char **arg) } if (style == BOX || style == REGION) { - if (domain->xperiodic) { - if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; - if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0]; - } - if (domain->yperiodic) { - if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; - if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1]; - } - if (domain->zperiodic) { - if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; - if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2]; + if (comm->layout != LAYOUT_TILED) { + if (domain->xperiodic) { + if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; + if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] -= 2.0*epsilon[0]; + } + if (domain->yperiodic) { + if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; + if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] -= 2.0*epsilon[1]; + } + if (domain->zperiodic) { + if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; + if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] -= 2.0*epsilon[2]; + } + } else { + if (domain->xperiodic) { + if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; + if (comm->mysplit[0][1] == 1.0) subhi[0] -= 2.0*epsilon[0]; + } + if (domain->yperiodic) { + if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; + if (comm->mysplit[1][1] == 1.0) subhi[1] -= 2.0*epsilon[1]; + } + if (domain->zperiodic) { + if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; + if (comm->mysplit[2][1] == 1.0) subhi[2] -= 2.0*epsilon[2]; + } } } @@ -259,6 +347,14 @@ void CreateAtoms::command(int narg, char **arg) fix->set_arrays(i); } + // restore each equal variable string previously saved + + if (varflag) { + if (xstr) input->variable->equal_restore(xvar,xstr_copy); + if (ystr) input->variable->equal_restore(yvar,ystr_copy); + if (zstr) input->variable->equal_restore(zvar,zstr_copy); + } + // set new total # of atoms and error check bigint nblocal = atom->nlocal; @@ -409,6 +505,10 @@ void CreateAtoms::command(int narg, char **arg) delete ranmol; if (domain->lattice) delete [] basistype; + delete [] vstr; + delete [] xstr; + delete [] ystr; + delete [] zstr; // print status @@ -509,7 +609,7 @@ void CreateAtoms::add_random() } // generate random positions for each new atom/molecule within bounding box - // iterate until atom is within region and triclinic simulation box + // iterate until atom is within region, variable, and triclinic simulation box // if final atom position is in my subbox, create it if (xlo > xhi || ylo > yhi || zlo > zhi) @@ -527,6 +627,7 @@ void CreateAtoms::add_random() if (nregion >= 0 && domain->regions[nregion]->match(xone[0],xone[1],xone[2]) == 0) valid = 0; + if (varflag && vartest(xone) == 0) valid = 0; if (triclinic) { domain->x2lamda(xone,lamda); coord = lamda; @@ -642,6 +743,10 @@ void CreateAtoms::add_lattice() if (style == REGION) if (!domain->regions[nregion]->match(x[0],x[1],x[2])) continue; + // if variable test specified, eval variable + + if (varflag && vartest(x) == 0) continue; + // test if atom/molecule position is in my subbox if (triclinic) { @@ -696,3 +801,19 @@ void CreateAtoms::add_molecule(double *center) atom->add_molecule_atom(onemol,m,n,0); } } + +/* ---------------------------------------------------------------------- + test a generated atom position against variable evaluation + first plug in x,y,z values as requested +------------------------------------------------------------------------- */ + +int CreateAtoms::vartest(double *x) +{ + if (xstr) input->variable->equal_override(xvar,x[0]); + if (ystr) input->variable->equal_override(yvar,x[1]); + if (zstr) input->variable->equal_override(zvar,x[2]); + + double value = input->variable->compute_equal(vvar); + if (value == 0.0) return 0; + return 1; +} diff --git a/src/create_atoms.h b/src/create_atoms.h index 26d3f475ef..a75a29d484 100644 --- a/src/create_atoms.h +++ b/src/create_atoms.h @@ -35,6 +35,10 @@ class CreateAtoms : protected Pointers { double xone[3]; int remapflag; + int varflag,vvar,xvar,yvar,zvar; + char *vstr,*xstr,*ystr,*zstr; + char *xstr_copy,*ystr_copy,*zstr_copy; + class Molecule *onemol; class RanMars *ranmol; @@ -45,6 +49,7 @@ class CreateAtoms : protected Pointers { void add_random(); void add_lattice(); void add_molecule(double *); + int vartest(double *); // evaluate a variable with new atom position }; } diff --git a/src/domain.cpp b/src/domain.cpp index 5ffa35a82a..6f3344bb02 100644 --- a/src/domain.cpp +++ b/src/domain.cpp @@ -44,14 +44,15 @@ using namespace LAMMPS_NS; using namespace MathConst; +enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp +enum{IGNORE,WARN,ERROR}; // same as thermo.cpp +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + #define BIG 1.0e20 #define SMALL 1.0e-4 #define DELTAREGION 4 #define BONDSTRETCH 1.1 -enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp -enum{IGNORE,WARN,ERROR}; // same as thermo.cpp - /* ---------------------------------------------------------------------- default is periodic ------------------------------------------------------------------------- */ @@ -249,55 +250,81 @@ void Domain::set_global_box() /* ---------------------------------------------------------------------- set lamda box params assumes global box is defined and proc assignment has been made - uses comm->xyz_split to define subbox boundaries in consistent manner + uses comm->xyz_split or comm->mysplit + to define subbox boundaries in consistent manner ------------------------------------------------------------------------- */ void Domain::set_lamda_box() { - int *myloc = comm->myloc; - double *xsplit = comm->xsplit; - double *ysplit = comm->ysplit; - double *zsplit = comm->zsplit; + if (comm->layout != LAYOUT_TILED) { + int *myloc = comm->myloc; + double *xsplit = comm->xsplit; + double *ysplit = comm->ysplit; + double *zsplit = comm->zsplit; - sublo_lamda[0] = xsplit[myloc[0]]; - subhi_lamda[0] = xsplit[myloc[0]+1]; + sublo_lamda[0] = xsplit[myloc[0]]; + subhi_lamda[0] = xsplit[myloc[0]+1]; + sublo_lamda[1] = ysplit[myloc[1]]; + subhi_lamda[1] = ysplit[myloc[1]+1]; + sublo_lamda[2] = zsplit[myloc[2]]; + subhi_lamda[2] = zsplit[myloc[2]+1]; - sublo_lamda[1] = ysplit[myloc[1]]; - subhi_lamda[1] = ysplit[myloc[1]+1]; + } else { + double (*mysplit)[2] = comm->mysplit; - sublo_lamda[2] = zsplit[myloc[2]]; - subhi_lamda[2] = zsplit[myloc[2]+1]; + sublo_lamda[0] = mysplit[0][0]; + subhi_lamda[0] = mysplit[0][1]; + sublo_lamda[1] = mysplit[1][0]; + subhi_lamda[1] = mysplit[1][1]; + sublo_lamda[2] = mysplit[2][0]; + subhi_lamda[2] = mysplit[2][1]; + } } /* ---------------------------------------------------------------------- set local subbox params for orthogonal boxes assumes global box is defined and proc assignment has been made - uses comm->xyz_split to define subbox boundaries in consistent manner + uses comm->xyz_split or comm->mysplit + to define subbox boundaries in consistent manner insure subhi[max] = boxhi ------------------------------------------------------------------------- */ void Domain::set_local_box() { - int *myloc = comm->myloc; - int *procgrid = comm->procgrid; - double *xsplit = comm->xsplit; - double *ysplit = comm->ysplit; - double *zsplit = comm->zsplit; + if (triclinic) return; + + if (comm->layout != LAYOUT_TILED) { + int *myloc = comm->myloc; + int *procgrid = comm->procgrid; + double *xsplit = comm->xsplit; + double *ysplit = comm->ysplit; + double *zsplit = comm->zsplit; - if (triclinic == 0) { sublo[0] = boxlo[0] + xprd*xsplit[myloc[0]]; - if (myloc[0] < procgrid[0]-1) - subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1]; + if (myloc[0] < procgrid[0]-1) subhi[0] = boxlo[0] + xprd*xsplit[myloc[0]+1]; else subhi[0] = boxhi[0]; sublo[1] = boxlo[1] + yprd*ysplit[myloc[1]]; - if (myloc[1] < procgrid[1]-1) - subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1]; + if (myloc[1] < procgrid[1]-1) subhi[1] = boxlo[1] + yprd*ysplit[myloc[1]+1]; else subhi[1] = boxhi[1]; sublo[2] = boxlo[2] + zprd*zsplit[myloc[2]]; - if (myloc[2] < procgrid[2]-1) - subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1]; + if (myloc[2] < procgrid[2]-1) subhi[2] = boxlo[2] + zprd*zsplit[myloc[2]+1]; + else subhi[2] = boxhi[2]; + + } else { + double (*mysplit)[2] = comm->mysplit; + + sublo[0] = boxlo[0] + xprd*mysplit[0][0]; + if (mysplit[0][1] < 1.0) subhi[0] = boxlo[0] + xprd*mysplit[0][1]; + else subhi[0] = boxhi[0]; + + sublo[1] = boxlo[1] + yprd*mysplit[1][0]; + if (mysplit[1][1] < 1.0) subhi[1] = boxlo[1] + yprd*mysplit[1][1]; + else subhi[1] = boxhi[1]; + + sublo[2] = boxlo[2] + zprd*mysplit[2][0]; + if (mysplit[2][1] < 1.0) subhi[2] = boxlo[2] + zprd*mysplit[2][1]; else subhi[2] = boxhi[2]; } } diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp index f1cb9379fa..56a38e2402 100644 --- a/src/dump_custom.cpp +++ b/src/dump_custom.cpp @@ -34,7 +34,7 @@ using namespace LAMMPS_NS; // customize by adding keyword // also customize compute_atom_property.cpp -enum{ID,MOL,PROC,TYPE,ELEMENT,MASS, +enum{ID,MOL,PROC,PROCP1,TYPE,ELEMENT,MASS, X,Y,Z,XS,YS,ZS,XSTRI,YSTRI,ZSTRI,XU,YU,ZU,XUTRI,YUTRI,ZUTRI, XSU,YSU,ZSU,XSUTRI,YSUTRI,ZSUTRI, IX,IY,IZ, @@ -470,6 +470,10 @@ int DumpCustom::count() for (i = 0; i < nlocal; i++) dchoose[i] = me; ptr = dchoose; nstride = 1; + } else if (thresh_array[ithresh] == PROCP1) { + for (i = 0; i < nlocal; i++) dchoose[i] = me; + ptr = dchoose; + nstride = 1; } else if (thresh_array[ithresh] == TYPE) { int *type = atom->type; for (i = 0; i < nlocal; i++) dchoose[i] = type[i]; @@ -1018,6 +1022,9 @@ int DumpCustom::parse_fields(int narg, char **arg) } else if (strcmp(arg[iarg],"proc") == 0) { pack_choice[i] = &DumpCustom::pack_proc; vtype[i] = INT; + } else if (strcmp(arg[iarg],"procp1") == 0) { + pack_choice[i] = &DumpCustom::pack_procp1; + vtype[i] = INT; } else if (strcmp(arg[iarg],"type") == 0) { pack_choice[i] = &DumpCustom::pack_type; vtype[i] = INT; @@ -1497,6 +1504,7 @@ int DumpCustom::modify_param(int narg, char **arg) if (strcmp(arg[1],"id") == 0) thresh_array[nthresh] = ID; else if (strcmp(arg[1],"mol") == 0) thresh_array[nthresh] = MOL; else if (strcmp(arg[1],"proc") == 0) thresh_array[nthresh] = PROC; + else if (strcmp(arg[1],"procp1") == 0) thresh_array[nthresh] = PROCP1; else if (strcmp(arg[1],"type") == 0) thresh_array[nthresh] = TYPE; else if (strcmp(arg[1],"mass") == 0) thresh_array[nthresh] = MASS; @@ -1876,6 +1884,16 @@ void DumpCustom::pack_proc(int n) /* ---------------------------------------------------------------------- */ +void DumpCustom::pack_procp1(int n) +{ + for (int i = 0; i < nchoose; i++) { + buf[n] = me+1; + n += size_one; + } +} + +/* ---------------------------------------------------------------------- */ + void DumpCustom::pack_type(int n) { int *type = atom->type; diff --git a/src/dump_custom.h b/src/dump_custom.h index c483d5b7e1..ea3c5ecff8 100644 --- a/src/dump_custom.h +++ b/src/dump_custom.h @@ -124,6 +124,7 @@ class DumpCustom : public Dump { void pack_id(int); void pack_molecule(int); void pack_proc(int); + void pack_procp1(int); void pack_type(int); void pack_mass(int); diff --git a/src/fix_adapt.cpp b/src/fix_adapt.cpp index 22e718dd76..246d828e7d 100644 --- a/src/fix_adapt.cpp +++ b/src/fix_adapt.cpp @@ -498,8 +498,6 @@ void FixAdapt::restore_settings() } else if (ad->which == ATOM) { if (diamflag) { - int mflag = 0; - if (atom->rmass_flag) mflag = 1; double density; double *vec = fix_diam->vstore; diff --git a/src/fix_balance.cpp b/src/fix_balance.cpp index e4b7b28b7d..c617d5d042 100644 --- a/src/fix_balance.cpp +++ b/src/fix_balance.cpp @@ -22,12 +22,14 @@ #include "irregular.h" #include "force.h" #include "kspace.h" +#include "rcb.h" #include "error.h" using namespace LAMMPS_NS; using namespace FixConst; -enum{SHIFT,RCB}; +enum{SHIFT,BISECTION}; +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files /* ---------------------------------------------------------------------- */ @@ -53,7 +55,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) : thresh = force->numeric(FLERR,arg[4]); if (strcmp(arg[5],"shift") == 0) lbstyle = SHIFT; - else if (strcmp(arg[5],"rcb") == 0) lbstyle = RCB; + else if (strcmp(arg[5],"rcb") == 0) lbstyle = BISECTION; else error->all(FLERR,"Illegal fix balance command"); int iarg = 5; @@ -65,7 +67,7 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) : stopthresh = force->numeric(FLERR,arg[iarg+3]); if (stopthresh < 1.0) error->all(FLERR,"Illegal fix balance command"); iarg += 4; - } else if (lbstyle == RCB) { + } else if (lbstyle == BISECTION) { iarg++; } @@ -97,14 +99,16 @@ FixBalance::FixBalance(LAMMPS *lmp, int narg, char **arg) : } } - // create instance of Balance class and initialize it with params - // NOTE: do I need Balance instance if RCB? - // create instance of Irregular class + if (lbstyle == BISECTION && comm->style == 0) + error->all(FLERR,"Fix balance rcb cannot be used with comm_style brick"); + + // create instance of Balance class + // if SHIFT, initialize it with params balance = new Balance(lmp); - if (lbstyle == SHIFT) balance->shift_setup(bstr,nitermax,thresh); - if (lbstyle == RCB) error->all(FLERR,"Fix balance rcb is not yet supported"); + + // create instance of Irregular class irregular = new Irregular(lmp); @@ -238,32 +242,33 @@ void FixBalance::rebalance() { imbprev = imbnow; + // invoke balancer and reset comm->uniform flag + + int *sendproc; if (lbstyle == SHIFT) { itercount = balance->shift(); - } else if (lbstyle == RCB) { + comm->layout = LAYOUT_NONUNIFORM; + } else if (lbstyle == BISECTION) { + sendproc = balance->bisection(); + comm->layout = LAYOUT_TILED; } // output of final result if (fp) balance->dumpout(update->ntimestep,fp); - // reset comm->uniform flag - // NOTE: this needs to change with RCB - - comm->uniform = 0; - // reset proc sub-domains if (domain->triclinic) domain->set_lamda_box(); domain->set_local_box(); - // if splits moved further than neighboring processor // move atoms to new processors via irregular() - // only needed if migrate_check() says an atom moves to far, + // only needed if migrate_check() says an atom moves to far // else allow caller's comm->exchange() to do it if (domain->triclinic) domain->x2lamda(atom->nlocal); - if (irregular->migrate_check()) irregular->migrate_atoms(); + if (lbstyle == BISECTION) irregular->migrate_atoms(0,sendproc); + else if (irregular->migrate_check()) irregular->migrate_atoms(); if (domain->triclinic) domain->lamda2x(atom->nlocal); // invoke KSpace setup_grid() to adjust to new proc sub-domains @@ -303,5 +308,6 @@ double FixBalance::compute_vector(int i) double FixBalance::memory_usage() { double bytes = irregular->memory_usage(); + if (balance->rcb) bytes += balance->rcb->memory_usage(); return bytes; } diff --git a/src/fix_deform.cpp b/src/fix_deform.cpp index 3401c61a67..3d350c569a 100644 --- a/src/fix_deform.cpp +++ b/src/fix_deform.cpp @@ -987,3 +987,14 @@ void FixDeform::options(int narg, char **arg) } else error->all(FLERR,"Illegal fix deform command"); } } + +/* ---------------------------------------------------------------------- + memory usage of Irregular +------------------------------------------------------------------------- */ + +double FixDeform::memory_usage() +{ + double bytes = 0.0; + if (irregular) bytes += irregular->memory_usage(); + return bytes; +} diff --git a/src/fix_deform.h b/src/fix_deform.h index 7a781629dc..469fa87761 100644 --- a/src/fix_deform.h +++ b/src/fix_deform.h @@ -35,6 +35,7 @@ class FixDeform : public Fix { void init(); void pre_exchange(); void end_of_step(); + double memory_usage(); private: int triclinic,scaleflag,flipflag; diff --git a/src/fix_nh.cpp b/src/fix_nh.cpp index 1abc05c739..b254636111 100644 --- a/src/fix_nh.cpp +++ b/src/fix_nh.cpp @@ -2271,3 +2271,14 @@ void FixNH::pre_exchange() domain->lamda2x(atom->nlocal); } } + +/* ---------------------------------------------------------------------- + memory usage of Irregular +------------------------------------------------------------------------- */ + +double FixNH::memory_usage() +{ + double bytes = 0.0; + if (irregular) bytes += irregular->memory_usage(); + return bytes; +} diff --git a/src/fix_nh.h b/src/fix_nh.h index 630c3cd68f..0e676b4a0d 100644 --- a/src/fix_nh.h +++ b/src/fix_nh.h @@ -39,6 +39,7 @@ class FixNH : public Fix { void reset_target(double); void reset_dt(); virtual void *extract(const char*,int &); + double memory_usage(); protected: int dimension,which; diff --git a/src/input.cpp b/src/input.cpp index b5affba078..f0bcfe7867 100644 --- a/src/input.cpp +++ b/src/input.cpp @@ -1161,19 +1161,15 @@ void Input::comm_style() { if (narg < 1) error->all(FLERR,"Illegal comm_style command"); if (strcmp(arg[0],"brick") == 0) { - if (comm->layout) - error->all(FLERR, - "Cannot switch to comm style brick from " - "irregular tiling of proc domains"); - comm = new CommBrick(lmp); - // NOTE: this will lose load balancing info in old CommBrick - if (domain->box_exist) { - comm->set_proc_grid(); - domain->set_local_box(); - } + if (comm->style == 0) return; + Comm *oldcomm = comm; + comm = new CommBrick(lmp,oldcomm); + delete oldcomm; } else if (strcmp(arg[0],"tiled") == 0) { - error->all(FLERR,"Comm_style tiled not yet supported"); - comm = new CommTiled(lmp); + if (comm->style == 1) return; + Comm *oldcomm = comm; + comm = new CommTiled(lmp,oldcomm); + delete oldcomm; } else error->all(FLERR,"Illegal comm_style command"); } diff --git a/src/irregular.cpp b/src/irregular.cpp index b55d964327..9fd478a9da 100644 --- a/src/irregular.cpp +++ b/src/irregular.cpp @@ -30,6 +30,8 @@ using namespace LAMMPS_NS; int *Irregular::proc_recv_copy; int compare_standalone(const void *, const void *); +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + #define BUFFACTOR 1.5 #define BUFMIN 1000 #define BUFEXTRA 1000 @@ -43,13 +45,26 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp) triclinic = domain->triclinic; map_style = atom->map_style; - procgrid = comm->procgrid; - grid2proc = comm->grid2proc; - aplan = NULL; - dplan = NULL; + // migrate work vectors - // initialize buffers for atom comm, not used for datum comm + maxlocal = 0; + mproclist = NULL; + msizes = NULL; + + // send buffers + + maxdbuf = 0; + dbuf = NULL; + maxbuf = 0; + buf = NULL; + + // universal work vectors + + memory->create(work1,nprocs,"irregular:work1"); + memory->create(work2,nprocs,"irregular:work2"); + + // initialize buffers for migrate atoms, not used for datum comm // these can persist for multiple irregular operations maxsend = BUFMIN; @@ -62,9 +77,12 @@ Irregular::Irregular(LAMMPS *lmp) : Pointers(lmp) Irregular::~Irregular() { - if (aplan) destroy_atom(); - if (dplan) destroy_data(); - + memory->destroy(mproclist); + memory->destroy(msizes); + memory->destroy(dbuf); + memory->destroy(buf); + memory->destroy(work1); + memory->destroy(work2); memory->destroy(buf_send); memory->destroy(buf_recv); } @@ -74,11 +92,13 @@ Irregular::~Irregular() can be used in place of comm->exchange() unlike exchange(), allows atoms to have moved arbitrarily long distances sets up irregular plan, invokes it, destroys it + sortflag = flag for sorting order of received messages by proc ID + procassign = non-NULL if already know procs atoms are assigned to (from RCB) atoms MUST be remapped to be inside simulation box before this is called for triclinic: atoms must be in lamda coords (0-1) before this is called ------------------------------------------------------------------------- */ -void Irregular::migrate_atoms(int sortflag) +void Irregular::migrate_atoms(int sortflag, int *procassign) { // clear global->local map since atoms move to new procs // clear old ghosts so map_set() at end will operate only on local atoms @@ -101,13 +121,16 @@ void Irregular::migrate_atoms(int sortflag) subhi = domain->subhi_lamda; } - uniform = comm->uniform; + layout = comm->layout; xsplit = comm->xsplit; ysplit = comm->ysplit; zsplit = comm->zsplit; boxlo = domain->boxlo; prd = domain->prd; + procgrid = comm->procgrid; + grid2proc = comm->grid2proc; + // loop over atoms, flag any that are not in my sub-box // fill buffer with atoms leaving my box, using < and >= // assign which proc it belongs to via coord2proc() @@ -119,41 +142,63 @@ void Irregular::migrate_atoms(int sortflag) double **x = atom->x; int nlocal = atom->nlocal; + if (nlocal > maxlocal) { + maxlocal = nlocal; + memory->destroy(mproclist); + memory->destroy(msizes); + memory->create(mproclist,maxlocal,"irregular:mproclist"); + memory->create(msizes,maxlocal,"irregular:msizes"); + } + + int igx,igy,igz; int nsend = 0; int nsendatom = 0; - int *sizes = new int[nlocal]; - int *proclist = new int[nlocal]; - int igx,igy,igz; - int i = 0; - while (i < nlocal) { - if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] || - x[i][1] < sublo[1] || x[i][1] >= subhi[1] || - x[i][2] < sublo[2] || x[i][2] >= subhi[2]) { - proclist[nsendatom] = coord2proc(x[i],igx,igy,igz); - if (proclist[nsendatom] != me) { + + if (procassign) { + while (i < nlocal) { + if (procassign[i] == me) i++; + else { + mproclist[nsendatom] = procassign[i]; if (nsend > maxsend) grow_send(nsend,1); - sizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]); - nsend += sizes[nsendatom]; + msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]); + nsend += msizes[nsendatom]; nsendatom++; avec->copy(nlocal-1,i,1); + procassign[i] = procassign[nlocal-1]; nlocal--; + } + } + + } else { + while (i < nlocal) { + if (x[i][0] < sublo[0] || x[i][0] >= subhi[0] || + x[i][1] < sublo[1] || x[i][1] >= subhi[1] || + x[i][2] < sublo[2] || x[i][2] >= subhi[2]) { + mproclist[nsendatom] = coord2proc(x[i],igx,igy,igz); + if (mproclist[nsendatom] == me) i++; + else { + if (nsend > maxsend) grow_send(nsend,1); + msizes[nsendatom] = avec->pack_exchange(i,&buf_send[nsend]); + nsend += msizes[nsendatom]; + nsendatom++; + avec->copy(nlocal-1,i,1); + nlocal--; + } } else i++; - } else i++; + } } + atom->nlocal = nlocal; // create irregular communication plan, perform comm, destroy plan // returned nrecv = size of buffer needed for incoming atoms - int nrecv = create_atom(nsendatom,sizes,proclist,sortflag); + int nrecv = create_atom(nsendatom,msizes,mproclist,sortflag); if (nrecv > maxrecv) grow_recv(nrecv); - exchange_atom(buf_send,sizes,buf_recv); + exchange_atom(buf_send,msizes,buf_recv); destroy_atom(); - delete [] sizes; - delete [] proclist; - // add received atoms to my list int m = 0; @@ -174,6 +219,11 @@ void Irregular::migrate_atoms(int sortflag) int Irregular::migrate_check() { + // migrate required if comm layout is tiled + // cannot use myloc[] logic below + + if (comm->layout == LAYOUT_TILED) return 1; + // subbox bounds for orthogonal or triclinic box // other comm/domain data used by coord2proc() @@ -186,13 +236,16 @@ int Irregular::migrate_check() subhi = domain->subhi_lamda; } - uniform = comm->uniform; + layout = comm->layout; xsplit = comm->xsplit; ysplit = comm->ysplit; zsplit = comm->zsplit; boxlo = domain->boxlo; prd = domain->prd; + procgrid = comm->procgrid; + grid2proc = comm->grid2proc; + // loop over atoms, check for any that are not in my sub-box // assign which proc it belongs to via coord2proc() // if logical igx,igy,igz of newproc > one away from myloc, set flag = 1 @@ -250,6 +303,7 @@ int Irregular::migrate_check() n = # of atoms to send sizes = # of doubles for each atom proclist = proc to send each atom to (not including self) + sortflag = flag for sorting order of received messages by proc ID return total # of doubles I will recv (not including self) ------------------------------------------------------------------------- */ @@ -257,51 +311,60 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) { int i; - // allocate plan and work vectors - - if (aplan) destroy_atom(); - aplan = (PlanAtom *) memory->smalloc(sizeof(PlanAtom),"irregular:aplan"); - int *list = new int[nprocs]; - int *count = new int[nprocs]; - - // nrecv = # of messages I receive + // setup for collective comm + // work1 = 1 for procs I send a message to, not including self + // work2 = 1 for all procs, used for ReduceScatter for (i = 0; i < nprocs; i++) { - list[i] = 0; - count[i] = 1; + work1[i] = 0; + work2[i] = 1; } - for (i = 0; i < n; i++) list[proclist[i]] = 1; + for (i = 0; i < n; i++) work1[proclist[i]] = 1; + work1[me] = 0; - int nrecv; - MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world); + // nrecv_proc = # of procs I receive messages from, not including self + // options for performing ReduceScatter operation + // some are more efficient on some machines at big sizes + +#ifdef LAMMPS_RS_ALLREDUCE_INPLACE + MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work1[me]; +#else +#ifdef LAMMPS_RS_ALLREDUCE + MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work2[me]; +#else + MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); +#endif +#endif // allocate receive arrays - int *proc_recv = new int[nrecv]; - int *length_recv = new int[nrecv]; - MPI_Request *request = new MPI_Request[nrecv]; - MPI_Status *status = new MPI_Status[nrecv]; + proc_recv = new int[nrecv_proc]; + length_recv = new int[nrecv_proc]; + request = new MPI_Request[nrecv_proc]; + status = new MPI_Status[nrecv_proc]; - // nsend = # of messages I send + // nsend_proc = # of messages I send - for (i = 0; i < nprocs; i++) list[i] = 0; - for (i = 0; i < n; i++) list[proclist[i]] += sizes[i]; + for (i = 0; i < nprocs; i++) work1[i] = 0; + for (i = 0; i < n; i++) work1[proclist[i]] += sizes[i]; - int nsend = 0; + nsend_proc = 0; for (i = 0; i < nprocs; i++) - if (list[i]) nsend++; + if (work1[i]) nsend_proc++; // allocate send arrays - int *proc_send = new int[nsend]; - int *length_send = new int[nsend]; - int *num_send = new int[nsend]; - int *index_send = new int[n]; - int *offset_send = new int[n]; + proc_send = new int[nsend_proc]; + length_send = new int[nsend_proc]; + num_send = new int[nsend_proc]; + index_send = new int[n]; + offset_send = new int[n]; // list still stores size of message for procs I send to // proc_send = procs I send to - // length_send = total size of message I send to each proc + // length_send = # of doubles I send to each proc // to balance pattern of send messages: // each proc begins with iproc > me, continues until iproc = me // reset list to store which send message each proc corresponds to @@ -311,81 +374,81 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) for (i = 0; i < nprocs; i++) { iproc++; if (iproc == nprocs) iproc = 0; - if (list[iproc] > 0) { + if (work1[iproc] > 0) { proc_send[isend] = iproc; - length_send[isend] = list[iproc]; - list[iproc] = isend; + length_send[isend] = work1[iproc]; + work1[iproc] = isend; isend++; } } // num_send = # of atoms I send to each proc - for (i = 0; i < nsend; i++) num_send[i] = 0; + for (i = 0; i < nsend_proc; i++) num_send[i] = 0; for (i = 0; i < n; i++) { - isend = list[proclist[i]]; + isend = work1[proclist[i]]; num_send[isend]++; } - // count = offsets into index_send for each proc I send to + // work2 = offsets into index_send for each proc I send to // index_send = list of which atoms to send to each proc // 1st N1 values are atom indices for 1st proc, // next N2 values are atom indices for 2nd proc, etc // offset_send = where each atom starts in send buffer - count[0] = 0; - for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1]; + work2[0] = 0; + for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1]; for (i = 0; i < n; i++) { - isend = list[proclist[i]]; - index_send[count[isend]++] = i; + isend = work1[proclist[i]]; + index_send[work2[isend]++] = i; if (i) offset_send[i] = offset_send[i-1] + sizes[i-1]; else offset_send[i] = 0; } // tell receivers how much data I send - // sendmax = largest # of doubles I send in a single message + // sendmax_proc = # of doubles I send in largest single message - int sendmax = 0; - for (i = 0; i < nsend; i++) { + sendmax_proc = 0; + for (i = 0; i < nsend_proc; i++) { MPI_Send(&length_send[i],1,MPI_INT,proc_send[i],0,world); - sendmax = MAX(sendmax,length_send[i]); + sendmax_proc = MAX(sendmax_proc,length_send[i]); } // receive incoming messages // proc_recv = procs I recv from - // length_recv = total size of message each proc sends me - // nrecvsize = total size of data I recv + // length_recv = # of doubles each proc sends me + // nrecvsize = total size of atom data I recv int nrecvsize = 0; - for (i = 0; i < nrecv; i++) { + for (i = 0; i < nrecv_proc; i++) { MPI_Recv(&length_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); proc_recv[i] = status->MPI_SOURCE; nrecvsize += length_recv[i]; } - // sort proc_recv and num_recv by proc ID if requested + // sort proc_recv and length_recv by proc ID if requested // useful for debugging to insure reproducible ordering of received atoms // invoke by adding final arg = 1 to create_atom() call in migrate_atoms() if (sortflag) { - int *order = new int[nrecv]; - int *proc_recv_ordered = new int[nrecv]; - int *length_recv_ordered = new int[nrecv]; + int *order = new int[nrecv_proc]; + int *proc_recv_ordered = new int[nrecv_proc]; + int *length_recv_ordered = new int[nrecv_proc]; - for (i = 0; i < nrecv; i++) order[i] = i; + for (i = 0; i < nrecv_proc; i++) order[i] = i; proc_recv_copy = proc_recv; - qsort(order,nrecv,sizeof(int),compare_standalone); + qsort(order,nrecv_proc,sizeof(int),compare_standalone); int j; - for (i = 0; i < nrecv; i++) { + for (i = 0; i < nrecv_proc; i++) { j = order[i]; proc_recv_ordered[i] = proc_recv[j]; length_recv_ordered[i] = length_recv[j]; } - memcpy(proc_recv,proc_recv_ordered,nrecv*sizeof(int)); - memcpy(length_recv,length_recv_ordered,nrecv*sizeof(int)); + memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int)); + memcpy(length_recv,length_recv_ordered,nrecv_proc*sizeof(int)); delete [] order; delete [] proc_recv_ordered; delete [] length_recv_ordered; @@ -396,27 +459,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag) MPI_Barrier(world); - // free work vectors - - delete [] count; - delete [] list; - - // initialize plan - - aplan->nsend = nsend; - aplan->nrecv = nrecv; - aplan->sendmax = sendmax; - - aplan->proc_send = proc_send; - aplan->length_send = length_send; - aplan->num_send = num_send; - aplan->index_send = index_send; - aplan->offset_send = offset_send; - aplan->proc_recv = proc_recv; - aplan->length_recv = length_recv; - - aplan->request = request; - aplan->status = status; + // return size of atom data I will receive return nrecvsize; } @@ -445,217 +488,226 @@ int compare_standalone(const void *iptr, const void *jptr) void Irregular::exchange_atom(double *sendbuf, int *sizes, double *recvbuf) { - int i,m,n,offset,num_send; + int i,m,n,offset,count; // post all receives offset = 0; - for (int irecv = 0; irecv < aplan->nrecv; irecv++) { - MPI_Irecv(&recvbuf[offset],aplan->length_recv[irecv],MPI_DOUBLE, - aplan->proc_recv[irecv],0,world,&aplan->request[irecv]); - offset += aplan->length_recv[irecv]; + for (int irecv = 0; irecv < nrecv_proc; irecv++) { + MPI_Irecv(&recvbuf[offset],length_recv[irecv],MPI_DOUBLE, + proc_recv[irecv],0,world,&request[irecv]); + offset += length_recv[irecv]; } - // allocate buf for largest send + // reallocate buf for largest send if necessary - double *buf; - memory->create(buf,aplan->sendmax,"irregular:buf"); + if (sendmax_proc > maxdbuf) { + memory->destroy(dbuf); + maxdbuf = sendmax_proc; + memory->create(dbuf,maxdbuf,"irregular:dbuf"); + } // send each message // pack buf with list of atoms // m = index of atom in sendbuf - int *index_send = aplan->index_send; - int nsend = aplan->nsend; n = 0; - - for (int isend = 0; isend < nsend; isend++) { + for (int isend = 0; isend < nsend_proc; isend++) { offset = 0; - num_send = aplan->num_send[isend]; - for (i = 0; i < num_send; i++) { + count = num_send[isend]; + for (i = 0; i < count; i++) { m = index_send[n++]; - memcpy(&buf[offset],&sendbuf[aplan->offset_send[m]], - sizes[m]*sizeof(double)); + memcpy(&dbuf[offset],&sendbuf[offset_send[m]],sizes[m]*sizeof(double)); offset += sizes[m]; } - MPI_Send(buf,aplan->length_send[isend],MPI_DOUBLE, - aplan->proc_send[isend],0,world); + MPI_Send(dbuf,length_send[isend],MPI_DOUBLE,proc_send[isend],0,world); } - // free temporary send buffer - - memory->destroy(buf); - // wait on all incoming messages - if (aplan->nrecv) MPI_Waitall(aplan->nrecv,aplan->request,aplan->status); + if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); } /* ---------------------------------------------------------------------- - destroy communication plan for atoms + destroy vectors in communication plan for atoms ------------------------------------------------------------------------- */ void Irregular::destroy_atom() { - delete [] aplan->proc_send; - delete [] aplan->length_send; - delete [] aplan->num_send; - delete [] aplan->index_send; - delete [] aplan->offset_send; - delete [] aplan->proc_recv; - delete [] aplan->length_recv; - delete [] aplan->request; - delete [] aplan->status; - memory->sfree(aplan); - aplan = NULL; + delete [] proc_send; + delete [] length_send; + delete [] num_send; + delete [] index_send; + delete [] offset_send; + delete [] proc_recv; + delete [] length_recv; + delete [] request; + delete [] status; } /* ---------------------------------------------------------------------- - create a communication plan for datums + create communication plan based on list of datums of uniform size n = # of datums to send - proclist = proc to send each datum to (including self) - return total # of datums I will recv (including self) + proclist = proc to send each datum to, can include self + sortflag = flag for sorting order of received messages by proc ID + return total # of datums I will recv, including any to self ------------------------------------------------------------------------- */ -int Irregular::create_data(int n, int *proclist) +int Irregular::create_data(int n, int *proclist, int sortflag) { int i,m; - // allocate plan and work vectors - - dplan = (PlanData *) memory->smalloc(sizeof(PlanData),"irregular:dplan"); - int *list = new int[nprocs]; - int *count = new int[nprocs]; - - // nrecv = # of messages I receive + // setup for collective comm + // work1 = 1 for procs I send a message to, not including self + // work2 = 1 for all procs, used for ReduceScatter for (i = 0; i < nprocs; i++) { - list[i] = 0; - count[i] = 1; + work1[i] = 0; + work2[i] = 1; } - for (i = 0; i < n; i++) list[proclist[i]] = 1; + for (i = 0; i < n; i++) work1[proclist[i]] = 1; + work1[me] = 0; - int nrecv; - MPI_Reduce_scatter(list,&nrecv,count,MPI_INT,MPI_SUM,world); - if (list[me]) nrecv--; + // nrecv_proc = # of procs I receive messages from, not including self + // options for performing ReduceScatter operation + // some are more efficient on some machines at big sizes + +#ifdef LAMMPS_RS_ALLREDUCE_INPLACE + MPI_Allreduce(MPI_IN_PLACE,work1,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work1[me]; +#else +#ifdef LAMMPS_RS_ALLREDUCE + MPI_Allreduce(work1,work2,nprocs,MPI_INT,MPI_SUM,world); + nrecv_proc = work2[me]; +#else + MPI_Reduce_scatter(work1,&nrecv_proc,work2,MPI_INT,MPI_SUM,world); +#endif +#endif // allocate receive arrays - int *proc_recv = new int[nrecv]; - int *num_recv = new int[nrecv]; - MPI_Request *request = new MPI_Request[nrecv]; - MPI_Status *status = new MPI_Status[nrecv]; + proc_recv = new int[nrecv_proc]; + num_recv = new int[nrecv_proc]; + request = new MPI_Request[nrecv_proc]; + status = new MPI_Status[nrecv_proc]; - // nsend = # of messages I send + // work1 = # of datums I send to each proc, including self + // nsend_proc = # of procs I send messages to, not including self - for (i = 0; i < nprocs; i++) list[i] = 0; - for (i = 0; i < n; i++) list[proclist[i]]++; + for (i = 0; i < nprocs; i++) work1[i] = 0; + for (i = 0; i < n; i++) work1[proclist[i]]++; - int nsend = 0; + nsend_proc = 0; for (i = 0; i < nprocs; i++) - if (list[i]) nsend++; - if (list[me]) nsend--; + if (work1[i]) nsend_proc++; + if (work1[me]) nsend_proc--; // allocate send and self arrays - int *proc_send = new int[nsend]; - int *num_send = new int[nsend]; - int *index_send = new int[n-list[me]]; - int *index_self = new int[list[me]]; + proc_send = new int[nsend_proc]; + num_send = new int[nsend_proc]; + index_send = new int[n-work1[me]]; + index_self = new int[work1[me]]; // proc_send = procs I send to // num_send = # of datums I send to each proc // num_self = # of datums I copy to self // to balance pattern of send messages: // each proc begins with iproc > me, continues until iproc = me - // reset list to store which send message each proc corresponds to - - int num_self; + // reset work1 to store which send message each proc corresponds to int iproc = me; int isend = 0; for (i = 0; i < nprocs; i++) { iproc++; if (iproc == nprocs) iproc = 0; - if (iproc == me) num_self = list[iproc]; - else if (list[iproc] > 0) { + if (iproc == me) { + num_self = work1[iproc]; + work1[iproc] = 0; + } else if (work1[iproc] > 0) { proc_send[isend] = iproc; - num_send[isend] = list[iproc]; - list[iproc] = isend; + num_send[isend] = work1[iproc]; + work1[iproc] = isend; isend++; } } - list[me] = 0; - // count = offsets into index_send for each proc I send to + // work2 = offsets into index_send for each proc I send to // m = ptr into index_self // index_send = list of which datums to send to each proc // 1st N1 values are datum indices for 1st proc, // next N2 values are datum indices for 2nd proc, etc + // index_self = list of which datums to copy to self - count[0] = 0; - for (i = 1; i < nsend; i++) count[i] = count[i-1] + num_send[i-1]; + work2[0] = 0; + for (i = 1; i < nsend_proc; i++) work2[i] = work2[i-1] + num_send[i-1]; m = 0; for (i = 0; i < n; i++) { iproc = proclist[i]; if (iproc == me) index_self[m++] = i; else { - isend = list[iproc]; - index_send[count[isend]++] = i; + isend = work1[iproc]; + index_send[work2[isend]++] = i; } } // tell receivers how much data I send - // sendmax = largest # of datums I send in a single message + // sendmax_proc = largest # of datums I send in a single message - int sendmax = 0; - for (i = 0; i < nsend; i++) { + sendmax_proc = 0; + for (i = 0; i < nsend_proc; i++) { MPI_Send(&num_send[i],1,MPI_INT,proc_send[i],0,world); - sendmax = MAX(sendmax,num_send[i]); + sendmax_proc = MAX(sendmax_proc,num_send[i]); } // receive incoming messages // proc_recv = procs I recv from // num_recv = total size of message each proc sends me - // nrecvsize = total size of data I recv + // nrecvdatum = total size of data I recv - int nrecvsize = 0; - for (i = 0; i < nrecv; i++) { + int nrecvdatum = 0; + for (i = 0; i < nrecv_proc; i++) { MPI_Recv(&num_recv[i],1,MPI_INT,MPI_ANY_SOURCE,0,world,status); proc_recv[i] = status->MPI_SOURCE; - nrecvsize += num_recv[i]; + nrecvdatum += num_recv[i]; + } + nrecvdatum += num_self; + + // sort proc_recv and num_recv by proc ID if requested + // useful for debugging to insure reproducible ordering of received datums + + if (sortflag) { + int *order = new int[nrecv_proc]; + int *proc_recv_ordered = new int[nrecv_proc]; + int *num_recv_ordered = new int[nrecv_proc]; + + for (i = 0; i < nrecv_proc; i++) order[i] = i; + proc_recv_copy = proc_recv; + qsort(order,nrecv_proc,sizeof(int),compare_standalone); + + int j; + for (i = 0; i < nrecv_proc; i++) { + j = order[i]; + proc_recv_ordered[i] = proc_recv[j]; + num_recv_ordered[i] = num_recv[j]; + } + + memcpy(proc_recv,proc_recv_ordered,nrecv_proc*sizeof(int)); + memcpy(num_recv,num_recv_ordered,nrecv_proc*sizeof(int)); + delete [] order; + delete [] proc_recv_ordered; + delete [] num_recv_ordered; } - nrecvsize += num_self; // barrier to insure all MPI_ANY_SOURCE messages are received // else another proc could proceed to exchange_data() and send to me MPI_Barrier(world); - // free work vectors + // return # of datums I will receive - delete [] count; - delete [] list; - - // initialize plan and return it - - dplan->nsend = nsend; - dplan->nrecv = nrecv; - dplan->sendmax = sendmax; - - dplan->proc_send = proc_send; - dplan->num_send = num_send; - dplan->index_send = index_send; - dplan->proc_recv = proc_recv; - dplan->num_recv = num_recv; - dplan->num_self = num_self; - dplan->index_self = index_self; - - dplan->request = request; - dplan->status = status; - - return nrecvsize; + return nrecvdatum; } /* ---------------------------------------------------------------------- @@ -667,49 +719,41 @@ int Irregular::create_data(int n, int *proclist) void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) { - int i,m,n,offset,num_send; + int i,m,n,offset,count; // post all receives, starting after self copies - offset = dplan->num_self*nbytes; - for (int irecv = 0; irecv < dplan->nrecv; irecv++) { - MPI_Irecv(&recvbuf[offset],dplan->num_recv[irecv]*nbytes,MPI_CHAR, - dplan->proc_recv[irecv],0,world,&dplan->request[irecv]); - offset += dplan->num_recv[irecv]*nbytes; + offset = num_self*nbytes; + for (int irecv = 0; irecv < nrecv_proc; irecv++) { + MPI_Irecv(&recvbuf[offset],num_recv[irecv]*nbytes,MPI_CHAR, + proc_recv[irecv],0,world,&request[irecv]); + offset += num_recv[irecv]*nbytes; } - // allocate buf for largest send + // reallocate buf for largest send if necessary - char *buf; - memory->create(buf,dplan->sendmax*nbytes,"irregular:buf"); + if (sendmax_proc*nbytes > maxbuf) { + memory->destroy(buf); + maxbuf = sendmax_proc*nbytes; + memory->create(buf,maxbuf,"irregular:buf"); + } // send each message // pack buf with list of datums // m = index of datum in sendbuf - int *index_send = dplan->index_send; - int nsend = dplan->nsend; n = 0; - - for (int isend = 0; isend < nsend; isend++) { - num_send = dplan->num_send[isend]; - for (i = 0; i < num_send; i++) { + for (int isend = 0; isend < nsend_proc; isend++) { + count = num_send[isend]; + for (i = 0; i < count; i++) { m = index_send[n++]; memcpy(&buf[i*nbytes],&sendbuf[m*nbytes],nbytes); } - MPI_Send(buf,dplan->num_send[isend]*nbytes,MPI_CHAR, - dplan->proc_send[isend],0,world); + MPI_Send(buf,count*nbytes,MPI_CHAR,proc_send[isend],0,world); } - // free temporary send buffer - - memory->destroy(buf); - // copy datums to self, put at beginning of recvbuf - int *index_self = dplan->index_self; - int num_self = dplan->num_self; - for (i = 0; i < num_self; i++) { m = index_self[i]; memcpy(&recvbuf[i*nbytes],&sendbuf[m*nbytes],nbytes); @@ -717,39 +761,37 @@ void Irregular::exchange_data(char *sendbuf, int nbytes, char *recvbuf) // wait on all incoming messages - if (dplan->nrecv) MPI_Waitall(dplan->nrecv,dplan->request,dplan->status); + if (nrecv_proc) MPI_Waitall(nrecv_proc,request,status); } /* ---------------------------------------------------------------------- - destroy communication plan for datums + destroy vectors in communication plan for datums ------------------------------------------------------------------------- */ void Irregular::destroy_data() { - delete [] dplan->proc_send; - delete [] dplan->num_send; - delete [] dplan->index_send; - delete [] dplan->proc_recv; - delete [] dplan->num_recv; - delete [] dplan->index_self; - delete [] dplan->request; - delete [] dplan->status; - memory->sfree(dplan); - dplan = NULL; + delete [] proc_send; + delete [] num_send; + delete [] index_send; + delete [] proc_recv; + delete [] num_recv; + delete [] index_self; + delete [] request; + delete [] status; } /* ---------------------------------------------------------------------- determine which proc owns atom with coord x[3] x will be in box (orthogonal) or lamda coords (triclinic) - for uniform = 1, directly calculate owning proc - for non-uniform, iteratively find owning proc via binary search + if layout = UNIFORM, calculate owning proc directly + else layout = NONUNIFORM, iteratively find owning proc via binary search return owning proc ID via grid2proc return igx,igy,igz = logical grid loc of owing proc within 3d grid of procs ------------------------------------------------------------------------- */ int Irregular::coord2proc(double *x, int &igx, int &igy, int &igz) { - if (uniform) { + if (layout == 0) { if (triclinic == 0) { igx = static_cast (procgrid[0] * (x[0]-boxlo[0]) / prd[0]); igy = static_cast (procgrid[1] * (x[1]-boxlo[1]) / prd[1]); @@ -846,7 +888,12 @@ void Irregular::grow_recv(int n) bigint Irregular::memory_usage() { - bigint bytes = memory->usage(buf_send,maxsend); - bytes += memory->usage(buf_recv,maxrecv); + bigint bytes = 0; + bytes += maxsend*sizeof(double); // buf_send + bytes += maxrecv*sizeof(double); // buf_recv + bytes += maxdbuf*sizeof(double); // dbuf + bytes += maxbuf; // buf + bytes += 2*maxlocal*sizeof(int); // mproclist,msizes + bytes += 2*nprocs*sizeof(int); // work1,work2 return bytes; } diff --git a/src/irregular.h b/src/irregular.h index 576fad5c90..1fdbbe03f7 100644 --- a/src/irregular.h +++ b/src/irregular.h @@ -27,9 +27,9 @@ class Irregular : protected Pointers { Irregular(class LAMMPS *); ~Irregular(); - void migrate_atoms(int sortflag = 0); + void migrate_atoms(int sortflag = 0, int *procassign = NULL); int migrate_check(); - int create_data(int, int *); + int create_data(int, int *, int sortflag = 0); void exchange_data(char *, int, char *); void destroy_data(); bigint memory_usage(); @@ -38,58 +38,58 @@ class Irregular : protected Pointers { int me,nprocs; int triclinic; int map_style; - int uniform; + int layout; double *xsplit,*ysplit,*zsplit; // ptrs to comm int *procgrid; // ptr to comm int ***grid2proc; // ptr to comm double *boxlo; // ptr to domain double *prd; // ptr to domain - int maxsend,maxrecv; // size of buffers in # of doubles - double *buf_send,*buf_recv; + int maxsend,maxrecv; // size of buf send/recv in # of doubles + double *buf_send,*buf_recv; // bufs used in migrate_atoms + int maxdbuf; // size of double buf in bytes + double *dbuf; // double buf for largest single atom send + int maxbuf; // size of char buf in bytes + char *buf; // char buf for largest single data send - // plan for irregular communication of atoms + int *mproclist,*msizes; // persistent vectors in migrate_atoms + int maxlocal; // allocated size of mproclist and msizes + + int *work1,*work2; // work vectors + + // plan params for irregular communication of atoms or datums + // no params refer to atoms/data copied to self + + int nsend_proc; // # of messages to send + int nrecv_proc; // # of messages to recv + int sendmax_proc; // # of doubles/datums in largest send message + int *proc_send; // list of procs to send to + int *num_send; // # of atoms/datums to send to each proc + int *index_send; // list of which atoms/datums to send to each proc + int *proc_recv; // list of procs to recv from + MPI_Request *request; // MPI requests for posted recvs + MPI_Status *status; // MPI statuses for WaitAll + + // extra plan params plan for irregular communication of atoms // no params refer to atoms copied to self - struct PlanAtom { - int nsend; // # of messages to send - int nrecv; // # of messages to recv - int sendmax; // # of doubles in largest send message - int *proc_send; // procs to send to - int *length_send; // # of doubles to send to each proc - int *num_send; // # of atoms to send to each proc - int *index_send; // list of which atoms to send to each proc - int *offset_send; // where each atom starts in send buffer - int *proc_recv; // procs to recv from - int *length_recv; // # of doubles to recv from each proc - MPI_Request *request; // MPI requests for posted recvs - MPI_Status *status; // MPI statuses for WaitAll - }; + int *length_send; // # of doubles to send to each proc + int *length_recv; // # of doubles to recv from each proc + int *offset_send; // where each atom starts in send buffer - // plan for irregular communication of datums - // only 2 self params refer to atoms copied to self + // extra plan params plan for irregular communication of datums + // 2 self params refer to data copied to self - struct PlanData { // plan for irregular communication of data - int nsend; // # of messages to send - int nrecv; // # of messages to recv - int sendmax; // # of datums in largest send message - int *proc_send; // procs to send to - int *num_send; // # of datums to send to each proc - int *index_send; // list of which datums to send to each proc - int *proc_recv; // procs to recv from - int *num_recv; // # of datums to recv from each proc - int num_self; // # of datums to copy to self - int *index_self; // list of which datums to copy to self - MPI_Request *request; // MPI requests for posted recvs - MPI_Status *status; // MPI statuses for WaitAll - }; + int *num_recv; // # of datums to recv from each proc + int num_self; // # of datums to copy to self + int *index_self; // list of which datums to copy to self - PlanAtom *aplan; - PlanData *dplan; + // private methods int create_atom(int, int *, int *, int); void exchange_atom(double *, int *, double *); void destroy_atom(); + int coord2proc(double *, int &, int &, int &); int binary(double, int, double *); diff --git a/src/rcb.cpp b/src/rcb.cpp new file mode 100644 index 0000000000..de3f9f3ba6 --- /dev/null +++ b/src/rcb.cpp @@ -0,0 +1,926 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#include "mpi.h" +#include "string.h" +#include "rcb.h" +#include "irregular.h" +#include "memory.h" +#include "error.h" + +using namespace LAMMPS_NS; + +#define MYHUGE 1.0e30 +#define TINY 1.0e-6 + +// set this to bigger number after debugging + +#define DELTA 10 + +// prototypes for non-class functions + +void box_merge(void *, void *, int *, MPI_Datatype *); +void median_merge(void *, void *, int *, MPI_Datatype *); + +// NOTE: if want to have reuse flag, need to sum Tree across procs + +/* ---------------------------------------------------------------------- */ + +RCB::RCB(LAMMPS *lmp) : Pointers(lmp) +{ + MPI_Comm_rank(world,&me); + MPI_Comm_size(world,&nprocs); + + ndot = maxdot = 0; + dots = NULL; + + nlist = maxlist = 0; + dotlist = dotmark = NULL; + + maxbuf = 0; + buf = NULL; + + maxrecv = maxsend = 0; + recvproc = recvindex = sendproc = sendindex = NULL; + + tree = NULL; + irregular = NULL; + + // create MPI data and function types for box and median AllReduce ops + + MPI_Type_contiguous(6,MPI_DOUBLE,&box_type); + MPI_Type_commit(&box_type); + MPI_Type_contiguous(sizeof(Median),MPI_CHAR,&med_type); + MPI_Type_commit(&med_type); + + MPI_Op_create(box_merge,1,&box_op); + MPI_Op_create(median_merge,1,&med_op); + + reuse = 0; +} + +/* ---------------------------------------------------------------------- */ + +RCB::~RCB() +{ + memory->sfree(dots); + memory->destroy(dotlist); + memory->destroy(dotmark); + memory->sfree(buf); + + memory->destroy(recvproc); + memory->destroy(recvindex); + memory->destroy(sendproc); + memory->destroy(sendindex); + + memory->sfree(tree); + delete irregular; + + MPI_Type_free(&med_type); + MPI_Type_free(&box_type); + MPI_Op_free(&box_op); + MPI_Op_free(&med_op); +} + +/* ---------------------------------------------------------------------- + perform RCB balancing of N particles at coords X in bounding box LO/HI + if wt = NULL, ignore per-particle weights + if wt defined, per-particle weights > 0.0 + dimension = 2 or 3 + as documented in rcb.h: + sets noriginal,nfinal,nkeep,recvproc,recvindex,lo,hi + all proc particles will be inside or on surface of 3-d box + defined by final lo/hi + // NOTE: worry about re-use of data structs for fix balance? + // NOTE: could get rid of wt all together, will it be used? +------------------------------------------------------------------------- */ + +void RCB::compute(int dimension, int n, double **x, double *wt, + double *bboxlo, double *bboxhi) +{ + int i,j,k; + int keep,outgoing,incoming,incoming2; + int dim,markactive; + int indexlo,indexhi; + int first_iteration,breakflag; + double wttot,wtlo,wthi,wtsum,wtok,wtupto,wtmax; + double targetlo,targethi; + double valuemin,valuemax,valuehalf; + double tolerance; + MPI_Comm comm,comm_half; + MPI_Request request,request2; + MPI_Status status; + Median med,medme; + + // create list of my Dots + + ndot = nkeep = noriginal = n; + + if (ndot > maxdot) { + maxdot = ndot; + memory->sfree(dots); + dots = (Dot *) memory->smalloc(ndot*sizeof(Dot),"RCB:dots"); + } + + for (i = 0; i < ndot; i++) { + dots[i].x[0] = x[i][0]; + dots[i].x[1] = x[i][1]; + dots[i].x[2] = x[i][2]; + dots[i].proc = me; + dots[i].index = i; + } + + if (wt) + for (i = 0; i < ndot; i++) dots[i].wt = wt[i]; + else + for (i = 0; i < ndot; i++) dots[i].wt = 1.0; + + // initial bounding box = simulation box + // includes periodic or shrink-wrapped boundaries + + lo = bbox.lo; + hi = bbox.hi; + + lo[0] = bboxlo[0]; + lo[1] = bboxlo[1]; + lo[2] = bboxlo[2]; + hi[0] = bboxhi[0]; + hi[1] = bboxhi[1]; + hi[2] = bboxhi[2]; + + // initialize counters + + counters[0] = 0; + counters[1] = 0; + counters[2] = 0; + counters[3] = ndot; + counters[4] = maxdot; + counters[5] = 0; + counters[6] = 0; + + // create communicator for use in recursion + + MPI_Comm_dup(world,&comm); + + // recurse until partition is a single proc = me + // proclower,procupper = lower,upper procs in partition + // procmid = 1st proc in upper half of partition + + int procpartner,procpartner2; + int readnumber; + + int procmid; + int proclower = 0; + int procupper = nprocs - 1; + + while (proclower != procupper) { + + // if odd # of procs, lower partition gets extra one + + procmid = proclower + (procupper - proclower) / 2 + 1; + + // determine communication partner(s) + // readnumber = # of proc partners to read from + + if (me < procmid) + procpartner = me + (procmid - proclower); + else + procpartner = me - (procmid - proclower); + + int readnumber = 1; + if (procpartner > procupper) { + readnumber = 0; + procpartner--; + } + if (me == procupper && procpartner != procmid - 1) { + readnumber = 2; + procpartner2 = procpartner + 1; + } + + // wttot = summed weight of entire partition + // search tolerance = largest single weight (plus epsilon) + // targetlo = desired weight in lower half of partition + // targethi = desired weight in upper half of partition + + wtmax = wtsum = 0.0; + + if (wt) { + for (i = 0; i < ndot; i++) { + wtsum += dots[i].wt; + if (dots[i].wt > wtmax) wtmax = dots[i].wt; + } + } else { + for (i = 0; i < ndot; i++) wtsum += dots[i].wt; + wtmax = 1.0; + } + + MPI_Allreduce(&wtsum,&wttot,1,MPI_DOUBLE,MPI_SUM,comm); + if (wt) MPI_Allreduce(&wtmax,&tolerance,1,MPI_DOUBLE,MPI_MAX,comm); + else tolerance = 1.0; + + tolerance *= 1.0 + TINY; + targetlo = wttot * (procmid - proclower) / (procupper + 1 - proclower); + targethi = wttot - targetlo; + + // dim = dimension to bisect on + // do not allow choice of z dimension for 2d system + + dim = 0; + if (hi[1]-lo[1] > hi[0]-lo[0]) dim = 1; + if (dimension == 3) { + if (dim == 0 && hi[2]-lo[2] > hi[0]-lo[0]) dim = 2; + if (dim == 1 && hi[2]-lo[2] > hi[1]-lo[1]) dim = 2; + } + + // create active list and mark array for dots + // initialize active list to all dots + + if (ndot > maxlist) { + memory->destroy(dotlist); + memory->destroy(dotmark); + maxlist = maxdot; + memory->create(dotlist,maxlist,"RCB:dotlist"); + memory->create(dotmark,maxlist,"RCB:dotmark"); + } + + nlist = ndot; + for (i = 0; i < nlist; i++) dotlist[i] = i; + + // median iteration + // zoom in on bisector until correct # of dots in each half of partition + // as each iteration of median-loop begins, require: + // all non-active dots are marked with 0/1 in dotmark + // valuemin <= every active dot <= valuemax + // wtlo, wthi = total wt of non-active dots + // when leave median-loop, require only: + // valuehalf = correct cut position + // all dots <= valuehalf are marked with 0 in dotmark + // all dots >= valuehalf are marked with 1 in dotmark + // markactive = which side of cut is active = 0/1 + // indexlo,indexhi = indices of dot closest to median + + wtlo = wthi = 0.0; + valuemin = lo[dim]; + valuemax = hi[dim]; + first_iteration = 1; + + while (1) { + + // choose bisector value + // use old value on 1st iteration if old cut dimension is the same + // on 2nd option: could push valuehalf towards geometric center + // with "1.0-factor" to force overshoot + + if (first_iteration && reuse && dim == tree[procmid].dim) { + counters[5]++; + valuehalf = tree[procmid].cut; + if (valuehalf < valuemin || valuehalf > valuemax) + valuehalf = 0.5 * (valuemin + valuemax); + } else if (wt) + valuehalf = valuemin + (targetlo - wtlo) / + (wttot - wtlo - wthi) * (valuemax - valuemin); + else + valuehalf = 0.5 * (valuemin + valuemax); + + first_iteration = 0; + + // initialize local median data structure + + medme.totallo = medme.totalhi = 0.0; + medme.valuelo = -MYHUGE; + medme.valuehi = MYHUGE; + medme.wtlo = medme.wthi = 0.0; + medme.countlo = medme.counthi = 0; + medme.proclo = medme.prochi = me; + + // mark all active dots on one side or other of bisector + // also set all fields in median data struct + // save indices of closest dots on either side + + for (j = 0; j < nlist; j++) { + i = dotlist[j]; + if (dots[i].x[dim] <= valuehalf) { // in lower part + medme.totallo += dots[i].wt; + dotmark[i] = 0; + if (dots[i].x[dim] > medme.valuelo) { // my closest dot + medme.valuelo = dots[i].x[dim]; + medme.wtlo = dots[i].wt; + medme.countlo = 1; + indexlo = i; + } else if (dots[i].x[dim] == medme.valuelo) { // tied for closest + medme.wtlo += dots[i].wt; + medme.countlo++; + } + } + else { // in upper part + medme.totalhi += dots[i].wt; + dotmark[i] = 1; + if (dots[i].x[dim] < medme.valuehi) { // my closest dot + medme.valuehi = dots[i].x[dim]; + medme.wthi = dots[i].wt; + medme.counthi = 1; + indexhi = i; + } else if (dots[i].x[dim] == medme.valuehi) { // tied for closest + medme.wthi += dots[i].wt; + medme.counthi++; + } + } + } + + // combine median data struct across current subset of procs + + counters[0]++; + MPI_Allreduce(&medme,&med,1,med_type,med_op,comm); + + // test median guess for convergence + // move additional dots that are next to cut across it + + if (wtlo + med.totallo < targetlo) { // lower half TOO SMALL + + wtlo += med.totallo; + valuehalf = med.valuehi; + + if (med.counthi == 1) { // only one dot to move + if (wtlo + med.wthi < targetlo) { // move it, keep iterating + if (me == med.prochi) dotmark[indexhi] = 0; + } + else { // only move if beneficial + if (wtlo + med.wthi - targetlo < targetlo - wtlo) + if (me == med.prochi) dotmark[indexhi] = 0; + break; // all done + } + } + else { // multiple dots to move + breakflag = 0; + wtok = 0.0; + if (medme.valuehi == med.valuehi) wtok = medme.wthi; + if (wtlo + med.wthi >= targetlo) { // all done + MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); + wtmax = targetlo - wtlo; + if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); + breakflag = 1; + } // wtok = most I can move + for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { + i = dotlist[j]; + if (dots[i].x[dim] == med.valuehi) { // only move if better + if (wtsum + dots[i].wt - wtok < wtok - wtsum) + dotmark[i] = 0; + wtsum += dots[i].wt; + } + } + if (breakflag) break; // done if moved enough + } + + wtlo += med.wthi; + if (targetlo-wtlo <= tolerance) break; // close enough + + valuemin = med.valuehi; // iterate again + markactive = 1; + } + + else if (wthi + med.totalhi < targethi) { // upper half TOO SMALL + + wthi += med.totalhi; + valuehalf = med.valuelo; + + if (med.countlo == 1) { // only one dot to move + if (wthi + med.wtlo < targethi) { // move it, keep iterating + if (me == med.proclo) dotmark[indexlo] = 1; + } + else { // only move if beneficial + if (wthi + med.wtlo - targethi < targethi - wthi) + if (me == med.proclo) dotmark[indexlo] = 1; + break; // all done + } + } + else { // multiple dots to move + breakflag = 0; + wtok = 0.0; + if (medme.valuelo == med.valuelo) wtok = medme.wtlo; + if (wthi + med.wtlo >= targethi) { // all done + MPI_Scan(&wtok,&wtupto,1,MPI_DOUBLE,MPI_SUM,comm); + wtmax = targethi - wthi; + if (wtupto > wtmax) wtok = wtok - (wtupto - wtmax); + breakflag = 1; + } // wtok = most I can move + for (j = 0, wtsum = 0.0; j < nlist && wtsum < wtok; j++) { + i = dotlist[j]; + if (dots[i].x[dim] == med.valuelo) { // only move if better + if (wtsum + dots[i].wt - wtok < wtok - wtsum) + dotmark[i] = 1; + wtsum += dots[i].wt; + } + } + if (breakflag) break; // done if moved enough + } + + wthi += med.wtlo; + if (targethi-wthi <= tolerance) break; // close enough + + valuemax = med.valuelo; // iterate again + markactive = 0; + } + + else // Goldilocks result: both partitions just right + break; + + // shrink the active list + + k = 0; + for (j = 0; j < nlist; j++) { + i = dotlist[j]; + if (dotmark[i] == markactive) dotlist[k++] = i; + } + nlist = k; + } + + // found median + // store cut info only if I am procmid + + if (me == procmid) { + cut = valuehalf; + cutdim = dim; + } + + // use cut to shrink my RCB bounding box + + if (me < procmid) hi[dim] = valuehalf; + else lo[dim] = valuehalf; + + // outgoing = number of dots to ship to partner + // nkeep = number of dots that have never migrated + + markactive = (me < procpartner); + for (i = 0, keep = 0, outgoing = 0; i < ndot; i++) + if (dotmark[i] == markactive) outgoing++; + else if (i < nkeep) keep++; + nkeep = keep; + + // alert partner how many dots I'll send, read how many I'll recv + + MPI_Send(&outgoing,1,MPI_INT,procpartner,0,world); + incoming = 0; + if (readnumber) { + MPI_Recv(&incoming,1,MPI_INT,procpartner,0,world,&status); + if (readnumber == 2) { + MPI_Recv(&incoming2,1,MPI_INT,procpartner2,0,world,&status); + incoming += incoming2; + } + } + + // check if need to alloc more space + + int ndotnew = ndot - outgoing + incoming; + if (ndotnew > maxdot) { + while (maxdot < ndotnew) maxdot += DELTA; + dots = (Dot *) memory->srealloc(dots,maxdot*sizeof(Dot),"RCB::dots"); + counters[6]++; + } + + counters[1] += outgoing; + counters[2] += incoming; + if (ndotnew > counters[3]) counters[3] = ndotnew; + if (maxdot > counters[4]) counters[4] = maxdot; + + // malloc comm send buffer + + if (outgoing > maxbuf) { + memory->sfree(buf); + maxbuf = outgoing; + buf = (Dot *) memory->smalloc(maxbuf*sizeof(Dot),"RCB:buf"); + } + + // fill buffer with dots that are marked for sending + // pack down the unmarked ones + + keep = outgoing = 0; + for (i = 0; i < ndot; i++) { + if (dotmark[i] == markactive) + memcpy(&buf[outgoing++],&dots[i],sizeof(Dot)); + else + memcpy(&dots[keep++],&dots[i],sizeof(Dot)); + } + + // post receives for dots + + if (readnumber > 0) { + MPI_Irecv(&dots[keep],incoming*sizeof(Dot),MPI_CHAR, + procpartner,1,world,&request); + if (readnumber == 2) { + keep += incoming - incoming2; + MPI_Irecv(&dots[keep],incoming2*sizeof(Dot),MPI_CHAR, + procpartner2,1,world,&request2); + } + } + + // handshake before sending dots to insure recvs have been posted + + if (readnumber > 0) { + MPI_Send(NULL,0,MPI_INT,procpartner,0,world); + if (readnumber == 2) MPI_Send(NULL,0,MPI_INT,procpartner2,0,world); + } + MPI_Recv(NULL,0,MPI_INT,procpartner,0,world,&status); + + // send dots to partner + + MPI_Rsend(buf,outgoing*sizeof(Dot),MPI_CHAR,procpartner,1,world); + + // wait until all dots are received + + if (readnumber > 0) { + MPI_Wait(&request,&status); + if (readnumber == 2) MPI_Wait(&request2,&status); + } + + ndot = ndotnew; + + // cut partition in half, create new communicators of 1/2 size + + int split; + if (me < procmid) { + procupper = procmid - 1; + split = 0; + } else { + proclower = procmid; + split = 1; + } + + MPI_Comm_split(comm,split,me,&comm_half); + MPI_Comm_free(&comm); + comm = comm_half; + } + + // clean up + + MPI_Comm_free(&comm); + + // set public variables with results of rebalance + + nfinal = ndot; + + if (nfinal > maxrecv) { + memory->destroy(recvproc); + memory->destroy(recvindex); + maxrecv = nfinal; + memory->create(recvproc,maxrecv,"RCB:recvproc"); + memory->create(recvindex,maxrecv,"RCB:recvindex"); + } + + for (i = 0; i < nfinal; i++) { + recvproc[i] = dots[i].proc; + recvindex[i] = dots[i].index; + } +} + +/* ---------------------------------------------------------------------- + custom MPI reduce operation + merge of each component of an RCB bounding box +------------------------------------------------------------------------- */ + +void box_merge(void *in, void *inout, int *len, MPI_Datatype *dptr) + +{ + RCB::BBox *box1 = (RCB::BBox *) in; + RCB::BBox *box2 = (RCB::BBox *) inout; + + for (int i = 0; i < 3; i++) { + if (box1->lo[i] < box2->lo[i]) box2->lo[i] = box1->lo[i]; + if (box1->hi[i] > box2->hi[i]) box2->hi[i] = box1->hi[i]; + } +} + +/* ---------------------------------------------------------------------- + custom MPI reduce operation + merge median data structure + on input: + in,inout->totallo, totalhi = weight in both partitions on this proc + valuelo, valuehi = pos of nearest dot(s) to cut on this proc + wtlo, wthi = total wt of dot(s) at that pos on this proc + countlo, counthi = # of dot(s) nearest to cut on this proc + proclo, prochi = not used + on exit: + inout-> totallo, totalhi = total # of active dots in both partitions + valuelo, valuehi = pos of nearest dot(s) to cut + wtlo, wthi = total wt of dot(s) at that position + countlo, counthi = total # of dot(s) nearest to cut + proclo, prochi = one unique proc who owns a nearest dot + all procs must get same proclo,prochi +------------------------------------------------------------------------- */ + +void median_merge(void *in, void *inout, int *len, MPI_Datatype *dptr) + +{ + RCB::Median *med1 = (RCB::Median *) in; + RCB::Median *med2 = (RCB::Median *) inout; + + med2->totallo += med1->totallo; + if (med1->valuelo > med2->valuelo) { + med2->valuelo = med1->valuelo; + med2->wtlo = med1->wtlo; + med2->countlo = med1->countlo; + med2->proclo = med1->proclo; + } + else if (med1->valuelo == med2->valuelo) { + med2->wtlo += med1->wtlo; + med2->countlo += med1->countlo; + if (med1->proclo < med2->proclo) med2->proclo = med1->proclo; + } + + med2->totalhi += med1->totalhi; + if (med1->valuehi < med2->valuehi) { + med2->valuehi = med1->valuehi; + med2->wthi = med1->wthi; + med2->counthi = med1->counthi; + med2->prochi = med1->prochi; + } + else if (med1->valuehi == med2->valuehi) { + med2->wthi += med1->wthi; + med2->counthi += med1->counthi; + if (med1->prochi < med2->prochi) med2->prochi = med1->prochi; + } +} + +/* ---------------------------------------------------------------------- + invert the RCB rebalance result to convert receive info into send info + sortflag = flag for sorting order of received messages by proc ID +------------------------------------------------------------------------- */ + +void RCB::invert(int sortflag) +{ + Invert *sbuf,*rbuf; + + // only create Irregular if not previously created + // allows Irregular to persist for multiple RCB calls by fix balance + + if (!irregular) irregular = new Irregular(lmp); + + // nsend = # of dots to request from other procs + + int nsend = nfinal-nkeep; + + int *proclist; + memory->create(proclist,nsend,"RCB:proclist"); + + Invert *sinvert = + (Invert *) memory->smalloc(nsend*sizeof(Invert),"RCB:sinvert"); + + int m = 0; + for (int i = nkeep; i < nfinal; i++) { + proclist[m] = recvproc[i]; + sinvert[m].rindex = recvindex[i]; + sinvert[m].sproc = me; + sinvert[m].sindex = i; + m++; + } + + // perform inversion via irregular comm + // nrecv = # of my dots to send to other procs + + int nrecv = irregular->create_data(nsend,proclist,sortflag); + Invert *rinvert = + (Invert *) memory->smalloc(nrecv*sizeof(Invert),"RCB:rinvert"); + irregular->exchange_data((char *) sinvert,sizeof(Invert),(char *) rinvert); + irregular->destroy_data(); + + // set public variables from requests to send my dots + + if (noriginal > maxsend) { + memory->destroy(sendproc); + memory->destroy(sendindex); + maxsend = noriginal; + memory->create(sendproc,maxsend,"RCB:sendproc"); + memory->create(sendindex,maxsend,"RCB:sendindex"); + } + + for (int i = 0; i < nkeep; i++) { + sendproc[recvindex[i]] = me; + sendindex[recvindex[i]] = i; + } + + for (int i = 0; i < nrecv; i++) { + m = rinvert[i].rindex; + sendproc[m] = rinvert[i].sproc; + sendindex[m] = rinvert[i].sindex; + } + + // clean-up + + memory->destroy(proclist); + memory->destroy(sinvert); + memory->destroy(rinvert); +} + +/* ---------------------------------------------------------------------- + memory use of Irregular +------------------------------------------------------------------------- */ + +bigint RCB::memory_usage() +{ + bigint bytes = 0; + if (irregular) bytes += irregular->memory_usage(); + return bytes; +} + +// ----------------------------------------------------------------------- +// DEBUG methods +// ----------------------------------------------------------------------- +/* +// consistency checks on RCB results + +void RCB::check() +{ + int i,iflag,total1,total2; + double weight,wtmax,wtmin,wtone,tolerance; + + // check that total # of dots remained the same + + MPI_Allreduce(&ndotorig,&total1,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&ndot,&total2,1,MPI_INT,MPI_SUM,world); + if (total1 != total2) { + if (me == 0) + printf("ERROR: Points before RCB = %d, Points after RCB = %d\n", + total1,total2); + } + + // check that result is load-balanced within log2(P)*max-wt + + weight = wtone = 0.0; + for (i = 0; i < ndot; i++) { + weight += dots[i].wt; + if (dots[i].wt > wtone) wtone = dots[i].wt; + } + + MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world); + MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); + MPI_Allreduce(&wtone,&tolerance,1,MPI_DOUBLE,MPI_MAX,world); + + // i = smallest power-of-2 >= nprocs + // tolerance = largest-single-weight*log2(nprocs) + + for (i = 0; (nprocs >> i) != 0; i++); + tolerance = tolerance * i * (1.0 + TINY); + + if (wtmax - wtmin > tolerance) { + if (me == 0) + printf("ERROR: Load-imbalance > tolerance of %g\n",tolerance); + MPI_Barrier(world); + if (weight == wtmin) printf(" Proc %d has weight = %g\n",me,weight); + if (weight == wtmax) printf(" Proc %d has weight = %g\n",me,weight); + } + + MPI_Barrier(world); + + // check that final set of points is inside RCB box of each proc + + iflag = 0; + for (i = 0; i < ndot; i++) { + if (dots[i].x[0] < lo[0] || dots[i].x[0] > hi[0] || + dots[i].x[1] < lo[1] || dots[i].x[1] > hi[1] || + dots[i].x[2] < lo[2] || dots[i].x[2] > hi[2]) + iflag++; + } + if (iflag > 0) + printf("ERROR: %d points are out-of-box on proc %d\n",iflag,me); +} + +// stats for RCB decomposition + +void RCB::stats(int flag) +{ + int i,iflag,sum,min,max; + double ave,rsum,rmin,rmax; + double weight,wttot,wtmin,wtmax; + + if (me == 0) printf("RCB Statistics:\n"); + + // distribution info + + for (i = 0, weight = 0.0; i < ndot; i++) weight += dots[i].wt; + MPI_Allreduce(&weight,&wttot,1,MPI_DOUBLE,MPI_SUM,world); + MPI_Allreduce(&weight,&wtmin,1,MPI_DOUBLE,MPI_MIN,world); + MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); + + if (me == 0) { + printf(" Total weight of dots = %g\n",wttot); + printf(" Weight on each proc: ave = %g, max = %g, min = %g\n", + wttot/nprocs,wtmax,wtmin); + } + if (flag) { + MPI_Barrier(world); + printf(" Proc %d has weight = %g\n",me,weight); + } + + for (i = 0, weight = 0.0; i < ndot; i++) + if (dots[i].wt > weight) weight = dots[i].wt; + MPI_Allreduce(&weight,&wtmax,1,MPI_DOUBLE,MPI_MAX,world); + + if (me == 0) printf(" Maximum weight of single dot = %g\n",wtmax); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d max weight = %g\n",me,weight); + } + + // counter info + + MPI_Allreduce(&counters[0],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[0],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[0],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" Median iter: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d median count = %d\n",me,counters[0]); + } + + MPI_Allreduce(&counters[1],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[1],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[1],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" Send count: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d send count = %d\n",me,counters[1]); + } + + MPI_Allreduce(&counters[2],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[2],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[2],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" Recv count: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d recv count = %d\n",me,counters[2]); + } + + MPI_Allreduce(&counters[3],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[3],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[3],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" Max dots: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d max dots = %d\n",me,counters[3]); + } + + MPI_Allreduce(&counters[4],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[4],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[4],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" Max memory: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d max memory = %d\n",me,counters[4]); + } + + if (reuse) { + MPI_Allreduce(&counters[5],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[5],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[5],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" # of Reuse: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d # of Reuse = %d\n",me,counters[5]); + } + } + + MPI_Allreduce(&counters[6],&sum,1,MPI_INT,MPI_SUM,world); + MPI_Allreduce(&counters[6],&min,1,MPI_INT,MPI_MIN,world); + MPI_Allreduce(&counters[6],&max,1,MPI_INT,MPI_MAX,world); + ave = ((double) sum)/nprocs; + if (me == 0) + printf(" # of OverAlloc: ave = %g, min = %d, max = %d\n",ave,min,max); + if (flag) { + MPI_Barrier(world); + printf(" Proc %d # of OverAlloc = %d\n",me,counters[6]); + } + + // RCB boxes for each proc + + if (flag) { + if (me == 0) printf(" RCB sub-domain boxes:\n"); + for (i = 0; i < 3; i++) { + MPI_Barrier(world); + if (me == 0) printf(" Dimension %d\n",i+1); + MPI_Barrier(world); + printf(" Proc = %d: Box = %g %g\n",me,lo[i],hi[i]); + } + } +} +*/ diff --git a/src/rcb.h b/src/rcb.h new file mode 100644 index 0000000000..4c2e3809e9 --- /dev/null +++ b/src/rcb.h @@ -0,0 +1,131 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LAMMPS_RCB_H +#define LAMMPS_RCB_H + +#include "mpi.h" +#include "pointers.h" + +namespace LAMMPS_NS { + +class RCB : protected Pointers { + public: + // set by compute() + + int noriginal; // # of dots I own before balancing + int nfinal; // # of dots I own after balancing + int nkeep; // how many dots of noriginal I still own + // will be first nkept of nfinal list + int *recvproc; // proc IDs of nfinal dots + int *recvindex; // index of nfinal dots on owning procs + // based on input list for compute() + double *lo,*hi; // final bounding box of my RCB sub-domain + double cut; // single cut (in Tree) owned by this proc + int cutdim; // dimension (0,1,2) of the cut + + // set by invert() + + int *sendproc; // proc to send each of my noriginal dots to + int *sendindex; // index of dot in receiver's nfinal list + + RCB(class LAMMPS *); + ~RCB(); + void compute(int, int, double **, double *, double *, double *); + void invert(int sortflag = 0); + bigint memory_usage(); + + // DEBUG methods + //void check(); + //void stats(int); + + // RCB cut info + + struct Median { + double totallo,totalhi; // weight in each half of active partition + double valuelo,valuehi; // position of dot(s) nearest to cut + double wtlo,wthi; // total weight of dot(s) at that position + int countlo,counthi; // # of dots at that position + int proclo,prochi; // unique proc who owns a nearest dot + }; + + struct BBox { + double lo[3],hi[3]; // corner points of a bounding box + }; + + private: + int me,nprocs; + + // point to balance on + + struct Dot { + double x[3]; // coord of point + double wt; // weight of point + int proc; // owning proc + int index; // index on owning proc + }; + + // tree of RCB cuts + + struct Tree { + double cut; // position of cut + int dim; // dimension = 0/1/2 of cut + }; + + // inversion message + + struct Invert { + int rindex; // index on receiving proc + int sproc; // sending proc + int sindex; // index on sending proc + }; + + Dot *dots; // dots on this proc + int ndot; // # of dots on this proc + int maxdot; // allocated size of dots + int ndotorig; + + int nlist; + int maxlist; + int *dotlist; + int *dotmark; + + int maxbuf; + Dot *buf; + + int maxrecv,maxsend; + + BBox bbox; + class Irregular *irregular; + + MPI_Op box_op,med_op; + MPI_Datatype box_type,med_type; + + int reuse; // 1/0 to use/not use previous cuts + int dottop; // dots >= this index are new + double bboxlo[3]; // bounding box of final RCB sub-domain + double bboxhi[3]; + Tree *tree; // tree of RCB cuts, used by reuse() + int counters[7]; // diagnostic counts + // 0 = # of median iterations + // 1 = # of points sent + // 2 = # of points received + // 3 = most points this proc ever owns + // 4 = most point memory this proc ever allocs + // 5 = # of times a previous cut is re-used + // 6 = # of reallocs of point vector +}; + +} + +#endif diff --git a/src/replicate.cpp b/src/replicate.cpp index b759c47fa3..26f3fca7ed 100644 --- a/src/replicate.cpp +++ b/src/replicate.cpp @@ -29,6 +29,8 @@ using namespace LAMMPS_NS; #define LB_FACTOR 1.1 #define EPSILON 1.0e-6 +enum{LAYOUT_UNIFORM,LAYOUT_NONUNIFORM,LAYOUT_TILED}; // several files + /* ---------------------------------------------------------------------- */ Replicate::Replicate(LAMMPS *lmp) : Pointers(lmp) {} @@ -220,17 +222,33 @@ void Replicate::command(int narg, char **arg) sublo[2] = domain->sublo_lamda[2]; subhi[2] = domain->subhi_lamda[2]; } - if (domain->xperiodic) { - if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; - if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; - } - if (domain->yperiodic) { - if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; - if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; - } - if (domain->zperiodic) { - if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; - if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; + if (comm->layout != LAYOUT_TILED) { + if (domain->xperiodic) { + if (comm->myloc[0] == 0) sublo[0] -= epsilon[0]; + if (comm->myloc[0] == comm->procgrid[0]-1) subhi[0] += epsilon[0]; + } + if (domain->yperiodic) { + if (comm->myloc[1] == 0) sublo[1] -= epsilon[1]; + if (comm->myloc[1] == comm->procgrid[1]-1) subhi[1] += epsilon[1]; + } + if (domain->zperiodic) { + if (comm->myloc[2] == 0) sublo[2] -= epsilon[2]; + if (comm->myloc[2] == comm->procgrid[2]-1) subhi[2] += epsilon[2]; + } + + } else { + if (domain->xperiodic) { + if (comm->mysplit[0][0] == 0.0) sublo[0] -= epsilon[0]; + if (comm->mysplit[0][1] == 1.0) subhi[0] += epsilon[0]; + } + if (domain->yperiodic) { + if (comm->mysplit[1][0] == 0.0) sublo[1] -= epsilon[1]; + if (comm->mysplit[1][1] == 1.0) subhi[1] += epsilon[1]; + } + if (domain->zperiodic) { + if (comm->mysplit[2][0] == 0.0) sublo[2] -= epsilon[2]; + if (comm->mysplit[2][1] == 1.0) subhi[2] += epsilon[2]; + } } // loop over all procs diff --git a/src/variable.cpp b/src/variable.cpp index 6efe9ef3f9..f371512128 100644 --- a/src/variable.cpp +++ b/src/variable.cpp @@ -44,6 +44,7 @@ using namespace MathConst; #define MAXLEVEL 4 #define MAXLINE 256 #define CHUNK 1024 +#define VALUELENGTH 64 #define MYROUND(a) (( a-floor(a) ) >= .5) ? ceil(a) : floor(a) @@ -306,7 +307,7 @@ void Variable::set(int narg, char **arg) pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); - data[nvar][1] = NULL; + data[nvar][1] = new char[VALUELENGTH]; // SCALARFILE for strings or numbers // which = 1st value @@ -360,7 +361,7 @@ void Variable::set(int narg, char **arg) pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(2,&arg[2],data[nvar]); - data[nvar][2] = NULL; + data[nvar][2] = new char[VALUELENGTH]; // EQUAL // replace pre-existing var if also style EQUAL (allows it to be reset) @@ -374,9 +375,7 @@ void Variable::set(int narg, char **arg) if (style[ivar] != EQUAL) error->all(FLERR,"Cannot redefine variable as a different style"); delete [] data[ivar][0]; - if (data[ivar][1]) delete [] data[ivar][1]; copy(1,&arg[2],data[ivar]); - data[ivar][1] = NULL; replaceflag = 1; } else { if (nvar == maxvar) grow(); @@ -386,7 +385,7 @@ void Variable::set(int narg, char **arg) pad[nvar] = 0; data[nvar] = new char*[num[nvar]]; copy(1,&arg[2],data[nvar]); - data[nvar][1] = NULL; + data[nvar][1] = new char[VALUELENGTH]; } // ATOM @@ -625,32 +624,24 @@ char *Variable::retrieve(char *name) strcpy(data[ivar][0],result); str = data[ivar][0]; } else if (style[ivar] == EQUAL) { - char result[64]; double answer = evaluate(data[ivar][0],NULL); - sprintf(result,"%.15g",answer); - int n = strlen(result) + 1; - if (data[ivar][1]) delete [] data[ivar][1]; - data[ivar][1] = new char[n]; - strcpy(data[ivar][1],result); + sprintf(data[ivar][1],"%.15g",answer); str = data[ivar][1]; } else if (style[ivar] == FORMAT) { - char result[64]; int jvar = find(data[ivar][0]); if (jvar == -1) return NULL; if (!equalstyle(jvar)) return NULL; double answer = evaluate(data[jvar][0],NULL); - sprintf(result,data[ivar][1],answer); - int n = strlen(result) + 1; - if (data[ivar][2]) delete [] data[ivar][2]; - data[ivar][2] = new char[n]; - strcpy(data[ivar][2],result); + sprintf(data[ivar][2],data[ivar][1],answer); str = data[ivar][2]; } else if (style[ivar] == GETENV) { const char *result = getenv(data[ivar][0]); - if (data[ivar][1]) delete [] data[ivar][1]; - if (result == NULL) result = (const char *)""; + if (result == NULL) result = (const char *) ""; int n = strlen(result) + 1; - data[ivar][1] = new char[n]; + if (n > VALUELENGTH) { + delete [] data[ivar][1]; + data[ivar][1] = new char[n]; + } strcpy(data[ivar][1],result); str = data[ivar][1]; } else if (style[ivar] == ATOM || style[ivar] == ATOMFILE) return NULL; @@ -782,6 +773,45 @@ int Variable::atomstyle(int ivar) return 0; } +/* ---------------------------------------------------------------------- + save copy of EQUAL style ivar formula in copy + allocate copy here, later equal_restore() call will free it + insure data[ivar][0] is of VALUELENGTH since will be overridden +------------------------------------------------------------------------- */ + +void Variable::equal_save(int ivar, char *©) +{ + int n = strlen(data[ivar][0]) + 1; + copy = new char[n]; + strcpy(copy,data[ivar][0]); + delete [] data[ivar][0]; + data[ivar][0] = new char[VALUELENGTH]; +} + +/* ---------------------------------------------------------------------- + restore formula string of EQUAL style ivar from copy + then free copy, allocated in equal_save() +------------------------------------------------------------------------- */ + +void Variable::equal_restore(int ivar, char *copy) +{ + delete [] data[ivar][0]; + int n = strlen(copy) + 1; + data[ivar][0] = new char[n]; + strcpy(data[ivar][0],copy); + delete [] copy; +} + +/* ---------------------------------------------------------------------- + override EQUAL style ivar formula with value converted to string + data[ivar][0] was set to length 64 in equal_save() +------------------------------------------------------------------------- */ + +void Variable::equal_override(int ivar, double value) +{ + sprintf(data[ivar][0],"%.15g",value); +} + /* ---------------------------------------------------------------------- remove Nth variable from list and compact list delete reader explicitly if it exists diff --git a/src/variable.h b/src/variable.h index bf6dcea656..f04a04f9d4 100644 --- a/src/variable.h +++ b/src/variable.h @@ -36,10 +36,15 @@ class Variable : protected Pointers { int int_between_brackets(char *&, int); double evaluate_boolean(char *); + void equal_save(int, char *&); + void equal_restore(int, char *); + void equal_override(int, double); + unsigned int data_mask(int ivar); unsigned int data_mask(char *str); private: + int me; int nvar; // # of defined variables int maxvar; // max # of variables following lists can hold char **names; // name of each variable @@ -57,7 +62,6 @@ class Variable : protected Pointers { int precedence[17]; // precedence level of math operators // set length to include up to OR in enum - int me; struct Tree { // parse tree for atom-style variables double value; // single scalar diff --git a/src/version.h b/src/version.h index 25d90d9612..b4d684523a 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "22 Jul 2014" +#define LAMMPS_VERSION "29 Jul 2014"