Compare commits
74 Commits
patch_22No
...
patch_17De
| Author | SHA1 | Date | |
|---|---|---|---|
| 81a2db8a0c | |||
| 0a176841e7 | |||
| 3027ac9250 | |||
| fc54ab5cea | |||
| e364b80724 | |||
| 830c9e8661 | |||
| 4907b29ad2 | |||
| eff7238ff2 | |||
| 126fb22e93 | |||
| 0a90492c44 | |||
| fed629c23e | |||
| 925481c3f4 | |||
| da2ad5b6e0 | |||
| bfcab72268 | |||
| f509f133af | |||
| 624c57e9da | |||
| f3b355bcbe | |||
| ae5764beac | |||
| fda43c00fd | |||
| b34be30be6 | |||
| 13b6196b82 | |||
| baf55c90f4 | |||
| 770f5d0bf7 | |||
| a31b00965a | |||
| a5e46e3e6a | |||
| 31be0da590 | |||
| 0f3b2544a1 | |||
| 586514e05c | |||
| 43c459ba56 | |||
| b5c3d2f66c | |||
| 5187cb97e5 | |||
| eff503e56c | |||
| cdcebab3bd | |||
| ddf678da51 | |||
| 435421301b | |||
| 9b48c49f83 | |||
| d3d5ac17bf | |||
| 8318c67816 | |||
| 7c61dbf5e2 | |||
| 39a12b15d7 | |||
| fb3f597f41 | |||
| d14814ae2e | |||
| beb5a30f67 | |||
| 7ddb6670c0 | |||
| 789e62388f | |||
| 7d098bff90 | |||
| 1d970d3cdf | |||
| 42d430168b | |||
| 5ff5bc2a6c | |||
| 02ae2d218a | |||
| 470908fc93 | |||
| 6759630c16 | |||
| 87781771ba | |||
| df46b9aa38 | |||
| 647c6f00ce | |||
| 237307eda2 | |||
| d58dd4f159 | |||
| ae70f1090f | |||
| 114926a488 | |||
| 5eb9dd0c5d | |||
| ebabc8f0bc | |||
| 232abf8534 | |||
| d22caf2658 | |||
| 3842aa6095 | |||
| 32c240978a | |||
| 212c2617f6 | |||
| 40f85c93ba | |||
| 2f02d98469 | |||
| 4553881fc2 | |||
| 81fcbcd99c | |||
| 82c6eb4675 | |||
| 8ed3f4226e | |||
| 9b7a0d7e1c | |||
| c9c2ae6c61 |
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="22 Nov 2016 version">
|
||||
<META NAME="docnumber" CONTENT="17 Dec 2016 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -21,7 +21,7 @@
|
||||
<H1></H1>
|
||||
|
||||
LAMMPS Documentation :c,h3
|
||||
22 Nov 2016 version :c,h4
|
||||
17 Dec 2016 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
||||
@ -632,11 +632,11 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
|
||||
"rigid/npt (o)"_fix_rigid.html,
|
||||
"rigid/nve (o)"_fix_rigid.html,
|
||||
"rigid/nvt (o)"_fix_rigid.html,
|
||||
<"rigid/small (o)"_fix_rigid.html,
|
||||
"rigid/small/nph"_fix_rigid.html,
|
||||
"rigid/small/npt"_fix_rigid.html,
|
||||
"rigid/small/nve"_fix_rigid.html,
|
||||
"rigid/small/nvt"_fix_rigid.html,
|
||||
"rigid/small (o)"_fix_rigid.html,
|
||||
"rigid/small/nph (o)"_fix_rigid.html,
|
||||
"rigid/small/npt (o)"_fix_rigid.html,
|
||||
"rigid/small/nve (o)"_fix_rigid.html,
|
||||
"rigid/small/nvt (o)"_fix_rigid.html,
|
||||
"setforce (k)"_fix_setforce.html,
|
||||
"shake"_fix_shake.html,
|
||||
"spring"_fix_spring.html,
|
||||
@ -767,6 +767,7 @@ KOKKOS, o = USER-OMP, t = OPT.
|
||||
"erotate/sphere"_compute_erotate_sphere.html,
|
||||
"erotate/sphere/atom"_compute_erotate_sphere_atom.html,
|
||||
"event/displace"_compute_event_displace.html,
|
||||
"global/atom"_compute_global_atom.html,
|
||||
"group/group"_compute_group_group.html,
|
||||
"gyration"_compute_gyration.html,
|
||||
"gyration/chunk"_compute_gyration_chunk.html,
|
||||
|
||||
@ -1936,18 +1936,22 @@ documentation in the src/library.cpp file for details, including
|
||||
which quantities can be queried by name:
|
||||
|
||||
void *lammps_extract_global(void *, char *)
|
||||
void lammps_extract_box(void *, double *, double *,
|
||||
double *, double *, double *, int *, int *)
|
||||
void *lammps_extract_atom(void *, char *)
|
||||
void *lammps_extract_compute(void *, char *, int, int)
|
||||
void *lammps_extract_fix(void *, char *, int, int, int, int)
|
||||
void *lammps_extract_variable(void *, char *, char *) :pre
|
||||
|
||||
int lammps_set_variable(void *, char *, char *)
|
||||
double lammps_get_thermo(void *, char *) :pre
|
||||
void lammps_reset_box(void *, double *, double *, double, double, double)
|
||||
int lammps_set_variable(void *, char *, char *) :pre
|
||||
|
||||
double lammps_get_thermo(void *, char *)
|
||||
int lammps_get_natoms(void *)
|
||||
void lammps_gather_atoms(void *, double *)
|
||||
void lammps_scatter_atoms(void *, double *) :pre
|
||||
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *) :pre
|
||||
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
|
||||
imageint *, int) :pre
|
||||
|
||||
The extract functions return a pointer to various global or per-atom
|
||||
quantities stored in LAMMPS or to values calculated by a compute, fix,
|
||||
@ -1957,10 +1961,16 @@ the other extract functions, the underlying storage may be reallocated
|
||||
as LAMMPS runs, so you need to re-call the function to assure a
|
||||
current pointer or returned value(s).
|
||||
|
||||
The lammps_reset_box() function resets the size and shape of the
|
||||
simulation box, e.g. as part of restoring a previously extracted and
|
||||
saved state of a simulation.
|
||||
|
||||
The lammps_set_variable() function can set an existing string-style
|
||||
variable to a new string value, so that subsequent LAMMPS commands can
|
||||
access the variable. The lammps_get_thermo() function returns the
|
||||
current value of a thermo keyword as a double.
|
||||
access the variable.
|
||||
|
||||
The lammps_get_thermo() function returns the current value of a thermo
|
||||
keyword as a double precision value.
|
||||
|
||||
The lammps_get_natoms() function returns the total number of atoms in
|
||||
the system and can be used by the caller to allocate space for the
|
||||
@ -1973,10 +1983,13 @@ passed by the caller, to each atom owned by individual processors.
|
||||
|
||||
The lammps_create_atoms() function takes a list of N atoms as input
|
||||
with atom types and coords (required), an optionally atom IDs and
|
||||
velocities. It uses the coords of each atom to assign it as a new
|
||||
atom to the processor that owns it. Additional properties for the new
|
||||
atoms can be assigned via the lammps_scatter_atoms() or
|
||||
lammps_extract_atom() functions.
|
||||
velocities and image flags. It uses the coords of each atom to assign
|
||||
it as a new atom to the processor that owns it. This function is
|
||||
useful to add atoms to a simulation or (in tandem with
|
||||
lammps_reset_box()) to restore a previously extracted and saved state
|
||||
of a simulation. Additional properties for the new atoms can then be
|
||||
assigned via the lammps_scatter_atoms() or lammps_extract_atom()
|
||||
functions.
|
||||
|
||||
The examples/COUPLE and python directories have example C++ and C and
|
||||
Python codes which show how a driver code can link to LAMMPS as a
|
||||
|
||||
@ -51,12 +51,12 @@ relative to the center of mass (COM) velocity of the 2 atoms in the
|
||||
bond.
|
||||
|
||||
The value {engvib} is the vibrational kinetic energy of the two atoms
|
||||
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
|
||||
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
|
||||
v2 are the magnitude of the velocity of the 2 atoms along the bond
|
||||
direction, after the COM velocity has been subtracted from each.
|
||||
|
||||
The value {engrot} is the rotationsl kinetic energy of the two atoms
|
||||
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
|
||||
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
|
||||
v2 are the magnitude of the velocity of the 2 atoms perpendicular to
|
||||
the bond direction, after the COM velocity has been subtracted from
|
||||
each.
|
||||
@ -67,7 +67,7 @@ Vcm^2 where Vcm = magnitude of the velocity of the COM.
|
||||
|
||||
Note that these 3 kinetic energy terms are simply a partitioning of
|
||||
the summed kinetic energy of the 2 atoms themselves. I.e. total KE =
|
||||
1/2 m1 v1^2 + 1/2 m2 v3^2 = engvib + engrot + engtrans, where v1,v2
|
||||
1/2 m1 v1^2 + 1/2 m2 v2^2 = engvib + engrot + engtrans, where v1,v2
|
||||
are the magnitude of the velocities of the 2 atoms, without any
|
||||
adjustment for the COM velocity.
|
||||
|
||||
|
||||
@ -641,7 +641,8 @@ the restarted simulation begins.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"fix ave/chunk"_fix_ave_chunk.html
|
||||
"fix ave/chunk"_fix_ave_chunk.html,
|
||||
"compute global/atom"_compute_global_atom.html
|
||||
|
||||
[Default:]
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ The neighbor list needed to compute this quantity is constructed each
|
||||
time the calculation is performed (i.e. each time a snapshot of atoms
|
||||
is dumped). Thus it can be inefficient to compute/dump this quantity
|
||||
too frequently or to have multiple compute/dump commands, each of a
|
||||
{clsuter/atom} style.
|
||||
{cluster/atom} style.
|
||||
|
||||
NOTE: If you have a bonded system, then the settings of
|
||||
"special_bonds"_special_bonds.html command can remove pairwise
|
||||
|
||||
220
doc/src/compute_global_atom.txt
Normal file
220
doc/src/compute_global_atom.txt
Normal file
@ -0,0 +1,220 @@
|
||||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
compute global/atom command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
compute ID group-ID style index input1 input2 ... :pre
|
||||
|
||||
ID, group-ID are documented in "compute"_compute.html command :ulb,l
|
||||
global/atom = style name of this compute command :l
|
||||
index = c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :l
|
||||
c_ID = per-atom vector calculated by a compute with ID
|
||||
c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID
|
||||
f_ID = per-atom vector calculated by a fix with ID
|
||||
f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID
|
||||
v_name = per-atom vector calculated by an atom-style variable with name :pre
|
||||
one or more inputs can be listed :l
|
||||
input = c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :l
|
||||
c_ID = global vector calculated by a compute with ID
|
||||
c_ID\[I\] = Ith column of global array calculated by a compute with ID, I can include wildcard (see below)
|
||||
f_ID = global vector calculated by a fix with ID
|
||||
f_ID\[I\] = Ith column of global array calculated by a fix with ID, I can include wildcard (see below)
|
||||
v_name = global vector calculated by a vector-style variable with name :pre
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
compute 1 all global/atom c_chunk c_com\[1\\] c_com\[2\\] c_com\[3\\]
|
||||
compute 1 all global/atom c_chunk c_com\[*\\] :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Define a calculation that assigns global values to each atom from
|
||||
vectors or arrays of global values. The specified {index} parameter
|
||||
is used to determine which global value is assigned to each atom.
|
||||
|
||||
The {index} parameter must reference a per-atom vector or array from a
|
||||
"compute"_compute.html or "fix"_fix.html or the evaluation of an
|
||||
atom-style "variable"_variable.html. Each {input} value must
|
||||
reference a global vector or array from a "compute"_compute.html or
|
||||
"fix"_fix.html or the evaluation of an vector-style
|
||||
"variable"_variable.html. Details are given below.
|
||||
|
||||
The {index} value for an atom is used as a index I (from 1 to N) into
|
||||
the vector associated with each of the input values. The Ith value
|
||||
from the input vector becomes one output value for that atom. If the
|
||||
atom is not in the specified group, or the index I < 1 or I > M, where
|
||||
M is the actual length of the input vector, then an output value of
|
||||
0.0 is assigned to the atom.
|
||||
|
||||
An example of how this command is useful, is in the context of
|
||||
"chunks" which are static or dyanmic subsets of atoms. The "compute
|
||||
chunk/atom"_compute_chunk_atom.html command assigns unique chunk IDs
|
||||
to each atom. It's output can be used as the {index} parameter for
|
||||
this command. Various other computes with "chunk" in their style
|
||||
name, such as "compute com/chunk"_compute_com_chunk.html or "compute
|
||||
msd/chunk"_compute_msd_chunk.html, calculate properties for each
|
||||
chunk. The output of these commands are global vectors or arrays,
|
||||
with one or more values per chunk, and can be used as input values for
|
||||
this command. This command will then assign the global chunk value to
|
||||
each atom in the chunk, producing a per-atom vector or per-atom array
|
||||
as output. The per-atom values can then be output to a dump file or
|
||||
used by any command that uses per-atom values from a compute as input,
|
||||
as discussed in "Section 6.15"_Section_howto.html#howto_15.
|
||||
|
||||
As a concrete example, these commands will calculate the displacement
|
||||
of each atom from the center-of-mass of the molecule it is in, and
|
||||
dump those values to a dump file. In this case, each molecule is a
|
||||
chunk.
|
||||
|
||||
compute cc1 all chunk/atom molecule
|
||||
compute myChunk all com/chunk cc1
|
||||
compute prop all property/atom xu yu zu
|
||||
compute glob all global/atom c_cc1 c_myChunk\[*\]
|
||||
variable dx atom c_prop\[1\]-c_glob\[1\]
|
||||
variable dy atom c_prop\[2\]-c_glob\[2\]
|
||||
variable dz atom c_prop\[3\]-c_glob\[3\]
|
||||
variable dist atom sqrt(v_dx*v_dx+v_dy*v_dy+v_dz*v_dz)
|
||||
dump 1 all custom 100 tmp.dump id xu yu zu c_glob\[1\] c_glob\[2\] c_glob\[3\] &
|
||||
v_dx v_dy v_dz v_dist
|
||||
dump_modify 1 sort id :pre
|
||||
|
||||
You can add these commands to the bench/in.chain script to see how
|
||||
they work.
|
||||
|
||||
:line
|
||||
|
||||
Note that for input values from a compute or fix, the bracketed index
|
||||
I can be specified using a wildcard asterisk with the index to
|
||||
effectively specify multiple values. This takes the form "*" or "*n"
|
||||
or "n*" or "m*n". If N = the size of the vector (for {mode} = scalar)
|
||||
or the number of columns in the array (for {mode} = vector), then an
|
||||
asterisk with no numeric values means all indices from 1 to N. A
|
||||
leading asterisk means all indices from 1 to n (inclusive). A
|
||||
trailing asterisk means all indices from n to N (inclusive). A middle
|
||||
asterisk means all indices from m to n (inclusive).
|
||||
|
||||
Using a wildcard is the same as if the individual columns of the array
|
||||
had been listed one by one. E.g. these 2 compute global/atom commands
|
||||
are equivalent, since the "compute com/chunk"_compute_com_chunk.html
|
||||
command creates a global array with 3 columns:
|
||||
|
||||
compute cc1 all chunk/atom molecule
|
||||
compute com all com/chunk cc1
|
||||
compute 1 all global/atom c_cc1 c_com\[1\] c_com\[2\] c_com\[3\]
|
||||
compute 1 all global/atom c_cc1 c_com\[*\] :pre
|
||||
|
||||
:line
|
||||
|
||||
This section explains the {index} parameter. Note that it must
|
||||
reference per-atom values, as contrasted with the {input} values which
|
||||
must reference global values.
|
||||
|
||||
Note that all of these options generate floating point values. When
|
||||
they are used as an index into the specified input vectors, they
|
||||
simple rounded down to convert the value to integer indices. The
|
||||
final values should range from 1 to N (inclusive), since they are used
|
||||
to access values from N-length vectors.
|
||||
|
||||
If {index} begins with "c_", a compute ID must follow which has been
|
||||
previously defined in the input script. The compute must generate
|
||||
per-atom quantities. See the individual "compute"_compute.html doc
|
||||
page for details. If no bracketed integer is appended, the per-atom
|
||||
vector calculated by the compute is used. If a bracketed integer is
|
||||
appended, the Ith column of the per-atom array calculated by the
|
||||
compute is used. Users can also write code for their own compute
|
||||
styles and "add them to LAMMPS"_Section_modify.html. See the
|
||||
discussion above for how I can be specified with a wildcard asterisk
|
||||
to effectively specify multiple values.
|
||||
|
||||
If {index} begins with "f_", a fix ID must follow which has been
|
||||
previously defined in the input script. The Fix must generate
|
||||
per-atom quantities. See the individual "fix"_fix.html doc page for
|
||||
details. Note that some fixes only produce their values on certain
|
||||
timesteps, which must be compatible with when compute global/atom
|
||||
references the values, else an error results. If no bracketed integer
|
||||
is appended, the per-atom vector calculated by the fix is used. If a
|
||||
bracketed integer is appended, the Ith column of the per-atom array
|
||||
calculated by the fix is used. Users can also write code for their
|
||||
own fix style and "add them to LAMMPS"_Section_modify.html. See the
|
||||
discussion above for how I can be specified with a wildcard asterisk
|
||||
to effectively specify multiple values.
|
||||
|
||||
If {index} begins with "v_", a variable name must follow which has
|
||||
been previously defined in the input script. It must be an
|
||||
"atom-style variable"_variable.html. Atom-style variables can
|
||||
reference thermodynamic keywords and various per-atom attributes, or
|
||||
invoke other computes, fixes, or variables when they are evaluated, so
|
||||
this is a very general means of generating per-atom quantities to use
|
||||
as {index}.
|
||||
|
||||
:line
|
||||
|
||||
This section explains the kinds of {input} values that can be used.
|
||||
Note that inputs reference global values, as contrasted with the
|
||||
{index} parameter which must reference per-atom values.
|
||||
|
||||
If a value begins with "c_", a compute ID must follow which has been
|
||||
previously defined in the input script. The compute must generate a
|
||||
global vector or array. See the individual "compute"_compute.html doc
|
||||
page for details. If no bracketed integer is appended, the vector
|
||||
calculated by the compute is used. If a bracketed integer is
|
||||
appended, the Ith column of the array calculated by the compute is
|
||||
used. Users can also write code for their own compute styles and "add
|
||||
them to LAMMPS"_Section_modify.html. See the discussion above for how
|
||||
I can be specified with a wildcard asterisk to effectively specify
|
||||
multiple values.
|
||||
|
||||
If a value begins with "f_", a fix ID must follow which has been
|
||||
previously defined in the input script. The fix must generate a
|
||||
global vector or array. See the individual "fix"_fix.html doc page
|
||||
for details. Note that some fixes only produce their values on
|
||||
certain timesteps, which must be compatible with when compute
|
||||
global/atom references the values, else an error results. If no
|
||||
bracketed integer is appended, the vector calculated by the fix is
|
||||
used. If a bracketed integer is appended, the Ith column of the array
|
||||
calculated by the fix is used. Users can also write code for their
|
||||
own fix style and "add them to LAMMPS"_Section_modify.html. See the
|
||||
discussion above for how I can be specified with a wildcard asterisk
|
||||
to effectively specify multiple values.
|
||||
|
||||
If a value begins with "v_", a variable name must follow which has
|
||||
been previously defined in the input script. It must be a
|
||||
"vector-style variable"_variable.html. Vector-style variables can
|
||||
reference thermodynamic keywords and various other attributes of
|
||||
atoms, or invoke other computes, fixes, or variables when they are
|
||||
evaluated, so this is a very general means of generating a vector of
|
||||
global quantities which the {index} parameter will reference for
|
||||
assignement of global values to atoms.
|
||||
|
||||
:line
|
||||
|
||||
[Output info:]
|
||||
|
||||
If a single input is specified this compute produces a per-atom
|
||||
vector. If multiple inputs are specified, this compute produces a
|
||||
per-atom array values, where the number of columns is equal to the
|
||||
number of inputs specified. These values can be used by any command
|
||||
that uses per-atom vector or array values from a compute as input.
|
||||
See "Section 6.15"_Section_howto.html#howto_15 for an overview of
|
||||
LAMMPS output options.
|
||||
|
||||
The per-atom vector or array values will be in whatever units the
|
||||
corresponsing input values are in.
|
||||
|
||||
[Restrictions:] none
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"compute"_compute.html, "fix"_fix.html, "variable"_variable.html,
|
||||
"compute chunk/atom"_compute_chunk_atom.html, "compute
|
||||
reduce"_compute_reduce.html
|
||||
|
||||
[Default:] none
|
||||
@ -27,7 +27,7 @@ contact radius is used only to prevent particles belonging to
|
||||
different physical bodies from penetrating each other. It is used by
|
||||
the contact pair styles, e.g., smd/hertz and smd/tri_surface.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
The value of the contact radius will be 0.0 for particles not in the
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/damage :pre
|
||||
Define a computation that calculates the damage status of SPH particles
|
||||
according to the damage model which is defined via the SMD SPH pair styles, e.g., the maximum plastic strain failure criterion.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output Info:]
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ configuration. This compute is only really useful for debugging the
|
||||
hourglass control mechanim which is part of the Total-Lagrangian SPH
|
||||
pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output Info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/internal/energy :pre
|
||||
Define a computation which outputs the per-particle enthalpy, i.e.,
|
||||
the sum of potential energy and heat.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output Info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that outputs the equivalent plastic strain per
|
||||
particle. This command is only meaningful if a material model with
|
||||
plasticity is defined.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output Info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that outputs the time rate of the equivalent
|
||||
plastic strain. This command is only meaningful if a material model
|
||||
with plasticity is defined.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output Info:]
|
||||
|
||||
@ -26,7 +26,7 @@ The mass density is the mass of a particle which is constant during
|
||||
the course of a simulation, divided by its volume, which can change
|
||||
due to mechanical deformation.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that calculates the deformation gradient. It is
|
||||
only meaningful for particles which interact according to the
|
||||
Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -30,7 +30,7 @@ time step. This calculation is performed automatically in the
|
||||
relevant SPH pair styles and this compute only serves to make the
|
||||
stable time increment accessible for output purposes.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that calculates the number of particles inside of
|
||||
the smoothing kernel radius for particles interacting via the
|
||||
Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -26,7 +26,7 @@ associated with a particle as a rotated ellipsoid. It is only
|
||||
meaningful for particles which interact according to the
|
||||
Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/tlsph/strain :pre
|
||||
Define a computation that calculates the Green-Lagrange strain tensor
|
||||
for particles interacting via the Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/tlsph/strain/rate :pre
|
||||
Define a computation that calculates the rate of the strain tensor for
|
||||
particles interacting via the Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/tlsph/stress :pre
|
||||
Define a computation that outputs the Cauchy stress tensor for
|
||||
particles interacting via the Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that returns the coordinates of the vertices
|
||||
corresponding to the triangle-elements of a mesh created by the "fix
|
||||
smd/wall_surface"_fix_smd_wall_surface.html.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that returns the number of neighbor particles
|
||||
inside of the smoothing kernel radius for particles interacting via
|
||||
the updated Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/ulsph/strain :pre
|
||||
Define a computation that outputs the logarithmic strain tensor. for
|
||||
particles interacting via the updated Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -25,7 +25,7 @@ Define a computation that outputs the rate of the logarithmic strain
|
||||
tensor for particles interacting via the updated Lagrangian SPH pair
|
||||
style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -23,7 +23,7 @@ compute 1 all smd/ulsph/stress :pre
|
||||
|
||||
Define a computation that outputs the Cauchy stress tensor.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -24,7 +24,7 @@ compute 1 all smd/vol :pre
|
||||
Define a computation that provides the per-particle volume and the sum
|
||||
of the per-particle volumes of the group for which the fix is defined.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
|
||||
Mach Dynamics in LAMMPS.
|
||||
|
||||
[Output info:]
|
||||
|
||||
@ -21,7 +21,7 @@ type = atom type for inserted atoms (must be 0 if mol keyword used) :l
|
||||
seed = random # seed (positive integer) :l
|
||||
T = temperature of the ideal gas reservoir (temperature units) :l
|
||||
mu = chemical potential of the ideal gas reservoir (energy units) :l
|
||||
translate = maximum Monte Carlo translation distance (length units) :l
|
||||
displace = maximum Monte Carlo translation distance (length units) :l
|
||||
zero or more keyword/value pairs may be appended to args :l
|
||||
keyword = {mol}, {region}, {maxangle}, {pressure}, {fugacity_coeff}, {full_energy}, {charge}, {group}, {grouptype}, {intra_energy}, or {tfac_insert}
|
||||
{mol} value = template-ID
|
||||
|
||||
@ -36,7 +36,7 @@ stable maximum time step.
|
||||
This fix inquires the minimum stable time increment across all particles contained in the group for which this
|
||||
fix is defined. An additional safety factor {s_fact} is applied to the time increment.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
|
||||
@ -32,7 +32,7 @@ fix 1 all smd/integrate_tlsph limit_velocity 1000 :pre
|
||||
|
||||
The fix performs explicit time integration for particles which interact according with the Total-Lagrangian SPH pair style.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
The {limit_velocity} keyword will control the velocity, scaling the norm of
|
||||
the velocity vector to max_vel in case it exceeds this velocity limit.
|
||||
|
||||
@ -34,7 +34,7 @@ fix 1 all smd/integrate_ulsph limit_velocity 1000 :pre
|
||||
[Description:]
|
||||
|
||||
The fix performs explicit time integration for particles which interact with the updated Lagrangian SPH pair style.
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
The {adjust_radius} keyword activates dynamic adjustment of the per-particle SPH smoothing kernel radius such that the number of neighbors per particles remains
|
||||
within the interval {min_nn} to {max_nn}. The parameter {adjust_radius_factor} determines the amount of adjustment per timestep. Typical values are
|
||||
|
||||
@ -55,7 +55,7 @@ specified. This style also sets the velocity of each particle to (omega cross
|
||||
Rperp) where omega is its angular velocity around the rotation axis and
|
||||
Rperp is a perpendicular vector from the rotation axis to the particle.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
|
||||
@ -37,7 +37,7 @@ It is possible to move the triangulated surface via the "smd/move_tri_surf"_fix_
|
||||
Immediately after a .STL file has been read, the simulation needs to be run for 0 timesteps in order to properly register the new particles
|
||||
in the system. See the "funnel_flow" example in the USER-SMD examples directory.
|
||||
|
||||
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
|
||||
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
|
||||
@ -311,6 +311,7 @@ compute_erotate_sphere.html
|
||||
compute_erotate_sphere_atom.html
|
||||
compute_event_displace.html
|
||||
compute_fep.html
|
||||
compute_global_atom.html
|
||||
compute_group_group.html
|
||||
compute_gyration.html
|
||||
compute_gyration_chunk.html
|
||||
|
||||
@ -39,7 +39,7 @@ invocation of the {tlsph} for a solid body would consist of an equation of state
|
||||
the pressure (the diagonal components of the stress tensor), and a material model to compute shear
|
||||
stresses (the off-diagonal components of the stress tensor). Damage and failure models can also be added.
|
||||
|
||||
Please see the "SMD user guide"_USER/smd/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
|
||||
Please see the "SMD user guide"_PDF/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
|
||||
|
||||
:line
|
||||
|
||||
|
||||
@ -43,7 +43,7 @@ stresses (the off-diagonal components of the stress tensor).
|
||||
|
||||
Note that the use of *GRADIENT_CORRECTION can lead to severe numerical instabilities. For a general fluid simulation, *NO_GRADIENT_CORRECTION is recommended.
|
||||
|
||||
Please see the "SMD user guide"_USER/smd/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
|
||||
Please see the "SMD user guide"_PDF/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
|
||||
|
||||
:line
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ python func keyword args ... :pre
|
||||
|
||||
func = name of Python function :ulb,l
|
||||
one or more keyword/args pairs must be appended :l
|
||||
keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {exists}
|
||||
keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or {here} or {exists}
|
||||
{invoke} arg = none = invoke the previously defined Python function
|
||||
{input} args = N i1 i2 ... iN
|
||||
N = # of inputs to function
|
||||
@ -29,6 +29,8 @@ keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {ex
|
||||
M = N+1 if there is a return value
|
||||
fstring = each character (i,f,s,p) corresponds in order to an input or return value
|
||||
'i' = integer, 'f' = floating point, 's' = string, 'p' = SELF
|
||||
{length} arg = Nlen
|
||||
Nlen = max length of string returned from Python function
|
||||
{file} arg = filename
|
||||
filename = file of Python code, which defines func
|
||||
{here} arg = inline
|
||||
@ -165,6 +167,17 @@ equal-style variable as an argument, but only if the output of the
|
||||
Python function is flagged as a numeric value ("i" or "f") via the
|
||||
{format} keyword.
|
||||
|
||||
If the {return} keyword is used and the {format} keyword specifies the
|
||||
output as a string, then the default maximum length of that string is
|
||||
63 characters (64-1 for the string terminator). If you want to return
|
||||
a longer string, the {length} keyword can be specified with its {Nlen}
|
||||
value set to a larger number (the code allocates space for Nlen+1 to
|
||||
include the string terminator). If the Python function generates a
|
||||
string longer than the default 63 or the specified {Nlen}, it will be
|
||||
trunctated.
|
||||
|
||||
:line
|
||||
|
||||
Either the {file}, {here}, or {exists} keyword must be used, but only
|
||||
one of them. These keywords specify what Python code to load into the
|
||||
Python interpreter. The {file} keyword gives the name of a file,
|
||||
|
||||
81
examples/USER/misc/grem/README
Normal file
81
examples/USER/misc/grem/README
Normal file
@ -0,0 +1,81 @@
|
||||
Generalized Replica Exchange Method (gREM) examples
|
||||
===================================================
|
||||
|
||||
Examples:
|
||||
---------------------------------------------------
|
||||
|
||||
lj-single:
|
||||
This example is the simplest case scenario utilizing the generalized
|
||||
ensemble defined by fix_grem. It utilizes only 1 replica and requires
|
||||
the LAMMPS executable to be run as usual:
|
||||
|
||||
mpirun -np 4 lmp_mpi -in in.gREM-npt
|
||||
./lmp_serial -in in.gREM-nvt
|
||||
|
||||
While this does not obtain any information about Ts(H), it is most similar to
|
||||
a microcanonical simulation and "single-replica gREM" can be useful for
|
||||
studying non-equilibrium processes as well.
|
||||
|
||||
lj-6rep:
|
||||
This example utilizes an external python script to handle swaps between
|
||||
replicas. Included is run.sh, which requires the path to your LAMMPS
|
||||
executable. The python script is fragile as it relies on parsing output files
|
||||
from the LAMMPS run and moving LAMMPS data files between directories. Use
|
||||
caution if modifying this example further. If complied with mpi, multiple
|
||||
processors can be used as:
|
||||
|
||||
./run.sh $NUM_PROCS
|
||||
|
||||
a serial run is completed simply as
|
||||
|
||||
./run.sh 1
|
||||
|
||||
where the executable provided must be serial if "1" is provided as the number
|
||||
of procs. While this external replica exchange module is quite slow and
|
||||
inefficient, it allows for many replicas to be used on a single processor.
|
||||
While here there are only 6 replicas, this example could be extended to >100
|
||||
replicas while still using a serial compilation. This is also beneficial for
|
||||
running on high performance nodes with few cores to complete a full-scale gREM
|
||||
simulation with a large number of replicas.
|
||||
|
||||
A quick note on efficiency: frequent exchanges slow down this script
|
||||
substantially because LAMMPS is restarted every exchange attempt. The script
|
||||
works best for large systems with infrequent exchanges.
|
||||
|
||||
lj-temper:
|
||||
This is an example using the internal replica exchange module. While fast
|
||||
in comparison to the python version, it requires substantial resources
|
||||
(at least 1 proc per replica). Instead of stopping LAMMPS every exchange
|
||||
attempt, all replicas are run concurrently, and exchanges take place
|
||||
internally. This requires use of LAMMPS partition mode, via the command
|
||||
line using the -p flag. Input files require world type variables defining
|
||||
the parameters of each replica. The included example with 4 replicas must
|
||||
run on at least 4 procs, in that case LAMMPS could be initiated as:
|
||||
|
||||
mpirun -np 4 lmp_mpi -p 4x1 -in in.gREM-temper
|
||||
|
||||
spawning 4 partitions with 1 replica each. Multiple procs per replica could
|
||||
be used. In the case of a 16 system with 4 replicas, the
|
||||
following logic could be used:
|
||||
|
||||
mpirun -np 16 lmp_mpi -p 4x4 -in in.gREM-temper
|
||||
|
||||
Once started, a universe log file will be created as well as log files for
|
||||
each replica. The universe (log.lammps) contains exchange information, while
|
||||
the replicas (*/log.lammps.*) contains the thermo_output as usual. In this
|
||||
example, in.gREM-temper moves the log files to their respective folders.
|
||||
|
||||
|
||||
Closing Notes:
|
||||
---------------------------------------------------
|
||||
|
||||
Of significant difference between lj-6rep and lj-temper is the format of data.
|
||||
In lj-6rep, data is stored as 'replicas' meaning discontinuous trajectories, as
|
||||
files are moved between directories labeled by the 'lambda' of the replica. In
|
||||
lj-temper, data is stored as 'walkers' with continuous trajectories, but
|
||||
discontinuous parameters. The later is significantly more efficient, but
|
||||
requires post-processing to obtain per-replica information.
|
||||
|
||||
|
||||
Any problems/questions should be directed to <dstelter@bu.edu>.
|
||||
|
||||
@ -51,7 +51,7 @@ for exchange in arange(starting_ex,max_exchange):
|
||||
os.chdir(path+"/%s" % lambdas[l])
|
||||
#os.system("cp restart_file restart_file%d" % exchange)
|
||||
if (nproc > 1):
|
||||
os.system("mpirun -np %d " + lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (nproc, lambdas[l], eta, H))
|
||||
os.system("mpirun -np %d " % (nproc) + lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (lambdas[l], eta, H))
|
||||
if (nproc == 1):
|
||||
os.system(lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (lambdas[l], eta, H))
|
||||
os.system("grep -v '[a-zA-Z]' output | awk '{if(NF==6 && NR>19)print $0}' | awk '{print $3}' >ent")
|
||||
@ -60,7 +60,6 @@ for exchange in arange(starting_ex,max_exchange):
|
||||
aver_enthalpy[l] = mean(ee[-1])
|
||||
# os.system("mv dump.dcd dump%d.dcd" % exchange)
|
||||
os.system("mv log.lammps log%d.lammps" % exchange)
|
||||
# os.system("rm output")
|
||||
os.system("mv final_restart_file final_restart_file%d" % exchange)
|
||||
os.system("mv ent ent%d" % exchange)
|
||||
os.system("bzip2 log%d.lammps ent%d" % (exchange,exchange))
|
||||
|
||||
2
src/.gitignore
vendored
2
src/.gitignore
vendored
@ -18,6 +18,8 @@
|
||||
/*_tally.cpp
|
||||
/*_rx.h
|
||||
/*_rx.cpp
|
||||
/*_ssa.h
|
||||
/*_ssa.cpp
|
||||
|
||||
/kokkos.cpp
|
||||
/kokkos.h
|
||||
|
||||
@ -105,11 +105,16 @@ action modify_kokkos.cpp
|
||||
action modify_kokkos.h
|
||||
action neigh_bond_kokkos.cpp
|
||||
action neigh_bond_kokkos.h
|
||||
action neigh_full_kokkos.h
|
||||
action neigh_list_kokkos.cpp
|
||||
action neigh_list_kokkos.h
|
||||
action neighbor_kokkos.cpp
|
||||
action neighbor_kokkos.h
|
||||
action npair_copy_kokkos.cpp
|
||||
action npair_copy_kokkos.h
|
||||
action npair_kokkos.cpp
|
||||
action npair_kokkos.h
|
||||
action nbin_kokkos.cpp
|
||||
action nbin_kokkos.h
|
||||
action math_special_kokkos.cpp
|
||||
action math_special_kokkos.h
|
||||
action pair_buck_coul_cut_kokkos.cpp
|
||||
|
||||
@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
|
||||
neighbor->requests[irequest]->pair = 0;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
|
||||
neighbor->requests[irequest]->fix = 1;
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
else
|
||||
neighflag = HALF;
|
||||
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
|
||||
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"binsize") == 0) {
|
||||
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_list_kokkos(int m)
|
||||
{
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
called by Finish
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
int KokkosLMP::neigh_count(int m)
|
||||
{
|
||||
int inum;
|
||||
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
|
||||
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
|
||||
|
||||
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
|
||||
if (nk->lists_host[m]) {
|
||||
inum = nk->lists_host[m]->inum;
|
||||
if (nk->lists[m]->execution_space == Host) {
|
||||
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_host[m]->d_ilist;
|
||||
h_numneigh = nk->lists_host[m]->d_numneigh;
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
|
||||
} else if (nk->lists_device[m]) {
|
||||
inum = nk->lists_device[m]->inum;
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
} else if (nk->lists[m]->execution_space == Device) {
|
||||
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nk->lists_device[m]->d_ilist;
|
||||
h_numneigh = nk->lists_device[m]->d_numneigh;
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
}
|
||||
|
||||
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];
|
||||
|
||||
@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
|
||||
KokkosLMP(class LAMMPS *, int, char **);
|
||||
~KokkosLMP();
|
||||
void accelerator(int, char **);
|
||||
int neigh_list_kokkos(int);
|
||||
int neigh_count(int);
|
||||
private:
|
||||
static void my_signal_handler(int);
|
||||
|
||||
145
src/KOKKOS/nbin_kokkos.cpp
Normal file
145
src/KOKKOS/nbin_kokkos.cpp
Normal file
@ -0,0 +1,145 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "nbin_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "group.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
enum{NSQ,BIN,MULTI}; // also in Neighbor
|
||||
|
||||
#define SMALL 1.0e-6
|
||||
#define CUT2BIN_RATIO 100
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
|
||||
atoms_per_bin = 16;
|
||||
|
||||
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
setup neighbor binning geometry
|
||||
bin numbering in each dimension is global:
|
||||
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
|
||||
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
|
||||
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
|
||||
code will work for any binsize
|
||||
since next(xyz) and stencil extend as far as necessary
|
||||
binsize = 1/2 of cutoff is roughly optimal
|
||||
for orthogonal boxes:
|
||||
a dim must be filled exactly by integer # of bins
|
||||
in periodic, procs on both sides of PBC must see same bin boundary
|
||||
in non-periodic, coord2bin() still assumes this by use of nbin xyz
|
||||
for triclinic boxes:
|
||||
tilted simulation box cannot contain integer # of bins
|
||||
stencil & neigh list built differently to account for this
|
||||
mbinlo = lowest global bin any of my ghost atoms could fall into
|
||||
mbinhi = highest global bin any of my ghost atoms could fall into
|
||||
mbin = number of bins I need in a dimension
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
|
||||
{
|
||||
if (mbins > k_bins.d_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
|
||||
bincount = k_bincount.view<DeviceType>();
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
|
||||
last_bin = update->ntimestep;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
bin owned and ghost atoms
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NBinKokkos<DeviceType>::bin_atoms()
|
||||
{
|
||||
h_resize() = 1;
|
||||
|
||||
while(h_resize() > 0) {
|
||||
h_resize() = 0;
|
||||
deep_copy(d_resize, h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x.view<DeviceType>();
|
||||
|
||||
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
|
||||
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
|
||||
|
||||
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(h_resize, d_resize);
|
||||
if(h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
bins = k_bins.view<DeviceType>();
|
||||
c_bins = bins;
|
||||
last_bin_memory = update->ntimestep;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < bins.dimension_1()) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
d_resize() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NBinKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NBinKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
153
src/KOKKOS/nbin_kokkos.h
Normal file
153
src/KOKKOS/nbin_kokkos.h
Normal file
@ -0,0 +1,153 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NBIN_CLASS
|
||||
|
||||
NBinStyle(kk/host,
|
||||
NBinKokkos<LMPHostType>,
|
||||
NB_KOKKOS_HOST)
|
||||
|
||||
NBinStyle(kk/device,
|
||||
NBinKokkos<LMPDeviceType>,
|
||||
NB_KOKKOS_DEVICE)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NBIN_KOKKOS_H
|
||||
#define LMP_NBIN_KOKKOS_H
|
||||
|
||||
#include "nbin_standard.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NBinKokkos : public NBinStandard {
|
||||
public:
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
NBinKokkos(class LAMMPS *);
|
||||
~NBinKokkos() {}
|
||||
void bin_atoms_setup(int);
|
||||
void bin_atoms();
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
typename AT::t_int_scalar d_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename AT::t_x_array_randomread x;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi_[0])
|
||||
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo_[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi_[1])
|
||||
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo_[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi_[2])
|
||||
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo_[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
private:
|
||||
double bboxlo_[3],bboxhi_[3];
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct NPairKokkosBinAtomsFunctor {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NBinKokkos<DeviceType> c;
|
||||
|
||||
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
|
||||
c(_c) {};
|
||||
~NPairKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binatomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
|
||||
|
||||
ipage = NULL;
|
||||
dpage = NULL;
|
||||
maxstencil = 0;
|
||||
ghostflag = 0;
|
||||
maxstencil_multi = 0;
|
||||
|
||||
maxatoms = 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (style == BIN) {
|
||||
if (smax > maxstencil) {
|
||||
maxstencil = smax;
|
||||
d_stencil =
|
||||
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
|
||||
"neighlist:stencil");
|
||||
if (ghostflag) {
|
||||
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
|
||||
3,"neighlist:stencilxyz");
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
int n = atom->ntypes;
|
||||
if (maxstencil_multi == 0) {
|
||||
nstencil_multi = new int[n+1];
|
||||
stencil_multi = new int*[n+1];
|
||||
distsq_multi = new double*[n+1];
|
||||
for (i = 1; i <= n; i++) {
|
||||
nstencil_multi[i] = 0;
|
||||
stencil_multi[i] = NULL;
|
||||
distsq_multi[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (smax > maxstencil_multi) {
|
||||
maxstencil_multi = smax;
|
||||
for (i = 1; i <= n; i++) {
|
||||
memory->destroy(stencil_multi[i]);
|
||||
memory->destroy(distsq_multi[i]);
|
||||
memory->create(stencil_multi[i],maxstencil_multi,
|
||||
"neighlist:stencil_multi");
|
||||
memory->create(distsq_multi[i],maxstencil_multi,
|
||||
"neighlist:distsq_multi");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NeighListKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
|
||||
@ -20,7 +20,7 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
|
||||
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
|
||||
|
||||
class AtomNeighbors
|
||||
{
|
||||
@ -74,14 +74,12 @@ public:
|
||||
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
|
||||
typename ArrayTypes<Device>::t_int_1d d_ilist;
|
||||
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
|
||||
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
|
||||
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
|
||||
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
|
||||
|
||||
NeighListKokkos(class LAMMPS *lmp):
|
||||
NeighList(lmp) {_stride = 1; maxneighs = 16;};
|
||||
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
|
||||
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0;
|
||||
execution_space = ExecutionSpaceFromDevice<Device>::space;
|
||||
};
|
||||
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
AtomNeighbors get_neighbors(const int &i) const {
|
||||
@ -99,7 +97,8 @@ public:
|
||||
int& num_neighs(const int & i) const {
|
||||
return d_numneigh(i);
|
||||
}
|
||||
void stencil_allocate(int smax, int style);
|
||||
private:
|
||||
int maxatoms;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
;/* ----------------------------------------------------------------------
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
@ -26,6 +26,10 @@
|
||||
#include "angle.h"
|
||||
#include "dihedral.h"
|
||||
#include "improper.h"
|
||||
#include "style_nbin.h"
|
||||
#include "style_nstencil.h"
|
||||
#include "style_npair.h"
|
||||
#include "style_ntopo.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -36,18 +40,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
|
||||
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
|
||||
neighbond_host(lmp),neighbond_device(lmp)
|
||||
{
|
||||
atoms_per_bin = 16;
|
||||
|
||||
nlist_host = 0;
|
||||
lists_host = NULL;
|
||||
pair_build_host = NULL;
|
||||
stencil_create_host = NULL;
|
||||
nlist_device = 0;
|
||||
lists_device = NULL;
|
||||
pair_build_device = NULL;
|
||||
stencil_create_device = NULL;
|
||||
|
||||
device_flag = 0;
|
||||
bondlist = NULL;
|
||||
anglelist = NULL;
|
||||
dihedrallist = NULL;
|
||||
improperlist = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -58,14 +55,6 @@ NeighborKokkos::~NeighborKokkos()
|
||||
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
|
||||
cutneighsq = NULL;
|
||||
|
||||
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
|
||||
delete [] pair_build_device;
|
||||
delete [] pair_build_host;
|
||||
|
||||
memory->destroy_kokkos(k_ex_type,ex_type);
|
||||
memory->destroy_kokkos(k_ex1_type,ex1_type);
|
||||
memory->destroy_kokkos(k_ex2_type,ex2_type);
|
||||
@ -89,6 +78,11 @@ void NeighborKokkos::init()
|
||||
{
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
Neighbor::init();
|
||||
|
||||
// 1st time allocation of xhold
|
||||
|
||||
if (dist_check)
|
||||
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -101,158 +95,16 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int NeighborKokkos::init_lists_kokkos()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nlist_host; i++) delete lists_host[i];
|
||||
delete [] lists_host;
|
||||
delete [] pair_build_host;
|
||||
delete [] stencil_create_host;
|
||||
nlist_host = 0;
|
||||
|
||||
for (i = 0; i < nlist_device; i++) delete lists_device[i];
|
||||
delete [] lists_device;
|
||||
delete [] pair_build_device;
|
||||
delete [] stencil_create_device;
|
||||
nlist_device = 0;
|
||||
|
||||
nlist = 0;
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (requests[i]->kokkos_device) nlist_device++;
|
||||
else if (requests[i]->kokkos_host) nlist_host++;
|
||||
else nlist++;
|
||||
}
|
||||
|
||||
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
|
||||
pair_build_host = new PairPtrHost[nrequest];
|
||||
stencil_create_host = new StencilPtrHost[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_host[i] = NULL;
|
||||
pair_build_host[i] = NULL;
|
||||
stencil_create_host[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_host) continue;
|
||||
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
|
||||
lists_host[i]->index = i;
|
||||
lists_host[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_host[i]);
|
||||
}
|
||||
if (requests[i]->fix) {
|
||||
Fix *fix = (Fix *) requests[i]->requestor;
|
||||
fix->init_list(requests[i]->id,lists_host[i]);
|
||||
}
|
||||
}
|
||||
|
||||
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
|
||||
pair_build_device = new PairPtrDevice[nrequest];
|
||||
stencil_create_device = new StencilPtrDevice[nrequest];
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
lists_device[i] = NULL;
|
||||
pair_build_device[i] = NULL;
|
||||
stencil_create_device[i] = NULL;
|
||||
}
|
||||
|
||||
for (i = 0; i < nrequest; i++) {
|
||||
if (!requests[i]->kokkos_device) continue;
|
||||
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
|
||||
lists_device[i]->index = i;
|
||||
lists_device[i]->dnum = requests[i]->dnum;
|
||||
if (requests[i]->pair) {
|
||||
Pair *pair = (Pair *) requests[i]->requestor;
|
||||
pair->init_list(requests[i]->id,lists_device[i]);
|
||||
}
|
||||
if (requests[i]->fix) {
|
||||
Fix *fix = (Fix *) requests[i]->requestor;
|
||||
fix->init_list(requests[i]->id,lists_device[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// 1st time allocation of xhold
|
||||
|
||||
if (dist_check)
|
||||
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
|
||||
|
||||
// return # of non-Kokkos lists
|
||||
|
||||
return nlist;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags1_kokkos(int i)
|
||||
void NeighborKokkos::create_kokkos_list(int i)
|
||||
{
|
||||
if (style != BIN)
|
||||
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
|
||||
|
||||
if (lists_host[i]) {
|
||||
lists_host[i]->buildflag = 1;
|
||||
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
|
||||
|
||||
lists_host[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_host[i]->growflag = 0;
|
||||
|
||||
lists_host[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_host[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
|
||||
|
||||
lists_host[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
lists_device[i]->buildflag = 1;
|
||||
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
|
||||
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
|
||||
|
||||
lists_device[i]->growflag = 1;
|
||||
if (requests[i]->copy) lists_device[i]->growflag = 0;
|
||||
|
||||
lists_device[i]->stencilflag = 1;
|
||||
if (style == NSQ) lists_device[i]->stencilflag = 0;
|
||||
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
|
||||
|
||||
lists_device[i]->ghostflag = 0;
|
||||
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
|
||||
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_flags2_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]) {
|
||||
if (lists_host[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_host[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
|
||||
if (lists_device[i]) {
|
||||
if (lists_device[i]->buildflag) blist[nblist++] = i;
|
||||
if (lists_device[i]->growflag && requests[i]->occasional == 0)
|
||||
glist[nglist++] = i;
|
||||
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
|
||||
slist[nslist++] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_list_grow_kokkos(int i)
|
||||
{
|
||||
if (lists_host[i]!=NULL && lists_host[i]->growflag)
|
||||
lists_host[i]->grow(maxatom);
|
||||
if (lists_device[i]!=NULL && lists_device[i]->growflag)
|
||||
lists_device[i]->grow(maxatom);
|
||||
if (requests[i]->kokkos_device) {
|
||||
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
|
||||
device_flag = 1;
|
||||
} else if (requests[i]->kokkos_host)
|
||||
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -281,49 +133,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
|
||||
k_ex_mol_bit.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
{
|
||||
if (rq->kokkos_host != 0) {
|
||||
PairPtrHost pb = NULL;
|
||||
if (rq->ghost) {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
|
||||
} else {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
|
||||
}
|
||||
pair_build_host[index] = pb;
|
||||
}
|
||||
if (rq->kokkos_device != 0) {
|
||||
PairPtrDevice pb = NULL;
|
||||
if (rq->ghost) {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
|
||||
} else {
|
||||
if (rq->full) {
|
||||
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
|
||||
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
|
||||
}
|
||||
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
|
||||
}
|
||||
pair_build_device[index] = pb;
|
||||
return;
|
||||
}
|
||||
|
||||
Neighbor::choose_build(index,rq);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
if any atom moved trigger distance (half of neighbor skin) return 1
|
||||
shrink trigger distance if box size has changed
|
||||
@ -337,7 +146,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
|
||||
|
||||
int NeighborKokkos::check_distance()
|
||||
{
|
||||
if (nlist_device)
|
||||
if (device_flag)
|
||||
check_distance_kokkos<LMPDeviceType>();
|
||||
else
|
||||
check_distance_kokkos<LMPHostType>();
|
||||
@ -417,7 +226,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
|
||||
|
||||
void NeighborKokkos::build(int topoflag)
|
||||
{
|
||||
if (nlist_device)
|
||||
if (device_flag)
|
||||
build_kokkos<LMPDeviceType>(topoflag);
|
||||
else
|
||||
build_kokkos<LMPHostType>(topoflag);
|
||||
@ -428,18 +237,25 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
||||
{
|
||||
typedef DeviceType device_type;
|
||||
|
||||
int i;
|
||||
int i,m;
|
||||
|
||||
ago = 0;
|
||||
ncalls++;
|
||||
lastcall = update->ntimestep;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
int nall = nlocal + atom->nghost;
|
||||
|
||||
// check that using special bond flags will not overflow neigh lists
|
||||
|
||||
if (nall > NEIGHMASK)
|
||||
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
|
||||
|
||||
// store current atom positions and box size if needed
|
||||
|
||||
if (dist_check) {
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
x = atomKK->k_x;
|
||||
int nlocal = atom->nlocal;
|
||||
if (includegroup) nlocal = atom->nfirst;
|
||||
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
|
||||
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
|
||||
@ -471,54 +287,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
|
||||
}
|
||||
}
|
||||
|
||||
// if any lists store neighbors of ghosts:
|
||||
// invoke grow() if nlocal+nghost exceeds previous list size
|
||||
// else only invoke grow() if nlocal exceeds previous list size
|
||||
// only for lists with growflag set and which are perpetual (glist)
|
||||
// bin atoms for all NBin instances
|
||||
// not just NBin associated with perpetual lists
|
||||
// b/c cannot wait to bin occasional lists in build_one() call
|
||||
// if bin then, atoms may have moved outside of proc domain & bin extent,
|
||||
// leading to errors or even a crash
|
||||
|
||||
if (anyghostlist && atom->nmax > maxatom) {
|
||||
maxatom = atom->nmax;
|
||||
for (i = 0; i < nglist; i++)
|
||||
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
|
||||
else init_list_grow_kokkos(glist[i]);
|
||||
} else if (atom->nmax > maxatom) {
|
||||
maxatom = atom->nmax;
|
||||
for (i = 0; i < nglist; i++)
|
||||
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
|
||||
else init_list_grow_kokkos(glist[i]);
|
||||
}
|
||||
|
||||
// extend atom bin list if necessary
|
||||
|
||||
if (style != NSQ && atom->nmax > maxbin) {
|
||||
maxbin = atom->nmax;
|
||||
memory->destroy(bins);
|
||||
memory->create(bins,maxbin,"bins");
|
||||
}
|
||||
|
||||
// check that using special bond flags will not overflow neigh lists
|
||||
|
||||
if (atom->nlocal+atom->nghost > NEIGHMASK)
|
||||
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
|
||||
|
||||
// invoke building of pair and molecular topology neighbor lists
|
||||
// only for pairwise lists with buildflag set
|
||||
// blist is for standard neigh lists, otherwise is a Kokkos list
|
||||
|
||||
for (i = 0; i < nblist; i++) {
|
||||
if (lists[blist[i]]) {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
(this->*pair_build[blist[i]])(lists[blist[i]]);
|
||||
} else {
|
||||
if (lists_host[blist[i]])
|
||||
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
|
||||
else if (lists_device[blist[i]])
|
||||
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
|
||||
if (style != NSQ) {
|
||||
for (int i = 0; i < nbin; i++) {
|
||||
neigh_bin[i]->bin_atoms_setup(nall);
|
||||
neigh_bin[i]->bin_atoms();
|
||||
}
|
||||
}
|
||||
|
||||
if (atom->molecular && topoflag)
|
||||
build_topology_kokkos();
|
||||
// build pairwise lists for all perpetual NPair/NeighList
|
||||
// grow() with nlocal/nall args so that only realloc if have to
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
for (i = 0; i < npair_perpetual; i++) {
|
||||
m = plist[i];
|
||||
lists[m]->grow(nlocal,nall);
|
||||
neigh_pair[m]->build_setup();
|
||||
neigh_pair[m]->build(lists[m]);
|
||||
}
|
||||
|
||||
// build topology lists for bonds/angles/etc
|
||||
|
||||
if (atom->molecular && topoflag) build_topology();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -532,26 +327,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::setup_bins_kokkos(int i)
|
||||
{
|
||||
if (lists_host[slist[i]]) {
|
||||
lists_host[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
|
||||
} else if (lists_device[slist[i]]) {
|
||||
lists_device[slist[i]]->stencil_allocate(smax,style);
|
||||
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
|
||||
}
|
||||
|
||||
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
|
||||
|
||||
if (maxhead > k_bins.d_view.dimension_0()) {
|
||||
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
|
||||
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::modify_ex_type_grow_kokkos(){
|
||||
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
|
||||
k_ex1_type.modify<LMPHostType>();
|
||||
@ -575,8 +350,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::init_topology_kokkos() {
|
||||
if (nlist_device) {
|
||||
void NeighborKokkos::init_topology() {
|
||||
if (device_flag) {
|
||||
neighbond_device.init_topology_kk();
|
||||
} else {
|
||||
neighbond_host.init_topology_kk();
|
||||
@ -588,8 +363,8 @@ void NeighborKokkos::init_topology_kokkos() {
|
||||
normally built with pair lists, but USER-CUDA separates them
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::build_topology_kokkos() {
|
||||
if (nlist_device) {
|
||||
void NeighborKokkos::build_topology() {
|
||||
if (device_flag) {
|
||||
neighbond_device.build_topology_kk();
|
||||
|
||||
k_bondlist = neighbond_device.k_bondlist;
|
||||
@ -637,7 +412,3 @@ void NeighborKokkos::build_topology_kokkos() {
|
||||
k_improperlist.modify<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
// include to trigger instantiation of templated functions
|
||||
|
||||
#include "neigh_full_kokkos.h"
|
||||
|
||||
@ -22,316 +22,6 @@
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class Device>
|
||||
class NeighborKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<Device> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<Device> neigh_list;
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const int maxex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const int maxex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const int maxex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<Device> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,const int & _maxex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,const int & _maxex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,const int & _maxex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),maxex_group(_maxex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NeighborKokkosExecute() {neigh_list.clean_copy();};
|
||||
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item(const int &i) const;
|
||||
|
||||
template<int HalfNeigh>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item_Ghost(const int &i) const;
|
||||
|
||||
template<int ClusterSize>
|
||||
KOKKOS_FUNCTION
|
||||
void build_cluster_Item(const int &i) const;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<int HalfNeigh, int GhostNewton>
|
||||
__device__ inline
|
||||
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class Device>
|
||||
struct NeighborKokkosBinAtomsFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
|
||||
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
|
||||
c(_c) {};
|
||||
~NeighborKokkosBinAtomsFunctor() {}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.binatomsItem(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NeighborKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
|
||||
}
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<class Device,int HALF_NEIGH>
|
||||
struct NeighborKokkosBuildFunctorGhost {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item_Ghost<HALF_NEIGH>(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class Device,int ClusterSize>
|
||||
struct NeighborClusterKokkosBuildFunctor {
|
||||
typedef Device device_type;
|
||||
|
||||
const NeighborKokkosExecute<Device> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_cluster_Item<ClusterSize>(i);
|
||||
}
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
struct TagNeighborCheckDistance{};
|
||||
|
||||
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
|
||||
public:
|
||||
typedef int value_type;
|
||||
|
||||
|
||||
|
||||
int nlist_host; // pairwise neighbor lists on Host
|
||||
NeighListKokkos<LMPHostType> **lists_host;
|
||||
int nlist_device; // pairwise neighbor lists on Device
|
||||
NeighListKokkos<LMPDeviceType> **lists_device;
|
||||
|
||||
NeighBondKokkos<LMPHostType> neighbond_host;
|
||||
NeighBondKokkos<LMPDeviceType> neighbond_device;
|
||||
|
||||
DAT::tdual_int_2d k_bondlist;
|
||||
DAT::tdual_int_2d k_anglelist;
|
||||
DAT::tdual_int_2d k_dihedrallist;
|
||||
DAT::tdual_int_2d k_improperlist;
|
||||
|
||||
NeighborKokkos(class LAMMPS *);
|
||||
~NeighborKokkos();
|
||||
void init();
|
||||
void init_topology();
|
||||
void build_topology();
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
|
||||
|
||||
private:
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
NeighBondKokkos<LMPHostType> neighbond_host;
|
||||
NeighBondKokkos<LMPDeviceType> neighbond_device;
|
||||
|
||||
DAT::tdual_int_2d k_bondlist;
|
||||
DAT::tdual_int_2d k_anglelist;
|
||||
DAT::tdual_int_2d k_dihedrallist;
|
||||
DAT::tdual_int_2d k_improperlist;
|
||||
|
||||
private:
|
||||
|
||||
DAT::tdual_x_array x;
|
||||
DAT::tdual_x_array xhold;
|
||||
|
||||
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
|
||||
int device_flag;
|
||||
|
||||
void init_cutneighsq_kokkos(int);
|
||||
int init_lists_kokkos();
|
||||
void init_list_flags1_kokkos(int);
|
||||
void init_list_flags2_kokkos(int);
|
||||
void init_list_grow_kokkos(int);
|
||||
void create_kokkos_list(int);
|
||||
void init_ex_type_kokkos(int);
|
||||
void init_ex_bit_kokkos();
|
||||
void init_ex_mol_bit_kokkos();
|
||||
void choose_build(int, NeighRequest *);
|
||||
virtual int check_distance();
|
||||
template<class DeviceType> int check_distance_kokkos();
|
||||
virtual void build(int);
|
||||
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
|
||||
void modify_ex_type_grow_kokkos();
|
||||
void modify_ex_group_grow_kokkos();
|
||||
void modify_mol_group_grow_kokkos();
|
||||
void init_topology_kokkos();
|
||||
void build_topology_kokkos();
|
||||
|
||||
typedef void (NeighborKokkos::*PairPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *);
|
||||
PairPtrHost *pair_build_host;
|
||||
typedef void (NeighborKokkos::*PairPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *);
|
||||
PairPtrDevice *pair_build_device;
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH, int GHOST>
|
||||
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
template<class DeviceType>
|
||||
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
|
||||
|
||||
typedef void (NeighborKokkos::*StencilPtrHost)
|
||||
(class NeighListKokkos<LMPHostType> *, int, int, int);
|
||||
StencilPtrHost *stencil_create_host;
|
||||
typedef void (NeighborKokkos::*StencilPtrDevice)
|
||||
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
|
||||
StencilPtrDevice *stencil_create_device;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
62
src/KOKKOS/npair_copy_kokkos.cpp
Normal file
62
src/KOKKOS/npair_copy_kokkos.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_copy_kokkos.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "domain.h"
|
||||
#include "my_page.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
NPairCopyKokkos<DeviceType>::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create list which is simply a copy of parent list
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void NPairCopyKokkos<DeviceType>::build(NeighList *list)
|
||||
{
|
||||
NeighList *listcopy = list->listcopy;
|
||||
|
||||
list->inum = listcopy->inum;
|
||||
list->gnum = listcopy->gnum;
|
||||
list->ilist = listcopy->ilist;
|
||||
list->numneigh = listcopy->numneigh;
|
||||
list->firstneigh = listcopy->firstneigh;
|
||||
list->firstdouble = listcopy->firstdouble;
|
||||
list->ipage = listcopy->ipage;
|
||||
list->dpage = listcopy->dpage;
|
||||
|
||||
NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
|
||||
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
|
||||
|
||||
list_kk->d_ilist = listcopy_kk->d_ilist;
|
||||
list_kk->d_numneigh = listcopy_kk->d_numneigh;
|
||||
list_kk->d_neighbors = listcopy_kk->d_neighbors;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairCopyKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class NPairCopyKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
||||
48
src/KOKKOS/npair_copy_kokkos.h
Normal file
48
src/KOKKOS/npair_copy_kokkos.h
Normal file
@ -0,0 +1,48 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(copy/kk/device,
|
||||
NPairCopyKokkos<LMPDeviceType>,
|
||||
NP_COPY | NP_KOKKOS_DEVICE)
|
||||
|
||||
NPairStyle(copy/kk/host,
|
||||
NPairCopyKokkos<LMPHostType>,
|
||||
NP_COPY | NP_KOKKOS_HOST)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_COPY_KOKKOS_H
|
||||
#define LMP_NPAIR_COPY_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class NPairCopyKokkos : public NPair {
|
||||
public:
|
||||
NPairCopyKokkos(class LAMMPS *);
|
||||
~NPairCopyKokkos() {}
|
||||
void build(class NeighList *);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -11,17 +11,105 @@
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_kokkos.h"
|
||||
#include "atom_kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
#include "domain_kokkos.h"
|
||||
#include "neighbor_kokkos.h"
|
||||
#include "nbin_kokkos.h"
|
||||
#include "nstencil.h"
|
||||
#include "force.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from Neighbor class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
|
||||
{
|
||||
NPair::copy_neighbor_info();
|
||||
|
||||
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
|
||||
|
||||
// general params
|
||||
|
||||
newton_pair = force->newton_pair;
|
||||
k_cutneighsq = neighborKK->k_cutneighsq;
|
||||
|
||||
// exclusion info
|
||||
|
||||
k_ex1_type = neighborKK->k_ex1_type;
|
||||
k_ex2_type = neighborKK->k_ex2_type;
|
||||
k_ex_type = neighborKK->k_ex_type;
|
||||
k_ex1_group = neighborKK->k_ex1_group;
|
||||
k_ex2_group = neighborKK->k_ex2_group;
|
||||
k_ex1_bit = neighborKK->k_ex1_bit;
|
||||
k_ex2_bit = neighborKK->k_ex2_bit;
|
||||
k_ex_mol_group = neighborKK->k_ex_mol_group;
|
||||
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy per-atom and per-bin vectors from NBin class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
|
||||
{
|
||||
NPair::copy_bin_info();
|
||||
|
||||
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
|
||||
|
||||
atoms_per_bin = nbKK->atoms_per_bin;
|
||||
k_bincount = nbKK->k_bincount;
|
||||
k_bins = nbKK->k_bins;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy needed info from NStencil class to this build class
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
|
||||
{
|
||||
NPair::copy_stencil_info();
|
||||
|
||||
nstencil = ns->nstencil;
|
||||
|
||||
int maxstencil = ns->get_maxstencil();
|
||||
|
||||
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++)
|
||||
k_stencil.h_view(k) = ns->stencil[k];
|
||||
k_stencil.modify<LMPHostType>();
|
||||
k_stencil.sync<DeviceType>();
|
||||
if (GHOST) {
|
||||
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
|
||||
for (int k = 0; k < maxstencil; k++) {
|
||||
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
|
||||
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
|
||||
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
|
||||
}
|
||||
k_stencilxyz.modify<LMPHostType>();
|
||||
k_stencilxyz.sync<DeviceType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
|
||||
{
|
||||
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
|
||||
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
|
||||
int nall = nlocal;
|
||||
if (GHOST)
|
||||
@ -32,7 +120,11 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nlocal,
|
||||
k_bins.view<DeviceType>(),
|
||||
nstencil,
|
||||
k_stencil.view<DeviceType>(),
|
||||
k_stencilxyz.view<DeviceType>(),
|
||||
nlocal,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
@ -43,16 +135,16 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
exclude, nex_type,maxex_type,
|
||||
exclude, nex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
nex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
nex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
@ -69,40 +161,15 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
k_bincount.sync<DeviceType>(),
|
||||
k_bins.sync<DeviceType>(),
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
if (GHOST)
|
||||
Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
@ -125,18 +192,18 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
#endif
|
||||
|
||||
if (GHOST) {
|
||||
NeighborKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
Kokkos::parallel_for(nall, f);
|
||||
} else {
|
||||
if(newton_pair) {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
if (newton_pair) {
|
||||
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
#endif
|
||||
} else {
|
||||
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
Kokkos::parallel_for(config, f);
|
||||
#else
|
||||
@ -169,24 +236,9 @@ if (GHOST) {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
|
||||
{
|
||||
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
|
||||
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < bins.dimension_1()) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
resize() = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
template<class Device>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
|
||||
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
|
||||
{
|
||||
const int n1 = nspecial(i,0);
|
||||
const int n2 = nspecial(i,1);
|
||||
@ -214,9 +266,9 @@ int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) cons
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device>
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
|
||||
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
|
||||
const int &itype,const int &jtype) const
|
||||
{
|
||||
int m;
|
||||
@ -241,8 +293,8 @@ int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh,int GhostNewton>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
template<class DeviceType> template<int HalfNeigh,int Newton>
|
||||
void NeighborKokkosExecute<DeviceType>::
|
||||
build_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -261,9 +313,8 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
if(HalfNeigh)
|
||||
@ -272,8 +323,8 @@ void NeighborKokkosExecute<Device>::
|
||||
const int jtype = type(j);
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
|
||||
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
@ -312,14 +363,16 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
// get subview of jbin
|
||||
if(HalfNeigh&&(ibin==jbin)) continue;
|
||||
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
|
||||
const int j = c_bins(jbin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
@ -331,7 +384,7 @@ void NeighborKokkosExecute<Device>::
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -364,15 +417,18 @@ void NeighborKokkosExecute<Device>::
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
extern __shared__ X_FLOAT sharedmem[];
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
|
||||
template<class DeviceType> template<int HalfNeigh,int Newton>
|
||||
__device__ inline
|
||||
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
|
||||
{
|
||||
@ -429,8 +485,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
if((j == i) ||
|
||||
(HalfNeigh && !GhostNewton && (j < i)) ||
|
||||
(HalfNeigh && GhostNewton &&
|
||||
(HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton &&
|
||||
((j < i) ||
|
||||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
@ -445,7 +501,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -472,9 +528,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
= d_stencil;
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
@ -501,7 +556,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//if(HalfNeigh && (j < i)) continue;
|
||||
if(HalfNeigh && !GhostNewton && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
@ -514,7 +569,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
if (molecular) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
/* else if (imol >= 0) */
|
||||
/* which = find_special(onemols[imol]->special[iatom], */
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
@ -558,8 +613,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int HalfNeigh>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
template<class DeviceType> template<int HalfNeigh>
|
||||
void NeighborKokkosExecute<DeviceType>::
|
||||
build_Item_Ghost(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -576,11 +631,10 @@ void NeighborKokkosExecute<Device>::
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_3_const_um stencilxyz
|
||||
= neigh_list.d_stencilxyz;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
|
||||
= d_stencilxyz;
|
||||
|
||||
// loop over all atoms in surrounding bins in stencil including self
|
||||
// when i is a ghost atom, must check if stencil bin is out of bounds
|
||||
@ -679,197 +733,17 @@ void NeighborKokkosExecute<Device>::
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
|
||||
{
|
||||
const int nall = includegroup?atom->nfirst:atom->nlocal;
|
||||
list->grow(nall);
|
||||
}
|
||||
|
||||
NeighborKokkosExecute<DeviceType>
|
||||
data(*list,
|
||||
k_cutneighsq.view<DeviceType>(),
|
||||
k_bincount.view<DeviceType>(),
|
||||
k_bins.view<DeviceType>(),nall,
|
||||
atomKK->k_x.view<DeviceType>(),
|
||||
atomKK->k_type.view<DeviceType>(),
|
||||
atomKK->k_mask.view<DeviceType>(),
|
||||
atomKK->k_molecule.view<DeviceType>(),
|
||||
atomKK->k_tag.view<DeviceType>(),
|
||||
atomKK->k_special.view<DeviceType>(),
|
||||
atomKK->k_nspecial.view<DeviceType>(),
|
||||
atomKK->molecular,
|
||||
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
|
||||
bininvx,bininvy,bininvz,
|
||||
exclude, nex_type,maxex_type,
|
||||
k_ex1_type.view<DeviceType>(),
|
||||
k_ex2_type.view<DeviceType>(),
|
||||
k_ex_type.view<DeviceType>(),
|
||||
nex_group,maxex_group,
|
||||
k_ex1_group.view<DeviceType>(),
|
||||
k_ex2_group.view<DeviceType>(),
|
||||
k_ex1_bit.view<DeviceType>(),
|
||||
k_ex2_bit.view<DeviceType>(),
|
||||
nex_mol, maxex_mol,
|
||||
k_ex_mol_group.view<DeviceType>(),
|
||||
k_ex_mol_bit.view<DeviceType>(),
|
||||
bboxhi,bboxlo,
|
||||
domain->xperiodic,domain->yperiodic,domain->zperiodic,
|
||||
domain->xprd_half,domain->yprd_half,domain->zprd_half);
|
||||
|
||||
k_cutneighsq.sync<DeviceType>();
|
||||
k_ex1_type.sync<DeviceType>();
|
||||
k_ex2_type.sync<DeviceType>();
|
||||
k_ex_type.sync<DeviceType>();
|
||||
k_ex1_group.sync<DeviceType>();
|
||||
k_ex2_group.sync<DeviceType>();
|
||||
k_ex1_bit.sync<DeviceType>();
|
||||
k_ex2_bit.sync<DeviceType>();
|
||||
k_ex_mol_group.sync<DeviceType>();
|
||||
k_ex_mol_bit.sync<DeviceType>();
|
||||
|
||||
data.special_flag[0] = special_flag[0];
|
||||
data.special_flag[1] = special_flag[1];
|
||||
data.special_flag[2] = special_flag[2];
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
|
||||
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
|
||||
DeviceType::fence();
|
||||
|
||||
while(data.h_resize() > 0) {
|
||||
data.h_resize() = 0;
|
||||
deep_copy(data.resize, data.h_resize);
|
||||
|
||||
MemsetZeroFunctor<DeviceType> f_zero;
|
||||
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
|
||||
Kokkos::parallel_for(mbins, f_zero);
|
||||
DeviceType::fence();
|
||||
|
||||
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
|
||||
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
DeviceType::fence();
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
if(data.h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
data.bins = k_bins.view<DeviceType>();
|
||||
data.c_bins = data.bins;
|
||||
}
|
||||
}
|
||||
|
||||
if(list->d_neighbors.dimension_0()<nall) {
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
|
||||
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.d_numneigh = list->d_numneigh;
|
||||
}
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
Kokkos::deep_copy(data.resize, data.h_resize);
|
||||
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
|
||||
namespace LAMMPS_NS {
|
||||
template class NPairKokkos<LMPDeviceType,0,0>;
|
||||
template class NPairKokkos<LMPDeviceType,0,1>;
|
||||
template class NPairKokkos<LMPDeviceType,1,0>;
|
||||
template class NPairKokkos<LMPDeviceType,1,1>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
#define BINS_PER_BLOCK 2
|
||||
const int factor = atoms_per_bin<64?2:1;
|
||||
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
|
||||
#else
|
||||
const int factor = 1;
|
||||
template class NPairKokkos<LMPHostType,0,0>;
|
||||
template class NPairKokkos<LMPHostType,0,1>;
|
||||
template class NPairKokkos<LMPHostType,1,0>;
|
||||
template class NPairKokkos<LMPHostType,1,1>;
|
||||
#endif
|
||||
|
||||
if(newton_pair) {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
} else {
|
||||
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
|
||||
//#ifdef KOKKOS_HAVE_CUDA
|
||||
// Kokkos::parallel_for(config, f);
|
||||
//#else
|
||||
Kokkos::parallel_for(nall, f);
|
||||
//#endif
|
||||
}
|
||||
DeviceType::fence();
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
data.neigh_list.maxneighs = list->maxneighs;
|
||||
}
|
||||
}
|
||||
|
||||
list->inum = nall;
|
||||
list->gnum = 0;
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class Device> template<int ClusterSize>
|
||||
void NeighborKokkosExecute<Device>::
|
||||
build_cluster_Item(const int &i) const
|
||||
{
|
||||
/* if necessary, goto next page and add pages */
|
||||
int n = 0;
|
||||
|
||||
// get subview of neighbors of i
|
||||
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
|
||||
const X_FLOAT xtmp = x(i, 0);
|
||||
const X_FLOAT ytmp = x(i, 1);
|
||||
const X_FLOAT ztmp = x(i, 2);
|
||||
const int itype = type(i);
|
||||
|
||||
const int ibin = coord2bin(xtmp, ytmp, ztmp);
|
||||
|
||||
const int nstencil = neigh_list.nstencil;
|
||||
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
|
||||
= neigh_list.d_stencil;
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
bool skip = i == j;
|
||||
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
|
||||
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
|
||||
|
||||
if(!skip) {
|
||||
const int jtype = type(j);
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
|
||||
n++;
|
||||
//m += ClusterSize - j&(ClusterSize-1)-1;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n >= neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n >= new_maxneighs()) new_maxneighs() = n;
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
}
|
||||
424
src/KOKKOS/npair_kokkos.h
Normal file
424
src/KOKKOS/npair_kokkos.h
Normal file
@ -0,0 +1,424 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
|
||||
NPairStyle(full/bin/kk/host,
|
||||
NPairKokkosFullBinHost,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
|
||||
NPairStyle(full/bin/kk/device,
|
||||
NPairKokkosFullBinDevice,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
|
||||
NPairStyle(full/bin/ghost/kk/host,
|
||||
NPairKokkosFullBinGhostHost,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
|
||||
NPairStyle(full/bin/ghost/kk/device,
|
||||
NPairKokkosFullBinGhostDevice,
|
||||
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
|
||||
NPairStyle(half/bin/kk/host,
|
||||
NPairKokkosHalfBinHost,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
|
||||
NPairStyle(half/bin/kk/device,
|
||||
NPairKokkosHalfBinDevice,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
|
||||
NPairStyle(half/bin/ghost/kk/host,
|
||||
NPairKokkosHalfBinGhostHost,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
|
||||
NPairStyle(half/bin/ghost/kk/device,
|
||||
NPairKokkosHalfBinGhostDevice,
|
||||
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_KOKKOS_H
|
||||
#define LMP_NPAIR_KOKKOS_H
|
||||
|
||||
#include "npair.h"
|
||||
#include "neigh_list_kokkos.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType, int HALF_NEIGH, int GHOST>
|
||||
class NPairKokkos : public NPair {
|
||||
public:
|
||||
NPairKokkos(class LAMMPS *);
|
||||
~NPairKokkos() {}
|
||||
void copy_neighbor_info();
|
||||
void copy_bin_info();
|
||||
void copy_stencil_info();
|
||||
void build(class NeighList *);
|
||||
|
||||
private:
|
||||
int newton_pair;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
DAT::tdual_xfloat_2d k_cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
|
||||
DAT::tdual_int_2d k_ex_type;
|
||||
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
|
||||
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
|
||||
DAT::tdual_int_1d k_ex_mol_group;
|
||||
DAT::tdual_int_1d k_ex_mol_bit;
|
||||
|
||||
// data from NBin class
|
||||
|
||||
int atoms_per_bin;
|
||||
DAT::tdual_int_1d k_bincount;
|
||||
DAT::tdual_int_2d k_bins;
|
||||
|
||||
// data from NStencil class
|
||||
|
||||
int nstencil;
|
||||
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
|
||||
DAT::tdual_int_1d_3 k_stencilxyz;
|
||||
};
|
||||
|
||||
template<class DeviceType>
|
||||
class NeighborKokkosExecute
|
||||
{
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
public:
|
||||
NeighListKokkos<DeviceType> neigh_list;
|
||||
|
||||
// data from Neighbor class
|
||||
|
||||
const typename AT::t_xfloat_2d_randomread cutneighsq;
|
||||
|
||||
// exclusion data from Neighbor class
|
||||
|
||||
const int exclude;
|
||||
|
||||
const int nex_type;
|
||||
const typename AT::t_int_1d_const ex1_type,ex2_type;
|
||||
const typename AT::t_int_2d_const ex_type;
|
||||
|
||||
const int nex_group;
|
||||
const typename AT::t_int_1d_const ex1_group,ex2_group;
|
||||
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
|
||||
|
||||
const int nex_mol;
|
||||
const typename AT::t_int_1d_const ex_mol_group;
|
||||
const typename AT::t_int_1d_const ex_mol_bit;
|
||||
|
||||
// data from NBin class
|
||||
|
||||
const typename AT::t_int_1d bincount;
|
||||
const typename AT::t_int_1d_const c_bincount;
|
||||
typename AT::t_int_2d bins;
|
||||
typename AT::t_int_2d_const c_bins;
|
||||
|
||||
|
||||
// data from NStencil class
|
||||
|
||||
int nstencil;
|
||||
typename AT::t_int_1d d_stencil; // # of J neighs for each I
|
||||
typename AT::t_int_1d_3 d_stencilxyz;
|
||||
|
||||
// data from Atom class
|
||||
|
||||
const typename AT::t_x_array_randomread x;
|
||||
const typename AT::t_int_1d_const type,mask,molecule;
|
||||
const typename AT::t_tagint_1d_const tag;
|
||||
const typename AT::t_tagint_2d_const special;
|
||||
const typename AT::t_int_2d_const nspecial;
|
||||
const int molecular;
|
||||
int moltemplate;
|
||||
|
||||
int special_flag[4];
|
||||
|
||||
const int nbinx,nbiny,nbinz;
|
||||
const int mbinx,mbiny,mbinz;
|
||||
const int mbinxlo,mbinylo,mbinzlo;
|
||||
const X_FLOAT bininvx,bininvy,bininvz;
|
||||
X_FLOAT bboxhi[3],bboxlo[3];
|
||||
|
||||
const int nlocal;
|
||||
|
||||
typename AT::t_int_scalar resize;
|
||||
typename AT::t_int_scalar new_maxneighs;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
|
||||
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
|
||||
|
||||
const int xperiodic, yperiodic, zperiodic;
|
||||
const int xprd_half, yprd_half, zprd_half;
|
||||
|
||||
NeighborKokkosExecute(
|
||||
const NeighListKokkos<DeviceType> &_neigh_list,
|
||||
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
|
||||
const typename AT::t_int_1d &_bincount,
|
||||
const typename AT::t_int_2d &_bins,
|
||||
const int _nstencil,
|
||||
const typename AT::t_int_1d &_d_stencil,
|
||||
const typename AT::t_int_1d_3 &_d_stencilxyz,
|
||||
const int _nlocal,
|
||||
const typename AT::t_x_array_randomread &_x,
|
||||
const typename AT::t_int_1d_const &_type,
|
||||
const typename AT::t_int_1d_const &_mask,
|
||||
const typename AT::t_int_1d_const &_molecule,
|
||||
const typename AT::t_tagint_1d_const &_tag,
|
||||
const typename AT::t_tagint_2d_const &_special,
|
||||
const typename AT::t_int_2d_const &_nspecial,
|
||||
const int &_molecular,
|
||||
const int & _nbinx,const int & _nbiny,const int & _nbinz,
|
||||
const int & _mbinx,const int & _mbiny,const int & _mbinz,
|
||||
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
|
||||
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
|
||||
const int & _exclude,const int & _nex_type,
|
||||
const typename AT::t_int_1d_const & _ex1_type,
|
||||
const typename AT::t_int_1d_const & _ex2_type,
|
||||
const typename AT::t_int_2d_const & _ex_type,
|
||||
const int & _nex_group,
|
||||
const typename AT::t_int_1d_const & _ex1_group,
|
||||
const typename AT::t_int_1d_const & _ex2_group,
|
||||
const typename AT::t_int_1d_const & _ex1_bit,
|
||||
const typename AT::t_int_1d_const & _ex2_bit,
|
||||
const int & _nex_mol,
|
||||
const typename AT::t_int_1d_const & _ex_mol_group,
|
||||
const typename AT::t_int_1d_const & _ex_mol_bit,
|
||||
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
|
||||
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
|
||||
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
|
||||
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
|
||||
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
|
||||
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
|
||||
nlocal(_nlocal),
|
||||
x(_x),type(_type),mask(_mask),molecule(_molecule),
|
||||
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
|
||||
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
|
||||
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
|
||||
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
|
||||
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
|
||||
exclude(_exclude),nex_type(_nex_type),
|
||||
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
|
||||
nex_group(_nex_group),
|
||||
ex1_group(_ex1_group),ex2_group(_ex2_group),
|
||||
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
|
||||
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
|
||||
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
|
||||
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
|
||||
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
|
||||
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
|
||||
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
|
||||
|
||||
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(resize);
|
||||
#else
|
||||
h_resize = resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
new_maxneighs = typename AT::
|
||||
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
|
||||
#else
|
||||
h_new_maxneighs = new_maxneighs;
|
||||
#endif
|
||||
h_new_maxneighs() = neigh_list.maxneighs;
|
||||
};
|
||||
|
||||
~NeighborKokkosExecute() {neigh_list.clean_copy();};
|
||||
|
||||
template<int HalfNeigh, int Newton>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item(const int &i) const;
|
||||
|
||||
template<int HalfNeigh>
|
||||
KOKKOS_FUNCTION
|
||||
void build_Item_Ghost(const int &i) const;
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template<int HalfNeigh, int Newton>
|
||||
__device__ inline
|
||||
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
|
||||
#endif
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void binatomsItem(const int &i) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
|
||||
{
|
||||
int ix,iy,iz;
|
||||
|
||||
if (x >= bboxhi[0])
|
||||
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
|
||||
else if (x >= bboxlo[0]) {
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
|
||||
ix = MIN(ix,nbinx-1);
|
||||
} else
|
||||
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
|
||||
|
||||
if (y >= bboxhi[1])
|
||||
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
|
||||
else if (y >= bboxlo[1]) {
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
|
||||
iy = MIN(iy,nbiny-1);
|
||||
} else
|
||||
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
|
||||
|
||||
if (z >= bboxhi[2])
|
||||
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
|
||||
else if (z >= bboxlo[2]) {
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
|
||||
iz = MIN(iz,nbinz-1);
|
||||
} else
|
||||
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
|
||||
|
||||
i[0] = ix - mbinxlo;
|
||||
i[1] = iy - mbinylo;
|
||||
i[2] = iz - mbinzlo;
|
||||
|
||||
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int find_special(const int &i, const int &j) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
int minimum_image_check(double dx, double dy, double dz) const {
|
||||
if (xperiodic && fabs(dx) > xprd_half) return 1;
|
||||
if (yperiodic && fabs(dy) > yprd_half) return 1;
|
||||
if (zperiodic && fabs(dz) > zprd_half) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NPairKokkosBuildFunctor {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NeighborKokkosExecute<DeviceType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
__device__ inline
|
||||
|
||||
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
|
||||
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
|
||||
}
|
||||
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
|
||||
#endif
|
||||
};
|
||||
|
||||
template<int HALF_NEIGH,int GHOST_NEWTON>
|
||||
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
|
||||
typedef LMPHostType device_type;
|
||||
|
||||
const NeighborKokkosExecute<LMPHostType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
|
||||
}
|
||||
|
||||
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
|
||||
};
|
||||
|
||||
template<class DeviceType,int HALF_NEIGH>
|
||||
struct NPairKokkosBuildFunctorGhost {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
const NeighborKokkosExecute<DeviceType> c;
|
||||
const size_t sharedsize;
|
||||
|
||||
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
|
||||
const size_t _sharedsize):c(_c),
|
||||
sharedsize(_sharedsize) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int & i) const {
|
||||
c.template build_Item_Ghost<HALF_NEIGH>(i);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairBuckKokkos : public PairBuck {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairBuckKokkos(class LAMMPS *);
|
||||
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
|
||||
};
|
||||
|
||||
@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
|
||||
}
|
||||
|
||||
@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
|
||||
}
|
||||
|
||||
@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
|
||||
}
|
||||
|
||||
@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
|
||||
}
|
||||
|
||||
@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
|
||||
}
|
||||
|
||||
@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
|
||||
}
|
||||
|
||||
@ -333,145 +333,6 @@ struct PairComputeFunctor {
|
||||
}
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
typedef EV_FLOAT value_type;
|
||||
|
||||
PairStyle c;
|
||||
NeighListKokkos<device_type> list;
|
||||
|
||||
PairComputeFunctor(PairStyle* c_ptr,
|
||||
NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),list(*list_ptr) {};
|
||||
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
return j >> SBBITS & 3;
|
||||
}
|
||||
|
||||
template<int EVFLAG, int NEWTON_PAIR>
|
||||
KOKKOS_FUNCTION
|
||||
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
|
||||
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
|
||||
EV_FLOAT ev;
|
||||
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
|
||||
|
||||
const X_FLOAT xtmp = c.c_x(i,0);
|
||||
const X_FLOAT ytmp = c.c_x(i,1);
|
||||
const X_FLOAT ztmp = c.c_x(i,2);
|
||||
int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
|
||||
F_FLOAT3 ftmp;
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int jjj = neighbors_i(jj);
|
||||
|
||||
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
|
||||
const int j = (jjj + k)&NEIGHMASK;
|
||||
if((j==i)||(j>=c.nall)) return;
|
||||
const X_FLOAT delx = xtmp - c.c_x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.c_x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.c_x(j,2);
|
||||
const int jtype = c.type(j);
|
||||
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
fftmp.x += delx*fpair;
|
||||
fftmp.y += dely*fpair;
|
||||
fftmp.z += delz*fpair;
|
||||
|
||||
if (EVFLAG) {
|
||||
F_FLOAT evdwl = 0.0;
|
||||
if (c.eflag) {
|
||||
evdwl = 0.5*
|
||||
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
|
||||
ev.evdwl += evdwl;
|
||||
}
|
||||
|
||||
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
},ftmp);
|
||||
}
|
||||
|
||||
Kokkos::single(Kokkos::PerThread(dev), [&]() {
|
||||
c.f(i,0) += ftmp.x;
|
||||
c.f(i,1) += ftmp.y;
|
||||
c.f(i,2) += ftmp.z;
|
||||
});
|
||||
|
||||
return ev;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
|
||||
if (EFLAG) {
|
||||
if (c.eflag_atom) {
|
||||
const E_FLOAT epairhalf = 0.5 * epair;
|
||||
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
|
||||
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
|
||||
}
|
||||
}
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
|
||||
if (c.vflag_atom) {
|
||||
if (i < c.nlocal) {
|
||||
c.d_vatom(i,0) += 0.5*v0;
|
||||
c.d_vatom(i,1) += 0.5*v1;
|
||||
c.d_vatom(i,2) += 0.5*v2;
|
||||
c.d_vatom(i,3) += 0.5*v3;
|
||||
c.d_vatom(i,4) += 0.5*v4;
|
||||
c.d_vatom(i,5) += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
|
||||
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
|
||||
if (c.newton_pair)
|
||||
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
else
|
||||
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
|
||||
}
|
||||
};
|
||||
|
||||
template <class PairStyle, bool STACKPARAMS, class Specialisation>
|
||||
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
||||
typedef typename PairStyle::device_type device_type ;
|
||||
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
|
||||
// The enable_if clause will invalidate the last parameter of the function, so that
|
||||
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
|
||||
// This uses the fact that failure to match template parameters is not an error.
|
||||
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
|
||||
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
|
||||
// By having the enable_if with a ! and without it, exactly one of the functions
|
||||
// pair_compute_neighlist will match - either the dummy version
|
||||
// or the real one further below.
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
||||
return ev;
|
||||
}
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
(void) fpair;
|
||||
(void) list;
|
||||
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
|
||||
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
|
||||
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
|
||||
return ev;
|
||||
}
|
||||
|
||||
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
|
||||
EV_FLOAT ev;
|
||||
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
} else {
|
||||
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
|
||||
f_type;
|
||||
f_type ff(fpair, list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
|
||||
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
|
||||
else Kokkos::parallel_for(config,ff);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
|
||||
template<class PairStyle, class Specialisation>
|
||||
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
|
||||
EV_FLOAT ev;
|
||||
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
|
||||
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == N2) {
|
||||
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
|
||||
} else if (fpair->neighflag == FULLCLUSTER) {
|
||||
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
|
||||
}
|
||||
return ev;
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
|
||||
}
|
||||
|
||||
@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJClass2Kokkos : public PairLJClass2 {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJClass2Kokkos(class LAMMPS *);
|
||||
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
|
||||
};
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
|
||||
}
|
||||
|
||||
@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
|
||||
}
|
||||
|
||||
@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJCutKokkos : public PairLJCut {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJCutKokkos(class LAMMPS *);
|
||||
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
|
||||
};
|
||||
|
||||
@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJExpandKokkos : public PairLJExpand {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJExpandKokkos(class LAMMPS *);
|
||||
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
|
||||
};
|
||||
|
||||
@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
|
||||
}
|
||||
|
||||
@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
|
||||
}
|
||||
|
||||
@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
vflag = vflag_in;
|
||||
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
|
||||
}
|
||||
|
||||
@ -31,7 +31,7 @@ namespace LAMMPS_NS {
|
||||
template<class DeviceType>
|
||||
class PairLJSDKKokkos : public PairLJSDK {
|
||||
public:
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
PairLJSDKKokkos(class LAMMPS *);
|
||||
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
|
||||
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
|
||||
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
|
||||
};
|
||||
|
||||
@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");
|
||||
|
||||
@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
eflag = eflag_in;
|
||||
vflag = vflag_in;
|
||||
|
||||
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
|
||||
if (neighflag == FULL) no_virial_fdotr_compute = 1;
|
||||
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
|
||||
else Kokkos::parallel_for(nlocal,f);
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
|
||||
f_type;
|
||||
f_type f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
} else {
|
||||
if (neighflag == FULL) {
|
||||
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
|
||||
else Kokkos::parallel_for(nlocal,f);
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
|
||||
f_type;
|
||||
f_type f(this,(NeighListKokkos<DeviceType>*) list);
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
|
||||
#else
|
||||
const int teamsize = 1;
|
||||
#endif
|
||||
const int nteams = (list->inum*+teamsize-1)/teamsize;
|
||||
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
|
||||
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
|
||||
else Kokkos::parallel_for(config,f);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 1;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == N2) {
|
||||
neighbor->requests[irequest]->full = 0;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
} else if (neighflag == FULLCLUSTER) {
|
||||
neighbor->requests[irequest]->full_cluster = 1;
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
} else {
|
||||
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ template<class DeviceType>
|
||||
class PairTableKokkos : public Pair {
|
||||
public:
|
||||
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
|
||||
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
|
||||
enum {COUL_FLAG=0};
|
||||
typedef DeviceType device_type;
|
||||
|
||||
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
|
||||
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
|
||||
|
||||
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
|
||||
};
|
||||
|
||||
@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
|
||||
//if (neighflag == FULL || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
|
||||
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
|
||||
neighbor->requests[irequest]->full = 1;
|
||||
neighbor->requests[irequest]->half = 0;
|
||||
neighbor->requests[irequest]->full_cluster = 0;
|
||||
if (neighflag == FULL)
|
||||
neighbor->requests[irequest]->ghost = 1;
|
||||
else
|
||||
|
||||
@ -195,17 +195,6 @@ void EwaldDisp::init()
|
||||
g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
|
||||
if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
|
||||
else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
|
||||
}
|
||||
else if (function[1] || function[2]) {
|
||||
//Try Newton Solver
|
||||
//Use old method to get guess
|
||||
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
|
||||
|
||||
double g_ewald_new =
|
||||
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
|
||||
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
|
||||
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
|
||||
"using old method to estimate g_ewald");
|
||||
} else if (function[3]) {
|
||||
//Try Newton Solver
|
||||
//Use old method to get guess
|
||||
@ -215,6 +204,16 @@ void EwaldDisp::init()
|
||||
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
|
||||
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
|
||||
"using old method to estimate g_ewald");
|
||||
} else if (function[1] || function[2]) {
|
||||
//Try Newton Solver
|
||||
//Use old method to get guess
|
||||
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
|
||||
|
||||
double g_ewald_new =
|
||||
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
|
||||
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
|
||||
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
|
||||
"using old method to estimate g_ewald");
|
||||
}
|
||||
}
|
||||
|
||||
@ -708,6 +707,8 @@ void EwaldDisp::compute(int eflag, int vflag)
|
||||
compute_virial();
|
||||
compute_virial_dipole();
|
||||
compute_virial_peratom();
|
||||
|
||||
if (slabflag) compute_slabcorr();
|
||||
}
|
||||
|
||||
|
||||
@ -974,7 +975,6 @@ void EwaldDisp::compute_energy()
|
||||
}
|
||||
}
|
||||
for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
|
||||
if (slabflag) compute_slabcorr();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1480,10 +1480,7 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
|
||||
double a = Rc*x;
|
||||
double f = 0.0;
|
||||
|
||||
if (function[1] || function[2]) { // LJ
|
||||
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
|
||||
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
|
||||
} else { // dipole
|
||||
if (function[3]) { // dipole
|
||||
double rg2 = a*a;
|
||||
double rg4 = rg2*rg2;
|
||||
double rg6 = rg4*rg2;
|
||||
@ -1492,7 +1489,10 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
|
||||
f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
|
||||
sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
|
||||
exp(-rg2)) - accuracy;
|
||||
}
|
||||
} else if (function[1] || function[2]) { // LJ
|
||||
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
|
||||
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
|
||||
}
|
||||
|
||||
return f;
|
||||
}
|
||||
|
||||
127
src/MAKE/MACHINES/Makefile.cori2
Executable file
127
src/MAKE/MACHINES/Makefile.cori2
Executable file
@ -0,0 +1,127 @@
|
||||
# cori2 = NERSC Cori II KNL, static build, FFTW (single precision)
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# module swap craype-haswell craype-mic-knl
|
||||
# module load fftw
|
||||
# module load craype-hugepages2M
|
||||
# Recommend using #SBATCH -S 2 for core specialization
|
||||
# ---------------------------------------------------------------------
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = CC
|
||||
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
|
||||
-fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = CC
|
||||
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
|
||||
LIB =
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings
|
||||
# see possible settings in Section 2.2 (step 4) of manual
|
||||
|
||||
LMP_INC = #-DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||
|
||||
# MPI library
|
||||
# see discussion in Section 2.2 (step 5) of manual
|
||||
# MPI wrapper compiler/linker can provide this info
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
|
||||
MPI_PATH =
|
||||
MPI_LIB =
|
||||
|
||||
# FFT library
|
||||
# see discussion in Section 2.2 (step 6) of manaul
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC = -DFFT_FFTW3 -DFFT_SINGLE
|
||||
FFT_PATH =
|
||||
FFT_LIB = -lfftw3f
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB = #-ljpeg
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# do not edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
%.d:%.cpp
|
||||
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||
|
||||
%.o:%.cu
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
depend : fastdep.exe $(SRC)
|
||||
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
|
||||
|
||||
fastdep.exe: ../DEPEND/fastdep.c
|
||||
cc -O -o $@ $<
|
||||
|
||||
sinclude .depend
|
||||
123
src/MAKE/OPTIONS/Makefile.intel_cpu
Executable file
123
src/MAKE/OPTIONS/Makefile.intel_cpu
Executable file
@ -0,0 +1,123 @@
|
||||
# intel_cpu_intelmpi = USER-INTEL package, Intel MPI, MKL FFT
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = mpiicpc
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
|
||||
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = mpiicpc
|
||||
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
|
||||
LIB = -ltbbmalloc -ltbbmalloc_proxy
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings
|
||||
# see possible settings in Section 2.2 (step 4) of manual
|
||||
|
||||
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||
|
||||
# MPI library
|
||||
# see discussion in Section 2.2 (step 5) of manual
|
||||
# MPI wrapper compiler/linker can provide this info
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
|
||||
MPI_PATH =
|
||||
MPI_LIB =
|
||||
|
||||
# FFT library
|
||||
# see discussion in Section 2.2 (step 6) of manaul
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC = -DFFT_MKL -DFFT_SINGLE
|
||||
FFT_PATH =
|
||||
FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 \
|
||||
-lmkl_sequential -lmkl_core
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB = -ljpeg
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# do not edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
|
||||
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||
|
||||
%.o:%.cu $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
depend : fastdep.exe $(SRC)
|
||||
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
|
||||
|
||||
fastdep.exe: ../DEPEND/fastdep.c
|
||||
cc -O -o $@ $<
|
||||
|
||||
sinclude .depend
|
||||
123
src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
Normal file
123
src/MAKE/OPTIONS/Makefile.intel_knl_coprocessor
Normal file
@ -0,0 +1,123 @@
|
||||
# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# compiler/linker settings
|
||||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = mpiicpc
|
||||
MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2
|
||||
CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
|
||||
-xHost -fno-alias -ansi-alias -restrict \
|
||||
-qoverride-limits $(MIC_OPT)
|
||||
SHFLAGS = -fPIC
|
||||
DEPFLAGS = -M
|
||||
|
||||
LINK = mpiicpc
|
||||
LINKFLAGS = -g -O3 -xHost -qopenmp -qoffload $(MIC_OPT)
|
||||
LIB = -ltbbmalloc
|
||||
SIZE = size
|
||||
|
||||
ARCHIVE = ar
|
||||
ARFLAGS = -rc
|
||||
SHLIBFLAGS = -shared
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# LAMMPS-specific settings, all OPTIONAL
|
||||
# specify settings for LAMMPS features you will use
|
||||
# if you change any -D setting, do full re-compile after "make clean"
|
||||
|
||||
# LAMMPS ifdef settings
|
||||
# see possible settings in Section 2.2 (step 4) of manual
|
||||
|
||||
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
|
||||
|
||||
# MPI library
|
||||
# see discussion in Section 2.2 (step 5) of manual
|
||||
# MPI wrapper compiler/linker can provide this info
|
||||
# can point to dummy MPI library in src/STUBS as in Makefile.serial
|
||||
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
|
||||
# INC = path for mpi.h, MPI compiler settings
|
||||
# PATH = path for MPI library
|
||||
# LIB = name of MPI library
|
||||
|
||||
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
|
||||
MPI_PATH =
|
||||
MPI_LIB =
|
||||
|
||||
# FFT library
|
||||
# see discussion in Section 2.2 (step 6) of manaul
|
||||
# can be left blank to use provided KISS FFT library
|
||||
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC = -DFFT_MKL -DFFT_SINGLE
|
||||
FFT_PATH =
|
||||
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 2.2 (step 7) of manual
|
||||
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
|
||||
# INC = path(s) for jpeglib.h and/or png.h
|
||||
# PATH = path(s) for JPEG library and/or PNG library
|
||||
# LIB = name(s) of JPEG library and/or PNG library
|
||||
|
||||
JPG_INC =
|
||||
JPG_PATH =
|
||||
JPG_LIB = -ljpeg
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
# build rules and dependencies
|
||||
# do not edit this section
|
||||
|
||||
include Makefile.package.settings
|
||||
include Makefile.package
|
||||
|
||||
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
|
||||
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
|
||||
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
|
||||
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
|
||||
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
|
||||
|
||||
# Path to src files
|
||||
|
||||
vpath %.cpp ..
|
||||
vpath %.h ..
|
||||
|
||||
# Link target
|
||||
|
||||
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
|
||||
$(SIZE) $(EXE)
|
||||
|
||||
# Library targets
|
||||
|
||||
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
|
||||
|
||||
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
|
||||
$(OBJ) $(EXTRA_LIB) $(LIB)
|
||||
|
||||
# Compilation rules
|
||||
|
||||
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
|
||||
|
||||
%.o:%.cu $(EXTRA_CPP_DEPENDS)
|
||||
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
|
||||
|
||||
# Individual dependencies
|
||||
|
||||
depend : fastdep.exe $(SRC)
|
||||
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
|
||||
|
||||
fastdep.exe: ../DEPEND/fastdep.c
|
||||
cc -O -o $@ $<
|
||||
|
||||
sinclude .depend
|
||||
11
src/Make.sh
11
src/Make.sh
@ -59,8 +59,9 @@ style () {
|
||||
# called by "make machine"
|
||||
# col 1 = string to search for
|
||||
# col 2 = search in *.h files starting with this name
|
||||
# col 3 = prefix of style file
|
||||
# col 4
|
||||
# col 3 = name of style file
|
||||
# col 4 = file that includes the style file
|
||||
# col 5 = optional 2nd file that includes the style file
|
||||
|
||||
if (test $1 = "style") then
|
||||
|
||||
@ -69,7 +70,7 @@ if (test $1 = "style") then
|
||||
style BODY_CLASS body_ body atom_vec_body
|
||||
style BOND_CLASS bond_ bond force
|
||||
style COMMAND_CLASS "" command input
|
||||
style COMPUTE_CLASS compute_ compute modify modify_cuda
|
||||
style COMPUTE_CLASS compute_ compute modify
|
||||
style DIHEDRAL_CLASS dihedral_ dihedral force
|
||||
style DUMP_CLASS dump_ dump output write_dump
|
||||
style FIX_CLASS fix_ fix modify
|
||||
@ -77,6 +78,10 @@ if (test $1 = "style") then
|
||||
style INTEGRATE_CLASS "" integrate update
|
||||
style KSPACE_CLASS "" kspace force
|
||||
style MINIMIZE_CLASS min_ minimize update
|
||||
style NBIN_CLASS nbin_ nbin neighbor
|
||||
style NPAIR_CLASS npair_ npair neighbor
|
||||
style NSTENCIL_CLASS nstencil_ nstencil neighbor
|
||||
style NTOPO_CLASS ntopo_ ntopo neighbor
|
||||
style PAIR_CLASS pair_ pair force
|
||||
style READER_CLASS reader_ reader read_dump
|
||||
style REGION_CLASS region_ region domain
|
||||
|
||||
@ -89,6 +89,7 @@ void Python::command(int narg, char **arg)
|
||||
istr = NULL;
|
||||
ostr = NULL;
|
||||
format = NULL;
|
||||
length_longstr = 0;
|
||||
char *pyfile = NULL;
|
||||
char *herestr = NULL;
|
||||
int existflag = 0;
|
||||
@ -115,6 +116,11 @@ void Python::command(int narg, char **arg)
|
||||
format = new char[n];
|
||||
strcpy(format,arg[iarg+1]);
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"length") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
|
||||
length_longstr = force->inumeric(FLERR,arg[iarg+1]);
|
||||
if (length_longstr <= 0) error->all(FLERR,"Invalid python command");
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"file") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
|
||||
delete[] pyfile;
|
||||
@ -249,6 +255,7 @@ void Python::invoke_function(int ifunc, char *result)
|
||||
|
||||
// function returned a value
|
||||
// assign it to result string stored by python-style variable
|
||||
// or if user specified a length, assign it to longstr
|
||||
|
||||
if (pfuncs[ifunc].noutput) {
|
||||
int otype = pfuncs[ifunc].otype;
|
||||
@ -258,7 +265,9 @@ void Python::invoke_function(int ifunc, char *result)
|
||||
sprintf(result,"%.15g",PyFloat_AsDouble(pValue));
|
||||
} else if (otype == STRING) {
|
||||
char *pystr = PyString_AsString(pValue);
|
||||
strncpy(result,pystr,VALUELENGTH-1);
|
||||
if (pfuncs[ifunc].longstr)
|
||||
strncpy(pfuncs[ifunc].longstr,pystr,pfuncs[ifunc].length_longstr);
|
||||
else strncpy(result,pystr,VALUELENGTH-1);
|
||||
}
|
||||
Py_DECREF(pValue);
|
||||
}
|
||||
@ -287,6 +296,13 @@ int Python::variable_match(char *name, char *varname, int numeric)
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
char *Python::long_string(int ifunc)
|
||||
{
|
||||
return pfuncs[ifunc].longstr;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
int Python::create_entry(char *name)
|
||||
{
|
||||
// ifunc = index to entry by name in pfuncs vector, can be old or new
|
||||
@ -370,6 +386,7 @@ int Python::create_entry(char *name)
|
||||
// process output as value or variable
|
||||
|
||||
pfuncs[ifunc].ovarname = NULL;
|
||||
pfuncs[ifunc].longstr = NULL;
|
||||
if (!noutput) return ifunc;
|
||||
|
||||
char type = format[ninput];
|
||||
@ -378,6 +395,14 @@ int Python::create_entry(char *name)
|
||||
else if (type == 's') pfuncs[ifunc].otype = STRING;
|
||||
else error->all(FLERR,"Invalid python command");
|
||||
|
||||
if (length_longstr) {
|
||||
if (pfuncs[ifunc].otype != STRING)
|
||||
error->all(FLERR,"Python command length keyword "
|
||||
"cannot be used unless output is a string");
|
||||
pfuncs[ifunc].length_longstr = length_longstr;
|
||||
pfuncs[ifunc].longstr = new char[length_longstr+1];
|
||||
}
|
||||
|
||||
if (strstr(ostr,"v_") != ostr) error->all(FLERR,"Invalid python command");
|
||||
int n = strlen(&ostr[2]) + 1;
|
||||
pfuncs[ifunc].ovarname = new char[n];
|
||||
@ -398,4 +423,5 @@ void Python::deallocate(int i)
|
||||
delete [] pfuncs[i].svalue[j];
|
||||
delete [] pfuncs[i].svalue;
|
||||
delete [] pfuncs[i].ovarname;
|
||||
delete [] pfuncs[i].longstr;
|
||||
}
|
||||
|
||||
@ -28,9 +28,10 @@ class Python : protected Pointers {
|
||||
void invoke_function(int, char *);
|
||||
int find(char *);
|
||||
int variable_match(char *, char *, int);
|
||||
char *long_string(int);
|
||||
|
||||
private:
|
||||
int ninput,noutput;
|
||||
int ninput,noutput,length_longstr;
|
||||
char **istr;
|
||||
char *ostr,*format;
|
||||
void *pyMain;
|
||||
@ -44,6 +45,8 @@ class Python : protected Pointers {
|
||||
char **svalue;
|
||||
int otype;
|
||||
char *ovarname;
|
||||
char *longstr;
|
||||
int length_longstr;
|
||||
void *pFunc;
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user