Compare commits

...

74 Commits

Author SHA1 Message Date
81a2db8a0c 17Dec16 patch 2016-12-16 11:36:54 -07:00
0a176841e7 extra python_wrapper change needed for last patch 2016-12-16 11:35:42 -07:00
3027ac9250 patch 16Dec16 2016-12-16 10:30:57 -07:00
fc54ab5cea Merge pull request #301 from akohlmey/corrections-and-bugfixes
Collected corrections and bugfixes
2016-12-16 10:25:29 -07:00
e364b80724 added length keyword to python command 2016-12-16 10:24:25 -07:00
830c9e8661 Merge branch 'USER-DPD_internal_energy' of https://github.com/timattox/lammps_USER-DPD into corrections-and-bugfixes
This closes #303
2016-12-16 11:22:25 -05:00
4907b29ad2 Merge branch 'USER-DPD_bugfixes' of https://github.com/timattox/lammps_USER-DPD into corrections-and-bugfixes
This closes #302
2016-12-16 11:21:15 -05:00
eff7238ff2 USER-DPD: fix_eos*: partition all internal energy into the uMech term only
This makes our results more closely match a vetted serial implementation.
NOTE: This does make the output different from any previous versions.
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-16 10:25:12 -05:00
126fb22e93 USER-DPD: Fix #define typo in pair_multi_lucy.h and pair_multi_lucy_rx.h 2016-12-16 10:08:30 -05:00
0a90492c44 USER-DPD: Update the header files to properly document all error statements
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 17:39:15 -05:00
fed629c23e USER-DPD: Bugfix for fix_rx and fix_eos_table_rx to handle restart files.
Patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 17:10:13 -05:00
925481c3f4 USER-DPD: Fix hard-wall force interaction bug, and ensure fraction is >= 0
pair_exp6_rx.cpp patch by Jim Larentzos.  Applied by Tim Mattox.
2016-12-15 16:46:25 -05:00
da2ad5b6e0 update FixIntel code for new neighbor list code 2016-12-14 15:51:12 -05:00
bfcab72268 restore change to make -DLAMMPS_MEMALIGN=64 default when USER-INTEL package is installed (which requires it) 2016-12-14 15:24:55 -05:00
f509f133af patch 13Dec16: neighbor refactor, Stan pppm/disp bug fix, M Brown INTEL package updates 2016-12-13 17:14:28 -07:00
624c57e9da Merge pull request #185 from akohlmey/new-neighbor
New neighbor list code with updates for USER-OMP and USER-DPD
2016-12-13 16:24:41 -07:00
f3b355bcbe Merge pull request #298 from akohlmey/collected-small-fixes
Collected small fixes
2016-12-13 16:23:23 -07:00
ae5764beac added functionity to lib interface 2016-12-13 16:22:17 -07:00
fda43c00fd add deleted file in package to purge list 2016-12-12 13:22:54 -05:00
b34be30be6 Merge pull request #53 from stanmoore1/new-neighbor
New neighbor Kokkos
2016-12-12 13:18:03 -05:00
13b6196b82 Fixing Kokkos compile error 2016-12-12 10:47:39 -07:00
baf55c90f4 Whitespace change 2016-12-12 09:25:41 -07:00
770f5d0bf7 Whitespace change 2016-12-12 09:24:37 -07:00
a31b00965a Updating to master 2016-12-12 09:18:20 -07:00
a5e46e3e6a Merging 2016-12-09 16:20:42 -07:00
31be0da590 Merging pull request 2016-12-09 16:17:35 -07:00
0f3b2544a1 Merge pull request #1 from timattox/new-neighbor
USER-DPD workaround for neighbor list issues
2016-12-09 16:08:31 -07:00
586514e05c Merge branch 'new-neighbor' into new-neighbor 2016-12-09 16:08:08 -07:00
43c459ba56 More changes for Kokkos neighbor 2016-12-09 15:56:55 -07:00
b5c3d2f66c Merge pull request #52 from timattox/new-neighbor
USER-DPD workaround for neighbor list issues
2016-12-09 17:51:35 -05:00
5187cb97e5 USER-DPD: Make fix_shardlow request its own SSA-specific neighbor list,
instead of having pair_dpd_fdt* make the SSA-neighbor list request for it.
Forces an "extra" list to be built, but now skip lists work properly.
Maybe we can detect if skip lists won't be used, and squash the extra list.
2016-12-09 15:42:27 -06:00
eff503e56c Prevent neighbor list copies between SSA and non-SSA neighbor list requests. 2016-12-09 15:39:46 -06:00
cdcebab3bd make the output of the %CPU/OpenMP threads line consistent with compiling in OpenMP support, not having USER-OMP installed 2016-12-09 14:43:56 -05:00
ddf678da51 make fix gcmc command overview consistent
this closes #296
2016-12-09 14:30:27 -05:00
435421301b Small tweaks to Kokkos neighbor 2016-12-09 08:37:01 -07:00
9b48c49f83 Removing used Kokkos file 2016-12-08 09:18:55 -07:00
d3d5ac17bf Fixed small typos in doc 2016-12-07 19:37:51 -08:00
8318c67816 Kokkos neighbor refactor 2016-12-07 13:00:27 -07:00
7c61dbf5e2 Merge branch 'new-neighbor' of github.com:akohlmey/lammps into new-neighbor 2016-12-07 13:43:04 -05:00
39a12b15d7 Merge branch 'master' into new-neighbor
Resolved Conflicts:
	src/Purge.list
	src/neigh_derive.cpp
2016-12-07 13:40:14 -05:00
fb3f597f41 30Nov16 patch 2016-11-30 14:04:41 -07:00
d14814ae2e Merge pull request #289 from akohlmey/collected-updates-and-bugfixes
Collected updates and bugfixes
2016-11-30 14:02:41 -07:00
beb5a30f67 new compute global/atom command, also bug fix for descending dump sorts 2016-11-30 14:01:27 -07:00
7ddb6670c0 fix typo 2016-11-30 00:12:35 -05:00
789e62388f simplify code 2016-11-29 09:03:53 -05:00
7d098bff90 update format
(cherry picked from commit 2597185afb)
2016-11-29 09:01:36 -05:00
1d970d3cdf dihedral_nharmonic: added writing coefficient by write_data
(cherry picked from commit 618f5c6aa5)
2016-11-29 09:01:36 -05:00
42d430168b fix typo in compute cluster/atom docs
this closes #292
2016-11-29 07:24:09 -05:00
5ff5bc2a6c avoid issues detected by coverity scan 2016-11-28 21:34:35 -05:00
02ae2d218a correct broken link to USER-SMD docs PDF 2016-11-28 11:34:22 -05:00
470908fc93 explicitly disallow dynamic groups with compute rdf 2016-11-24 05:46:15 -05:00
6759630c16 bug fix for dump sort in descending order 2016-11-23 17:08:36 -07:00
87781771ba fix typo and USER-OMP support omission 2016-11-23 09:02:32 -05:00
df46b9aa38 rename compute pressure/grem to compute PRESSURE/GREM 2016-11-22 15:25:59 -05:00
647c6f00ce Merge branch 'grem-feature' of https://github.com/dstelter92/lammps into collected-updates-and-bugfixes 2016-11-22 14:51:19 -05:00
237307eda2 small typo and changes 2016-11-22 12:16:00 -05:00
d58dd4f159 bugfix when parsing mpirun 2016-11-22 12:13:20 -05:00
ae70f1090f added readme for grem examples 2016-11-22 12:05:14 -05:00
114926a488 Merge branch 'master' into new-neighbor 2016-10-02 00:26:56 -04:00
5eb9dd0c5d Merge branch 'master' into new-neighbor 2016-09-29 23:14:28 -04:00
ebabc8f0bc Merge remote-tracking branch 'lammps-rw/integration' into update-neighbor 2016-09-09 15:46:20 -04:00
232abf8534 restore locale and enforce grep option squashing 2016-09-09 15:42:14 -04:00
d22caf2658 Merge pull request #29 from timattox/new-neighbor
USER-DPD: remove several files from src that came from src/USER-DPD/
2016-09-09 15:28:56 -04:00
3842aa6095 forward skip lists /omp neighbor list builds to non-omp implementations 2016-09-09 15:23:40 -04:00
32c240978a USER-DPD: remove several files from src that came from src/USER-DPD/
These were accidentally added to git in c9c2ae6.
2016-09-09 15:17:42 -04:00
212c2617f6 delete a couple more files, that don't belong into src/ 2016-09-09 14:56:23 -04:00
40f85c93ba corrected mismatched free() vs. delete[] 2016-09-09 14:10:42 -04:00
2f02d98469 remove USER-DPD files that should not be where they are 2016-09-09 13:53:07 -04:00
4553881fc2 Merge pull request #28 from timattox/new-neighbor
New neighbor, USER-DPD updates
2016-09-09 13:11:55 -04:00
81fcbcd99c USER-DPD: move nstencil_ssa out of core LAMMPS into USER-DPD 2016-09-09 12:19:54 -04:00
82c6eb4675 USER-DPD: Set missing NP_HALF flag in npair_half_bin_newton_ssa.h 2016-09-09 12:19:36 -04:00
8ed3f4226e USER-DPD: move custom binning stuff to a NBinSSA child class.
Removes most SSA specific fields from class NeighList.
2016-09-09 12:19:06 -04:00
9b7a0d7e1c Update gitignore for the new USER-DPD source files. 2016-09-09 12:18:51 -04:00
c9c2ae6c61 new neighbor list changes 2016-09-07 13:42:58 -06:00
422 changed files with 25749 additions and 16640 deletions

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="22 Nov 2016 version">
<META NAME="docnumber" CONTENT="17 Dec 2016 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
22 Nov 2016 version :c,h4
17 Dec 2016 version :c,h4
Version info: :h4

View File

@ -632,11 +632,11 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
"rigid/npt (o)"_fix_rigid.html,
"rigid/nve (o)"_fix_rigid.html,
"rigid/nvt (o)"_fix_rigid.html,
<"rigid/small (o)"_fix_rigid.html,
"rigid/small/nph"_fix_rigid.html,
"rigid/small/npt"_fix_rigid.html,
"rigid/small/nve"_fix_rigid.html,
"rigid/small/nvt"_fix_rigid.html,
"rigid/small (o)"_fix_rigid.html,
"rigid/small/nph (o)"_fix_rigid.html,
"rigid/small/npt (o)"_fix_rigid.html,
"rigid/small/nve (o)"_fix_rigid.html,
"rigid/small/nvt (o)"_fix_rigid.html,
"setforce (k)"_fix_setforce.html,
"shake"_fix_shake.html,
"spring"_fix_spring.html,
@ -767,6 +767,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"erotate/sphere"_compute_erotate_sphere.html,
"erotate/sphere/atom"_compute_erotate_sphere_atom.html,
"event/displace"_compute_event_displace.html,
"global/atom"_compute_global_atom.html,
"group/group"_compute_group_group.html,
"gyration"_compute_gyration.html,
"gyration/chunk"_compute_gyration_chunk.html,

View File

@ -1936,18 +1936,22 @@ documentation in the src/library.cpp file for details, including
which quantities can be queried by name:
void *lammps_extract_global(void *, char *)
void lammps_extract_box(void *, double *, double *,
double *, double *, double *, int *, int *)
void *lammps_extract_atom(void *, char *)
void *lammps_extract_compute(void *, char *, int, int)
void *lammps_extract_fix(void *, char *, int, int, int, int)
void *lammps_extract_variable(void *, char *, char *) :pre
int lammps_set_variable(void *, char *, char *)
double lammps_get_thermo(void *, char *) :pre
void lammps_reset_box(void *, double *, double *, double, double, double)
int lammps_set_variable(void *, char *, char *) :pre
double lammps_get_thermo(void *, char *)
int lammps_get_natoms(void *)
void lammps_gather_atoms(void *, double *)
void lammps_scatter_atoms(void *, double *) :pre
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *) :pre
void lammps_create_atoms(void *, int, tagint *, int *, double *, double *,
imageint *, int) :pre
The extract functions return a pointer to various global or per-atom
quantities stored in LAMMPS or to values calculated by a compute, fix,
@ -1957,10 +1961,16 @@ the other extract functions, the underlying storage may be reallocated
as LAMMPS runs, so you need to re-call the function to assure a
current pointer or returned value(s).
The lammps_reset_box() function resets the size and shape of the
simulation box, e.g. as part of restoring a previously extracted and
saved state of a simulation.
The lammps_set_variable() function can set an existing string-style
variable to a new string value, so that subsequent LAMMPS commands can
access the variable. The lammps_get_thermo() function returns the
current value of a thermo keyword as a double.
access the variable.
The lammps_get_thermo() function returns the current value of a thermo
keyword as a double precision value.
The lammps_get_natoms() function returns the total number of atoms in
the system and can be used by the caller to allocate space for the
@ -1973,10 +1983,13 @@ passed by the caller, to each atom owned by individual processors.
The lammps_create_atoms() function takes a list of N atoms as input
with atom types and coords (required), an optionally atom IDs and
velocities. It uses the coords of each atom to assign it as a new
atom to the processor that owns it. Additional properties for the new
atoms can be assigned via the lammps_scatter_atoms() or
lammps_extract_atom() functions.
velocities and image flags. It uses the coords of each atom to assign
it as a new atom to the processor that owns it. This function is
useful to add atoms to a simulation or (in tandem with
lammps_reset_box()) to restore a previously extracted and saved state
of a simulation. Additional properties for the new atoms can then be
assigned via the lammps_scatter_atoms() or lammps_extract_atom()
functions.
The examples/COUPLE and python directories have example C++ and C and
Python codes which show how a driver code can link to LAMMPS as a

View File

@ -51,12 +51,12 @@ relative to the center of mass (COM) velocity of the 2 atoms in the
bond.
The value {engvib} is the vibrational kinetic energy of the two atoms
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
v2 are the magnitude of the velocity of the 2 atoms along the bond
direction, after the COM velocity has been subtracted from each.
The value {engrot} is the rotationsl kinetic energy of the two atoms
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m1 v2^2, where v1 and
in the bond, which is simply 1/2 m1 v1^2 + 1/2 m2 v2^2, where v1 and
v2 are the magnitude of the velocity of the 2 atoms perpendicular to
the bond direction, after the COM velocity has been subtracted from
each.
@ -67,7 +67,7 @@ Vcm^2 where Vcm = magnitude of the velocity of the COM.
Note that these 3 kinetic energy terms are simply a partitioning of
the summed kinetic energy of the 2 atoms themselves. I.e. total KE =
1/2 m1 v1^2 + 1/2 m2 v3^2 = engvib + engrot + engtrans, where v1,v2
1/2 m1 v1^2 + 1/2 m2 v2^2 = engvib + engrot + engtrans, where v1,v2
are the magnitude of the velocities of the 2 atoms, without any
adjustment for the COM velocity.

View File

@ -641,7 +641,8 @@ the restarted simulation begins.
[Related commands:]
"fix ave/chunk"_fix_ave_chunk.html
"fix ave/chunk"_fix_ave_chunk.html,
"compute global/atom"_compute_global_atom.html
[Default:]

View File

@ -37,7 +37,7 @@ The neighbor list needed to compute this quantity is constructed each
time the calculation is performed (i.e. each time a snapshot of atoms
is dumped). Thus it can be inefficient to compute/dump this quantity
too frequently or to have multiple compute/dump commands, each of a
{clsuter/atom} style.
{cluster/atom} style.
NOTE: If you have a bonded system, then the settings of
"special_bonds"_special_bonds.html command can remove pairwise

View File

@ -0,0 +1,220 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
compute global/atom command :h3
[Syntax:]
compute ID group-ID style index input1 input2 ... :pre
ID, group-ID are documented in "compute"_compute.html command :ulb,l
global/atom = style name of this compute command :l
index = c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :l
c_ID = per-atom vector calculated by a compute with ID
c_ID\[I\] = Ith column of per-atom array calculated by a compute with ID
f_ID = per-atom vector calculated by a fix with ID
f_ID\[I\] = Ith column of per-atom array calculated by a fix with ID
v_name = per-atom vector calculated by an atom-style variable with name :pre
one or more inputs can be listed :l
input = c_ID, c_ID\[N\], f_ID, f_ID\[N\], v_name :l
c_ID = global vector calculated by a compute with ID
c_ID\[I\] = Ith column of global array calculated by a compute with ID, I can include wildcard (see below)
f_ID = global vector calculated by a fix with ID
f_ID\[I\] = Ith column of global array calculated by a fix with ID, I can include wildcard (see below)
v_name = global vector calculated by a vector-style variable with name :pre
:ule
[Examples:]
compute 1 all global/atom c_chunk c_com\[1\\] c_com\[2\\] c_com\[3\\]
compute 1 all global/atom c_chunk c_com\[*\\] :pre
[Description:]
Define a calculation that assigns global values to each atom from
vectors or arrays of global values. The specified {index} parameter
is used to determine which global value is assigned to each atom.
The {index} parameter must reference a per-atom vector or array from a
"compute"_compute.html or "fix"_fix.html or the evaluation of an
atom-style "variable"_variable.html. Each {input} value must
reference a global vector or array from a "compute"_compute.html or
"fix"_fix.html or the evaluation of an vector-style
"variable"_variable.html. Details are given below.
The {index} value for an atom is used as a index I (from 1 to N) into
the vector associated with each of the input values. The Ith value
from the input vector becomes one output value for that atom. If the
atom is not in the specified group, or the index I < 1 or I > M, where
M is the actual length of the input vector, then an output value of
0.0 is assigned to the atom.
An example of how this command is useful, is in the context of
"chunks" which are static or dyanmic subsets of atoms. The "compute
chunk/atom"_compute_chunk_atom.html command assigns unique chunk IDs
to each atom. It's output can be used as the {index} parameter for
this command. Various other computes with "chunk" in their style
name, such as "compute com/chunk"_compute_com_chunk.html or "compute
msd/chunk"_compute_msd_chunk.html, calculate properties for each
chunk. The output of these commands are global vectors or arrays,
with one or more values per chunk, and can be used as input values for
this command. This command will then assign the global chunk value to
each atom in the chunk, producing a per-atom vector or per-atom array
as output. The per-atom values can then be output to a dump file or
used by any command that uses per-atom values from a compute as input,
as discussed in "Section 6.15"_Section_howto.html#howto_15.
As a concrete example, these commands will calculate the displacement
of each atom from the center-of-mass of the molecule it is in, and
dump those values to a dump file. In this case, each molecule is a
chunk.
compute cc1 all chunk/atom molecule
compute myChunk all com/chunk cc1
compute prop all property/atom xu yu zu
compute glob all global/atom c_cc1 c_myChunk\[*\]
variable dx atom c_prop\[1\]-c_glob\[1\]
variable dy atom c_prop\[2\]-c_glob\[2\]
variable dz atom c_prop\[3\]-c_glob\[3\]
variable dist atom sqrt(v_dx*v_dx+v_dy*v_dy+v_dz*v_dz)
dump 1 all custom 100 tmp.dump id xu yu zu c_glob\[1\] c_glob\[2\] c_glob\[3\] &
v_dx v_dy v_dz v_dist
dump_modify 1 sort id :pre
You can add these commands to the bench/in.chain script to see how
they work.
:line
Note that for input values from a compute or fix, the bracketed index
I can be specified using a wildcard asterisk with the index to
effectively specify multiple values. This takes the form "*" or "*n"
or "n*" or "m*n". If N = the size of the vector (for {mode} = scalar)
or the number of columns in the array (for {mode} = vector), then an
asterisk with no numeric values means all indices from 1 to N. A
leading asterisk means all indices from 1 to n (inclusive). A
trailing asterisk means all indices from n to N (inclusive). A middle
asterisk means all indices from m to n (inclusive).
Using a wildcard is the same as if the individual columns of the array
had been listed one by one. E.g. these 2 compute global/atom commands
are equivalent, since the "compute com/chunk"_compute_com_chunk.html
command creates a global array with 3 columns:
compute cc1 all chunk/atom molecule
compute com all com/chunk cc1
compute 1 all global/atom c_cc1 c_com\[1\] c_com\[2\] c_com\[3\]
compute 1 all global/atom c_cc1 c_com\[*\] :pre
:line
This section explains the {index} parameter. Note that it must
reference per-atom values, as contrasted with the {input} values which
must reference global values.
Note that all of these options generate floating point values. When
they are used as an index into the specified input vectors, they
simple rounded down to convert the value to integer indices. The
final values should range from 1 to N (inclusive), since they are used
to access values from N-length vectors.
If {index} begins with "c_", a compute ID must follow which has been
previously defined in the input script. The compute must generate
per-atom quantities. See the individual "compute"_compute.html doc
page for details. If no bracketed integer is appended, the per-atom
vector calculated by the compute is used. If a bracketed integer is
appended, the Ith column of the per-atom array calculated by the
compute is used. Users can also write code for their own compute
styles and "add them to LAMMPS"_Section_modify.html. See the
discussion above for how I can be specified with a wildcard asterisk
to effectively specify multiple values.
If {index} begins with "f_", a fix ID must follow which has been
previously defined in the input script. The Fix must generate
per-atom quantities. See the individual "fix"_fix.html doc page for
details. Note that some fixes only produce their values on certain
timesteps, which must be compatible with when compute global/atom
references the values, else an error results. If no bracketed integer
is appended, the per-atom vector calculated by the fix is used. If a
bracketed integer is appended, the Ith column of the per-atom array
calculated by the fix is used. Users can also write code for their
own fix style and "add them to LAMMPS"_Section_modify.html. See the
discussion above for how I can be specified with a wildcard asterisk
to effectively specify multiple values.
If {index} begins with "v_", a variable name must follow which has
been previously defined in the input script. It must be an
"atom-style variable"_variable.html. Atom-style variables can
reference thermodynamic keywords and various per-atom attributes, or
invoke other computes, fixes, or variables when they are evaluated, so
this is a very general means of generating per-atom quantities to use
as {index}.
:line
This section explains the kinds of {input} values that can be used.
Note that inputs reference global values, as contrasted with the
{index} parameter which must reference per-atom values.
If a value begins with "c_", a compute ID must follow which has been
previously defined in the input script. The compute must generate a
global vector or array. See the individual "compute"_compute.html doc
page for details. If no bracketed integer is appended, the vector
calculated by the compute is used. If a bracketed integer is
appended, the Ith column of the array calculated by the compute is
used. Users can also write code for their own compute styles and "add
them to LAMMPS"_Section_modify.html. See the discussion above for how
I can be specified with a wildcard asterisk to effectively specify
multiple values.
If a value begins with "f_", a fix ID must follow which has been
previously defined in the input script. The fix must generate a
global vector or array. See the individual "fix"_fix.html doc page
for details. Note that some fixes only produce their values on
certain timesteps, which must be compatible with when compute
global/atom references the values, else an error results. If no
bracketed integer is appended, the vector calculated by the fix is
used. If a bracketed integer is appended, the Ith column of the array
calculated by the fix is used. Users can also write code for their
own fix style and "add them to LAMMPS"_Section_modify.html. See the
discussion above for how I can be specified with a wildcard asterisk
to effectively specify multiple values.
If a value begins with "v_", a variable name must follow which has
been previously defined in the input script. It must be a
"vector-style variable"_variable.html. Vector-style variables can
reference thermodynamic keywords and various other attributes of
atoms, or invoke other computes, fixes, or variables when they are
evaluated, so this is a very general means of generating a vector of
global quantities which the {index} parameter will reference for
assignement of global values to atoms.
:line
[Output info:]
If a single input is specified this compute produces a per-atom
vector. If multiple inputs are specified, this compute produces a
per-atom array values, where the number of columns is equal to the
number of inputs specified. These values can be used by any command
that uses per-atom vector or array values from a compute as input.
See "Section 6.15"_Section_howto.html#howto_15 for an overview of
LAMMPS output options.
The per-atom vector or array values will be in whatever units the
corresponsing input values are in.
[Restrictions:] none
[Related commands:]
"compute"_compute.html, "fix"_fix.html, "variable"_variable.html,
"compute chunk/atom"_compute_chunk_atom.html, "compute
reduce"_compute_reduce.html
[Default:] none

View File

@ -27,7 +27,7 @@ contact radius is used only to prevent particles belonging to
different physical bodies from penetrating each other. It is used by
the contact pair styles, e.g., smd/hertz and smd/tri_surface.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
The value of the contact radius will be 0.0 for particles not in the

View File

@ -24,7 +24,7 @@ compute 1 all smd/damage :pre
Define a computation that calculates the damage status of SPH particles
according to the damage model which is defined via the SMD SPH pair styles, e.g., the maximum plastic strain failure criterion.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
[Output Info:]

View File

@ -32,7 +32,7 @@ configuration. This compute is only really useful for debugging the
hourglass control mechanim which is part of the Total-Lagrangian SPH
pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output Info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/internal/energy :pre
Define a computation which outputs the per-particle enthalpy, i.e.,
the sum of potential energy and heat.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output Info:]

View File

@ -25,7 +25,7 @@ Define a computation that outputs the equivalent plastic strain per
particle. This command is only meaningful if a material model with
plasticity is defined.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output Info:]

View File

@ -25,7 +25,7 @@ Define a computation that outputs the time rate of the equivalent
plastic strain. This command is only meaningful if a material model
with plasticity is defined.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output Info:]

View File

@ -26,7 +26,7 @@ The mass density is the mass of a particle which is constant during
the course of a simulation, divided by its volume, which can change
due to mechanical deformation.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -25,7 +25,7 @@ Define a computation that calculates the deformation gradient. It is
only meaningful for particles which interact according to the
Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -30,7 +30,7 @@ time step. This calculation is performed automatically in the
relevant SPH pair styles and this compute only serves to make the
stable time increment accessible for output purposes.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -25,7 +25,7 @@ Define a computation that calculates the number of particles inside of
the smoothing kernel radius for particles interacting via the
Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -26,7 +26,7 @@ associated with a particle as a rotated ellipsoid. It is only
meaningful for particles which interact according to the
Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/tlsph/strain :pre
Define a computation that calculates the Green-Lagrange strain tensor
for particles interacting via the Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/tlsph/strain/rate :pre
Define a computation that calculates the rate of the strain tensor for
particles interacting via the Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/tlsph/stress :pre
Define a computation that outputs the Cauchy stress tensor for
particles interacting via the Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -25,7 +25,7 @@ Define a computation that returns the coordinates of the vertices
corresponding to the triangle-elements of a mesh created by the "fix
smd/wall_surface"_fix_smd_wall_surface.html.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -25,7 +25,7 @@ Define a computation that returns the number of neighbor particles
inside of the smoothing kernel radius for particles interacting via
the updated Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/ulsph/strain :pre
Define a computation that outputs the logarithmic strain tensor. for
particles interacting via the updated Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -25,7 +25,7 @@ Define a computation that outputs the rate of the logarithmic strain
tensor for particles interacting via the updated Lagrangian SPH pair
style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -23,7 +23,7 @@ compute 1 all smd/ulsph/stress :pre
Define a computation that outputs the Cauchy stress tensor.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -24,7 +24,7 @@ compute 1 all smd/vol :pre
Define a computation that provides the per-particle volume and the sum
of the per-particle volumes of the group for which the fix is defined.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth
Mach Dynamics in LAMMPS.
[Output info:]

View File

@ -21,7 +21,7 @@ type = atom type for inserted atoms (must be 0 if mol keyword used) :l
seed = random # seed (positive integer) :l
T = temperature of the ideal gas reservoir (temperature units) :l
mu = chemical potential of the ideal gas reservoir (energy units) :l
translate = maximum Monte Carlo translation distance (length units) :l
displace = maximum Monte Carlo translation distance (length units) :l
zero or more keyword/value pairs may be appended to args :l
keyword = {mol}, {region}, {maxangle}, {pressure}, {fugacity_coeff}, {full_energy}, {charge}, {group}, {grouptype}, {intra_energy}, or {tfac_insert}
{mol} value = template-ID

View File

@ -36,7 +36,7 @@ stable maximum time step.
This fix inquires the minimum stable time increment across all particles contained in the group for which this
fix is defined. An additional safety factor {s_fact} is applied to the time increment.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
[Restart, fix_modify, output, run start/stop, minimize info:]

View File

@ -32,7 +32,7 @@ fix 1 all smd/integrate_tlsph limit_velocity 1000 :pre
The fix performs explicit time integration for particles which interact according with the Total-Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
The {limit_velocity} keyword will control the velocity, scaling the norm of
the velocity vector to max_vel in case it exceeds this velocity limit.

View File

@ -34,7 +34,7 @@ fix 1 all smd/integrate_ulsph limit_velocity 1000 :pre
[Description:]
The fix performs explicit time integration for particles which interact with the updated Lagrangian SPH pair style.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
The {adjust_radius} keyword activates dynamic adjustment of the per-particle SPH smoothing kernel radius such that the number of neighbors per particles remains
within the interval {min_nn} to {max_nn}. The parameter {adjust_radius_factor} determines the amount of adjustment per timestep. Typical values are

View File

@ -55,7 +55,7 @@ specified. This style also sets the velocity of each particle to (omega cross
Rperp) where omega is its angular velocity around the rotation axis and
Rperp is a perpendicular vector from the rotation axis to the particle.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to using Smooth Mach Dynamics in LAMMPS.
[Restart, fix_modify, output, run start/stop, minimize info:]

View File

@ -37,7 +37,7 @@ It is possible to move the triangulated surface via the "smd/move_tri_surf"_fix_
Immediately after a .STL file has been read, the simulation needs to be run for 0 timesteps in order to properly register the new particles
in the system. See the "funnel_flow" example in the USER-SMD examples directory.
See "this PDF guide"_USER/smd/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
See "this PDF guide"_PDF/SMD_LAMMPS_userguide.pdf to use Smooth Mach Dynamics in LAMMPS.
[Restart, fix_modify, output, run start/stop, minimize info:]

View File

@ -311,6 +311,7 @@ compute_erotate_sphere.html
compute_erotate_sphere_atom.html
compute_event_displace.html
compute_fep.html
compute_global_atom.html
compute_group_group.html
compute_gyration.html
compute_gyration_chunk.html

View File

@ -39,7 +39,7 @@ invocation of the {tlsph} for a solid body would consist of an equation of state
the pressure (the diagonal components of the stress tensor), and a material model to compute shear
stresses (the off-diagonal components of the stress tensor). Damage and failure models can also be added.
Please see the "SMD user guide"_USER/smd/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
Please see the "SMD user guide"_PDF/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
:line

View File

@ -43,7 +43,7 @@ stresses (the off-diagonal components of the stress tensor).
Note that the use of *GRADIENT_CORRECTION can lead to severe numerical instabilities. For a general fluid simulation, *NO_GRADIENT_CORRECTION is recommended.
Please see the "SMD user guide"_USER/smd/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
Please see the "SMD user guide"_PDF/SMD_LAMMPS_userguide.pdf for a complete listing of the possible keywords and material models.
:line

View File

@ -14,7 +14,7 @@ python func keyword args ... :pre
func = name of Python function :ulb,l
one or more keyword/args pairs must be appended :l
keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {exists}
keyword = {invoke} or {input} or {return} or {format} or {length} or {file} or {here} or {exists}
{invoke} arg = none = invoke the previously defined Python function
{input} args = N i1 i2 ... iN
N = # of inputs to function
@ -29,6 +29,8 @@ keyword = {invoke} or {input} or {return} or {format} or {file} or {here} or {ex
M = N+1 if there is a return value
fstring = each character (i,f,s,p) corresponds in order to an input or return value
'i' = integer, 'f' = floating point, 's' = string, 'p' = SELF
{length} arg = Nlen
Nlen = max length of string returned from Python function
{file} arg = filename
filename = file of Python code, which defines func
{here} arg = inline
@ -165,6 +167,17 @@ equal-style variable as an argument, but only if the output of the
Python function is flagged as a numeric value ("i" or "f") via the
{format} keyword.
If the {return} keyword is used and the {format} keyword specifies the
output as a string, then the default maximum length of that string is
63 characters (64-1 for the string terminator). If you want to return
a longer string, the {length} keyword can be specified with its {Nlen}
value set to a larger number (the code allocates space for Nlen+1 to
include the string terminator). If the Python function generates a
string longer than the default 63 or the specified {Nlen}, it will be
trunctated.
:line
Either the {file}, {here}, or {exists} keyword must be used, but only
one of them. These keywords specify what Python code to load into the
Python interpreter. The {file} keyword gives the name of a file,

View File

@ -0,0 +1,81 @@
Generalized Replica Exchange Method (gREM) examples
===================================================
Examples:
---------------------------------------------------
lj-single:
This example is the simplest case scenario utilizing the generalized
ensemble defined by fix_grem. It utilizes only 1 replica and requires
the LAMMPS executable to be run as usual:
mpirun -np 4 lmp_mpi -in in.gREM-npt
./lmp_serial -in in.gREM-nvt
While this does not obtain any information about Ts(H), it is most similar to
a microcanonical simulation and "single-replica gREM" can be useful for
studying non-equilibrium processes as well.
lj-6rep:
This example utilizes an external python script to handle swaps between
replicas. Included is run.sh, which requires the path to your LAMMPS
executable. The python script is fragile as it relies on parsing output files
from the LAMMPS run and moving LAMMPS data files between directories. Use
caution if modifying this example further. If complied with mpi, multiple
processors can be used as:
./run.sh $NUM_PROCS
a serial run is completed simply as
./run.sh 1
where the executable provided must be serial if "1" is provided as the number
of procs. While this external replica exchange module is quite slow and
inefficient, it allows for many replicas to be used on a single processor.
While here there are only 6 replicas, this example could be extended to >100
replicas while still using a serial compilation. This is also beneficial for
running on high performance nodes with few cores to complete a full-scale gREM
simulation with a large number of replicas.
A quick note on efficiency: frequent exchanges slow down this script
substantially because LAMMPS is restarted every exchange attempt. The script
works best for large systems with infrequent exchanges.
lj-temper:
This is an example using the internal replica exchange module. While fast
in comparison to the python version, it requires substantial resources
(at least 1 proc per replica). Instead of stopping LAMMPS every exchange
attempt, all replicas are run concurrently, and exchanges take place
internally. This requires use of LAMMPS partition mode, via the command
line using the -p flag. Input files require world type variables defining
the parameters of each replica. The included example with 4 replicas must
run on at least 4 procs, in that case LAMMPS could be initiated as:
mpirun -np 4 lmp_mpi -p 4x1 -in in.gREM-temper
spawning 4 partitions with 1 replica each. Multiple procs per replica could
be used. In the case of a 16 system with 4 replicas, the
following logic could be used:
mpirun -np 16 lmp_mpi -p 4x4 -in in.gREM-temper
Once started, a universe log file will be created as well as log files for
each replica. The universe (log.lammps) contains exchange information, while
the replicas (*/log.lammps.*) contains the thermo_output as usual. In this
example, in.gREM-temper moves the log files to their respective folders.
Closing Notes:
---------------------------------------------------
Of significant difference between lj-6rep and lj-temper is the format of data.
In lj-6rep, data is stored as 'replicas' meaning discontinuous trajectories, as
files are moved between directories labeled by the 'lambda' of the replica. In
lj-temper, data is stored as 'walkers' with continuous trajectories, but
discontinuous parameters. The later is significantly more efficient, but
requires post-processing to obtain per-replica information.
Any problems/questions should be directed to <dstelter@bu.edu>.

View File

@ -51,7 +51,7 @@ for exchange in arange(starting_ex,max_exchange):
os.chdir(path+"/%s" % lambdas[l])
#os.system("cp restart_file restart_file%d" % exchange)
if (nproc > 1):
os.system("mpirun -np %d " + lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (nproc, lambdas[l], eta, H))
os.system("mpirun -np %d " % (nproc) + lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (lambdas[l], eta, H))
if (nproc == 1):
os.system(lmp + " -in ../" + inp + " -var lambda %g -var eta %g -var enthalpy %g > output" % (lambdas[l], eta, H))
os.system("grep -v '[a-zA-Z]' output | awk '{if(NF==6 && NR>19)print $0}' | awk '{print $3}' >ent")
@ -60,7 +60,6 @@ for exchange in arange(starting_ex,max_exchange):
aver_enthalpy[l] = mean(ee[-1])
# os.system("mv dump.dcd dump%d.dcd" % exchange)
os.system("mv log.lammps log%d.lammps" % exchange)
# os.system("rm output")
os.system("mv final_restart_file final_restart_file%d" % exchange)
os.system("mv ent ent%d" % exchange)
os.system("bzip2 log%d.lammps ent%d" % (exchange,exchange))

2
src/.gitignore vendored
View File

@ -18,6 +18,8 @@
/*_tally.cpp
/*_rx.h
/*_rx.cpp
/*_ssa.h
/*_ssa.cpp
/kokkos.cpp
/kokkos.h

View File

@ -105,11 +105,16 @@ action modify_kokkos.cpp
action modify_kokkos.h
action neigh_bond_kokkos.cpp
action neigh_bond_kokkos.h
action neigh_full_kokkos.h
action neigh_list_kokkos.cpp
action neigh_list_kokkos.h
action neighbor_kokkos.cpp
action neighbor_kokkos.h
action npair_copy_kokkos.cpp
action npair_copy_kokkos.h
action npair_kokkos.cpp
action npair_kokkos.h
action nbin_kokkos.cpp
action nbin_kokkos.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp

View File

@ -125,12 +125,10 @@ void FixQEqReaxKokkos<DeviceType>::init()
neighbor->requests[irequest]->pair = 0;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else { //if (neighflag == HALF || neighflag == HALFTHREAD)
neighbor->requests[irequest]->fix = 1;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
}
}

View File

@ -168,7 +168,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
else
neighflag = HALF;
} else if (strcmp(arg[iarg+1],"n2") == 0) neighflag = N2;
else if (strcmp(arg[iarg+1],"full/cluster") == 0) neighflag = FULLCLUSTER;
else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
} else if (strcmp(arg[iarg],"binsize") == 0) {
@ -232,20 +231,6 @@ void KokkosLMP::accelerator(int narg, char **arg)
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_list_kokkos(int m)
{
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m] && nk->lists_host[m]->d_numneigh.dimension_0())
return 1;
if (nk->lists_device[m] && nk->lists_device[m]->d_numneigh.dimension_0())
return 1;
return 0;
}
/* ----------------------------------------------------------------------
called by Finish
------------------------------------------------------------------------- */
int KokkosLMP::neigh_count(int m)
{
int inum;
@ -255,28 +240,30 @@ int KokkosLMP::neigh_count(int m)
ArrayTypes<LMPHostType>::t_int_1d h_numneigh;
NeighborKokkos *nk = (NeighborKokkos *) neighbor;
if (nk->lists_host[m]) {
inum = nk->lists_host[m]->inum;
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_host[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_host[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_host[m]->d_ilist;
h_numneigh = nk->lists_host[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_host[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_host[m]->d_numneigh);
} else if (nk->lists_device[m]) {
inum = nk->lists_device[m]->inum;
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nk->lists_device[m]->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nk->lists_device[m]->d_numneigh);
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nk->lists_device[m]->d_ilist;
h_numneigh = nk->lists_device[m]->d_numneigh;
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nk->lists_device[m]->d_ilist);
Kokkos::deep_copy(h_numneigh,nk->lists_device[m]->d_numneigh);
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
}
for (int i = 0; i < inum; i++) nneigh += h_numneigh[h_ilist[i]];

View File

@ -34,7 +34,6 @@ class KokkosLMP : protected Pointers {
KokkosLMP(class LAMMPS *, int, char **);
~KokkosLMP();
void accelerator(int, char **);
int neigh_list_kokkos(int);
int neigh_count(int);
private:
static void my_signal_handler(int);

145
src/KOKKOS/nbin_kokkos.cpp Normal file
View File

@ -0,0 +1,145 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "nbin_kokkos.h"
#include "neighbor.h"
#include "atom_kokkos.h"
#include "group.h"
#include "domain.h"
#include "comm.h"
#include "update.h"
#include "error.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
enum{NSQ,BIN,MULTI}; // also in Neighbor
#define SMALL 1.0e-6
#define CUT2BIN_RATIO 100
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
atoms_per_bin = 16;
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}
/* ----------------------------------------------------------------------
setup neighbor binning geometry
bin numbering in each dimension is global:
0 = 0.0 to binsize, 1 = binsize to 2*binsize, etc
nbin-1,nbin,etc = bbox-binsize to bbox, bbox to bbox+binsize, etc
-1,-2,etc = -binsize to 0.0, -2*binsize to -binsize, etc
code will work for any binsize
since next(xyz) and stencil extend as far as necessary
binsize = 1/2 of cutoff is roughly optimal
for orthogonal boxes:
a dim must be filled exactly by integer # of bins
in periodic, procs on both sides of PBC must see same bin boundary
in non-periodic, coord2bin() still assumes this by use of nbin xyz
for triclinic boxes:
tilted simulation box cannot contain integer # of bins
stencil & neigh list built differently to account for this
mbinlo = lowest global bin any of my ghost atoms could fall into
mbinhi = highest global bin any of my ghost atoms could fall into
mbin = number of bins I need in a dimension
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms_setup(int nall)
{
if (mbins > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",mbins,atoms_per_bin);
bins = k_bins.view<DeviceType>();
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",mbins);
bincount = k_bincount.view<DeviceType>();
last_bin_memory = update->ntimestep;
}
last_bin = update->ntimestep;
}
/* ----------------------------------------------------------------------
bin owned and ghost atoms
------------------------------------------------------------------------- */
template<class DeviceType>
void NBinKokkos<DeviceType>::bin_atoms()
{
h_resize() = 1;
while(h_resize() > 0) {
h_resize() = 0;
deep_copy(d_resize, h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x.view<DeviceType>();
bboxlo_[0] = bboxlo[0]; bboxlo_[1] = bboxlo[1]; bboxlo_[2] = bboxlo[2];
bboxhi_[0] = bboxhi[0]; bboxhi_[1] = bboxhi[1]; bboxhi_[2] = bboxhi[2];
NPairKokkosBinAtomsFunctor<DeviceType> f(*this);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(h_resize, d_resize);
if(h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
bins = k_bins.view<DeviceType>();
c_bins = bins;
last_bin_memory = update->ntimestep;
}
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
d_resize() = 1;
}
}
namespace LAMMPS_NS {
template class NBinKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NBinKokkos<LMPHostType>;
#endif
}

153
src/KOKKOS/nbin_kokkos.h Normal file
View File

@ -0,0 +1,153 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NBIN_CLASS
NBinStyle(kk/host,
NBinKokkos<LMPHostType>,
NB_KOKKOS_HOST)
NBinStyle(kk/device,
NBinKokkos<LMPDeviceType>,
NB_KOKKOS_DEVICE)
#else
#ifndef LMP_NBIN_KOKKOS_H
#define LMP_NBIN_KOKKOS_H
#include "nbin_standard.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NBinKokkos : public NBinStandard {
public:
typedef ArrayTypes<DeviceType> AT;
NBinKokkos(class LAMMPS *);
~NBinKokkos() {}
void bin_atoms_setup(int);
void bin_atoms();
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
typename AT::t_int_scalar d_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename AT::t_x_array_randomread x;
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi_[0])
ix = static_cast<int> ((x-bboxhi_[0])*bininvx) + nbinx;
else if (x >= bboxlo_[0]) {
ix = static_cast<int> ((x-bboxlo_[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo_[0])*bininvx) - 1;
if (y >= bboxhi_[1])
iy = static_cast<int> ((y-bboxhi_[1])*bininvy) + nbiny;
else if (y >= bboxlo_[1]) {
iy = static_cast<int> ((y-bboxlo_[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo_[1])*bininvy) - 1;
if (z >= bboxhi_[2])
iz = static_cast<int> ((z-bboxhi_[2])*bininvz) + nbinz;
else if (z >= bboxlo_[2]) {
iz = static_cast<int> ((z-bboxlo_[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo_[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
private:
double bboxlo_[3],bboxhi_[3];
};
template<class DeviceType>
struct NPairKokkosBinAtomsFunctor {
typedef DeviceType device_type;
const NBinKokkos<DeviceType> c;
NPairKokkosBinAtomsFunctor(const NBinKokkos<DeviceType> &_c):
c(_c) {};
~NPairKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -34,9 +34,8 @@ void NeighListKokkos<Device>::clean_copy()
ipage = NULL;
dpage = NULL;
maxstencil = 0;
ghostflag = 0;
maxstencil_multi = 0;
maxatoms = 0;
}
/* ---------------------------------------------------------------------- */
@ -70,49 +69,6 @@ void NeighListKokkos<Device>::grow(int nmax)
/* ---------------------------------------------------------------------- */
template<class Device>
void NeighListKokkos<Device>::stencil_allocate(int smax, int style)
{
int i;
if (style == BIN) {
if (smax > maxstencil) {
maxstencil = smax;
d_stencil =
memory->create_kokkos(d_stencil,h_stencil,stencil,maxstencil,
"neighlist:stencil");
if (ghostflag) {
memory->create_kokkos(d_stencilxyz,h_stencilxyz,stencilxyz,maxstencil,
3,"neighlist:stencilxyz");
}
}
} else {
int n = atom->ntypes;
if (maxstencil_multi == 0) {
nstencil_multi = new int[n+1];
stencil_multi = new int*[n+1];
distsq_multi = new double*[n+1];
for (i = 1; i <= n; i++) {
nstencil_multi[i] = 0;
stencil_multi[i] = NULL;
distsq_multi[i] = NULL;
}
}
if (smax > maxstencil_multi) {
maxstencil_multi = smax;
for (i = 1; i <= n; i++) {
memory->destroy(stencil_multi[i]);
memory->destroy(distsq_multi[i]);
memory->create(stencil_multi[i],maxstencil_multi,
"neighlist:stencil_multi");
memory->create(distsq_multi[i],maxstencil_multi,
"neighlist:distsq_multi");
}
}
}
}
namespace LAMMPS_NS {
template class NeighListKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA

View File

@ -20,7 +20,7 @@
namespace LAMMPS_NS {
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u,FULLCLUSTER=16u};
enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
class AtomNeighbors
{
@ -74,14 +74,12 @@ public:
typename DAT::tdual_int_1d k_ilist; // local indices of I atoms
typename ArrayTypes<Device>::t_int_1d d_ilist;
typename ArrayTypes<Device>::t_int_1d d_numneigh; // # of J neighs for each I
typename ArrayTypes<Device>::t_int_1d d_stencil; // # of J neighs for each I
typename ArrayTypes<LMPHostType>::t_int_1d h_stencil; // # of J neighs per I
typename ArrayTypes<Device>::t_int_1d_3 d_stencilxyz;
typename ArrayTypes<LMPHostType>::t_int_1d_3 h_stencilxyz;
NeighListKokkos(class LAMMPS *lmp):
NeighList(lmp) {_stride = 1; maxneighs = 16;};
~NeighListKokkos() {stencil = NULL; numneigh = NULL; ilist = NULL;};
NeighList(lmp) {_stride = 1; maxneighs = 16; kokkos = 1; maxatoms = 0;
execution_space = ExecutionSpaceFromDevice<Device>::space;
};
~NeighListKokkos() {numneigh = NULL; ilist = NULL;};
KOKKOS_INLINE_FUNCTION
AtomNeighbors get_neighbors(const int &i) const {
@ -99,7 +97,8 @@ public:
int& num_neighs(const int & i) const {
return d_numneigh(i);
}
void stencil_allocate(int smax, int style);
private:
int maxatoms;
};
}

View File

@ -1,4 +1,4 @@
;/* ----------------------------------------------------------------------
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -26,6 +26,10 @@
#include "angle.h"
#include "dihedral.h"
#include "improper.h"
#include "style_nbin.h"
#include "style_nstencil.h"
#include "style_npair.h"
#include "style_ntopo.h"
using namespace LAMMPS_NS;
@ -36,18 +40,11 @@ enum{NSQ,BIN,MULTI}; // also in neigh_list.cpp
NeighborKokkos::NeighborKokkos(LAMMPS *lmp) : Neighbor(lmp),
neighbond_host(lmp),neighbond_device(lmp)
{
atoms_per_bin = 16;
nlist_host = 0;
lists_host = NULL;
pair_build_host = NULL;
stencil_create_host = NULL;
nlist_device = 0;
lists_device = NULL;
pair_build_device = NULL;
stencil_create_device = NULL;
device_flag = 0;
bondlist = NULL;
anglelist = NULL;
dihedrallist = NULL;
improperlist = NULL;
}
/* ---------------------------------------------------------------------- */
@ -58,14 +55,6 @@ NeighborKokkos::~NeighborKokkos()
memory->destroy_kokkos(k_cutneighsq,cutneighsq);
cutneighsq = NULL;
for (int i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
for (int i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] pair_build_host;
memory->destroy_kokkos(k_ex_type,ex_type);
memory->destroy_kokkos(k_ex1_type,ex1_type);
memory->destroy_kokkos(k_ex2_type,ex2_type);
@ -89,6 +78,11 @@ void NeighborKokkos::init()
{
atomKK = (AtomKokkos *) atom;
Neighbor::init();
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
}
/* ---------------------------------------------------------------------- */
@ -101,158 +95,16 @@ void NeighborKokkos::init_cutneighsq_kokkos(int n)
/* ---------------------------------------------------------------------- */
int NeighborKokkos::init_lists_kokkos()
{
int i;
for (i = 0; i < nlist_host; i++) delete lists_host[i];
delete [] lists_host;
delete [] pair_build_host;
delete [] stencil_create_host;
nlist_host = 0;
for (i = 0; i < nlist_device; i++) delete lists_device[i];
delete [] lists_device;
delete [] pair_build_device;
delete [] stencil_create_device;
nlist_device = 0;
nlist = 0;
for (i = 0; i < nrequest; i++) {
if (requests[i]->kokkos_device) nlist_device++;
else if (requests[i]->kokkos_host) nlist_host++;
else nlist++;
}
lists_host = new NeighListKokkos<LMPHostType>*[nrequest];
pair_build_host = new PairPtrHost[nrequest];
stencil_create_host = new StencilPtrHost[nrequest];
for (i = 0; i < nrequest; i++) {
lists_host[i] = NULL;
pair_build_host[i] = NULL;
stencil_create_host[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_host) continue;
lists_host[i] = new NeighListKokkos<LMPHostType>(lmp);
lists_host[i]->index = i;
lists_host[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_host[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_host[i]);
}
}
lists_device = new NeighListKokkos<LMPDeviceType>*[nrequest];
pair_build_device = new PairPtrDevice[nrequest];
stencil_create_device = new StencilPtrDevice[nrequest];
for (i = 0; i < nrequest; i++) {
lists_device[i] = NULL;
pair_build_device[i] = NULL;
stencil_create_device[i] = NULL;
}
for (i = 0; i < nrequest; i++) {
if (!requests[i]->kokkos_device) continue;
lists_device[i] = new NeighListKokkos<LMPDeviceType>(lmp);
lists_device[i]->index = i;
lists_device[i]->dnum = requests[i]->dnum;
if (requests[i]->pair) {
Pair *pair = (Pair *) requests[i]->requestor;
pair->init_list(requests[i]->id,lists_device[i]);
}
if (requests[i]->fix) {
Fix *fix = (Fix *) requests[i]->requestor;
fix->init_list(requests[i]->id,lists_device[i]);
}
}
// 1st time allocation of xhold
if (dist_check)
xhold = DAT::tdual_x_array("neigh:xhold",maxhold);
// return # of non-Kokkos lists
return nlist;
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags1_kokkos(int i)
void NeighborKokkos::create_kokkos_list(int i)
{
if (style != BIN)
error->all(FLERR,"KOKKOS package only supports 'bin' neighbor lists");
if (lists_host[i]) {
lists_host[i]->buildflag = 1;
if (pair_build_host[i] == NULL) lists_host[i]->buildflag = 0;
if (requests[i]->occasional) lists_host[i]->buildflag = 0;
lists_host[i]->growflag = 1;
if (requests[i]->copy) lists_host[i]->growflag = 0;
lists_host[i]->stencilflag = 1;
if (style == NSQ) lists_host[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_host[i]->stencilflag = 0;
lists_host[i]->ghostflag = 0;
if (requests[i]->ghost) lists_host[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
if (lists_device[i]) {
lists_device[i]->buildflag = 1;
if (pair_build_device[i] == NULL) lists_device[i]->buildflag = 0;
if (requests[i]->occasional) lists_device[i]->buildflag = 0;
lists_device[i]->growflag = 1;
if (requests[i]->copy) lists_device[i]->growflag = 0;
lists_device[i]->stencilflag = 1;
if (style == NSQ) lists_device[i]->stencilflag = 0;
if (stencil_create[i] == NULL) lists_device[i]->stencilflag = 0;
lists_device[i]->ghostflag = 0;
if (requests[i]->ghost) lists_device[i]->ghostflag = 1;
if (requests[i]->ghost && !requests[i]->occasional) anyghostlist = 1;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_flags2_kokkos(int i)
{
if (lists_host[i]) {
if (lists_host[i]->buildflag) blist[nblist++] = i;
if (lists_host[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_host[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
if (lists_device[i]) {
if (lists_device[i]->buildflag) blist[nblist++] = i;
if (lists_device[i]->growflag && requests[i]->occasional == 0)
glist[nglist++] = i;
if (lists_device[i]->stencilflag && requests[i]->occasional == 0)
slist[nslist++] = i;
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_list_grow_kokkos(int i)
{
if (lists_host[i]!=NULL && lists_host[i]->growflag)
lists_host[i]->grow(maxatom);
if (lists_device[i]!=NULL && lists_device[i]->growflag)
lists_device[i]->grow(maxatom);
if (requests[i]->kokkos_device) {
lists[i] = new NeighListKokkos<LMPDeviceType>(lmp);
device_flag = 1;
} else if (requests[i]->kokkos_host)
lists[i] = new NeighListKokkos<LMPHostType>(lmp);
}
/* ---------------------------------------------------------------------- */
@ -281,49 +133,6 @@ void NeighborKokkos::init_ex_mol_bit_kokkos()
k_ex_mol_bit.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::choose_build(int index, NeighRequest *rq)
{
if (rq->kokkos_host != 0) {
PairPtrHost pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPHostType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPHostType,1,0>;
}
pair_build_host[index] = pb;
}
if (rq->kokkos_device != 0) {
PairPtrDevice pb = NULL;
if (rq->ghost) {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,1>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,1>;
} else {
if (rq->full) {
if (rq->full_cluster) pb = &NeighborKokkos::full_bin_cluster_kokkos<LMPDeviceType>;
else pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,0,0>;
}
else if (rq->half) pb = &NeighborKokkos::full_bin_kokkos<LMPDeviceType,1,0>;
}
pair_build_device[index] = pb;
return;
}
Neighbor::choose_build(index,rq);
}
/* ----------------------------------------------------------------------
if any atom moved trigger distance (half of neighbor skin) return 1
shrink trigger distance if box size has changed
@ -337,7 +146,7 @@ void NeighborKokkos::choose_build(int index, NeighRequest *rq)
int NeighborKokkos::check_distance()
{
if (nlist_device)
if (device_flag)
check_distance_kokkos<LMPDeviceType>();
else
check_distance_kokkos<LMPHostType>();
@ -417,7 +226,7 @@ void NeighborKokkos::operator()(TagNeighborCheckDistance<DeviceType>, const int
void NeighborKokkos::build(int topoflag)
{
if (nlist_device)
if (device_flag)
build_kokkos<LMPDeviceType>(topoflag);
else
build_kokkos<LMPHostType>(topoflag);
@ -428,18 +237,25 @@ void NeighborKokkos::build_kokkos(int topoflag)
{
typedef DeviceType device_type;
int i;
int i,m;
ago = 0;
ncalls++;
lastcall = update->ntimestep;
int nlocal = atom->nlocal;
int nall = nlocal + atom->nghost;
// check that using special bond flags will not overflow neigh lists
if (nall > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// store current atom positions and box size if needed
if (dist_check) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
x = atomKK->k_x;
int nlocal = atom->nlocal;
if (includegroup) nlocal = atom->nfirst;
int maxhold_kokkos = xhold.view<DeviceType>().dimension_0();
if (atom->nmax > maxhold || maxhold_kokkos < maxhold) {
@ -471,54 +287,33 @@ void NeighborKokkos::build_kokkos(int topoflag)
}
}
// if any lists store neighbors of ghosts:
// invoke grow() if nlocal+nghost exceeds previous list size
// else only invoke grow() if nlocal exceeds previous list size
// only for lists with growflag set and which are perpetual (glist)
// bin atoms for all NBin instances
// not just NBin associated with perpetual lists
// b/c cannot wait to bin occasional lists in build_one() call
// if bin then, atoms may have moved outside of proc domain & bin extent,
// leading to errors or even a crash
if (anyghostlist && atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
} else if (atom->nmax > maxatom) {
maxatom = atom->nmax;
for (i = 0; i < nglist; i++)
if (lists[glist[i]]) lists[glist[i]]->grow(maxatom);
else init_list_grow_kokkos(glist[i]);
}
// extend atom bin list if necessary
if (style != NSQ && atom->nmax > maxbin) {
maxbin = atom->nmax;
memory->destroy(bins);
memory->create(bins,maxbin,"bins");
}
// check that using special bond flags will not overflow neigh lists
if (atom->nlocal+atom->nghost > NEIGHMASK)
error->one(FLERR,"Too many local+ghost atoms for neighbor list");
// invoke building of pair and molecular topology neighbor lists
// only for pairwise lists with buildflag set
// blist is for standard neigh lists, otherwise is a Kokkos list
for (i = 0; i < nblist; i++) {
if (lists[blist[i]]) {
atomKK->sync(Host,ALL_MASK);
(this->*pair_build[blist[i]])(lists[blist[i]]);
} else {
if (lists_host[blist[i]])
(this->*pair_build_host[blist[i]])(lists_host[blist[i]]);
else if (lists_device[blist[i]])
(this->*pair_build_device[blist[i]])(lists_device[blist[i]]);
if (style != NSQ) {
for (int i = 0; i < nbin; i++) {
neigh_bin[i]->bin_atoms_setup(nall);
neigh_bin[i]->bin_atoms();
}
}
if (atom->molecular && topoflag)
build_topology_kokkos();
// build pairwise lists for all perpetual NPair/NeighList
// grow() with nlocal/nall args so that only realloc if have to
atomKK->sync(Host,ALL_MASK);
for (i = 0; i < npair_perpetual; i++) {
m = plist[i];
lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]);
}
// build topology lists for bonds/angles/etc
if (atom->molecular && topoflag) build_topology();
}
template<class DeviceType>
@ -532,26 +327,6 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
/* ---------------------------------------------------------------------- */
void NeighborKokkos::setup_bins_kokkos(int i)
{
if (lists_host[slist[i]]) {
lists_host[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_host[slist[i]],sx,sy,sz);
} else if (lists_device[slist[i]]) {
lists_device[slist[i]]->stencil_allocate(smax,style);
(this->*stencil_create[slist[i]])(lists_device[slist[i]],sx,sy,sz);
}
//if (i < nslist-1) return; // this won't work if a non-kokkos neighbor list is last
if (maxhead > k_bins.d_view.dimension_0()) {
k_bins = DAT::tdual_int_2d("Neighbor::d_bins",maxhead,atoms_per_bin);
k_bincount = DAT::tdual_int_1d("Neighbor::d_bincount",maxhead);
}
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
memory->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
@ -575,8 +350,8 @@ void NeighborKokkos::modify_mol_group_grow_kokkos(){
/* ---------------------------------------------------------------------- */
void NeighborKokkos::init_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::init_topology() {
if (device_flag) {
neighbond_device.init_topology_kk();
} else {
neighbond_host.init_topology_kk();
@ -588,8 +363,8 @@ void NeighborKokkos::init_topology_kokkos() {
normally built with pair lists, but USER-CUDA separates them
------------------------------------------------------------------------- */
void NeighborKokkos::build_topology_kokkos() {
if (nlist_device) {
void NeighborKokkos::build_topology() {
if (device_flag) {
neighbond_device.build_topology_kk();
k_bondlist = neighbond_device.k_bondlist;
@ -637,7 +412,3 @@ void NeighborKokkos::build_topology_kokkos() {
k_improperlist.modify<LMPHostType>();
}
}
// include to trigger instantiation of templated functions
#include "neigh_full_kokkos.h"

View File

@ -22,316 +22,6 @@
namespace LAMMPS_NS {
template<class Device>
class NeighborKokkosExecute
{
typedef ArrayTypes<Device> AT;
public:
NeighListKokkos<Device> neigh_list;
const typename AT::t_xfloat_2d_randomread cutneighsq;
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
const int exclude;
const int nex_type;
const int maxex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const int maxex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const int maxex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<Device> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,const int & _maxex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,const int & _maxex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,const int & _maxex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),maxex_type(_maxex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),maxex_group(_maxex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),maxex_mol(_maxex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half){
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int GhostNewton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
template<int ClusterSize>
KOKKOS_FUNCTION
void build_cluster_Item(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int GhostNewton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<Device>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class Device>
struct NeighborKokkosBinAtomsFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
NeighborKokkosBinAtomsFunctor(const NeighborKokkosExecute<Device> &_c):
c(_c) {};
~NeighborKokkosBinAtomsFunctor() {}
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.binatomsItem(i);
}
};
template<class Device,int HALF_NEIGH,int GHOST_NEWTON>
struct NeighborKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
KOKKOS_INLINE_FUNCTION
void operator() (typename Kokkos::TeamPolicy<Device>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<class Device,int HALF_NEIGH>
struct NeighborKokkosBuildFunctorGhost {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborKokkosBuildFunctorGhost(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
template<class Device,int ClusterSize>
struct NeighborClusterKokkosBuildFunctor {
typedef Device device_type;
const NeighborKokkosExecute<Device> c;
const size_t sharedsize;
NeighborClusterKokkosBuildFunctor(const NeighborKokkosExecute<Device> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_cluster_Item<ClusterSize>(i);
}
};
template<class DeviceType>
struct TagNeighborCheckDistance{};
@ -342,24 +32,11 @@ class NeighborKokkos : public Neighbor {
public:
typedef int value_type;
int nlist_host; // pairwise neighbor lists on Host
NeighListKokkos<LMPHostType> **lists_host;
int nlist_device; // pairwise neighbor lists on Device
NeighListKokkos<LMPDeviceType> **lists_device;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
NeighborKokkos(class LAMMPS *);
~NeighborKokkos();
void init();
void init_topology();
void build_topology();
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
@ -369,11 +46,7 @@ class NeighborKokkos : public Neighbor {
KOKKOS_INLINE_FUNCTION
void operator()(TagNeighborXhold<DeviceType>, const int&) const;
private:
int atoms_per_bin;
DAT::tdual_xfloat_2d k_cutneighsq;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
@ -382,6 +55,16 @@ class NeighborKokkos : public Neighbor {
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
NeighBondKokkos<LMPHostType> neighbond_host;
NeighBondKokkos<LMPDeviceType> neighbond_device;
DAT::tdual_int_2d k_bondlist;
DAT::tdual_int_2d k_anglelist;
DAT::tdual_int_2d k_dihedrallist;
DAT::tdual_int_2d k_improperlist;
private:
DAT::tdual_x_array x;
DAT::tdual_x_array xhold;
@ -389,14 +72,10 @@ class NeighborKokkos : public Neighbor {
int device_flag;
void init_cutneighsq_kokkos(int);
int init_lists_kokkos();
void init_list_flags1_kokkos(int);
void init_list_flags2_kokkos(int);
void init_list_grow_kokkos(int);
void create_kokkos_list(int);
void init_ex_type_kokkos(int);
void init_ex_bit_kokkos();
void init_ex_mol_bit_kokkos();
void choose_build(int, NeighRequest *);
virtual int check_distance();
template<class DeviceType> int check_distance_kokkos();
virtual void build(int);
@ -405,27 +84,6 @@ class NeighborKokkos : public Neighbor {
void modify_ex_type_grow_kokkos();
void modify_ex_group_grow_kokkos();
void modify_mol_group_grow_kokkos();
void init_topology_kokkos();
void build_topology_kokkos();
typedef void (NeighborKokkos::*PairPtrHost)
(class NeighListKokkos<LMPHostType> *);
PairPtrHost *pair_build_host;
typedef void (NeighborKokkos::*PairPtrDevice)
(class NeighListKokkos<LMPDeviceType> *);
PairPtrDevice *pair_build_device;
template<class DeviceType,int HALF_NEIGH, int GHOST>
void full_bin_kokkos(NeighListKokkos<DeviceType> *list);
template<class DeviceType>
void full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list);
typedef void (NeighborKokkos::*StencilPtrHost)
(class NeighListKokkos<LMPHostType> *, int, int, int);
StencilPtrHost *stencil_create_host;
typedef void (NeighborKokkos::*StencilPtrDevice)
(class NeighListKokkos<LMPDeviceType> *, int, int, int);
StencilPtrDevice *stencil_create_device;
};
}

View File

@ -0,0 +1,62 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_copy_kokkos.h"
#include "neighbor.h"
#include "neigh_list_kokkos.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "domain.h"
#include "my_page.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
NPairCopyKokkos<DeviceType>::NPairCopyKokkos(LAMMPS *lmp) : NPair(lmp) {}
/* ----------------------------------------------------------------------
create list which is simply a copy of parent list
------------------------------------------------------------------------- */
template<class DeviceType>
void NPairCopyKokkos<DeviceType>::build(NeighList *list)
{
NeighList *listcopy = list->listcopy;
list->inum = listcopy->inum;
list->gnum = listcopy->gnum;
list->ilist = listcopy->ilist;
list->numneigh = listcopy->numneigh;
list->firstneigh = listcopy->firstneigh;
list->firstdouble = listcopy->firstdouble;
list->ipage = listcopy->ipage;
list->dpage = listcopy->dpage;
NeighListKokkos<DeviceType>* list_kk = (NeighListKokkos<DeviceType>*) list;
NeighListKokkos<DeviceType>* listcopy_kk = (NeighListKokkos<DeviceType>*) list->listcopy;
list_kk->d_ilist = listcopy_kk->d_ilist;
list_kk->d_numneigh = listcopy_kk->d_numneigh;
list_kk->d_neighbors = listcopy_kk->d_neighbors;
}
namespace LAMMPS_NS {
template class NPairCopyKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class NPairCopyKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(copy/kk/device,
NPairCopyKokkos<LMPDeviceType>,
NP_COPY | NP_KOKKOS_DEVICE)
NPairStyle(copy/kk/host,
NPairCopyKokkos<LMPHostType>,
NP_COPY | NP_KOKKOS_HOST)
#else
#ifndef LMP_NPAIR_COPY_KOKKOS_H
#define LMP_NPAIR_COPY_KOKKOS_H
#include "npair.h"
namespace LAMMPS_NS {
template<class DeviceType>
class NPairCopyKokkos : public NPair {
public:
NPairCopyKokkos(class LAMMPS *);
~NPairCopyKokkos() {}
void build(class NeighList *);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -11,17 +11,105 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "npair_kokkos.h"
#include "atom_kokkos.h"
#include "atom_masks.h"
#include "domain_kokkos.h"
#include "neighbor_kokkos.h"
#include "nbin_kokkos.h"
#include "nstencil.h"
#include "force.h"
namespace LAMMPS_NS {
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::NPairKokkos(LAMMPS *lmp) : NPair(lmp) {
}
/* ----------------------------------------------------------------------
copy needed info from Neighbor class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_neighbor_info()
{
NPair::copy_neighbor_info();
NeighborKokkos* neighborKK = (NeighborKokkos*) neighbor;
// general params
newton_pair = force->newton_pair;
k_cutneighsq = neighborKK->k_cutneighsq;
// exclusion info
k_ex1_type = neighborKK->k_ex1_type;
k_ex2_type = neighborKK->k_ex2_type;
k_ex_type = neighborKK->k_ex_type;
k_ex1_group = neighborKK->k_ex1_group;
k_ex2_group = neighborKK->k_ex2_group;
k_ex1_bit = neighborKK->k_ex1_bit;
k_ex2_bit = neighborKK->k_ex2_bit;
k_ex_mol_group = neighborKK->k_ex_mol_group;
k_ex_mol_bit = neighborKK->k_ex_mol_bit;
}
/* ----------------------------------------------------------------------
copy per-atom and per-bin vectors from NBin class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_bin_info()
{
NPair::copy_bin_info();
NBinKokkos<DeviceType>* nbKK = (NBinKokkos<DeviceType>*) nb;
atoms_per_bin = nbKK->atoms_per_bin;
k_bincount = nbKK->k_bincount;
k_bins = nbKK->k_bins;
}
/* ----------------------------------------------------------------------
copy needed info from NStencil class to this build class
------------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::copy_stencil_info()
{
NPair::copy_stencil_info();
nstencil = ns->nstencil;
int maxstencil = ns->get_maxstencil();
k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil);
for (int k = 0; k < maxstencil; k++)
k_stencil.h_view(k) = ns->stencil[k];
k_stencil.modify<LMPHostType>();
k_stencil.sync<DeviceType>();
if (GHOST) {
k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil);
for (int k = 0; k < maxstencil; k++) {
k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0];
k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1];
k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2];
}
k_stencilxyz.modify<LMPHostType>();
k_stencilxyz.sync<DeviceType>();
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType, int HALF_NEIGH, int GHOST>
void NPairKokkos<DeviceType,HALF_NEIGH,GHOST>::build(NeighList *list_)
{
NeighListKokkos<DeviceType>* list = (NeighListKokkos<DeviceType>*) list_;
const int nlocal = includegroup?atom->nfirst:atom->nlocal;
int nall = nlocal;
if (GHOST)
@ -32,7 +120,11 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nlocal,
k_bins.view<DeviceType>(),
nstencil,
k_stencil.view<DeviceType>(),
k_stencilxyz.view<DeviceType>(),
nlocal,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
@ -43,16 +135,16 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
exclude, nex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
nex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
nex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
@ -69,40 +161,15 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
k_bincount.sync<DeviceType>(),
k_bins.sync<DeviceType>(),
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
if (GHOST)
Kokkos::deep_copy(list->d_stencilxyz,list->h_stencilxyz);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
@ -125,18 +192,18 @@ void NeighborKokkos::full_bin_kokkos(NeighListKokkos<DeviceType> *list)
#endif
if (GHOST) {
NeighborKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctorGhost<DeviceType,HALF_NEIGH> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
Kokkos::parallel_for(nall, f);
} else {
if(newton_pair) {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
if (newton_pair) {
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,1> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
Kokkos::parallel_for(nall, f);
#endif
} else {
NeighborKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctor<DeviceType,HALF_NEIGH,0> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
#ifdef KOKKOS_HAVE_CUDA
Kokkos::parallel_for(config, f);
#else
@ -169,24 +236,9 @@ if (GHOST) {
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void NeighborKokkosExecute<Device>::binatomsItem(const int &i) const
{
const int ibin = coord2bin(x(i, 0), x(i, 1), x(i, 2));
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < bins.dimension_1()) {
bins(ibin, ac) = i;
} else {
resize() = 1;
}
}
/* ---------------------------------------------------------------------- */
template<class Device>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) const
int NeighborKokkosExecute<DeviceType>::find_special(const int &i, const int &j) const
{
const int n1 = nspecial(i,0);
const int n2 = nspecial(i,1);
@ -214,9 +266,9 @@ int NeighborKokkosExecute<Device>::find_special(const int &i, const int &j) cons
/* ---------------------------------------------------------------------- */
template<class Device>
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
int NeighborKokkosExecute<DeviceType>::exclusion(const int &i,const int &j,
const int &itype,const int &jtype) const
{
int m;
@ -241,8 +293,8 @@ int NeighborKokkosExecute<Device>::exclusion(const int &i,const int &j,
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh,int GhostNewton>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh,int Newton>
void NeighborKokkosExecute<DeviceType>::
build_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -261,9 +313,8 @@ void NeighborKokkosExecute<Device>::
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
@ -272,8 +323,8 @@ void NeighborKokkosExecute<Device>::
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton && ((j < i) || ((j >= nlocal) &&
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
@ -312,14 +363,16 @@ void NeighborKokkosExecute<Device>::
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh&&(ibin==jbin)) continue;
//const ArrayTypes<Device>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -331,7 +384,7 @@ void NeighborKokkosExecute<Device>::
if(rsq <= cutneighsq(itype,jtype)) {
if (molecular) {
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -364,15 +417,18 @@ void NeighborKokkosExecute<Device>::
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
/* ---------------------------------------------------------------------- */
#ifdef KOKKOS_HAVE_CUDA
extern __shared__ X_FLOAT sharedmem[];
/* ---------------------------------------------------------------------- */
template<class DeviceType> template<int HalfNeigh,int GhostNewton>
template<class DeviceType> template<int HalfNeigh,int Newton>
__device__ inline
void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const
{
@ -429,8 +485,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
(HalfNeigh && !GhostNewton && (j < i)) ||
(HalfNeigh && GhostNewton &&
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
@ -445,7 +501,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -472,9 +528,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
}
__syncthreads();
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
= d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
@ -501,7 +556,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
const int jtype = other_x[m + 3 * atoms_per_bin];
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !GhostNewton && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
@ -514,7 +569,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
if (molecular) {
int which = 0;
if (!moltemplate)
which = find_special(i,j);
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
/* else if (imol >= 0) */
/* which = find_special(onemols[imol]->special[iatom], */
/* onemols[imol]->nspecial[iatom], */
@ -558,8 +613,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
/* ---------------------------------------------------------------------- */
template<class Device> template<int HalfNeigh>
void NeighborKokkosExecute<Device>::
template<class DeviceType> template<int HalfNeigh>
void NeighborKokkosExecute<DeviceType>::
build_Item_Ghost(const int &i) const
{
/* if necessary, goto next page and add pages */
@ -576,11 +631,10 @@ void NeighborKokkosExecute<Device>::
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
const typename ArrayTypes<Device>::t_int_1d_3_const_um stencilxyz
= neigh_list.d_stencilxyz;
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
const typename ArrayTypes<DeviceType>::t_int_1d_3_const_um stencilxyz
= d_stencilxyz;
// loop over all atoms in surrounding bins in stencil including self
// when i is a ghost atom, must check if stencil bin is out of bounds
@ -679,197 +733,17 @@ void NeighborKokkosExecute<Device>::
neigh_list.d_ilist(i) = i;
}
template<class DeviceType>
void NeighborKokkos::full_bin_cluster_kokkos(NeighListKokkos<DeviceType> *list)
{
const int nall = includegroup?atom->nfirst:atom->nlocal;
list->grow(nall);
}
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
k_bincount.view<DeviceType>(),
k_bins.view<DeviceType>(),nall,
atomKK->k_x.view<DeviceType>(),
atomKK->k_type.view<DeviceType>(),
atomKK->k_mask.view<DeviceType>(),
atomKK->k_molecule.view<DeviceType>(),
atomKK->k_tag.view<DeviceType>(),
atomKK->k_special.view<DeviceType>(),
atomKK->k_nspecial.view<DeviceType>(),
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,maxex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
nex_group,maxex_group,
k_ex1_group.view<DeviceType>(),
k_ex2_group.view<DeviceType>(),
k_ex1_bit.view<DeviceType>(),
k_ex2_bit.view<DeviceType>(),
nex_mol, maxex_mol,
k_ex_mol_group.view<DeviceType>(),
k_ex_mol_bit.view<DeviceType>(),
bboxhi,bboxlo,
domain->xperiodic,domain->yperiodic,domain->zperiodic,
domain->xprd_half,domain->yprd_half,domain->zprd_half);
k_cutneighsq.sync<DeviceType>();
k_ex1_type.sync<DeviceType>();
k_ex2_type.sync<DeviceType>();
k_ex_type.sync<DeviceType>();
k_ex1_group.sync<DeviceType>();
k_ex2_group.sync<DeviceType>();
k_ex1_bit.sync<DeviceType>();
k_ex2_bit.sync<DeviceType>();
k_ex_mol_group.sync<DeviceType>();
k_ex_mol_bit.sync<DeviceType>();
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
data.special_flag[3] = special_flag[3];
atomKK->sync(Device,X_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK);
Kokkos::deep_copy(list->d_stencil,list->h_stencil);
DeviceType::fence();
while(data.h_resize() > 0) {
data.h_resize() = 0;
deep_copy(data.resize, data.h_resize);
MemsetZeroFunctor<DeviceType> f_zero;
f_zero.ptr = (void*) k_bincount.view<DeviceType>().ptr_on_device();
Kokkos::parallel_for(mbins, f_zero);
DeviceType::fence();
NeighborKokkosBinAtomsFunctor<DeviceType> f(data);
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
data.bins = k_bins.view<DeviceType>();
data.c_bins = data.bins;
}
}
if(list->d_neighbors.dimension_0()<nall) {
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", nall*1.1, list->maxneighs);
list->d_numneigh = typename ArrayTypes<DeviceType>::t_int_1d("numneigh", nall*1.1);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.d_numneigh = list->d_numneigh;
}
data.h_resize()=1;
while(data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
Kokkos::deep_copy(data.resize, data.h_resize);
Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs);
namespace LAMMPS_NS {
template class NPairKokkos<LMPDeviceType,0,0>;
template class NPairKokkos<LMPDeviceType,0,1>;
template class NPairKokkos<LMPDeviceType,1,0>;
template class NPairKokkos<LMPDeviceType,1,1>;
#ifdef KOKKOS_HAVE_CUDA
#define BINS_PER_BLOCK 2
const int factor = atoms_per_bin<64?2:1;
Kokkos::TeamPolicy<DeviceType> config((mbins+factor-1)/factor,atoms_per_bin*factor);
#else
const int factor = 1;
template class NPairKokkos<LMPHostType,0,0>;
template class NPairKokkos<LMPHostType,0,1>;
template class NPairKokkos<LMPHostType,1,0>;
template class NPairKokkos<LMPHostType,1,1>;
#endif
if(newton_pair) {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
} else {
NeighborClusterKokkosBuildFunctor<DeviceType,NeighClusterSize> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
//#ifdef KOKKOS_HAVE_CUDA
// Kokkos::parallel_for(config, f);
//#else
Kokkos::parallel_for(nall, f);
//#endif
}
DeviceType::fence();
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.dimension_0(), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
data.neigh_list.maxneighs = list->maxneighs;
}
}
list->inum = nall;
list->gnum = 0;
}
/* ---------------------------------------------------------------------- */
template<class Device> template<int ClusterSize>
void NeighborKokkosExecute<Device>::
build_cluster_Item(const int &i) const
{
/* if necessary, goto next page and add pages */
int n = 0;
// get subview of neighbors of i
const AtomNeighbors neighbors_i = neigh_list.get_neighbors(i);
const X_FLOAT xtmp = x(i, 0);
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
const int ibin = coord2bin(xtmp, ytmp, ztmp);
const int nstencil = neigh_list.nstencil;
const typename ArrayTypes<Device>::t_int_1d_const_um stencil
= neigh_list.d_stencil;
for(int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
bool skip = i == j;
for(int k = 0; k< (n<neigh_list.maxneighs?n:neigh_list.maxneighs); k++)
if((j-(j%ClusterSize)) == neighbors_i(k)) {skip=true;};//{m += ClusterSize - j&(ClusterSize-1)-1; skip=true;}
if(!skip) {
const int jtype = type(j);
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n) = (j-(j%ClusterSize));
n++;
//m += ClusterSize - j&(ClusterSize-1)-1;
}
}
}
}
neigh_list.d_numneigh(i) = n;
if(n >= neigh_list.maxneighs) {
resize() = 1;
if(n >= new_maxneighs()) new_maxneighs() = n;
}
neigh_list.d_ilist(i) = i;
}
}

424
src/KOKKOS/npair_kokkos.h Normal file
View File

@ -0,0 +1,424 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
typedef NPairKokkos<LMPHostType,0,0> NPairKokkosFullBinHost;
NPairStyle(full/bin/kk/host,
NPairKokkosFullBinHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,0> NPairKokkosFullBinDevice;
NPairStyle(full/bin/kk/device,
NPairKokkosFullBinDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,0,1> NPairKokkosFullBinGhostHost;
NPairStyle(full/bin/ghost/kk/host,
NPairKokkosFullBinGhostHost,
NP_FULL | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,0,1> NPairKokkosFullBinGhostDevice;
NPairStyle(full/bin/ghost/kk/device,
NPairKokkosFullBinGhostDevice,
NP_FULL | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,0> NPairKokkosHalfBinHost;
NPairStyle(half/bin/kk/host,
NPairKokkosHalfBinHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,0> NPairKokkosHalfBinDevice;
NPairStyle(half/bin/kk/device,
NPairKokkosHalfBinDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPHostType,1,1> NPairKokkosHalfBinGhostHost;
NPairStyle(half/bin/ghost/kk/host,
NPairKokkosHalfBinGhostHost,
NP_HALF | NP_BIN | NP_KOKKOS_HOST | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
typedef NPairKokkos<LMPDeviceType,1,1> NPairKokkosHalfBinGhostDevice;
NPairStyle(half/bin/ghost/kk/device,
NPairKokkosHalfBinGhostDevice,
NP_HALF | NP_BIN | NP_KOKKOS_DEVICE | NP_NEWTON | NP_NEWTOFF | NP_GHOST | NP_ORTHO | NP_TRI)
#else
#ifndef LMP_NPAIR_KOKKOS_H
#define LMP_NPAIR_KOKKOS_H
#include "npair.h"
#include "neigh_list_kokkos.h"
namespace LAMMPS_NS {
template<class DeviceType, int HALF_NEIGH, int GHOST>
class NPairKokkos : public NPair {
public:
NPairKokkos(class LAMMPS *);
~NPairKokkos() {}
void copy_neighbor_info();
void copy_bin_info();
void copy_stencil_info();
void build(class NeighList *);
private:
int newton_pair;
// data from Neighbor class
DAT::tdual_xfloat_2d k_cutneighsq;
// exclusion data from Neighbor class
DAT::tdual_int_1d k_ex1_type,k_ex2_type;
DAT::tdual_int_2d k_ex_type;
DAT::tdual_int_1d k_ex1_group,k_ex2_group;
DAT::tdual_int_1d k_ex1_bit,k_ex2_bit;
DAT::tdual_int_1d k_ex_mol_group;
DAT::tdual_int_1d k_ex_mol_bit;
// data from NBin class
int atoms_per_bin;
DAT::tdual_int_1d k_bincount;
DAT::tdual_int_2d k_bins;
// data from NStencil class
int nstencil;
DAT::tdual_int_1d k_stencil; // # of J neighs for each I
DAT::tdual_int_1d_3 k_stencilxyz;
};
template<class DeviceType>
class NeighborKokkosExecute
{
typedef ArrayTypes<DeviceType> AT;
public:
NeighListKokkos<DeviceType> neigh_list;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
// exclusion data from Neighbor class
const int exclude;
const int nex_type;
const typename AT::t_int_1d_const ex1_type,ex2_type;
const typename AT::t_int_2d_const ex_type;
const int nex_group;
const typename AT::t_int_1d_const ex1_group,ex2_group;
const typename AT::t_int_1d_const ex1_bit,ex2_bit;
const int nex_mol;
const typename AT::t_int_1d_const ex_mol_group;
const typename AT::t_int_1d_const ex_mol_bit;
// data from NBin class
const typename AT::t_int_1d bincount;
const typename AT::t_int_1d_const c_bincount;
typename AT::t_int_2d bins;
typename AT::t_int_2d_const c_bins;
// data from NStencil class
int nstencil;
typename AT::t_int_1d d_stencil; // # of J neighs for each I
typename AT::t_int_1d_3 d_stencilxyz;
// data from Atom class
const typename AT::t_x_array_randomread x;
const typename AT::t_int_1d_const type,mask,molecule;
const typename AT::t_tagint_1d_const tag;
const typename AT::t_tagint_2d_const special;
const typename AT::t_int_2d_const nspecial;
const int molecular;
int moltemplate;
int special_flag[4];
const int nbinx,nbiny,nbinz;
const int mbinx,mbiny,mbinz;
const int mbinxlo,mbinylo,mbinzlo;
const X_FLOAT bininvx,bininvy,bininvz;
X_FLOAT bboxhi[3],bboxlo[3];
const int nlocal;
typename AT::t_int_scalar resize;
typename AT::t_int_scalar new_maxneighs;
typename ArrayTypes<LMPHostType>::t_int_scalar h_resize;
typename ArrayTypes<LMPHostType>::t_int_scalar h_new_maxneighs;
const int xperiodic, yperiodic, zperiodic;
const int xprd_half, yprd_half, zprd_half;
NeighborKokkosExecute(
const NeighListKokkos<DeviceType> &_neigh_list,
const typename AT::t_xfloat_2d_randomread &_cutneighsq,
const typename AT::t_int_1d &_bincount,
const typename AT::t_int_2d &_bins,
const int _nstencil,
const typename AT::t_int_1d &_d_stencil,
const typename AT::t_int_1d_3 &_d_stencilxyz,
const int _nlocal,
const typename AT::t_x_array_randomread &_x,
const typename AT::t_int_1d_const &_type,
const typename AT::t_int_1d_const &_mask,
const typename AT::t_int_1d_const &_molecule,
const typename AT::t_tagint_1d_const &_tag,
const typename AT::t_tagint_2d_const &_special,
const typename AT::t_int_2d_const &_nspecial,
const int &_molecular,
const int & _nbinx,const int & _nbiny,const int & _nbinz,
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
const int & _nex_group,
const typename AT::t_int_1d_const & _ex1_group,
const typename AT::t_int_1d_const & _ex2_group,
const typename AT::t_int_1d_const & _ex1_bit,
const typename AT::t_int_1d_const & _ex2_bit,
const int & _nex_mol,
const typename AT::t_int_1d_const & _ex_mol_group,
const typename AT::t_int_1d_const & _ex_mol_bit,
const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo,
const int & _xperiodic, const int & _yperiodic, const int & _zperiodic,
const int & _xprd_half, const int & _yprd_half, const int & _zprd_half):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),
bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins),
nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz),
nlocal(_nlocal),
x(_x),type(_type),mask(_mask),molecule(_molecule),
tag(_tag),special(_special),nspecial(_nspecial),molecular(_molecular),
nbinx(_nbinx),nbiny(_nbiny),nbinz(_nbinz),
mbinx(_mbinx),mbiny(_mbiny),mbinz(_mbinz),
mbinxlo(_mbinxlo),mbinylo(_mbinylo),mbinzlo(_mbinzlo),
bininvx(_bininvx),bininvy(_bininvy),bininvz(_bininvz),
exclude(_exclude),nex_type(_nex_type),
ex1_type(_ex1_type),ex2_type(_ex2_type),ex_type(_ex_type),
nex_group(_nex_group),
ex1_group(_ex1_group),ex2_group(_ex2_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),nex_mol(_nex_mol),
ex_mol_group(_ex_mol_group),ex_mol_bit(_ex_mol_bit),
xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic),
xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half) {
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2];
bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2];
resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(resize);
#else
h_resize = resize;
#endif
h_resize() = 1;
new_maxneighs = typename AT::
t_int_scalar("NeighborKokkosFunctor::new_maxneighs");
#ifndef KOKKOS_USE_CUDA_UVM
h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs);
#else
h_new_maxneighs = new_maxneighs;
#endif
h_new_maxneighs() = neigh_list.maxneighs;
};
~NeighborKokkosExecute() {neigh_list.clean_copy();};
template<int HalfNeigh, int Newton>
KOKKOS_FUNCTION
void build_Item(const int &i) const;
template<int HalfNeigh>
KOKKOS_FUNCTION
void build_Item_Ghost(const int &i) const;
#ifdef KOKKOS_HAVE_CUDA
template<int HalfNeigh, int Newton>
__device__ inline
void build_ItemCuda(typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const;
#endif
KOKKOS_INLINE_FUNCTION
void binatomsItem(const int &i) const;
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int coord2bin(const X_FLOAT & x,const X_FLOAT & y,const X_FLOAT & z, int* i) const
{
int ix,iy,iz;
if (x >= bboxhi[0])
ix = static_cast<int> ((x-bboxhi[0])*bininvx) + nbinx;
else if (x >= bboxlo[0]) {
ix = static_cast<int> ((x-bboxlo[0])*bininvx);
ix = MIN(ix,nbinx-1);
} else
ix = static_cast<int> ((x-bboxlo[0])*bininvx) - 1;
if (y >= bboxhi[1])
iy = static_cast<int> ((y-bboxhi[1])*bininvy) + nbiny;
else if (y >= bboxlo[1]) {
iy = static_cast<int> ((y-bboxlo[1])*bininvy);
iy = MIN(iy,nbiny-1);
} else
iy = static_cast<int> ((y-bboxlo[1])*bininvy) - 1;
if (z >= bboxhi[2])
iz = static_cast<int> ((z-bboxhi[2])*bininvz) + nbinz;
else if (z >= bboxlo[2]) {
iz = static_cast<int> ((z-bboxlo[2])*bininvz);
iz = MIN(iz,nbinz-1);
} else
iz = static_cast<int> ((z-bboxlo[2])*bininvz) - 1;
i[0] = ix - mbinxlo;
i[1] = iy - mbinylo;
i[2] = iz - mbinzlo;
return (iz-mbinzlo)*mbiny*mbinx + (iy-mbinylo)*mbinx + (ix-mbinxlo);
}
KOKKOS_INLINE_FUNCTION
int exclusion(const int &i,const int &j, const int &itype,const int &jtype) const;
KOKKOS_INLINE_FUNCTION
int find_special(const int &i, const int &j) const;
KOKKOS_INLINE_FUNCTION
int minimum_image_check(double dx, double dy, double dz) const {
if (xperiodic && fabs(dx) > xprd_half) return 1;
if (yperiodic && fabs(dy) > yprd_half) return 1;
if (zperiodic && fabs(dz) > zprd_half) return 1;
return 0;
}
};
template<class DeviceType,int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
#ifdef KOKKOS_HAVE_CUDA
__device__ inline
void operator() (typename Kokkos::TeamPolicy<DeviceType>::member_type dev) const {
c.template build_ItemCuda<HALF_NEIGH,GHOST_NEWTON>(dev);
}
size_t shmem_size(const int team_size) const { (void) team_size; return sharedsize; }
#endif
};
template<int HALF_NEIGH,int GHOST_NEWTON>
struct NPairKokkosBuildFunctor<LMPHostType,HALF_NEIGH,GHOST_NEWTON> {
typedef LMPHostType device_type;
const NeighborKokkosExecute<LMPHostType> c;
const size_t sharedsize;
NPairKokkosBuildFunctor(const NeighborKokkosExecute<LMPHostType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item<HALF_NEIGH,GHOST_NEWTON>(i);
}
void operator() (typename Kokkos::TeamPolicy<LMPHostType>::member_type dev) const {}
};
template<class DeviceType,int HALF_NEIGH>
struct NPairKokkosBuildFunctorGhost {
typedef DeviceType device_type;
const NeighborKokkosExecute<DeviceType> c;
const size_t sharedsize;
NPairKokkosBuildFunctorGhost(const NeighborKokkosExecute<DeviceType> &_c,
const size_t _sharedsize):c(_c),
sharedsize(_sharedsize) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int & i) const {
c.template build_Item_Ghost<HALF_NEIGH>(i);
}
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -90,7 +90,7 @@ void PairBuckCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -309,19 +309,12 @@ void PairBuckCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/cut/kk");
}

View File

@ -109,7 +109,7 @@ void PairBuckCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -458,11 +458,9 @@ void PairBuckCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -79,7 +79,7 @@ void PairBuckKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -233,19 +233,12 @@ void PairBuckKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairBuckKokkos : public PairBuck {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairBuckKokkos(class LAMMPS *);
@ -96,17 +96,14 @@ class PairBuckKokkos : public PairBuck {
friend class PairComputeFunctor<PairBuckKokkos,HALF,true>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairBuckKokkos,N2,true>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairBuckKokkos,FULL,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALF,false>;
friend class PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairBuckKokkos,N2,false>;
friend class PairComputeFunctor<PairBuckKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,N2,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
};

View File

@ -78,7 +78,7 @@ void PairCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -215,11 +215,9 @@ void PairCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/cut/kk");
}

View File

@ -85,7 +85,7 @@ void PairCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -257,19 +257,12 @@ void PairCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/debye/kk");
}

View File

@ -221,11 +221,9 @@ void PairCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/dsf/kk");
}

View File

@ -102,7 +102,7 @@ void PairCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -408,11 +408,9 @@ void PairCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with buck/coul/long/kk");
}

View File

@ -222,11 +222,9 @@ void PairCoulWolfKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with coul/wolf/kk");
}

View File

@ -286,11 +286,9 @@ void PairEAMAlloyKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/alloy");
}

View File

@ -291,11 +291,9 @@ void PairEAMFSKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk/fs");
}

View File

@ -281,11 +281,9 @@ void PairEAMKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with pair eam/kk");
}

View File

@ -333,145 +333,6 @@ struct PairComputeFunctor {
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,FULLCLUSTER,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
typedef EV_FLOAT value_type;
PairStyle c;
NeighListKokkos<device_type> list;
PairComputeFunctor(PairStyle* c_ptr,
NeighListKokkos<device_type>* list_ptr):
c(*c_ptr),list(*list_ptr) {};
~PairComputeFunctor() {c.cleanup_copy();list.clean_copy();};
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
return j >> SBBITS & 3;
}
template<int EVFLAG, int NEWTON_PAIR>
KOKKOS_FUNCTION
EV_FLOAT compute_item(const typename Kokkos::TeamPolicy<device_type>::member_type& dev,
const NeighListKokkos<device_type> &list, const NoCoulTag& ) const {
EV_FLOAT ev;
int i = dev.league_rank()*dev.team_size() + dev.team_rank();
const X_FLOAT xtmp = c.c_x(i,0);
const X_FLOAT ytmp = c.c_x(i,1);
const X_FLOAT ztmp = c.c_x(i,2);
int itype = c.type(i);
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
F_FLOAT3 ftmp;
for (int jj = 0; jj < jnum; jj++) {
int jjj = neighbors_i(jj);
Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(dev,NeighClusterSize),[&] (const int& k, F_FLOAT3& fftmp) {
const F_FLOAT factor_lj = c.special_lj[sbmask(jjj+k)];
const int j = (jjj + k)&NEIGHMASK;
if((j==i)||(j>=c.nall)) return;
const X_FLOAT delx = xtmp - c.c_x(j,0);
const X_FLOAT dely = ytmp - c.c_x(j,1);
const X_FLOAT delz = ztmp - c.c_x(j,2);
const int jtype = c.type(j);
const F_FLOAT rsq = (delx*delx + dely*dely + delz*delz);
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
fftmp.x += delx*fpair;
fftmp.y += dely*fpair;
fftmp.z += delz*fpair;
if (EVFLAG) {
F_FLOAT evdwl = 0.0;
if (c.eflag) {
evdwl = 0.5*
factor_lj * c.template compute_evdwl<STACKPARAMS,Specialisation>(rsq,i,j,itype,jtype);
ev.evdwl += evdwl;
}
if (c.vflag_either || c.eflag_atom) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
}
}
},ftmp);
}
Kokkos::single(Kokkos::PerThread(dev), [&]() {
c.f(i,0) += ftmp.x;
c.f(i,1) += ftmp.y;
c.f(i,2) += ftmp.z;
});
return ev;
}
KOKKOS_INLINE_FUNCTION
void ev_tally(EV_FLOAT &ev, const int &i, const int &j,
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
const F_FLOAT &dely, const F_FLOAT &delz) const
{
const int EFLAG = c.eflag;
const int NEWTON_PAIR = c.newton_pair;
const int VFLAG = c.vflag_either;
if (EFLAG) {
if (c.eflag_atom) {
const E_FLOAT epairhalf = 0.5 * epair;
if (NEWTON_PAIR || i < c.nlocal) c.d_eatom[i] += epairhalf;
if (NEWTON_PAIR || j < c.nlocal) c.d_eatom[j] += epairhalf;
}
}
if (VFLAG) {
const E_FLOAT v0 = delx*delx*fpair;
const E_FLOAT v1 = dely*dely*fpair;
const E_FLOAT v2 = delz*delz*fpair;
const E_FLOAT v3 = delx*dely*fpair;
const E_FLOAT v4 = delx*delz*fpair;
const E_FLOAT v5 = dely*delz*fpair;
if (c.vflag_global) {
ev.v[0] += 0.5*v0;
ev.v[1] += 0.5*v1;
ev.v[2] += 0.5*v2;
ev.v[3] += 0.5*v3;
ev.v[4] += 0.5*v4;
ev.v[5] += 0.5*v5;
}
if (c.vflag_atom) {
if (i < c.nlocal) {
c.d_vatom(i,0) += 0.5*v0;
c.d_vatom(i,1) += 0.5*v1;
c.d_vatom(i,2) += 0.5*v2;
c.d_vatom(i,3) += 0.5*v3;
c.d_vatom(i,4) += 0.5*v4;
c.d_vatom(i,5) += 0.5*v5;
}
}
}
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev) const {
if (c.newton_pair) compute_item<0,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else compute_item<0,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
KOKKOS_INLINE_FUNCTION
void operator()(const typename Kokkos::TeamPolicy<device_type>::member_type& dev, value_type &energy_virial) const {
if (c.newton_pair)
energy_virial += compute_item<1,1>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
else
energy_virial += compute_item<1,0>(dev,list,typename DoCoul<PairStyle::COUL_FLAG>::type());
}
};
template <class PairStyle, bool STACKPARAMS, class Specialisation>
struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
typedef typename PairStyle::device_type device_type ;
@ -607,8 +468,8 @@ struct PairComputeFunctor<PairStyle,N2,STACKPARAMS,Specialisation> {
// The enable_if clause will invalidate the last parameter of the function, so that
// a match is only achieved, if PairStyle supports the specific neighborlist variant.
// This uses the fact that failure to match template parameters is not an error.
// By having the enable_if with a ! and without it, exactly one of the two versions of the functions
// pair_compute_neighlist and pair_compute_fullcluster will match - either the dummy version
// By having the enable_if with a ! and without it, exactly one of the functions
// pair_compute_neighlist will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -619,15 +480,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<!((FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
(void) fpair;
(void) list;
printf("ERROR: calling pair_compute with invalid neighbor list style: requested %i available %i \n",FULLCLUSTER,PairStyle::EnabledNeighFlags);
return ev;
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
@ -644,41 +496,6 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable
return ev;
}
// Submit ParallelFor for NEIGHFLAG=FULLCLUSTER
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute_fullcluster (PairStyle* fpair, typename Kokkos::Impl::enable_if<(FULLCLUSTER&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*>::type list) {
EV_FLOAT ev;
if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,false,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
} else {
typedef PairComputeFunctor<PairStyle,FULLCLUSTER,true,Specialisation >
f_type;
f_type ff(fpair, list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<typename f_type::device_type, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<typename f_type::device_type> config(nteams,teamsize,NeighClusterSize);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(config,ff,ev);
else Kokkos::parallel_for(config,ff);
}
return ev;
}
template<class PairStyle, class Specialisation>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
@ -690,8 +507,6 @@ EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::dev
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
} else if (fpair->neighflag == N2) {
ev = pair_compute_neighlist<PairStyle,N2,Specialisation> (fpair,list);
} else if (fpair->neighflag == FULLCLUSTER) {
ev = pair_compute_fullcluster<PairStyle,Specialisation> (fpair,list);
}
return ev;
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::compute(int eflag_in, int
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -455,11 +455,9 @@ void PairLJCharmmCoulCharmmImplicitKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/implicit/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::compute(int eflag_in, int vflag_i
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -456,11 +456,9 @@ void PairLJCharmmCoulCharmmKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/charmm/kk");
}

View File

@ -110,7 +110,7 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -486,11 +486,9 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/charmm/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2CoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -289,19 +289,12 @@ void PairLJClass2CoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/cut/kk");
}

View File

@ -95,7 +95,7 @@ void PairLJClass2CoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -445,11 +445,9 @@ void PairLJClass2CoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJClass2Kokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -227,19 +227,12 @@ void PairLJClass2Kokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/class2/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJClass2Kokkos : public PairLJClass2 {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJClass2Kokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJClass2Kokkos : public PairLJClass2 {
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,true>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,N2,false>;
friend class PairComputeFunctor<PairLJClass2Kokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,N2,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
};

View File

@ -87,7 +87,7 @@ void PairLJCutCoulCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -280,19 +280,12 @@ void PairLJCutCoulCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -91,7 +91,7 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -310,19 +310,12 @@ void PairLJCutCoulDebyeKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/debye/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulDSFKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -301,19 +301,12 @@ void PairLJCutCoulDSFKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/cut/kk");
}

View File

@ -99,7 +99,7 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -464,11 +464,9 @@ void PairLJCutCoulLongKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/coul/long/kk");
}

View File

@ -87,7 +87,7 @@ void PairLJCutKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -245,19 +245,12 @@ void PairLJCutKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJCutKokkos : public PairLJCut {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJCutKokkos(class LAMMPS *);
@ -99,17 +99,14 @@ class PairLJCutKokkos : public PairLJCut {
friend class PairComputeFunctor<PairLJCutKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,true>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJCutKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJCutKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJCutKokkos,N2,false>;
friend class PairComputeFunctor<PairLJCutKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,FULL,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALF,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,HALFTHREAD,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCutKokkos,N2,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCutKokkos,void>(PairLJCutKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCutKokkos>(PairLJCutKokkos*);
};

View File

@ -86,7 +86,7 @@ void PairLJExpandKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -230,19 +230,12 @@ void PairLJExpandKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/expand/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJExpandKokkos : public PairLJExpand {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJExpandKokkos(class LAMMPS *);
@ -100,17 +100,14 @@ class PairLJExpandKokkos : public PairLJExpand {
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,true>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJExpandKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,N2,false>;
friend class PairComputeFunctor<PairLJExpandKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,FULL,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALF,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,HALFTHREAD,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJExpandKokkos,N2,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJExpandKokkos,void>(PairLJExpandKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJExpandKokkos>(PairLJExpandKokkos*);
};

View File

@ -101,7 +101,7 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -439,11 +439,9 @@ void PairLJGromacsCoulGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/coul/gromacs/kk");
}

View File

@ -98,7 +98,7 @@ void PairLJGromacsKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -277,11 +277,9 @@ void PairLJGromacsKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/gromacs/kk");
}

View File

@ -86,7 +86,7 @@ void PairLJSDKKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -258,19 +258,12 @@ void PairLJSDKKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/sdk/kk");
}

View File

@ -31,7 +31,7 @@ namespace LAMMPS_NS {
template<class DeviceType>
class PairLJSDKKokkos : public PairLJSDK {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
PairLJSDKKokkos(class LAMMPS *);
@ -97,17 +97,14 @@ class PairLJSDKKokkos : public PairLJSDK {
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,true>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,true >;
friend class PairComputeFunctor<PairLJSDKKokkos,FULL,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALF,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,HALFTHREAD,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,N2,false>;
friend class PairComputeFunctor<PairLJSDKKokkos,FULLCLUSTER,false >;
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,FULL,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALF,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,HALFTHREAD,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJSDKKokkos,N2,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_fullcluster<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJSDKKokkos,void>(PairLJSDKKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJSDKKokkos>(PairLJSDKKokkos*);
};

View File

@ -146,12 +146,10 @@ void PairReaxCKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
neighbor->requests[irequest]->ghost = 1;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with reax/c/kk");

View File

@ -601,7 +601,6 @@ void PairSWKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -96,7 +96,7 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
eflag = eflag_in;
vflag = vflag_in;
if (neighflag == FULL || neighflag == FULLCLUSTER) no_virial_fdotr_compute = 1;
if (neighflag == FULL) no_virial_fdotr_compute = 1;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
@ -142,19 +142,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,false,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
} else {
if (neighflag == FULL) {
@ -177,19 +164,6 @@ void PairTableKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
f(this,(NeighListKokkos<DeviceType>*) list);
if (eflag || vflag) Kokkos::parallel_reduce(nlocal,f,ev);
else Kokkos::parallel_for(nlocal,f);
} else if (neighflag == FULLCLUSTER) {
typedef PairComputeFunctor<PairTableKokkos<DeviceType>,FULLCLUSTER,true,S_TableCompute<DeviceType,TABSTYLE> >
f_type;
f_type f(this,(NeighListKokkos<DeviceType>*) list);
#ifdef KOKKOS_HAVE_CUDA
const int teamsize = Kokkos::Impl::is_same<DeviceType, Kokkos::Cuda>::value ? 32 : 1;
#else
const int teamsize = 1;
#endif
const int nteams = (list->inum*+teamsize-1)/teamsize;
Kokkos::TeamPolicy<DeviceType> config(nteams,teamsize,NeighClusterSize);
if (eflag || vflag) Kokkos::parallel_reduce(config,f,ev);
else Kokkos::parallel_for(config,f);
}
}
@ -1261,19 +1235,12 @@ void PairTableKokkos<DeviceType>::init_style()
if (neighflag == FULL) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 1;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == N2) {
neighbor->requests[irequest]->full = 0;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
} else if (neighflag == FULLCLUSTER) {
neighbor->requests[irequest]->full_cluster = 1;
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
} else {
error->all(FLERR,"Cannot use chosen neighbor list style with lj/cut/kk");
}

View File

@ -41,7 +41,7 @@ template<class DeviceType>
class PairTableKokkos : public Pair {
public:
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2|FULLCLUSTER};
enum {EnabledNeighFlags=FULL|HALFTHREAD|HALF|N2};
enum {COUL_FLAG=0};
typedef DeviceType device_type;
@ -170,45 +170,37 @@ class PairTableKokkos : public Pair {
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LOOKUP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,LINEAR> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,SPLINE> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,true,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULL,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALF,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,HALFTHREAD,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,N2,false,S_TableCompute<DeviceType,BITMAP> >;
friend class PairComputeFunctor<PairTableKokkos,FULLCLUSTER,false,S_TableCompute<DeviceType,BITMAP> >;
friend void pair_virial_fdotr_compute<PairTableKokkos>(PairTableKokkos*);
};

View File

@ -103,7 +103,6 @@ void PairTersoffKokkos<DeviceType>::init_style()
//if (neighflag == FULL || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -102,7 +102,6 @@ void PairTersoffMODKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -113,7 +113,6 @@ void PairTersoffZBLKokkos<DeviceType>::init_style()
if (neighflag == FULL || neighflag == HALF || neighflag == HALFTHREAD) {
neighbor->requests[irequest]->full = 1;
neighbor->requests[irequest]->half = 0;
neighbor->requests[irequest]->full_cluster = 0;
if (neighflag == FULL)
neighbor->requests[irequest]->ghost = 1;
else

View File

@ -195,17 +195,6 @@ void EwaldDisp::init()
g_ewald = accuracy*sqrt(natoms*(*cutoff)*shape_det(domain->h)) / (2.0*q2);
if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15*log(accuracy))/(*cutoff);
else g_ewald = sqrt(-log(g_ewald)) / (*cutoff);
}
else if (function[1] || function[2]) {
//Try Newton Solver
//Use old method to get guess
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
double g_ewald_new =
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
} else if (function[3]) {
//Try Newton Solver
//Use old method to get guess
@ -215,6 +204,16 @@ void EwaldDisp::init()
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
} else if (function[1] || function[2]) {
//Try Newton Solver
//Use old method to get guess
g_ewald = (1.35 - 0.15*log(accuracy))/ *cutoff;
double g_ewald_new =
NewtonSolve(g_ewald,(*cutoff),natoms,shape_det(domain->h),b2);
if (g_ewald_new > 0.0) g_ewald = g_ewald_new;
else error->warning(FLERR,"Ewald/disp Newton solver failed, "
"using old method to estimate g_ewald");
}
}
@ -708,6 +707,8 @@ void EwaldDisp::compute(int eflag, int vflag)
compute_virial();
compute_virial_dipole();
compute_virial_peratom();
if (slabflag) compute_slabcorr();
}
@ -974,7 +975,6 @@ void EwaldDisp::compute_energy()
}
}
for (int k=0; k<EWALD_NFUNCS; ++k) energy += c[k]*sum[k]-energy_self[k];
if (slabflag) compute_slabcorr();
}
/* ---------------------------------------------------------------------- */
@ -1480,10 +1480,7 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
double a = Rc*x;
double f = 0.0;
if (function[1] || function[2]) { // LJ
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
} else { // dipole
if (function[3]) { // dipole
double rg2 = a*a;
double rg4 = rg2*rg2;
double rg6 = rg4*rg2;
@ -1492,7 +1489,10 @@ double EwaldDisp::f(double x, double Rc, bigint natoms, double vol, double b2)
f = (b2/(sqrt(vol*powint(x,4)*powint(Rc,9)*natoms)) *
sqrt(13.0/6.0*Cc*Cc + 2.0/15.0*Dc*Dc - 13.0/15.0*Cc*Dc) *
exp(-rg2)) - accuracy;
}
} else if (function[1] || function[2]) { // LJ
f = (4.0*MY_PI*b2*powint(x,4)/vol/sqrt((double)natoms)*erfc(a) *
(6.0*powint(a,-5) + 6.0*powint(a,-3) + 3.0/a + a) - accuracy);
}
return f;
}

127
src/MAKE/MACHINES/Makefile.cori2 Executable file
View File

@ -0,0 +1,127 @@
# cori2 = NERSC Cori II KNL, static build, FFTW (single precision)
# ---------------------------------------------------------------------
# module swap craype-haswell craype-mic-knl
# module load fftw
# module load craype-hugepages2M
# Recommend using #SBATCH -S 2 for core specialization
# ---------------------------------------------------------------------
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = CC
OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -qno-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS) -DLMP_INTEL_NO_TBB
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = CC
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
LIB =
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = #-DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_FFTW3 -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -lfftw3f
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = #-ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -0,0 +1,123 @@
# intel_cpu_intelmpi = USER-INTEL package, Intel MPI, MKL FFT
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
LIB = -ltbbmalloc -ltbbmalloc_proxy
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$MKLROOT/lib/intel64/ -lmkl_intel_ilp64 \
-lmkl_sequential -lmkl_core
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -0,0 +1,123 @@
# intel_phi = USER-INTEL with Phi x200 (KNL) offload support,Intel MPI,MKL FFT
SHELL = /bin/sh
# ---------------------------------------------------------------------
# compiler/linker settings
# specify flags and libraries needed for your compiler
CC = mpiicpc
MIC_OPT = -qoffload-arch=mic-avx512 -fp-model fast=2
CCFLAGS = -g -O3 -qopenmp -DLMP_INTEL_OFFLOAD -DLAMMPS_MEMALIGN=64 \
-xHost -fno-alias -ansi-alias -restrict \
-qoverride-limits $(MIC_OPT)
SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -g -O3 -xHost -qopenmp -qoffload $(MIC_OPT)
LIB = -ltbbmalloc
SIZE = size
ARCHIVE = ar
ARFLAGS = -rc
SHLIBFLAGS = -shared
# ---------------------------------------------------------------------
# LAMMPS-specific settings, all OPTIONAL
# specify settings for LAMMPS features you will use
# if you change any -D setting, do full re-compile after "make clean"
# LAMMPS ifdef settings
# see possible settings in Section 2.2 (step 4) of manual
LMP_INC = -DLAMMPS_GZIP -DLAMMPS_JPEG
# MPI library
# see discussion in Section 2.2 (step 5) of manual
# MPI wrapper compiler/linker can provide this info
# can point to dummy MPI library in src/STUBS as in Makefile.serial
# use -D MPICH and OMPI settings in INC to avoid C++ lib conflicts
# INC = path for mpi.h, MPI compiler settings
# PATH = path for MPI library
# LIB = name of MPI library
MPI_INC = -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1
MPI_PATH =
MPI_LIB =
# FFT library
# see discussion in Section 2.2 (step 6) of manaul
# can be left blank to use provided KISS FFT library
# INC = -DFFT setting, e.g. -DFFT_FFTW, FFT compiler settings
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC = -DFFT_MKL -DFFT_SINGLE
FFT_PATH =
FFT_LIB = -L$(MKLROOT)/lib/intel64/ -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core
# JPEG and/or PNG library
# see discussion in Section 2.2 (step 7) of manual
# only needed if -DLAMMPS_JPEG or -DLAMMPS_PNG listed with LMP_INC
# INC = path(s) for jpeglib.h and/or png.h
# PATH = path(s) for JPEG library and/or PNG library
# LIB = name(s) of JPEG library and/or PNG library
JPG_INC =
JPG_PATH =
JPG_LIB = -ljpeg
# ---------------------------------------------------------------------
# build rules and dependencies
# do not edit this section
include Makefile.package.settings
include Makefile.package
EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)
EXTRA_CPP_DEPENDS = $(PKG_CPP_DEPENDS)
EXTRA_LINK_DEPENDS = $(PKG_LINK_DEPENDS)
# Path to src files
vpath %.cpp ..
vpath %.h ..
# Link target
$(EXE): $(OBJ) $(EXTRA_LINK_DEPENDS)
$(LINK) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(EXTRA_LIB) $(LIB) -o $(EXE)
$(SIZE) $(EXE)
# Library targets
lib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(ARCHIVE) $(ARFLAGS) $(EXE) $(OBJ)
shlib: $(OBJ) $(EXTRA_LINK_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(SHLIBFLAGS) $(EXTRA_PATH) -o $(EXE) \
$(OBJ) $(EXTRA_LIB) $(LIB)
# Compilation rules
%.o:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
%.d:%.cpp $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(EXTRA_INC) $(DEPFLAGS) $< > $@
%.o:%.cu $(EXTRA_CPP_DEPENDS)
$(CC) $(CCFLAGS) $(SHFLAGS) $(EXTRA_INC) -c $<
# Individual dependencies
depend : fastdep.exe $(SRC)
@./fastdep.exe $(EXTRA_INC) -- $^ > .depend || exit 1
fastdep.exe: ../DEPEND/fastdep.c
cc -O -o $@ $<
sinclude .depend

View File

@ -59,8 +59,9 @@ style () {
# called by "make machine"
# col 1 = string to search for
# col 2 = search in *.h files starting with this name
# col 3 = prefix of style file
# col 4
# col 3 = name of style file
# col 4 = file that includes the style file
# col 5 = optional 2nd file that includes the style file
if (test $1 = "style") then
@ -69,7 +70,7 @@ if (test $1 = "style") then
style BODY_CLASS body_ body atom_vec_body
style BOND_CLASS bond_ bond force
style COMMAND_CLASS "" command input
style COMPUTE_CLASS compute_ compute modify modify_cuda
style COMPUTE_CLASS compute_ compute modify
style DIHEDRAL_CLASS dihedral_ dihedral force
style DUMP_CLASS dump_ dump output write_dump
style FIX_CLASS fix_ fix modify
@ -77,6 +78,10 @@ if (test $1 = "style") then
style INTEGRATE_CLASS "" integrate update
style KSPACE_CLASS "" kspace force
style MINIMIZE_CLASS min_ minimize update
style NBIN_CLASS nbin_ nbin neighbor
style NPAIR_CLASS npair_ npair neighbor
style NSTENCIL_CLASS nstencil_ nstencil neighbor
style NTOPO_CLASS ntopo_ ntopo neighbor
style PAIR_CLASS pair_ pair force
style READER_CLASS reader_ reader read_dump
style REGION_CLASS region_ region domain

View File

@ -89,6 +89,7 @@ void Python::command(int narg, char **arg)
istr = NULL;
ostr = NULL;
format = NULL;
length_longstr = 0;
char *pyfile = NULL;
char *herestr = NULL;
int existflag = 0;
@ -115,6 +116,11 @@ void Python::command(int narg, char **arg)
format = new char[n];
strcpy(format,arg[iarg+1]);
iarg += 2;
} else if (strcmp(arg[iarg],"length") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
length_longstr = force->inumeric(FLERR,arg[iarg+1]);
if (length_longstr <= 0) error->all(FLERR,"Invalid python command");
iarg += 2;
} else if (strcmp(arg[iarg],"file") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Invalid python command");
delete[] pyfile;
@ -249,6 +255,7 @@ void Python::invoke_function(int ifunc, char *result)
// function returned a value
// assign it to result string stored by python-style variable
// or if user specified a length, assign it to longstr
if (pfuncs[ifunc].noutput) {
int otype = pfuncs[ifunc].otype;
@ -258,7 +265,9 @@ void Python::invoke_function(int ifunc, char *result)
sprintf(result,"%.15g",PyFloat_AsDouble(pValue));
} else if (otype == STRING) {
char *pystr = PyString_AsString(pValue);
strncpy(result,pystr,VALUELENGTH-1);
if (pfuncs[ifunc].longstr)
strncpy(pfuncs[ifunc].longstr,pystr,pfuncs[ifunc].length_longstr);
else strncpy(result,pystr,VALUELENGTH-1);
}
Py_DECREF(pValue);
}
@ -287,6 +296,13 @@ int Python::variable_match(char *name, char *varname, int numeric)
/* ------------------------------------------------------------------ */
char *Python::long_string(int ifunc)
{
return pfuncs[ifunc].longstr;
}
/* ------------------------------------------------------------------ */
int Python::create_entry(char *name)
{
// ifunc = index to entry by name in pfuncs vector, can be old or new
@ -370,6 +386,7 @@ int Python::create_entry(char *name)
// process output as value or variable
pfuncs[ifunc].ovarname = NULL;
pfuncs[ifunc].longstr = NULL;
if (!noutput) return ifunc;
char type = format[ninput];
@ -378,6 +395,14 @@ int Python::create_entry(char *name)
else if (type == 's') pfuncs[ifunc].otype = STRING;
else error->all(FLERR,"Invalid python command");
if (length_longstr) {
if (pfuncs[ifunc].otype != STRING)
error->all(FLERR,"Python command length keyword "
"cannot be used unless output is a string");
pfuncs[ifunc].length_longstr = length_longstr;
pfuncs[ifunc].longstr = new char[length_longstr+1];
}
if (strstr(ostr,"v_") != ostr) error->all(FLERR,"Invalid python command");
int n = strlen(&ostr[2]) + 1;
pfuncs[ifunc].ovarname = new char[n];
@ -398,4 +423,5 @@ void Python::deallocate(int i)
delete [] pfuncs[i].svalue[j];
delete [] pfuncs[i].svalue;
delete [] pfuncs[i].ovarname;
delete [] pfuncs[i].longstr;
}

View File

@ -28,9 +28,10 @@ class Python : protected Pointers {
void invoke_function(int, char *);
int find(char *);
int variable_match(char *, char *, int);
char *long_string(int);
private:
int ninput,noutput;
int ninput,noutput,length_longstr;
char **istr;
char *ostr,*format;
void *pyMain;
@ -44,6 +45,8 @@ class Python : protected Pointers {
char **svalue;
int otype;
char *ovarname;
char *longstr;
int length_longstr;
void *pFunc;
};

Some files were not shown because too many files have changed in this diff Show More